mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-28 00:28:18 +01:00
Compare commits
3 Commits
47b03c49ba
...
20a2cce34b
Author | SHA1 | Date | |
---|---|---|---|
20a2cce34b | |||
d661ffe439 | |||
45a310387a |
@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## Unreleased
|
||||||
|
### Fixed
|
||||||
|
- Fixed regex so we don't run the invalid multi-value separator fix on
|
||||||
|
`dcterms.bibliographicCitation` fields
|
||||||
|
- Fixed regex so we run the comma space fix on `dcterms.bibliographicCitation`
|
||||||
|
fields
|
||||||
|
|
||||||
## [0.6.1] - 2023-02-23
|
## [0.6.1] - 2023-02-23
|
||||||
### Fixed
|
### Fixed
|
||||||
- Missing region check should ignore subregion field, if it exists
|
- Missing region check should ignore subregion field, if it exists
|
||||||
|
@ -102,7 +102,7 @@ def run(argv):
|
|||||||
# Fix: missing space after comma. Only run on author and citation
|
# Fix: missing space after comma. Only run on author and citation
|
||||||
# fields for now, as this problem is mostly an issue in names.
|
# fields for now, as this problem is mostly an issue in names.
|
||||||
if args.unsafe_fixes:
|
if args.unsafe_fixes:
|
||||||
match = re.match(r"^.*?(author|citation).*$", column)
|
match = re.match(r"^.*?(author|[Cc]itation).*$", column)
|
||||||
if match is not None:
|
if match is not None:
|
||||||
df[column] = df[column].apply(fix.comma_space, field_name=column)
|
df[column] = df[column].apply(fix.comma_space, field_name=column)
|
||||||
|
|
||||||
@ -126,7 +126,7 @@ def run(argv):
|
|||||||
# Fix: invalid and unnecessary multi-value separators. Skip the title
|
# Fix: invalid and unnecessary multi-value separators. Skip the title
|
||||||
# and abstract fields because "|" is used to indicate something like
|
# and abstract fields because "|" is used to indicate something like
|
||||||
# a subtitle.
|
# a subtitle.
|
||||||
match = re.match(r"^.*?(abstract|title).*$", column)
|
match = re.match(r"^.*?(abstract|[Cc]itation|title).*$", column)
|
||||||
if match is None:
|
if match is None:
|
||||||
df[column] = df[column].apply(fix.separators, field_name=column)
|
df[column] = df[column].apply(fix.separators, field_name=column)
|
||||||
# Run whitespace fix again after fixing invalid separators
|
# Run whitespace fix again after fixing invalid separators
|
||||||
|
Loading…
Reference in New Issue
Block a user