mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-29 09:08:26 +01:00
Compare commits
2 Commits
82261f7fe0
...
c95261f522
Author | SHA1 | Date | |
---|---|---|---|
c95261f522 | |||
787fa9e8d9 |
@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
well as dcterms.bibliographicCitation) in `experimental.correct_language()`
|
well as dcterms.bibliographicCitation) in `experimental.correct_language()`
|
||||||
- Regular expression to match dc.title and dcterms.title, but
|
- Regular expression to match dc.title and dcterms.title, but
|
||||||
ignore dc.title.alternative `check.duplicate_items()`
|
ignore dc.title.alternative `check.duplicate_items()`
|
||||||
|
- Missing field name in `fix.newlines()` output
|
||||||
|
|
||||||
## [0.4.7] - 2021-03-17
|
## [0.4.7] - 2021-03-17
|
||||||
### Changed
|
### Changed
|
||||||
|
@ -89,7 +89,7 @@ def run(argv):
|
|||||||
|
|
||||||
# Fix: newlines
|
# Fix: newlines
|
||||||
if args.unsafe_fixes:
|
if args.unsafe_fixes:
|
||||||
df[column] = df[column].apply(fix.newlines)
|
df[column] = df[column].apply(fix.newlines, field_name=column)
|
||||||
|
|
||||||
# Fix: missing space after comma. Only run on author and citation
|
# Fix: missing space after comma. Only run on author and citation
|
||||||
# fields for now, as this problem is mostly an issue in names.
|
# fields for now, as this problem is mostly an issue in names.
|
||||||
|
@ -180,7 +180,7 @@ def duplicates(field, field_name):
|
|||||||
return new_field
|
return new_field
|
||||||
|
|
||||||
|
|
||||||
def newlines(field):
|
def newlines(field, field_name):
|
||||||
"""Fix newlines.
|
"""Fix newlines.
|
||||||
|
|
||||||
Single metadata values should not span multiple lines because this is not
|
Single metadata values should not span multiple lines because this is not
|
||||||
@ -205,7 +205,7 @@ def newlines(field):
|
|||||||
match = re.findall(r"\n", field)
|
match = re.findall(r"\n", field)
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
print(f"{Fore.GREEN}Removing newline: {Fore.RESET}{field}")
|
print(f"{Fore.GREEN}Removing newline ({field_name}): {Fore.RESET}{field}")
|
||||||
field = field.replace("\n", "")
|
field = field.replace("\n", "")
|
||||||
|
|
||||||
return field
|
return field
|
||||||
|
@ -76,8 +76,9 @@ def test_fix_newlines():
|
|||||||
|
|
||||||
value = """Ken
|
value = """Ken
|
||||||
ya"""
|
ya"""
|
||||||
|
field_name = "dcterms.subject"
|
||||||
|
|
||||||
assert fix.newlines(value) == "Kenya"
|
assert fix.newlines(value, field_name) == "Kenya"
|
||||||
|
|
||||||
|
|
||||||
def test_fix_comma_space():
|
def test_fix_comma_space():
|
||||||
|
Loading…
Reference in New Issue
Block a user