1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-29 09:08:26 +01:00

Compare commits

...

2 Commits

Author SHA1 Message Date
c95261f522
CHANGELOG.md: Add note about fix.newlines
All checks were successful
continuous-integration/drone/push Build is passing
2021-10-08 14:37:12 +03:00
787fa9e8d9
Add field name to fix.newlines output 2021-10-08 14:36:43 +03:00
4 changed files with 6 additions and 4 deletions

View File

@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
well as dcterms.bibliographicCitation) in `experimental.correct_language()` well as dcterms.bibliographicCitation) in `experimental.correct_language()`
- Regular expression to match dc.title and dcterms.title, but - Regular expression to match dc.title and dcterms.title, but
ignore dc.title.alternative `check.duplicate_items()` ignore dc.title.alternative `check.duplicate_items()`
- Missing field name in `fix.newlines()` output
## [0.4.7] - 2021-03-17 ## [0.4.7] - 2021-03-17
### Changed ### Changed

View File

@ -89,7 +89,7 @@ def run(argv):
# Fix: newlines # Fix: newlines
if args.unsafe_fixes: if args.unsafe_fixes:
df[column] = df[column].apply(fix.newlines) df[column] = df[column].apply(fix.newlines, field_name=column)
# Fix: missing space after comma. Only run on author and citation # Fix: missing space after comma. Only run on author and citation
# fields for now, as this problem is mostly an issue in names. # fields for now, as this problem is mostly an issue in names.

View File

@ -180,7 +180,7 @@ def duplicates(field, field_name):
return new_field return new_field
def newlines(field): def newlines(field, field_name):
"""Fix newlines. """Fix newlines.
Single metadata values should not span multiple lines because this is not Single metadata values should not span multiple lines because this is not
@ -205,7 +205,7 @@ def newlines(field):
match = re.findall(r"\n", field) match = re.findall(r"\n", field)
if match: if match:
print(f"{Fore.GREEN}Removing newline: {Fore.RESET}{field}") print(f"{Fore.GREEN}Removing newline ({field_name}): {Fore.RESET}{field}")
field = field.replace("\n", "") field = field.replace("\n", "")
return field return field

View File

@ -76,8 +76,9 @@ def test_fix_newlines():
value = """Ken value = """Ken
ya""" ya"""
field_name = "dcterms.subject"
assert fix.newlines(value) == "Kenya" assert fix.newlines(value, field_name) == "Kenya"
def test_fix_comma_space(): def test_fix_comma_space():