mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-01-24 11:13:22 +01:00
Add field name to fix.newlines output
This commit is contained in:
parent
82261f7fe0
commit
787fa9e8d9
@ -89,7 +89,7 @@ def run(argv):
|
||||
|
||||
# Fix: newlines
|
||||
if args.unsafe_fixes:
|
||||
df[column] = df[column].apply(fix.newlines)
|
||||
df[column] = df[column].apply(fix.newlines, field_name=column)
|
||||
|
||||
# Fix: missing space after comma. Only run on author and citation
|
||||
# fields for now, as this problem is mostly an issue in names.
|
||||
|
@ -180,7 +180,7 @@ def duplicates(field, field_name):
|
||||
return new_field
|
||||
|
||||
|
||||
def newlines(field):
|
||||
def newlines(field, field_name):
|
||||
"""Fix newlines.
|
||||
|
||||
Single metadata values should not span multiple lines because this is not
|
||||
@ -205,7 +205,7 @@ def newlines(field):
|
||||
match = re.findall(r"\n", field)
|
||||
|
||||
if match:
|
||||
print(f"{Fore.GREEN}Removing newline: {Fore.RESET}{field}")
|
||||
print(f"{Fore.GREEN}Removing newline ({field_name}): {Fore.RESET}{field}")
|
||||
field = field.replace("\n", "")
|
||||
|
||||
return field
|
||||
|
@ -76,8 +76,9 @@ def test_fix_newlines():
|
||||
|
||||
value = """Ken
|
||||
ya"""
|
||||
field_name = "dcterms.subject"
|
||||
|
||||
assert fix.newlines(value) == "Kenya"
|
||||
assert fix.newlines(value, field_name) == "Kenya"
|
||||
|
||||
|
||||
def test_fix_comma_space():
|
||||
|
Loading…
x
Reference in New Issue
Block a user