1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-05-08 06:06:00 +02:00

Add field name to fix.newlines output

This commit is contained in:
2021-10-08 14:36:23 +03:00
parent 82261f7fe0
commit 787fa9e8d9
3 changed files with 5 additions and 4 deletions

View File

@ -89,7 +89,7 @@ def run(argv):
# Fix: newlines
if args.unsafe_fixes:
df[column] = df[column].apply(fix.newlines)
df[column] = df[column].apply(fix.newlines, field_name=column)
# Fix: missing space after comma. Only run on author and citation
# fields for now, as this problem is mostly an issue in names.

View File

@ -180,7 +180,7 @@ def duplicates(field, field_name):
return new_field
def newlines(field):
def newlines(field, field_name):
"""Fix newlines.
Single metadata values should not span multiple lines because this is not
@ -205,7 +205,7 @@ def newlines(field):
match = re.findall(r"\n", field)
if match:
print(f"{Fore.GREEN}Removing newline: {Fore.RESET}{field}")
print(f"{Fore.GREEN}Removing newline ({field_name}): {Fore.RESET}{field}")
field = field.replace("\n", "")
return field