From 787fa9e8d955f6581443b78d4b288184dc1fddf3 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Fri, 8 Oct 2021 14:36:23 +0300 Subject: [PATCH] Add field name to fix.newlines output --- csv_metadata_quality/app.py | 2 +- csv_metadata_quality/fix.py | 4 ++-- tests/test_fix.py | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/csv_metadata_quality/app.py b/csv_metadata_quality/app.py index 715921c..7c1c7cf 100644 --- a/csv_metadata_quality/app.py +++ b/csv_metadata_quality/app.py @@ -89,7 +89,7 @@ def run(argv): # Fix: newlines if args.unsafe_fixes: - df[column] = df[column].apply(fix.newlines) + df[column] = df[column].apply(fix.newlines, field_name=column) # Fix: missing space after comma. Only run on author and citation # fields for now, as this problem is mostly an issue in names. diff --git a/csv_metadata_quality/fix.py b/csv_metadata_quality/fix.py index deb7da0..0a0adb3 100755 --- a/csv_metadata_quality/fix.py +++ b/csv_metadata_quality/fix.py @@ -180,7 +180,7 @@ def duplicates(field, field_name): return new_field -def newlines(field): +def newlines(field, field_name): """Fix newlines. Single metadata values should not span multiple lines because this is not @@ -205,7 +205,7 @@ def newlines(field): match = re.findall(r"\n", field) if match: - print(f"{Fore.GREEN}Removing newline: {Fore.RESET}{field}") + print(f"{Fore.GREEN}Removing newline ({field_name}): {Fore.RESET}{field}") field = field.replace("\n", "") return field diff --git a/tests/test_fix.py b/tests/test_fix.py index 9ecba75..ad7ec42 100644 --- a/tests/test_fix.py +++ b/tests/test_fix.py @@ -76,8 +76,9 @@ def test_fix_newlines(): value = """Ken ya""" + field_name = "dcterms.subject" - assert fix.newlines(value) == "Kenya" + assert fix.newlines(value, field_name) == "Kenya" def test_fix_comma_space():