From 10612cf8911f289f140f51fa7186ae4eed4e602f Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Sun, 14 Mar 2021 21:01:21 +0200 Subject: [PATCH] Remove checks for invalid multi-value separators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that I no longer treat the fix for these as "unsafe" I don't a ctually need to check for them—I can just fix them when I see them. --- csv_metadata_quality/app.py | 3 --- csv_metadata_quality/check.py | 36 ---------------------------- tests/test_check.py | 44 ----------------------------------- 3 files changed, 83 deletions(-) diff --git a/csv_metadata_quality/app.py b/csv_metadata_quality/app.py index e0ad9b9..263ad7f 100644 --- a/csv_metadata_quality/app.py +++ b/csv_metadata_quality/app.py @@ -104,9 +104,6 @@ def run(argv): # Fix: unnecessary Unicode df[column] = df[column].apply(fix.unnecessary_unicode) - # Check: invalid and unnecessary multi-value separators - df[column] = df[column].apply(check.separators, field_name=column) - # Check: suspicious characters df[column] = df[column].apply(check.suspicious_characters, field_name=column) diff --git a/csv_metadata_quality/check.py b/csv_metadata_quality/check.py index add1c5a..89b261c 100755 --- a/csv_metadata_quality/check.py +++ b/csv_metadata_quality/check.py @@ -58,42 +58,6 @@ def isbn(field): return field -def separators(field, field_name): - """Check for invalid and unnecessary multi-value separators, for example: - - value|value - value|||value - value||value|| - - Prints the field with the invalid multi-value separator. - """ - - # Skip fields with missing values - if pd.isna(field): - return - - # Try to split multi-value field on "||" separator - for value in field.split("||"): - # Check if the current value is blank - if value == "": - print( - f"{Fore.RED}Unnecessary multi-value separator ({field_name}): {Fore.RESET}{field}" - ) - - continue - - # After splitting, see if there are any remaining "|" characters - match = re.findall(r"^.*?\|.*$", value) - - # Check if there was a match - if match: - print( - f"{Fore.RED}Invalid multi-value separator ({field_name}): {Fore.RESET}{field}" - ) - - return field - - def date(field, field_name): """Check if a date is valid. diff --git a/tests/test_check.py b/tests/test_check.py index 725a4d3..db20485 100644 --- a/tests/test_check.py +++ b/tests/test_check.py @@ -47,50 +47,6 @@ def test_check_valid_isbn(): assert result == value -def test_check_invalid_separators(capsys): - """Test checking invalid multi-value separators.""" - - value = "Alan|Orth" - - field_name = "dc.contributor.author" - - check.separators(value, field_name) - - captured = capsys.readouterr() - assert ( - captured.out - == f"{Fore.RED}Invalid multi-value separator ({field_name}): {Fore.RESET}{value}\n" - ) - - -def test_check_unnecessary_separators(capsys): - """Test checking unnecessary multi-value separators.""" - - field = "Alan||Orth||" - - field_name = "dc.contributor.author" - - check.separators(field, field_name) - - captured = capsys.readouterr() - assert ( - captured.out - == f"{Fore.RED}Unnecessary multi-value separator ({field_name}): {Fore.RESET}{field}\n" - ) - - -def test_check_valid_separators(): - """Test checking valid multi-value separators.""" - - value = "Alan||Orth" - - field_name = "dc.contributor.author" - - result = check.separators(value, field_name) - - assert result == value - - def test_check_missing_date(capsys): """Test checking missing date."""