From ed5612fbcf5d70eea992eec9d05f45e2d1aa2db9 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Wed, 21 Aug 2019 15:31:12 +0300 Subject: [PATCH] Add column name to output in date checks This makes it easier to understand where the error is in case a CSV has multiple date fields, for example: Missing date (dc.date.issued). Missing date (dc.date.issued[]). If you have 126 items and you get 126 "Missing date" messages then it's likely that 100 of the items have dates in one field, and the others have dates in other field. --- csv_metadata_quality/app.py | 2 +- csv_metadata_quality/check.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/csv_metadata_quality/app.py b/csv_metadata_quality/app.py index 097f930..f977c05 100644 --- a/csv_metadata_quality/app.py +++ b/csv_metadata_quality/app.py @@ -84,7 +84,7 @@ def run(argv): # Check: invalid date match = re.match(r'^.*?date.*$', column) if match is not None: - df[column] = df[column].apply(check.date) + df[column] = df[column].apply(check.date, field_name=column) # Check: filename extension if column == 'filename': diff --git a/csv_metadata_quality/check.py b/csv_metadata_quality/check.py index 67ba1b5..083d49d 100755 --- a/csv_metadata_quality/check.py +++ b/csv_metadata_quality/check.py @@ -75,7 +75,7 @@ def separators(field): return field -def date(field): +def date(field, field_name): """Check if a date is valid. In DSpace the issue date is usually 1990, 1990-01, or 1990-01-01, but it @@ -88,7 +88,7 @@ def date(field): from datetime import datetime if pd.isna(field): - print(f'Missing date.') + print(f'Missing date ({field_name}).') return @@ -97,7 +97,7 @@ def date(field): # We don't allow multi-value date fields if len(multiple_dates) > 1: - print(f'Multiple dates not allowed: {field}') + print(f'Multiple dates not allowed ({field_name}): {field}') return field @@ -123,7 +123,7 @@ def date(field): return field except ValueError: - print(f'Invalid date: {field}') + print(f'Invalid date ({field_name}): {field}') return field