1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-12-22 04:02:19 +01:00

Add column name to output in date checks

This makes it easier to understand where the error is in case a CSV
has multiple date fields, for example:

    Missing date (dc.date.issued).
    Missing date (dc.date.issued[]).

If you have 126 items and you get 126 "Missing date" messages then
it's likely that 100 of the items have dates in one field, and the
others have dates in other field.
This commit is contained in:
Alan Orth 2019-08-21 15:31:12 +03:00
parent 3247495cee
commit ed5612fbcf
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9
2 changed files with 5 additions and 5 deletions

View File

@ -84,7 +84,7 @@ def run(argv):
# Check: invalid date
match = re.match(r'^.*?date.*$', column)
if match is not None:
df[column] = df[column].apply(check.date)
df[column] = df[column].apply(check.date, field_name=column)
# Check: filename extension
if column == 'filename':

View File

@ -75,7 +75,7 @@ def separators(field):
return field
def date(field):
def date(field, field_name):
"""Check if a date is valid.
In DSpace the issue date is usually 1990, 1990-01, or 1990-01-01, but it
@ -88,7 +88,7 @@ def date(field):
from datetime import datetime
if pd.isna(field):
print(f'Missing date.')
print(f'Missing date ({field_name}).')
return
@ -97,7 +97,7 @@ def date(field):
# We don't allow multi-value date fields
if len(multiple_dates) > 1:
print(f'Multiple dates not allowed: {field}')
print(f'Multiple dates not allowed ({field_name}): {field}')
return field
@ -123,7 +123,7 @@ def date(field):
return field
except ValueError:
print(f'Invalid date: {field}')
print(f'Invalid date ({field_name}): {field}')
return field