mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-01-24 11:13:22 +01:00
Add column name to output in date checks
This makes it easier to understand where the error is in case a CSV has multiple date fields, for example: Missing date (dc.date.issued). Missing date (dc.date.issued[]). If you have 126 items and you get 126 "Missing date" messages then it's likely that 100 of the items have dates in one field, and the others have dates in other field.
This commit is contained in:
parent
3247495cee
commit
ed5612fbcf
@ -84,7 +84,7 @@ def run(argv):
|
||||
# Check: invalid date
|
||||
match = re.match(r'^.*?date.*$', column)
|
||||
if match is not None:
|
||||
df[column] = df[column].apply(check.date)
|
||||
df[column] = df[column].apply(check.date, field_name=column)
|
||||
|
||||
# Check: filename extension
|
||||
if column == 'filename':
|
||||
|
@ -75,7 +75,7 @@ def separators(field):
|
||||
return field
|
||||
|
||||
|
||||
def date(field):
|
||||
def date(field, field_name):
|
||||
"""Check if a date is valid.
|
||||
|
||||
In DSpace the issue date is usually 1990, 1990-01, or 1990-01-01, but it
|
||||
@ -88,7 +88,7 @@ def date(field):
|
||||
from datetime import datetime
|
||||
|
||||
if pd.isna(field):
|
||||
print(f'Missing date.')
|
||||
print(f'Missing date ({field_name}).')
|
||||
|
||||
return
|
||||
|
||||
@ -97,7 +97,7 @@ def date(field):
|
||||
|
||||
# We don't allow multi-value date fields
|
||||
if len(multiple_dates) > 1:
|
||||
print(f'Multiple dates not allowed: {field}')
|
||||
print(f'Multiple dates not allowed ({field_name}): {field}')
|
||||
|
||||
return field
|
||||
|
||||
@ -123,7 +123,7 @@ def date(field):
|
||||
|
||||
return field
|
||||
except ValueError:
|
||||
print(f'Invalid date: {field}')
|
||||
print(f'Invalid date ({field_name}): {field}')
|
||||
|
||||
return field
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user