mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-12-22 12:12:18 +01:00
Add column name to output in date checks
This makes it easier to understand where the error is in case a CSV has multiple date fields, for example: Missing date (dc.date.issued). Missing date (dc.date.issued[]). If you have 126 items and you get 126 "Missing date" messages then it's likely that 100 of the items have dates in one field, and the others have dates in other field.
This commit is contained in:
parent
3247495cee
commit
ed5612fbcf
@ -84,7 +84,7 @@ def run(argv):
|
|||||||
# Check: invalid date
|
# Check: invalid date
|
||||||
match = re.match(r'^.*?date.*$', column)
|
match = re.match(r'^.*?date.*$', column)
|
||||||
if match is not None:
|
if match is not None:
|
||||||
df[column] = df[column].apply(check.date)
|
df[column] = df[column].apply(check.date, field_name=column)
|
||||||
|
|
||||||
# Check: filename extension
|
# Check: filename extension
|
||||||
if column == 'filename':
|
if column == 'filename':
|
||||||
|
@ -75,7 +75,7 @@ def separators(field):
|
|||||||
return field
|
return field
|
||||||
|
|
||||||
|
|
||||||
def date(field):
|
def date(field, field_name):
|
||||||
"""Check if a date is valid.
|
"""Check if a date is valid.
|
||||||
|
|
||||||
In DSpace the issue date is usually 1990, 1990-01, or 1990-01-01, but it
|
In DSpace the issue date is usually 1990, 1990-01, or 1990-01-01, but it
|
||||||
@ -88,7 +88,7 @@ def date(field):
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
if pd.isna(field):
|
if pd.isna(field):
|
||||||
print(f'Missing date.')
|
print(f'Missing date ({field_name}).')
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -97,7 +97,7 @@ def date(field):
|
|||||||
|
|
||||||
# We don't allow multi-value date fields
|
# We don't allow multi-value date fields
|
||||||
if len(multiple_dates) > 1:
|
if len(multiple_dates) > 1:
|
||||||
print(f'Multiple dates not allowed: {field}')
|
print(f'Multiple dates not allowed ({field_name}): {field}')
|
||||||
|
|
||||||
return field
|
return field
|
||||||
|
|
||||||
@ -123,7 +123,7 @@ def date(field):
|
|||||||
|
|
||||||
return field
|
return field
|
||||||
except ValueError:
|
except ValueError:
|
||||||
print(f'Invalid date: {field}')
|
print(f'Invalid date ({field_name}): {field}')
|
||||||
|
|
||||||
return field
|
return field
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user