1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-16 02:57:04 +01:00

csv_metadata_quality/app.py: Use regex in column match

Check for a column that has "issn" or "isbn" in the name rather
than by its explicit name, as the column is dc.identifier.issn now,
but will be cg.issn in the future if CG Core v2 happens.
This commit is contained in:
Alan Orth 2019-07-28 17:27:20 +03:00
parent 4687e2f5fa
commit e88d35ace3
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9

View File

@ -17,10 +17,14 @@ def main():
# Run invalid multi-value separator check on all columns # Run invalid multi-value separator check on all columns
df[column] = df[column].apply(check.separators) df[column] = df[column].apply(check.separators)
if column == 'dc.identifier.issn': # check if column is an issn column like dc.identifier.issn
match = re.match(r'^.*?issn.*$', column)
if match is not None:
df[column] = df[column].apply(check.issn) df[column] = df[column].apply(check.issn)
if column == 'dc.identifier.isbn': # check if column is an isbn column like dc.identifier.isbn
match = re.match(r'^.*?isbn.*$', column)
if match is not None:
df[column] = df[column].apply(check.isbn) df[column] = df[column].apply(check.isbn)
# check if column is a date column like dc.date.issued # check if column is a date column like dc.date.issued