1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-17 19:47:03 +01:00

csv_metadata_quality/app.py: skip abstract in separator check

Also skip abstract in the separator check, since it's rare to have
any "|" here, but more likely that if one is present then it's for
a reason.
This commit is contained in:
Alan Orth 2023-02-13 10:37:33 +03:00
parent fbb625be5c
commit 8db1e36a6d
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9

View File

@ -124,9 +124,9 @@ def run(argv):
df[column] = df[column].apply(fix.unnecessary_unicode) df[column] = df[column].apply(fix.unnecessary_unicode)
# Fix: invalid and unnecessary multi-value separators. Skip the title # Fix: invalid and unnecessary multi-value separators. Skip the title
# field because sometimes "|" is used to indicate something like a # and abstract fields because "|" is used to indicate something like
# subtitle. # a subtitle.
match = re.match(r"^.*?title.*$", column) match = re.match(r"^.*?(abstract|title).*$", column)
if match is None: if match is None:
df[column] = df[column].apply(fix.separators, field_name=column) df[column] = df[column].apply(fix.separators, field_name=column)
# Run whitespace fix again after fixing invalid separators # Run whitespace fix again after fixing invalid separators