1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-17 19:47:03 +01:00

Don't fix multi-value separators on citations

This commit is contained in:
Alan Orth 2023-03-10 16:12:30 +03:00
parent 47b03c49ba
commit 45a310387a
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9

View File

@ -126,7 +126,7 @@ def run(argv):
# Fix: invalid and unnecessary multi-value separators. Skip the title
# and abstract fields because "|" is used to indicate something like
# a subtitle.
match = re.match(r"^.*?(abstract|title).*$", column)
match = re.match(r"^.*?(abstract|[Cc]itation|title).*$", column)
if match is None:
df[column] = df[column].apply(fix.separators, field_name=column)
# Run whitespace fix again after fixing invalid separators