1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-12-30 16:04:29 +01:00

csv_metadata_quality/app.py: disable whitespace on abstracts

It's too aggressive on abstracts. If people paste in text from a
PDF there are often newlines, and most of the time this is what
they want.
This commit is contained in:
Alan Orth 2023-02-07 16:48:40 +03:00
parent d5afbad788
commit 545bb8cd0c
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9

View File

@ -90,11 +90,13 @@ def run(argv):
continue
if args.unsafe_fixes:
match = re.match(r"^.*?abstract.*$", column)
if match is None:
# Fix: whitespace
df[column] = df[column].apply(fix.whitespace, field_name=column)
# Fix: newlines
if args.unsafe_fixes:
df[column] = df[column].apply(fix.newlines, field_name=column)
# Fix: missing space after comma. Only run on author and citation