mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-17 11:37:03 +01:00
csv_metadata_quality/app.py: disable whitespace on abstracts
It's too aggressive on abstracts. If people paste in text from a PDF there are often newlines, and most of the time this is what they want.
This commit is contained in:
parent
d5afbad788
commit
545bb8cd0c
@ -90,12 +90,14 @@ def run(argv):
|
||||
|
||||
continue
|
||||
|
||||
# Fix: whitespace
|
||||
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
||||
|
||||
# Fix: newlines
|
||||
if args.unsafe_fixes:
|
||||
df[column] = df[column].apply(fix.newlines, field_name=column)
|
||||
match = re.match(r"^.*?abstract.*$", column)
|
||||
if match is None:
|
||||
# Fix: whitespace
|
||||
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
||||
|
||||
# Fix: newlines
|
||||
df[column] = df[column].apply(fix.newlines, field_name=column)
|
||||
|
||||
# Fix: missing space after comma. Only run on author and citation
|
||||
# fields for now, as this problem is mostly an issue in names.
|
||||
|
Loading…
Reference in New Issue
Block a user