mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-17 19:47:03 +01:00
csv_metadata_quality/app.py: skip newline fix on description
The description field often has free-form text like the abstract and there are too many legitimate newlines here to be correcting them automatically.
This commit is contained in:
parent
1491e1edb0
commit
e2d46e9495
@ -90,7 +90,9 @@ def run(argv):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if args.unsafe_fixes:
|
if args.unsafe_fixes:
|
||||||
match = re.match(r"^.*?abstract.*$", column)
|
# Skip whitespace and newline fixes on abstracts and descriptions
|
||||||
|
# because there are too many with legitimate multi-line metadata.
|
||||||
|
match = re.match(r"^.*?(abstract|description).*$", column)
|
||||||
if match is None:
|
if match is None:
|
||||||
# Fix: whitespace
|
# Fix: whitespace
|
||||||
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
||||||
|
Loading…
Reference in New Issue
Block a user