mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-12-21 11:42:20 +01:00
csv_metadata_quality/app.py: skip newline fix on description
The description field often has free-form text like the abstract and there are too many legitimate newlines here to be correcting them automatically.
This commit is contained in:
parent
1491e1edb0
commit
e2d46e9495
@ -90,7 +90,9 @@ def run(argv):
|
||||
continue
|
||||
|
||||
if args.unsafe_fixes:
|
||||
match = re.match(r"^.*?abstract.*$", column)
|
||||
# Skip whitespace and newline fixes on abstracts and descriptions
|
||||
# because there are too many with legitimate multi-line metadata.
|
||||
match = re.match(r"^.*?(abstract|description).*$", column)
|
||||
if match is None:
|
||||
# Fix: whitespace
|
||||
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
||||
|
Loading…
Reference in New Issue
Block a user