mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-05-09 22:56:01 +02:00
Add fix for missing space after commas
This happens in names very often, for example in the contributor and citation fields. I will limit this to those fields for now and hide this fix behind the "unsafe fixes" option until I test it more.
This commit is contained in:
@ -55,6 +55,13 @@ def run(argv):
|
||||
if args.unsafe_fixes:
|
||||
df[column] = df[column].apply(fix.newlines)
|
||||
|
||||
# Fix: missing space after comma. Only run on author and citation
|
||||
# fields for now, as this problem is mostly an issue in names.
|
||||
if args.unsafe_fixes:
|
||||
match = re.match(r'^.*?(author|citation).*$', column)
|
||||
if match is not None:
|
||||
df[column] = df[column].apply(fix.comma_space, field_name=column)
|
||||
|
||||
# Fix: unnecessary Unicode
|
||||
df[column] = df[column].apply(fix.unnecessary_unicode)
|
||||
|
||||
|
Reference in New Issue
Block a user