1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-12-22 12:12:18 +01:00

csv_metadata_quality/app.py: adjust mojibake check

If unsafe fixes (-u) are enabled then we don't need to do the check
first before actually fixing them. Doing the check first creates e-
tra output that needs to be reviewed by the user.
This commit is contained in:
Alan Orth 2021-12-05 15:18:35 +02:00
parent a7c3be280d
commit 999cc65097
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9

View File

@ -109,12 +109,11 @@ def run(argv):
# Check: suspicious characters # Check: suspicious characters
df[column].apply(check.suspicious_characters, field_name=column) df[column].apply(check.suspicious_characters, field_name=column)
# Check: mojibake # Fix: mojibake. If unsafe fixes are not enabled then we only check.
df[column].apply(check.mojibake, field_name=column)
# Fix: mojibake
if args.unsafe_fixes: if args.unsafe_fixes:
df[column] = df[column].apply(fix.mojibake, field_name=column) df[column] = df[column].apply(fix.mojibake, field_name=column)
else:
df[column].apply(check.mojibake, field_name=column)
# Fix: invalid and unnecessary multi-value separators # Fix: invalid and unnecessary multi-value separators
df[column] = df[column].apply(fix.separators, field_name=column) df[column] = df[column].apply(fix.separators, field_name=column)