From 999cc650971ae61eb48778f0b1f7e53127a2c292 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Sun, 5 Dec 2021 15:18:35 +0200 Subject: [PATCH] csv_metadata_quality/app.py: adjust mojibake check If unsafe fixes (-u) are enabled then we don't need to do the check first before actually fixing them. Doing the check first creates e- tra output that needs to be reviewed by the user. --- csv_metadata_quality/app.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/csv_metadata_quality/app.py b/csv_metadata_quality/app.py index 7c1c7cf..3c60296 100644 --- a/csv_metadata_quality/app.py +++ b/csv_metadata_quality/app.py @@ -109,12 +109,11 @@ def run(argv): # Check: suspicious characters df[column].apply(check.suspicious_characters, field_name=column) - # Check: mojibake - df[column].apply(check.mojibake, field_name=column) - - # Fix: mojibake + # Fix: mojibake. If unsafe fixes are not enabled then we only check. if args.unsafe_fixes: df[column] = df[column].apply(fix.mojibake, field_name=column) + else: + df[column].apply(check.mojibake, field_name=column) # Fix: invalid and unnecessary multi-value separators df[column] = df[column].apply(fix.separators, field_name=column)