From 1f76247353dc3c3885c3f4d98b543662c4de2d38 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Fri, 2 Sep 2022 10:35:04 +0300 Subject: [PATCH] csv_metadata_quality/app.py: rework exclude/skip Instead of processing the excludes inside the for column loop we do it once before and then only need to check if the current column is in the list. --- csv_metadata_quality/app.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/csv_metadata_quality/app.py b/csv_metadata_quality/app.py index 5c01096..37c2c4c 100644 --- a/csv_metadata_quality/app.py +++ b/csv_metadata_quality/app.py @@ -76,19 +76,19 @@ def run(argv): # Read all fields as strings so dates don't get converted from 1998 to 1998.0 df = pd.read_csv(args.input_file, dtype=str) - for column in df.columns: - # Check if the user requested to skip any fields - if args.exclude_fields: - skip = False - # Split the list of excludes on ',' so we can test exact matches - # rather than fuzzy matches with regexes or "if word in string" - for exclude in args.exclude_fields.split(","): - if column == exclude and skip is False: - skip = True - if skip: - print(f"{Fore.YELLOW}Skipping {Fore.RESET}{column}") + # Check if the user requested to skip any fields + if args.exclude_fields: + # Split the list of excluded fields on ',' into a list. Note that the + # user should be careful to no include spaces here. + exclude = args.exclude_fields.split(",") + else: + exclude = list() - continue + for column in df.columns: + if column in exclude: + print(f"{Fore.YELLOW}Skipping {Fore.RESET}{column}") + + continue # Fix: whitespace df[column] = df[column].apply(fix.whitespace, field_name=column)