From 1f76247353dc3c3885c3f4d98b543662c4de2d38 Mon Sep 17 00:00:00 2001
From: Alan Orth <alan.orth@gmail.com>
Date: Fri, 2 Sep 2022 10:35:04 +0300
Subject: [PATCH] csv_metadata_quality/app.py: rework exclude/skip

Instead of processing the excludes inside the for column loop we do
it once before and then only need to check if the current column is
in the list.
---
 csv_metadata_quality/app.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/csv_metadata_quality/app.py b/csv_metadata_quality/app.py
index 5c01096..37c2c4c 100644
--- a/csv_metadata_quality/app.py
+++ b/csv_metadata_quality/app.py
@@ -76,19 +76,19 @@ def run(argv):
     # Read all fields as strings so dates don't get converted from 1998 to 1998.0
     df = pd.read_csv(args.input_file, dtype=str)
 
-    for column in df.columns:
-        # Check if the user requested to skip any fields
-        if args.exclude_fields:
-            skip = False
-            # Split the list of excludes on ',' so we can test exact matches
-            # rather than fuzzy matches with regexes or "if word in string"
-            for exclude in args.exclude_fields.split(","):
-                if column == exclude and skip is False:
-                    skip = True
-            if skip:
-                print(f"{Fore.YELLOW}Skipping {Fore.RESET}{column}")
+    # Check if the user requested to skip any fields
+    if args.exclude_fields:
+        # Split the list of excluded fields on ',' into a list. Note that the
+        # user should be careful to no include spaces here.
+        exclude = args.exclude_fields.split(",")
+    else:
+        exclude = list()
 
-                continue
+    for column in df.columns:
+        if column in exclude:
+            print(f"{Fore.YELLOW}Skipping {Fore.RESET}{column}")
+
+            continue
 
         # Fix: whitespace
         df[column] = df[column].apply(fix.whitespace, field_name=column)