mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-12-21 11:42:20 +01:00
csv_metadata_quality/app.py: rework exclude/skip
Instead of processing the excludes inside the for column loop we do it once before and then only need to check if the current column is in the list.
This commit is contained in:
parent
2e489fc921
commit
1f76247353
@ -76,19 +76,19 @@ def run(argv):
|
|||||||
# Read all fields as strings so dates don't get converted from 1998 to 1998.0
|
# Read all fields as strings so dates don't get converted from 1998 to 1998.0
|
||||||
df = pd.read_csv(args.input_file, dtype=str)
|
df = pd.read_csv(args.input_file, dtype=str)
|
||||||
|
|
||||||
for column in df.columns:
|
# Check if the user requested to skip any fields
|
||||||
# Check if the user requested to skip any fields
|
if args.exclude_fields:
|
||||||
if args.exclude_fields:
|
# Split the list of excluded fields on ',' into a list. Note that the
|
||||||
skip = False
|
# user should be careful to no include spaces here.
|
||||||
# Split the list of excludes on ',' so we can test exact matches
|
exclude = args.exclude_fields.split(",")
|
||||||
# rather than fuzzy matches with regexes or "if word in string"
|
else:
|
||||||
for exclude in args.exclude_fields.split(","):
|
exclude = list()
|
||||||
if column == exclude and skip is False:
|
|
||||||
skip = True
|
|
||||||
if skip:
|
|
||||||
print(f"{Fore.YELLOW}Skipping {Fore.RESET}{column}")
|
|
||||||
|
|
||||||
continue
|
for column in df.columns:
|
||||||
|
if column in exclude:
|
||||||
|
print(f"{Fore.YELLOW}Skipping {Fore.RESET}{column}")
|
||||||
|
|
||||||
|
continue
|
||||||
|
|
||||||
# Fix: whitespace
|
# Fix: whitespace
|
||||||
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
||||||
|
Loading…
Reference in New Issue
Block a user