Improve exclude function

When a user explicitly requests that a field be excluded with -x we skip that field in most checks. Up until now that did not include the item-based checks using a transposed dataframe because we don't know the metadata field names (labels) until we iterate over them. Now the excludes are respected for item-based checks.
2025-07-02 20:47:25 +02:00 · 2022-09-02 15:59:22 +03:00
parent 1f76247353
commit 040e56fc76
6 changed files with 54 additions and 24 deletions
--- a/csv_metadata_quality/app.py
+++ b/csv_metadata_quality/app.py
@ -200,20 +200,22 @@ def run(argv):
    # should rename column in this for loop...
    for column in df_transposed.columns:
        # Check: citation DOI
-        check.citation_doi(df_transposed[column])
+        check.citation_doi(df_transposed[column], exclude)

        # Check: title in citation
-        check.title_in_citation(df_transposed[column])
+        check.title_in_citation(df_transposed[column], exclude)

        if args.unsafe_fixes:
            # Fix: countries match regions
-            df_transposed[column] = fix.countries_match_regions(df_transposed[column])
+            df_transposed[column] = fix.countries_match_regions(
+                df_transposed[column], exclude
+            )
        else:
            # Check: countries match regions
-            check.countries_match_regions(df_transposed[column])
+            check.countries_match_regions(df_transposed[column], exclude)

        if args.experimental_checks:
-            experimental.correct_language(df_transposed[column])
+            experimental.correct_language(df_transposed[column], exclude)

    # Transpose the DataFrame back before writing. This is probably wasteful to
    # do every time since we technically only need to do it if we've done the