csv_metadata_quality/app.py: Add ability to skip fields

The user may want to skip the checking and fixing of certain fields in the input file.
2025-07-28 00:38:02 +02:00 · 2019-08-27 00:10:07 +03:00
parent bd984f3db5
commit 113e7cd8b6
1 changed files with 14 additions and 0 deletions
--- a/csv_metadata_quality/app.py
+++ b/csv_metadata_quality/app.py
@ -15,6 +15,7 @@ def parse_args(argv):
    parser.add_argument('--output-file', '-o', help='Path to output file (always CSV).', required=True, type=argparse.FileType('w', encoding='UTF-8'))
    parser.add_argument('--unsafe-fixes', '-u', help='Perform unsafe fixes.', action='store_true')
    parser.add_argument('--version', '-V', action='version', version=f'CSV Metadata Quality v{VERSION}')
+    parser.add_argument('--exclude-fields', '-x', help='Comma-separated list of fields to skip, for example: dc.contributor.author,dc.identifier.citation')
    args = parser.parse_args()

    return args
@ -34,6 +35,19 @@ def run(argv):
    df = pd.read_csv(args.input_file, dtype=str)

    for column in df.columns.values.tolist():
+        # Check if the user requested to skip any fields
+        if args.exclude_fields:
+            skip = False
+            # Split the list of excludes on ',' so we can test exact matches
+            # rather than fuzzy matches with regexes or "if word in string"
+            for exclude in args.exclude_fields.split(','):
+                if column == exclude and skip is False:
+                    skip = True
+            if skip:
+                print(f'Skipping {column}')
+
+                continue
+
        # Fix: whitespace
        df[column] = df[column].apply(fix.whitespace)