diff --git a/csv_metadata_quality/app.py b/csv_metadata_quality/app.py index 0bcc61f..0278c59 100644 --- a/csv_metadata_quality/app.py +++ b/csv_metadata_quality/app.py @@ -121,10 +121,14 @@ def run(argv): # Fix: unnecessary Unicode df[column] = df[column].apply(fix.unnecessary_unicode) - # Fix: invalid and unnecessary multi-value separators - df[column] = df[column].apply(fix.separators, field_name=column) - # Run whitespace fix again after fixing invalid separators - df[column] = df[column].apply(fix.whitespace, field_name=column) + # Fix: invalid and unnecessary multi-value separators. Skip the title + # field because sometimes "|" is used to indicate something like a + # subtitle. + match = re.match(r"^.*?title.*$", column) + if match is None: + df[column] = df[column].apply(fix.separators, field_name=column) + # Run whitespace fix again after fixing invalid separators + df[column] = df[column].apply(fix.whitespace, field_name=column) # Fix: duplicate metadata values df[column] = df[column].apply(fix.duplicates, field_name=column)