diff --git a/csv_metadata_quality/fix.py b/csv_metadata_quality/fix.py index 0a0adb3..2e9cd85 100755 --- a/csv_metadata_quality/fix.py +++ b/csv_metadata_quality/fix.py @@ -104,6 +104,7 @@ def unnecessary_unicode(field): Replaces unnecessary Unicode characters like: - Soft hyphen (U+00AD) → hyphen - No-break space (U+00A0) → space + - Thin space (U+2009) → space Return string with characters removed or replaced. """ @@ -148,6 +149,16 @@ def unnecessary_unicode(field): ) field = re.sub(pattern, "-", field) + # Check for thin spaces (U+2009) + pattern = re.compile(r"\u2009") + match = re.findall(pattern, field) + + if match: + print( + f"{Fore.GREEN}Replacing unnecessary Unicode (U+2009): {Fore.RESET}{field}" + ) + field = re.sub(pattern, " ", field) + return field