1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-25 15:18:19 +01:00

csv_metadata_quality/fix.py: fix thin spaces
All checks were successful
continuous-integration/drone/push Build is passing

Replace thin spaces with normal spaces. Sometimes I see these get
mis handled on Windows machines and they end up as "?" or so.
This commit is contained in:
Alan Orth 2021-12-09 23:22:53 +02:00
parent cef6c66b30
commit 95015febbd
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9

View File

@ -104,6 +104,7 @@ def unnecessary_unicode(field):
Replaces unnecessary Unicode characters like: Replaces unnecessary Unicode characters like:
- Soft hyphen (U+00AD) hyphen - Soft hyphen (U+00AD) hyphen
- No-break space (U+00A0) space - No-break space (U+00A0) space
- Thin space (U+2009) space
Return string with characters removed or replaced. Return string with characters removed or replaced.
""" """
@ -148,6 +149,16 @@ def unnecessary_unicode(field):
) )
field = re.sub(pattern, "-", field) field = re.sub(pattern, "-", field)
# Check for thin spaces (U+2009)
pattern = re.compile(r"\u2009")
match = re.findall(pattern, field)
if match:
print(
f"{Fore.GREEN}Replacing unnecessary Unicode (U+2009): {Fore.RESET}{field}"
)
field = re.sub(pattern, " ", field)
return field return field