mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-25 15:18:19 +01:00
csv_metadata_quality/fix.py: fix thin spaces
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
Replace thin spaces with normal spaces. Sometimes I see these get mis handled on Windows machines and they end up as "?" or so.
This commit is contained in:
parent
cef6c66b30
commit
95015febbd
@ -104,6 +104,7 @@ def unnecessary_unicode(field):
|
|||||||
Replaces unnecessary Unicode characters like:
|
Replaces unnecessary Unicode characters like:
|
||||||
- Soft hyphen (U+00AD) → hyphen
|
- Soft hyphen (U+00AD) → hyphen
|
||||||
- No-break space (U+00A0) → space
|
- No-break space (U+00A0) → space
|
||||||
|
- Thin space (U+2009) → space
|
||||||
|
|
||||||
Return string with characters removed or replaced.
|
Return string with characters removed or replaced.
|
||||||
"""
|
"""
|
||||||
@ -148,6 +149,16 @@ def unnecessary_unicode(field):
|
|||||||
)
|
)
|
||||||
field = re.sub(pattern, "-", field)
|
field = re.sub(pattern, "-", field)
|
||||||
|
|
||||||
|
# Check for thin spaces (U+2009)
|
||||||
|
pattern = re.compile(r"\u2009")
|
||||||
|
match = re.findall(pattern, field)
|
||||||
|
|
||||||
|
if match:
|
||||||
|
print(
|
||||||
|
f"{Fore.GREEN}Replacing unnecessary Unicode (U+2009): {Fore.RESET}{field}"
|
||||||
|
)
|
||||||
|
field = re.sub(pattern, " ", field)
|
||||||
|
|
||||||
return field
|
return field
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user