1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-05-21 03:21:56 +02:00

Add utility function to check normalization

Python's built-in unicodedata library includes the is_normalized()
function starting with Python 3.8. This utility function allows us
to do the same thing with earlier Python versions.

See: https://docs.python.org/3/library/unicodedata.html
This commit is contained in:
2020-01-15 12:17:52 +02:00
parent 550ce7fb7e
commit 365ecda324
2 changed files with 16 additions and 2 deletions
csv_metadata_quality

@ -212,7 +212,7 @@ def normalize_unicode(field, field_name):
Return normalized string.
"""
from unicodedata import is_normalized
from csv_metadata_quality.util import is_nfc
from unicodedata import normalize
# Skip fields with missing values
@ -220,7 +220,7 @@ def normalize_unicode(field, field_name):
return
# Check if the current string is using normalized Unicode (NFC)
if not is_normalized("NFC", field):
if not is_nfc(field):
print(f"Normalizing Unicode ({field_name}): {field}")
field = normalize("NFC", field)