1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-05-08 06:06:00 +02:00

Fix references to ISO 639

It turns out that ISO 639-1 is the two-letter codes, and ISO 639-2
is the three-letter codes, aka alpha2 and alpha3.

See: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
This commit is contained in:
2019-09-11 16:36:53 +03:00
parent b5899001b7
commit d9fc09f121
3 changed files with 15 additions and 15 deletions

View File

@ -165,7 +165,7 @@ def suspicious_characters(field, field_name):
def language(field):
"""Check if a language is valid ISO 639-2 or ISO 639-3.
"""Check if a language is valid ISO 639-1 or ISO 639-2.
Prints the value if it is invalid.
"""
@ -182,15 +182,15 @@ def language(field):
for value in field.split("||"):
# After splitting, check if language value is 2 or 3 characters so we
# can check it against ISO 639-2 or ISO 639-3 accordingly.
# can check it against ISO 639-1 or ISO 639-2 accordingly.
if len(value) == 2:
if not languages.get(alpha_2=value):
print(f"Invalid ISO 639-2 language: {value}")
print(f"Invalid ISO 639-1 language: {value}")
pass
elif len(value) == 3:
if not languages.get(alpha_3=value):
print(f"Invalid ISO 639-3 language: {value}")
print(f"Invalid ISO 639-2 language: {value}")
pass
else: