mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-12-22 04:02:19 +01:00
More ISO 639-1 and ISO 639-3 fixes
ISO 639-1 uses two-letter codes and ISO 639-3 uses three-letter codes. Technically there ISO 639-2/T and ISO 639-2/B, which also uses three letter codes, but those are not supported by the pycountry library so I won't even worry about them. See: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
This commit is contained in:
parent
ddbe970342
commit
86d4623fd3
@ -165,7 +165,7 @@ def suspicious_characters(field, field_name):
|
||||
|
||||
|
||||
def language(field):
|
||||
"""Check if a language is valid ISO 639-1 or ISO 639-2.
|
||||
"""Check if a language is valid ISO 639-1 (alpha 2) or ISO 639-3 (alpha 3).
|
||||
|
||||
Prints the value if it is invalid.
|
||||
"""
|
||||
@ -182,7 +182,7 @@ def language(field):
|
||||
for value in field.split("||"):
|
||||
|
||||
# After splitting, check if language value is 2 or 3 characters so we
|
||||
# can check it against ISO 639-1 or ISO 639-2 accordingly.
|
||||
# can check it against ISO 639-1 or ISO 639-3 accordingly.
|
||||
if len(value) == 2:
|
||||
if not languages.get(alpha_2=value):
|
||||
print(f"Invalid ISO 639-1 language: {value}")
|
||||
@ -190,7 +190,7 @@ def language(field):
|
||||
pass
|
||||
elif len(value) == 3:
|
||||
if not languages.get(alpha_3=value):
|
||||
print(f"Invalid ISO 639-2 language: {value}")
|
||||
print(f"Invalid ISO 639-3 language: {value}")
|
||||
|
||||
pass
|
||||
else:
|
||||
|
@ -129,7 +129,7 @@ def test_check_suspicious_characters(capsys):
|
||||
|
||||
|
||||
def test_check_valid_iso639_1_language():
|
||||
'''Test valid ISO 639-1 language.'''
|
||||
'''Test valid ISO 639-1 (alpha 2) language.'''
|
||||
|
||||
value = 'ja'
|
||||
|
||||
@ -138,8 +138,8 @@ def test_check_valid_iso639_1_language():
|
||||
assert result == value
|
||||
|
||||
|
||||
def test_check_valid_iso639_2_language():
|
||||
'''Test invalid ISO 639-2 language.'''
|
||||
def test_check_valid_iso639_3_language():
|
||||
'''Test valid ISO 639-3 (alpha 3) language.'''
|
||||
|
||||
value = 'eng'
|
||||
|
||||
@ -149,7 +149,7 @@ def test_check_valid_iso639_2_language():
|
||||
|
||||
|
||||
def test_check_invalid_iso639_1_language(capsys):
|
||||
'''Test invalid ISO 639-1 language.'''
|
||||
'''Test invalid ISO 639-1 (alpha 2) language.'''
|
||||
|
||||
value = 'jp'
|
||||
|
||||
@ -159,15 +159,15 @@ def test_check_invalid_iso639_1_language(capsys):
|
||||
assert captured.out == f'Invalid ISO 639-1 language: {value}\n'
|
||||
|
||||
|
||||
def test_check_invalid_iso639_2_language(capsys):
|
||||
'''Test invalid ISO 639-2 language.'''
|
||||
def test_check_invalid_iso639_3_language(capsys):
|
||||
'''Test invalid ISO 639-3 (alpha 3) language.'''
|
||||
|
||||
value = 'chi'
|
||||
|
||||
check.language(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Invalid ISO 639-2 language: {value}\n'
|
||||
assert captured.out == f'Invalid ISO 639-3 language: {value}\n'
|
||||
|
||||
|
||||
def test_check_invalid_language(capsys):
|
||||
|
Loading…
Reference in New Issue
Block a user