1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-22 13:55:03 +01:00

Fix references to ISO 639

It turns out that ISO 639-1 is the two-letter codes, and ISO 639-2
is the three-letter codes, aka alpha2 and alpha3.

See: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
This commit is contained in:
Alan Orth 2019-09-11 16:36:53 +03:00
parent b5899001b7
commit d9fc09f121
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9
3 changed files with 15 additions and 15 deletions

View File

@ -6,7 +6,7 @@ Requires Python 3.6 or greater. CSV and Excel support comes from the [Pandas](ht
## Functionality ## Functionality
- Validate dates, ISSNs, ISBNs, and multi-value separators ("||") - Validate dates, ISSNs, ISBNs, and multi-value separators ("||")
- Validate languages against ISO 639-2 and ISO 639-3 - Validate languages against ISO 639-1 (alpha2) and ISO 639-2 (alpha3)
- Validate subjects against the AGROVOC REST API (see the `--agrovoc-fields` option) - Validate subjects against the AGROVOC REST API (see the `--agrovoc-fields` option)
- Fix leading, trailing, and excessive (ie, more than one) whitespace - Fix leading, trailing, and excessive (ie, more than one) whitespace
- Fix invalid multi-value separators (`|`) using `--unsafe-fixes` - Fix invalid multi-value separators (`|`) using `--unsafe-fixes`

View File

@ -165,7 +165,7 @@ def suspicious_characters(field, field_name):
def language(field): def language(field):
"""Check if a language is valid ISO 639-2 or ISO 639-3. """Check if a language is valid ISO 639-1 or ISO 639-2.
Prints the value if it is invalid. Prints the value if it is invalid.
""" """
@ -182,15 +182,15 @@ def language(field):
for value in field.split("||"): for value in field.split("||"):
# After splitting, check if language value is 2 or 3 characters so we # After splitting, check if language value is 2 or 3 characters so we
# can check it against ISO 639-2 or ISO 639-3 accordingly. # can check it against ISO 639-1 or ISO 639-2 accordingly.
if len(value) == 2: if len(value) == 2:
if not languages.get(alpha_2=value): if not languages.get(alpha_2=value):
print(f"Invalid ISO 639-2 language: {value}") print(f"Invalid ISO 639-1 language: {value}")
pass pass
elif len(value) == 3: elif len(value) == 3:
if not languages.get(alpha_3=value): if not languages.get(alpha_3=value):
print(f"Invalid ISO 639-3 language: {value}") print(f"Invalid ISO 639-2 language: {value}")
pass pass
else: else:

View File

@ -128,8 +128,8 @@ def test_check_suspicious_characters(capsys):
assert captured.out == f'Suspicious character ({field_name}): ˆt\n' assert captured.out == f'Suspicious character ({field_name}): ˆt\n'
def test_check_valid_iso639_2_language(): def test_check_valid_iso639_1_language():
'''Test valid ISO 639-2 language.''' '''Test valid ISO 639-1 language.'''
value = 'ja' value = 'ja'
@ -138,8 +138,8 @@ def test_check_valid_iso639_2_language():
assert result == value assert result == value
def test_check_valid_iso639_3_language(): def test_check_valid_iso639_2_language():
'''Test invalid ISO 639-3 language.''' '''Test invalid ISO 639-2 language.'''
value = 'eng' value = 'eng'
@ -148,26 +148,26 @@ def test_check_valid_iso639_3_language():
assert result == value assert result == value
def test_check_invalid_iso639_2_language(capsys): def test_check_invalid_iso639_1_language(capsys):
'''Test invalid ISO 639-2 language.''' '''Test invalid ISO 639-1 language.'''
value = 'jp' value = 'jp'
check.language(value) check.language(value)
captured = capsys.readouterr() captured = capsys.readouterr()
assert captured.out == f'Invalid ISO 639-2 language: {value}\n' assert captured.out == f'Invalid ISO 639-1 language: {value}\n'
def test_check_invalid_iso639_3_language(capsys): def test_check_invalid_iso639_2_language(capsys):
'''Test invalid ISO 639-3 language.''' '''Test invalid ISO 639-2 language.'''
value = 'chi' value = 'chi'
check.language(value) check.language(value)
captured = capsys.readouterr() captured = capsys.readouterr()
assert captured.out == f'Invalid ISO 639-3 language: {value}\n' assert captured.out == f'Invalid ISO 639-2 language: {value}\n'
def test_check_invalid_language(capsys): def test_check_invalid_language(capsys):