From d9fc09f121459467f140d3a71de015d399d9933c Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Wed, 11 Sep 2019 16:36:53 +0300 Subject: [PATCH] Fix references to ISO 639 It turns out that ISO 639-1 is the two-letter codes, and ISO 639-2 is the three-letter codes, aka alpha2 and alpha3. See: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes --- README.md | 2 +- csv_metadata_quality/check.py | 8 ++++---- tests/test_check.py | 20 ++++++++++---------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index b3723c4..f98bbad 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Requires Python 3.6 or greater. CSV and Excel support comes from the [Pandas](ht ## Functionality - Validate dates, ISSNs, ISBNs, and multi-value separators ("||") -- Validate languages against ISO 639-2 and ISO 639-3 +- Validate languages against ISO 639-1 (alpha2) and ISO 639-2 (alpha3) - Validate subjects against the AGROVOC REST API (see the `--agrovoc-fields` option) - Fix leading, trailing, and excessive (ie, more than one) whitespace - Fix invalid multi-value separators (`|`) using `--unsafe-fixes` diff --git a/csv_metadata_quality/check.py b/csv_metadata_quality/check.py index 2c8a336..44fa134 100755 --- a/csv_metadata_quality/check.py +++ b/csv_metadata_quality/check.py @@ -165,7 +165,7 @@ def suspicious_characters(field, field_name): def language(field): - """Check if a language is valid ISO 639-2 or ISO 639-3. + """Check if a language is valid ISO 639-1 or ISO 639-2. Prints the value if it is invalid. """ @@ -182,15 +182,15 @@ def language(field): for value in field.split("||"): # After splitting, check if language value is 2 or 3 characters so we - # can check it against ISO 639-2 or ISO 639-3 accordingly. + # can check it against ISO 639-1 or ISO 639-2 accordingly. if len(value) == 2: if not languages.get(alpha_2=value): - print(f"Invalid ISO 639-2 language: {value}") + print(f"Invalid ISO 639-1 language: {value}") pass elif len(value) == 3: if not languages.get(alpha_3=value): - print(f"Invalid ISO 639-3 language: {value}") + print(f"Invalid ISO 639-2 language: {value}") pass else: diff --git a/tests/test_check.py b/tests/test_check.py index a525c11..f3c8151 100644 --- a/tests/test_check.py +++ b/tests/test_check.py @@ -128,8 +128,8 @@ def test_check_suspicious_characters(capsys): assert captured.out == f'Suspicious character ({field_name}): ˆt\n' -def test_check_valid_iso639_2_language(): - '''Test valid ISO 639-2 language.''' +def test_check_valid_iso639_1_language(): + '''Test valid ISO 639-1 language.''' value = 'ja' @@ -138,8 +138,8 @@ def test_check_valid_iso639_2_language(): assert result == value -def test_check_valid_iso639_3_language(): - '''Test invalid ISO 639-3 language.''' +def test_check_valid_iso639_2_language(): + '''Test invalid ISO 639-2 language.''' value = 'eng' @@ -148,26 +148,26 @@ def test_check_valid_iso639_3_language(): assert result == value -def test_check_invalid_iso639_2_language(capsys): - '''Test invalid ISO 639-2 language.''' +def test_check_invalid_iso639_1_language(capsys): + '''Test invalid ISO 639-1 language.''' value = 'jp' check.language(value) captured = capsys.readouterr() - assert captured.out == f'Invalid ISO 639-2 language: {value}\n' + assert captured.out == f'Invalid ISO 639-1 language: {value}\n' -def test_check_invalid_iso639_3_language(capsys): - '''Test invalid ISO 639-3 language.''' +def test_check_invalid_iso639_2_language(capsys): + '''Test invalid ISO 639-2 language.''' value = 'chi' check.language(value) captured = capsys.readouterr() - assert captured.out == f'Invalid ISO 639-3 language: {value}\n' + assert captured.out == f'Invalid ISO 639-2 language: {value}\n' def test_check_invalid_language(capsys):