1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-05-09 14:46:00 +02:00

Add support for validating languages

Will validate against ISO 639-2 or ISO 639-3 depending on how long
the language field is. Otherwise will return that the language is
invalid.

Does not currently have any support for generic values like "Other".
This commit is contained in:
2019-07-29 18:59:42 +03:00
parent 1978fa7b48
commit a36454a3ac
5 changed files with 117 additions and 15 deletions

View File

@ -116,3 +116,56 @@ def test_check_suspicious_characters(capsys):
captured = capsys.readouterr()
assert captured.out == f'Suspicious character: {value}\n'
def test_check_valid_iso639_2_language():
'''Test valid ISO 639-2 language.'''
value = 'ja'
result = check.language(value)
assert result == value
def test_check_valid_iso639_3_language():
'''Test invalid ISO 639-3 language.'''
value = 'eng'
result = check.language(value)
assert result == value
def test_check_invalid_iso639_2_language(capsys):
'''Test invalid ISO 639-2 language.'''
value = 'jp'
check.language(value)
captured = capsys.readouterr()
assert captured.out == f'Invalid ISO 639-2 language: {value}\n'
def test_check_invalid_iso639_3_language(capsys):
'''Test invalid ISO 639-3 language.'''
value = 'chi'
check.language(value)
captured = capsys.readouterr()
assert captured.out == f'Invalid ISO 639-3 language: {value}\n'
def test_check_invalid_language(capsys):
'''Test invalid language.'''
value = 'Span'
check.language(value)
captured = capsys.readouterr()
assert captured.out == f'Invalid language: {value}\n'