mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-05-09 14:46:00 +02:00
Add support for validating languages
Will validate against ISO 639-2 or ISO 639-3 depending on how long the language field is. Otherwise will return that the language is invalid. Does not currently have any support for generic values like "Other".
This commit is contained in:
@ -116,3 +116,56 @@ def test_check_suspicious_characters(capsys):
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Suspicious character: {value}\n'
|
||||
|
||||
|
||||
def test_check_valid_iso639_2_language():
|
||||
'''Test valid ISO 639-2 language.'''
|
||||
|
||||
value = 'ja'
|
||||
|
||||
result = check.language(value)
|
||||
|
||||
assert result == value
|
||||
|
||||
|
||||
def test_check_valid_iso639_3_language():
|
||||
'''Test invalid ISO 639-3 language.'''
|
||||
|
||||
value = 'eng'
|
||||
|
||||
result = check.language(value)
|
||||
|
||||
assert result == value
|
||||
|
||||
|
||||
def test_check_invalid_iso639_2_language(capsys):
|
||||
'''Test invalid ISO 639-2 language.'''
|
||||
|
||||
value = 'jp'
|
||||
|
||||
check.language(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Invalid ISO 639-2 language: {value}\n'
|
||||
|
||||
|
||||
def test_check_invalid_iso639_3_language(capsys):
|
||||
'''Test invalid ISO 639-3 language.'''
|
||||
|
||||
value = 'chi'
|
||||
|
||||
check.language(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Invalid ISO 639-3 language: {value}\n'
|
||||
|
||||
|
||||
def test_check_invalid_language(capsys):
|
||||
'''Test invalid language.'''
|
||||
|
||||
value = 'Span'
|
||||
|
||||
check.language(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Invalid language: {value}\n'
|
||||
|
Reference in New Issue
Block a user