mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-25 23:28:18 +01:00
Add mojibake to data/test.csv and tests
This commit is contained in:
parent
898bb412c3
commit
39a4b1a487
@ -32,3 +32,4 @@ Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,,,,
|
||||
Invalid SPDX license identifier,2021-03-11,,,,,,,CC-BY,
|
||||
Duplicate Title,2021-03-17,,,,,,,,Report
|
||||
Duplicate Title,2021-03-17,,,,,,,,Report
|
||||
Mojibake,2021-03-18,,,,CIAT Publicaçao,,,,Report
|
||||
|
|
@ -339,3 +339,29 @@ def test_check_duplicate_item(capsys):
|
||||
captured.out
|
||||
== f"{Fore.YELLOW}Possible duplicate (dc.title): {Fore.RESET}{item_title}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_no_mojibake():
|
||||
"""Test string with no mojibake."""
|
||||
|
||||
field = "CIAT Publicaçao"
|
||||
field_name = "dcterms.isPartOf"
|
||||
|
||||
result = check.mojibake(field, field_name)
|
||||
|
||||
assert result == None
|
||||
|
||||
|
||||
def test_check_mojibake(capsys):
|
||||
"""Test string with mojibake."""
|
||||
|
||||
field = "CIAT Publicaçao"
|
||||
field_name = "dcterms.isPartOf"
|
||||
|
||||
result = check.mojibake(field, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert (
|
||||
captured.out
|
||||
== f"{Fore.YELLOW}Possible encoding issue ({field_name}): {Fore.RESET}{field}\n"
|
||||
)
|
||||
|
@ -108,3 +108,12 @@ def test_fix_decomposed_unicode():
|
||||
field_name = "dc.contributor.author"
|
||||
|
||||
assert fix.normalize_unicode(value, field_name) == "Ouédraogo, Mathieu"
|
||||
|
||||
|
||||
def test_fix_mojibake():
|
||||
"""Test string with no mojibake."""
|
||||
|
||||
field = "CIAT Publicaçao"
|
||||
field_name = "dcterms.isPartOf"
|
||||
|
||||
assert fix.mojibake(field, field_name) == "CIAT Publicaçao"
|
||||
|
Loading…
Reference in New Issue
Block a user