From 39a4b1a487453cdd33949ca00c478640e25f7e5d Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Fri, 19 Mar 2021 10:28:33 +0200 Subject: [PATCH] Add mojibake to data/test.csv and tests --- data/test.csv | 1 + tests/test_check.py | 26 ++++++++++++++++++++++++++ tests/test_fix.py | 9 +++++++++ 3 files changed, 36 insertions(+) diff --git a/data/test.csv b/data/test.csv index 8eca26a..8874a2b 100644 --- a/data/test.csv +++ b/data/test.csv @@ -32,3 +32,4 @@ Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,,,, Invalid SPDX license identifier,2021-03-11,,,,,,,CC-BY, Duplicate Title,2021-03-17,,,,,,,,Report Duplicate Title,2021-03-17,,,,,,,,Report +Mojibake,2021-03-18,,,,CIAT Publicaçao,,,,Report diff --git a/tests/test_check.py b/tests/test_check.py index 1f4c6ea..aeeaca2 100644 --- a/tests/test_check.py +++ b/tests/test_check.py @@ -339,3 +339,29 @@ def test_check_duplicate_item(capsys): captured.out == f"{Fore.YELLOW}Possible duplicate (dc.title): {Fore.RESET}{item_title}\n" ) + + +def test_check_no_mojibake(): + """Test string with no mojibake.""" + + field = "CIAT Publicaçao" + field_name = "dcterms.isPartOf" + + result = check.mojibake(field, field_name) + + assert result == None + + +def test_check_mojibake(capsys): + """Test string with mojibake.""" + + field = "CIAT Publicaçao" + field_name = "dcterms.isPartOf" + + result = check.mojibake(field, field_name) + + captured = capsys.readouterr() + assert ( + captured.out + == f"{Fore.YELLOW}Possible encoding issue ({field_name}): {Fore.RESET}{field}\n" + ) diff --git a/tests/test_fix.py b/tests/test_fix.py index b53bb1a..4155301 100644 --- a/tests/test_fix.py +++ b/tests/test_fix.py @@ -108,3 +108,12 @@ def test_fix_decomposed_unicode(): field_name = "dc.contributor.author" assert fix.normalize_unicode(value, field_name) == "Ouédraogo, Mathieu" + + +def test_fix_mojibake(): + """Test string with no mojibake.""" + + field = "CIAT Publicaçao" + field_name = "dcterms.isPartOf" + + assert fix.mojibake(field, field_name) == "CIAT Publicaçao"