diff --git a/tests/test_check.py b/tests/test_check.py index 0ad968f..45b644b 100644 --- a/tests/test_check.py +++ b/tests/test_check.py @@ -4,20 +4,20 @@ import pandas as pd def test_check_invalid_issn(capsys): - '''Test checking invalid ISSN.''' + """Test checking invalid ISSN.""" - value = '2321-2302' + value = "2321-2302" check.issn(value) captured = capsys.readouterr() - assert captured.out == f'Invalid ISSN: {value}\n' + assert captured.out == f"Invalid ISSN: {value}\n" def test_check_valid_issn(): - '''Test checking valid ISSN.''' + """Test checking valid ISSN.""" - value = '0024-9319' + value = "0024-9319" result = check.issn(value) @@ -25,20 +25,20 @@ def test_check_valid_issn(): def test_check_invalid_isbn(capsys): - '''Test checking invalid ISBN.''' + """Test checking invalid ISBN.""" - value = '99921-58-10-6' + value = "99921-58-10-6" check.isbn(value) captured = capsys.readouterr() - assert captured.out == f'Invalid ISBN: {value}\n' + assert captured.out == f"Invalid ISBN: {value}\n" def test_check_valid_isbn(): - '''Test checking valid ISBN.''' + """Test checking valid ISBN.""" - value = '99921-58-10-7' + value = "99921-58-10-7" result = check.isbn(value) @@ -46,20 +46,20 @@ def test_check_valid_isbn(): def test_check_invalid_separators(capsys): - '''Test checking invalid multi-value separators.''' + """Test checking invalid multi-value separators.""" - value = 'Alan|Orth' + value = "Alan|Orth" check.separators(value) captured = capsys.readouterr() - assert captured.out == f'Invalid multi-value separator: {value}\n' + assert captured.out == f"Invalid multi-value separator: {value}\n" def test_check_valid_separators(): - '''Test checking valid multi-value separators.''' + """Test checking valid multi-value separators.""" - value = 'Alan||Orth' + value = "Alan||Orth" result = check.separators(value) @@ -67,50 +67,50 @@ def test_check_valid_separators(): def test_check_missing_date(capsys): - '''Test checking missing date.''' + """Test checking missing date.""" value = None - field_name = 'dc.date.issued' + field_name = "dc.date.issued" check.date(value, field_name) captured = capsys.readouterr() - assert captured.out == f'Missing date ({field_name}).\n' + assert captured.out == f"Missing date ({field_name}).\n" def test_check_multiple_dates(capsys): - '''Test checking multiple dates.''' + """Test checking multiple dates.""" - value = '1990||1991' + value = "1990||1991" - field_name = 'dc.date.issued' + field_name = "dc.date.issued" check.date(value, field_name) captured = capsys.readouterr() - assert captured.out == f'Multiple dates not allowed ({field_name}): {value}\n' + assert captured.out == f"Multiple dates not allowed ({field_name}): {value}\n" def test_check_invalid_date(capsys): - '''Test checking invalid ISO8601 date.''' + """Test checking invalid ISO8601 date.""" - value = '1990-0' + value = "1990-0" - field_name = 'dc.date.issued' + field_name = "dc.date.issued" check.date(value, field_name) captured = capsys.readouterr() - assert captured.out == f'Invalid date ({field_name}): {value}\n' + assert captured.out == f"Invalid date ({field_name}): {value}\n" def test_check_valid_date(): - '''Test checking valid ISO8601 date.''' + """Test checking valid ISO8601 date.""" - value = '1990' + value = "1990" - field_name = 'dc.date.issued' + field_name = "dc.date.issued" result = check.date(value, field_name) @@ -118,22 +118,22 @@ def test_check_valid_date(): def test_check_suspicious_characters(capsys): - '''Test checking for suspicious characters.''' + """Test checking for suspicious characters.""" - value = 'foreˆt' + value = "foreˆt" - field_name = 'dc.contributor.author' + field_name = "dc.contributor.author" check.suspicious_characters(value, field_name) captured = capsys.readouterr() - assert captured.out == f'Suspicious character ({field_name}): ˆt\n' + assert captured.out == f"Suspicious character ({field_name}): ˆt\n" def test_check_valid_iso639_1_language(): - '''Test valid ISO 639-1 (alpha 2) language.''' + """Test valid ISO 639-1 (alpha 2) language.""" - value = 'ja' + value = "ja" result = check.language(value) @@ -141,9 +141,9 @@ def test_check_valid_iso639_1_language(): def test_check_valid_iso639_3_language(): - '''Test valid ISO 639-3 (alpha 3) language.''' + """Test valid ISO 639-3 (alpha 3) language.""" - value = 'eng' + value = "eng" result = check.language(value) @@ -151,55 +151,55 @@ def test_check_valid_iso639_3_language(): def test_check_invalid_iso639_1_language(capsys): - '''Test invalid ISO 639-1 (alpha 2) language.''' + """Test invalid ISO 639-1 (alpha 2) language.""" - value = 'jp' + value = "jp" check.language(value) captured = capsys.readouterr() - assert captured.out == f'Invalid ISO 639-1 language: {value}\n' + assert captured.out == f"Invalid ISO 639-1 language: {value}\n" def test_check_invalid_iso639_3_language(capsys): - '''Test invalid ISO 639-3 (alpha 3) language.''' + """Test invalid ISO 639-3 (alpha 3) language.""" - value = 'chi' + value = "chi" check.language(value) captured = capsys.readouterr() - assert captured.out == f'Invalid ISO 639-3 language: {value}\n' + assert captured.out == f"Invalid ISO 639-3 language: {value}\n" def test_check_invalid_language(capsys): - '''Test invalid language.''' + """Test invalid language.""" - value = 'Span' + value = "Span" check.language(value) captured = capsys.readouterr() - assert captured.out == f'Invalid language: {value}\n' + assert captured.out == f"Invalid language: {value}\n" def test_check_invalid_agrovoc(capsys): - '''Test invalid AGROVOC subject.''' + """Test invalid AGROVOC subject.""" - value = 'FOREST' - field_name = 'dc.subject' + value = "FOREST" + field_name = "dc.subject" check.agrovoc(value, field_name) captured = capsys.readouterr() - assert captured.out == f'Invalid AGROVOC ({field_name}): {value}\n' + assert captured.out == f"Invalid AGROVOC ({field_name}): {value}\n" def test_check_valid_agrovoc(): - '''Test valid AGROVOC subject.''' + """Test valid AGROVOC subject.""" - value = 'FORESTS' - field_name = 'dc.subject' + value = "FORESTS" + field_name = "dc.subject" result = check.agrovoc(value, field_name) @@ -207,20 +207,20 @@ def test_check_valid_agrovoc(): def test_check_uncommon_filename_extension(capsys): - '''Test uncommon filename extension.''' + """Test uncommon filename extension.""" - value = 'file.pdf.lck' + value = "file.pdf.lck" check.filename_extension(value) captured = capsys.readouterr() - assert captured.out == f'Filename with uncommon extension: {value}\n' + assert captured.out == f"Filename with uncommon extension: {value}\n" def test_check_common_filename_extension(): - '''Test common filename extension.''' + """Test common filename extension.""" - value = 'file.pdf' + value = "file.pdf" result = check.filename_extension(value) @@ -228,45 +228,51 @@ def test_check_common_filename_extension(): def test_check_incorrect_iso_639_1_language(capsys): - '''Test incorrect ISO 639-1 language, as determined by comparing the item's language field with the actual language predicted in the item's title.''' + """Test incorrect ISO 639-1 language, as determined by comparing the item's language field with the actual language predicted in the item's title.""" - title = 'A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle' - language = 'es' + title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle" + language = "es" # Create a dictionary to mimic Pandas series - row = {'dc.title': title, 'dc.language.iso': language} + row = {"dc.title": title, "dc.language.iso": language} series = pd.Series(row) experimental.correct_language(series) captured = capsys.readouterr() - assert captured.out == f'Possibly incorrect language {language} (detected en): {title}\n' + assert ( + captured.out + == f"Possibly incorrect language {language} (detected en): {title}\n" + ) def test_check_incorrect_iso_639_3_language(capsys): - '''Test incorrect ISO 639-3 language, as determined by comparing the item's language field with the actual language predicted in the item's title.''' + """Test incorrect ISO 639-3 language, as determined by comparing the item's language field with the actual language predicted in the item's title.""" - title = 'A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle' - language = 'spa' + title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle" + language = "spa" # Create a dictionary to mimic Pandas series - row = {'dc.title': title, 'dc.language.iso': language} + row = {"dc.title": title, "dc.language.iso": language} series = pd.Series(row) experimental.correct_language(series) captured = capsys.readouterr() - assert captured.out == f'Possibly incorrect language {language} (detected eng): {title}\n' + assert ( + captured.out + == f"Possibly incorrect language {language} (detected eng): {title}\n" + ) def test_check_correct_iso_639_1_language(): - '''Test correct ISO 639-1 language, as determined by comparing the item's language field with the actual language predicted in the item's title.''' + """Test correct ISO 639-1 language, as determined by comparing the item's language field with the actual language predicted in the item's title.""" - title = 'A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle' - language = 'en' + title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle" + language = "en" # Create a dictionary to mimic Pandas series - row = {'dc.title': title, 'dc.language.iso': language} + row = {"dc.title": title, "dc.language.iso": language} series = pd.Series(row) result = experimental.correct_language(series) @@ -275,13 +281,13 @@ def test_check_correct_iso_639_1_language(): def test_check_correct_iso_639_3_language(): - '''Test correct ISO 639-3 language, as determined by comparing the item's language field with the actual language predicted in the item's title.''' + """Test correct ISO 639-3 language, as determined by comparing the item's language field with the actual language predicted in the item's title.""" - title = 'A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle' - language = 'eng' + title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle" + language = "eng" # Create a dictionary to mimic Pandas series - row = {'dc.title': title, 'dc.language.iso': language} + row = {"dc.title": title, "dc.language.iso": language} series = pd.Series(row) result = experimental.correct_language(series) diff --git a/tests/test_fix.py b/tests/test_fix.py index a589870..e6df205 100644 --- a/tests/test_fix.py +++ b/tests/test_fix.py @@ -2,67 +2,67 @@ import csv_metadata_quality.fix as fix def test_fix_leading_whitespace(): - '''Test fixing leading whitespace.''' + """Test fixing leading whitespace.""" - value = ' Alan' + value = " Alan" - assert fix.whitespace(value) == 'Alan' + assert fix.whitespace(value) == "Alan" def test_fix_trailing_whitespace(): - '''Test fixing trailing whitespace.''' + """Test fixing trailing whitespace.""" - value = 'Alan ' + value = "Alan " - assert fix.whitespace(value) == 'Alan' + assert fix.whitespace(value) == "Alan" def test_fix_excessive_whitespace(): - '''Test fixing excessive whitespace.''' + """Test fixing excessive whitespace.""" - value = 'Alan Orth' + value = "Alan Orth" - assert fix.whitespace(value) == 'Alan Orth' + assert fix.whitespace(value) == "Alan Orth" def test_fix_invalid_separators(): - '''Test fixing invalid multi-value separators.''' + """Test fixing invalid multi-value separators.""" - value = 'Alan|Orth' + value = "Alan|Orth" - assert fix.separators(value) == 'Alan||Orth' + assert fix.separators(value) == "Alan||Orth" def test_fix_unnecessary_unicode(): - '''Test fixing unnecessary Unicode.''' + """Test fixing unnecessary Unicode.""" - value = 'Alan​ Orth' + value = "Alan​ Orth" - assert fix.unnecessary_unicode(value) == 'Alan Orth' + assert fix.unnecessary_unicode(value) == "Alan Orth" def test_fix_duplicates(): - '''Test fixing duplicate metadata values.''' + """Test fixing duplicate metadata values.""" - value = 'Kenya||Kenya' + value = "Kenya||Kenya" - assert fix.duplicates(value) == 'Kenya' + assert fix.duplicates(value) == "Kenya" def test_fix_newlines(): - '''Test fixing newlines.''' + """Test fixing newlines.""" - value = '''Ken -ya''' + value = """Ken +ya""" - assert fix.newlines(value) == 'Kenya' + assert fix.newlines(value) == "Kenya" def test_fix_comma_space(): - '''Test adding space after comma.''' + """Test adding space after comma.""" - value = 'Orth,Alan S.' + value = "Orth,Alan S." - field_name = 'dc.contributor.author' + field_name = "dc.contributor.author" - assert fix.comma_space(value, field_name) == 'Orth, Alan S.' + assert fix.comma_space(value, field_name) == "Orth, Alan S."