1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-22 13:55:03 +01:00

Reformat tests with black

This commit is contained in:
Alan Orth 2019-09-26 14:02:51 +03:00
parent e7c220039b
commit 604bd5bda6
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9
2 changed files with 108 additions and 102 deletions

View File

@ -4,20 +4,20 @@ import pandas as pd
def test_check_invalid_issn(capsys): def test_check_invalid_issn(capsys):
'''Test checking invalid ISSN.''' """Test checking invalid ISSN."""
value = '2321-2302' value = "2321-2302"
check.issn(value) check.issn(value)
captured = capsys.readouterr() captured = capsys.readouterr()
assert captured.out == f'Invalid ISSN: {value}\n' assert captured.out == f"Invalid ISSN: {value}\n"
def test_check_valid_issn(): def test_check_valid_issn():
'''Test checking valid ISSN.''' """Test checking valid ISSN."""
value = '0024-9319' value = "0024-9319"
result = check.issn(value) result = check.issn(value)
@ -25,20 +25,20 @@ def test_check_valid_issn():
def test_check_invalid_isbn(capsys): def test_check_invalid_isbn(capsys):
'''Test checking invalid ISBN.''' """Test checking invalid ISBN."""
value = '99921-58-10-6' value = "99921-58-10-6"
check.isbn(value) check.isbn(value)
captured = capsys.readouterr() captured = capsys.readouterr()
assert captured.out == f'Invalid ISBN: {value}\n' assert captured.out == f"Invalid ISBN: {value}\n"
def test_check_valid_isbn(): def test_check_valid_isbn():
'''Test checking valid ISBN.''' """Test checking valid ISBN."""
value = '99921-58-10-7' value = "99921-58-10-7"
result = check.isbn(value) result = check.isbn(value)
@ -46,20 +46,20 @@ def test_check_valid_isbn():
def test_check_invalid_separators(capsys): def test_check_invalid_separators(capsys):
'''Test checking invalid multi-value separators.''' """Test checking invalid multi-value separators."""
value = 'Alan|Orth' value = "Alan|Orth"
check.separators(value) check.separators(value)
captured = capsys.readouterr() captured = capsys.readouterr()
assert captured.out == f'Invalid multi-value separator: {value}\n' assert captured.out == f"Invalid multi-value separator: {value}\n"
def test_check_valid_separators(): def test_check_valid_separators():
'''Test checking valid multi-value separators.''' """Test checking valid multi-value separators."""
value = 'Alan||Orth' value = "Alan||Orth"
result = check.separators(value) result = check.separators(value)
@ -67,50 +67,50 @@ def test_check_valid_separators():
def test_check_missing_date(capsys): def test_check_missing_date(capsys):
'''Test checking missing date.''' """Test checking missing date."""
value = None value = None
field_name = 'dc.date.issued' field_name = "dc.date.issued"
check.date(value, field_name) check.date(value, field_name)
captured = capsys.readouterr() captured = capsys.readouterr()
assert captured.out == f'Missing date ({field_name}).\n' assert captured.out == f"Missing date ({field_name}).\n"
def test_check_multiple_dates(capsys): def test_check_multiple_dates(capsys):
'''Test checking multiple dates.''' """Test checking multiple dates."""
value = '1990||1991' value = "1990||1991"
field_name = 'dc.date.issued' field_name = "dc.date.issued"
check.date(value, field_name) check.date(value, field_name)
captured = capsys.readouterr() captured = capsys.readouterr()
assert captured.out == f'Multiple dates not allowed ({field_name}): {value}\n' assert captured.out == f"Multiple dates not allowed ({field_name}): {value}\n"
def test_check_invalid_date(capsys): def test_check_invalid_date(capsys):
'''Test checking invalid ISO8601 date.''' """Test checking invalid ISO8601 date."""
value = '1990-0' value = "1990-0"
field_name = 'dc.date.issued' field_name = "dc.date.issued"
check.date(value, field_name) check.date(value, field_name)
captured = capsys.readouterr() captured = capsys.readouterr()
assert captured.out == f'Invalid date ({field_name}): {value}\n' assert captured.out == f"Invalid date ({field_name}): {value}\n"
def test_check_valid_date(): def test_check_valid_date():
'''Test checking valid ISO8601 date.''' """Test checking valid ISO8601 date."""
value = '1990' value = "1990"
field_name = 'dc.date.issued' field_name = "dc.date.issued"
result = check.date(value, field_name) result = check.date(value, field_name)
@ -118,22 +118,22 @@ def test_check_valid_date():
def test_check_suspicious_characters(capsys): def test_check_suspicious_characters(capsys):
'''Test checking for suspicious characters.''' """Test checking for suspicious characters."""
value = 'foreˆt' value = "foreˆt"
field_name = 'dc.contributor.author' field_name = "dc.contributor.author"
check.suspicious_characters(value, field_name) check.suspicious_characters(value, field_name)
captured = capsys.readouterr() captured = capsys.readouterr()
assert captured.out == f'Suspicious character ({field_name}): ˆt\n' assert captured.out == f"Suspicious character ({field_name}): ˆt\n"
def test_check_valid_iso639_1_language(): def test_check_valid_iso639_1_language():
'''Test valid ISO 639-1 (alpha 2) language.''' """Test valid ISO 639-1 (alpha 2) language."""
value = 'ja' value = "ja"
result = check.language(value) result = check.language(value)
@ -141,9 +141,9 @@ def test_check_valid_iso639_1_language():
def test_check_valid_iso639_3_language(): def test_check_valid_iso639_3_language():
'''Test valid ISO 639-3 (alpha 3) language.''' """Test valid ISO 639-3 (alpha 3) language."""
value = 'eng' value = "eng"
result = check.language(value) result = check.language(value)
@ -151,55 +151,55 @@ def test_check_valid_iso639_3_language():
def test_check_invalid_iso639_1_language(capsys): def test_check_invalid_iso639_1_language(capsys):
'''Test invalid ISO 639-1 (alpha 2) language.''' """Test invalid ISO 639-1 (alpha 2) language."""
value = 'jp' value = "jp"
check.language(value) check.language(value)
captured = capsys.readouterr() captured = capsys.readouterr()
assert captured.out == f'Invalid ISO 639-1 language: {value}\n' assert captured.out == f"Invalid ISO 639-1 language: {value}\n"
def test_check_invalid_iso639_3_language(capsys): def test_check_invalid_iso639_3_language(capsys):
'''Test invalid ISO 639-3 (alpha 3) language.''' """Test invalid ISO 639-3 (alpha 3) language."""
value = 'chi' value = "chi"
check.language(value) check.language(value)
captured = capsys.readouterr() captured = capsys.readouterr()
assert captured.out == f'Invalid ISO 639-3 language: {value}\n' assert captured.out == f"Invalid ISO 639-3 language: {value}\n"
def test_check_invalid_language(capsys): def test_check_invalid_language(capsys):
'''Test invalid language.''' """Test invalid language."""
value = 'Span' value = "Span"
check.language(value) check.language(value)
captured = capsys.readouterr() captured = capsys.readouterr()
assert captured.out == f'Invalid language: {value}\n' assert captured.out == f"Invalid language: {value}\n"
def test_check_invalid_agrovoc(capsys): def test_check_invalid_agrovoc(capsys):
'''Test invalid AGROVOC subject.''' """Test invalid AGROVOC subject."""
value = 'FOREST' value = "FOREST"
field_name = 'dc.subject' field_name = "dc.subject"
check.agrovoc(value, field_name) check.agrovoc(value, field_name)
captured = capsys.readouterr() captured = capsys.readouterr()
assert captured.out == f'Invalid AGROVOC ({field_name}): {value}\n' assert captured.out == f"Invalid AGROVOC ({field_name}): {value}\n"
def test_check_valid_agrovoc(): def test_check_valid_agrovoc():
'''Test valid AGROVOC subject.''' """Test valid AGROVOC subject."""
value = 'FORESTS' value = "FORESTS"
field_name = 'dc.subject' field_name = "dc.subject"
result = check.agrovoc(value, field_name) result = check.agrovoc(value, field_name)
@ -207,20 +207,20 @@ def test_check_valid_agrovoc():
def test_check_uncommon_filename_extension(capsys): def test_check_uncommon_filename_extension(capsys):
'''Test uncommon filename extension.''' """Test uncommon filename extension."""
value = 'file.pdf.lck' value = "file.pdf.lck"
check.filename_extension(value) check.filename_extension(value)
captured = capsys.readouterr() captured = capsys.readouterr()
assert captured.out == f'Filename with uncommon extension: {value}\n' assert captured.out == f"Filename with uncommon extension: {value}\n"
def test_check_common_filename_extension(): def test_check_common_filename_extension():
'''Test common filename extension.''' """Test common filename extension."""
value = 'file.pdf' value = "file.pdf"
result = check.filename_extension(value) result = check.filename_extension(value)
@ -228,45 +228,51 @@ def test_check_common_filename_extension():
def test_check_incorrect_iso_639_1_language(capsys): def test_check_incorrect_iso_639_1_language(capsys):
'''Test incorrect ISO 639-1 language, as determined by comparing the item's language field with the actual language predicted in the item's title.''' """Test incorrect ISO 639-1 language, as determined by comparing the item's language field with the actual language predicted in the item's title."""
title = 'A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle' title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle"
language = 'es' language = "es"
# Create a dictionary to mimic Pandas series # Create a dictionary to mimic Pandas series
row = {'dc.title': title, 'dc.language.iso': language} row = {"dc.title": title, "dc.language.iso": language}
series = pd.Series(row) series = pd.Series(row)
experimental.correct_language(series) experimental.correct_language(series)
captured = capsys.readouterr() captured = capsys.readouterr()
assert captured.out == f'Possibly incorrect language {language} (detected en): {title}\n' assert (
captured.out
== f"Possibly incorrect language {language} (detected en): {title}\n"
)
def test_check_incorrect_iso_639_3_language(capsys): def test_check_incorrect_iso_639_3_language(capsys):
'''Test incorrect ISO 639-3 language, as determined by comparing the item's language field with the actual language predicted in the item's title.''' """Test incorrect ISO 639-3 language, as determined by comparing the item's language field with the actual language predicted in the item's title."""
title = 'A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle' title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle"
language = 'spa' language = "spa"
# Create a dictionary to mimic Pandas series # Create a dictionary to mimic Pandas series
row = {'dc.title': title, 'dc.language.iso': language} row = {"dc.title": title, "dc.language.iso": language}
series = pd.Series(row) series = pd.Series(row)
experimental.correct_language(series) experimental.correct_language(series)
captured = capsys.readouterr() captured = capsys.readouterr()
assert captured.out == f'Possibly incorrect language {language} (detected eng): {title}\n' assert (
captured.out
== f"Possibly incorrect language {language} (detected eng): {title}\n"
)
def test_check_correct_iso_639_1_language(): def test_check_correct_iso_639_1_language():
'''Test correct ISO 639-1 language, as determined by comparing the item's language field with the actual language predicted in the item's title.''' """Test correct ISO 639-1 language, as determined by comparing the item's language field with the actual language predicted in the item's title."""
title = 'A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle' title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle"
language = 'en' language = "en"
# Create a dictionary to mimic Pandas series # Create a dictionary to mimic Pandas series
row = {'dc.title': title, 'dc.language.iso': language} row = {"dc.title": title, "dc.language.iso": language}
series = pd.Series(row) series = pd.Series(row)
result = experimental.correct_language(series) result = experimental.correct_language(series)
@ -275,13 +281,13 @@ def test_check_correct_iso_639_1_language():
def test_check_correct_iso_639_3_language(): def test_check_correct_iso_639_3_language():
'''Test correct ISO 639-3 language, as determined by comparing the item's language field with the actual language predicted in the item's title.''' """Test correct ISO 639-3 language, as determined by comparing the item's language field with the actual language predicted in the item's title."""
title = 'A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle' title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle"
language = 'eng' language = "eng"
# Create a dictionary to mimic Pandas series # Create a dictionary to mimic Pandas series
row = {'dc.title': title, 'dc.language.iso': language} row = {"dc.title": title, "dc.language.iso": language}
series = pd.Series(row) series = pd.Series(row)
result = experimental.correct_language(series) result = experimental.correct_language(series)

View File

@ -2,67 +2,67 @@ import csv_metadata_quality.fix as fix
def test_fix_leading_whitespace(): def test_fix_leading_whitespace():
'''Test fixing leading whitespace.''' """Test fixing leading whitespace."""
value = ' Alan' value = " Alan"
assert fix.whitespace(value) == 'Alan' assert fix.whitespace(value) == "Alan"
def test_fix_trailing_whitespace(): def test_fix_trailing_whitespace():
'''Test fixing trailing whitespace.''' """Test fixing trailing whitespace."""
value = 'Alan ' value = "Alan "
assert fix.whitespace(value) == 'Alan' assert fix.whitespace(value) == "Alan"
def test_fix_excessive_whitespace(): def test_fix_excessive_whitespace():
'''Test fixing excessive whitespace.''' """Test fixing excessive whitespace."""
value = 'Alan Orth' value = "Alan Orth"
assert fix.whitespace(value) == 'Alan Orth' assert fix.whitespace(value) == "Alan Orth"
def test_fix_invalid_separators(): def test_fix_invalid_separators():
'''Test fixing invalid multi-value separators.''' """Test fixing invalid multi-value separators."""
value = 'Alan|Orth' value = "Alan|Orth"
assert fix.separators(value) == 'Alan||Orth' assert fix.separators(value) == "Alan||Orth"
def test_fix_unnecessary_unicode(): def test_fix_unnecessary_unicode():
'''Test fixing unnecessary Unicode.''' """Test fixing unnecessary Unicode."""
value = 'Alan Orth' value = "Alan Orth"
assert fix.unnecessary_unicode(value) == 'Alan Orth' assert fix.unnecessary_unicode(value) == "Alan Orth"
def test_fix_duplicates(): def test_fix_duplicates():
'''Test fixing duplicate metadata values.''' """Test fixing duplicate metadata values."""
value = 'Kenya||Kenya' value = "Kenya||Kenya"
assert fix.duplicates(value) == 'Kenya' assert fix.duplicates(value) == "Kenya"
def test_fix_newlines(): def test_fix_newlines():
'''Test fixing newlines.''' """Test fixing newlines."""
value = '''Ken value = """Ken
ya''' ya"""
assert fix.newlines(value) == 'Kenya' assert fix.newlines(value) == "Kenya"
def test_fix_comma_space(): def test_fix_comma_space():
'''Test adding space after comma.''' """Test adding space after comma."""
value = 'Orth,Alan S.' value = "Orth,Alan S."
field_name = 'dc.contributor.author' field_name = "dc.contributor.author"
assert fix.comma_space(value, field_name) == 'Orth, Alan S.' assert fix.comma_space(value, field_name) == "Orth, Alan S."