1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-01-24 11:13:22 +01:00

Reformat tests with black

This commit is contained in:
Alan Orth 2019-09-26 14:02:51 +03:00
parent e7c220039b
commit 604bd5bda6
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9
2 changed files with 108 additions and 102 deletions

View File

@ -4,20 +4,20 @@ import pandas as pd
def test_check_invalid_issn(capsys):
'''Test checking invalid ISSN.'''
"""Test checking invalid ISSN."""
value = '2321-2302'
value = "2321-2302"
check.issn(value)
captured = capsys.readouterr()
assert captured.out == f'Invalid ISSN: {value}\n'
assert captured.out == f"Invalid ISSN: {value}\n"
def test_check_valid_issn():
'''Test checking valid ISSN.'''
"""Test checking valid ISSN."""
value = '0024-9319'
value = "0024-9319"
result = check.issn(value)
@ -25,20 +25,20 @@ def test_check_valid_issn():
def test_check_invalid_isbn(capsys):
'''Test checking invalid ISBN.'''
"""Test checking invalid ISBN."""
value = '99921-58-10-6'
value = "99921-58-10-6"
check.isbn(value)
captured = capsys.readouterr()
assert captured.out == f'Invalid ISBN: {value}\n'
assert captured.out == f"Invalid ISBN: {value}\n"
def test_check_valid_isbn():
'''Test checking valid ISBN.'''
"""Test checking valid ISBN."""
value = '99921-58-10-7'
value = "99921-58-10-7"
result = check.isbn(value)
@ -46,20 +46,20 @@ def test_check_valid_isbn():
def test_check_invalid_separators(capsys):
'''Test checking invalid multi-value separators.'''
"""Test checking invalid multi-value separators."""
value = 'Alan|Orth'
value = "Alan|Orth"
check.separators(value)
captured = capsys.readouterr()
assert captured.out == f'Invalid multi-value separator: {value}\n'
assert captured.out == f"Invalid multi-value separator: {value}\n"
def test_check_valid_separators():
'''Test checking valid multi-value separators.'''
"""Test checking valid multi-value separators."""
value = 'Alan||Orth'
value = "Alan||Orth"
result = check.separators(value)
@ -67,50 +67,50 @@ def test_check_valid_separators():
def test_check_missing_date(capsys):
'''Test checking missing date.'''
"""Test checking missing date."""
value = None
field_name = 'dc.date.issued'
field_name = "dc.date.issued"
check.date(value, field_name)
captured = capsys.readouterr()
assert captured.out == f'Missing date ({field_name}).\n'
assert captured.out == f"Missing date ({field_name}).\n"
def test_check_multiple_dates(capsys):
'''Test checking multiple dates.'''
"""Test checking multiple dates."""
value = '1990||1991'
value = "1990||1991"
field_name = 'dc.date.issued'
field_name = "dc.date.issued"
check.date(value, field_name)
captured = capsys.readouterr()
assert captured.out == f'Multiple dates not allowed ({field_name}): {value}\n'
assert captured.out == f"Multiple dates not allowed ({field_name}): {value}\n"
def test_check_invalid_date(capsys):
'''Test checking invalid ISO8601 date.'''
"""Test checking invalid ISO8601 date."""
value = '1990-0'
value = "1990-0"
field_name = 'dc.date.issued'
field_name = "dc.date.issued"
check.date(value, field_name)
captured = capsys.readouterr()
assert captured.out == f'Invalid date ({field_name}): {value}\n'
assert captured.out == f"Invalid date ({field_name}): {value}\n"
def test_check_valid_date():
'''Test checking valid ISO8601 date.'''
"""Test checking valid ISO8601 date."""
value = '1990'
value = "1990"
field_name = 'dc.date.issued'
field_name = "dc.date.issued"
result = check.date(value, field_name)
@ -118,22 +118,22 @@ def test_check_valid_date():
def test_check_suspicious_characters(capsys):
'''Test checking for suspicious characters.'''
"""Test checking for suspicious characters."""
value = 'foreˆt'
value = "foreˆt"
field_name = 'dc.contributor.author'
field_name = "dc.contributor.author"
check.suspicious_characters(value, field_name)
captured = capsys.readouterr()
assert captured.out == f'Suspicious character ({field_name}): ˆt\n'
assert captured.out == f"Suspicious character ({field_name}): ˆt\n"
def test_check_valid_iso639_1_language():
'''Test valid ISO 639-1 (alpha 2) language.'''
"""Test valid ISO 639-1 (alpha 2) language."""
value = 'ja'
value = "ja"
result = check.language(value)
@ -141,9 +141,9 @@ def test_check_valid_iso639_1_language():
def test_check_valid_iso639_3_language():
'''Test valid ISO 639-3 (alpha 3) language.'''
"""Test valid ISO 639-3 (alpha 3) language."""
value = 'eng'
value = "eng"
result = check.language(value)
@ -151,55 +151,55 @@ def test_check_valid_iso639_3_language():
def test_check_invalid_iso639_1_language(capsys):
'''Test invalid ISO 639-1 (alpha 2) language.'''
"""Test invalid ISO 639-1 (alpha 2) language."""
value = 'jp'
value = "jp"
check.language(value)
captured = capsys.readouterr()
assert captured.out == f'Invalid ISO 639-1 language: {value}\n'
assert captured.out == f"Invalid ISO 639-1 language: {value}\n"
def test_check_invalid_iso639_3_language(capsys):
'''Test invalid ISO 639-3 (alpha 3) language.'''
"""Test invalid ISO 639-3 (alpha 3) language."""
value = 'chi'
value = "chi"
check.language(value)
captured = capsys.readouterr()
assert captured.out == f'Invalid ISO 639-3 language: {value}\n'
assert captured.out == f"Invalid ISO 639-3 language: {value}\n"
def test_check_invalid_language(capsys):
'''Test invalid language.'''
"""Test invalid language."""
value = 'Span'
value = "Span"
check.language(value)
captured = capsys.readouterr()
assert captured.out == f'Invalid language: {value}\n'
assert captured.out == f"Invalid language: {value}\n"
def test_check_invalid_agrovoc(capsys):
'''Test invalid AGROVOC subject.'''
"""Test invalid AGROVOC subject."""
value = 'FOREST'
field_name = 'dc.subject'
value = "FOREST"
field_name = "dc.subject"
check.agrovoc(value, field_name)
captured = capsys.readouterr()
assert captured.out == f'Invalid AGROVOC ({field_name}): {value}\n'
assert captured.out == f"Invalid AGROVOC ({field_name}): {value}\n"
def test_check_valid_agrovoc():
'''Test valid AGROVOC subject.'''
"""Test valid AGROVOC subject."""
value = 'FORESTS'
field_name = 'dc.subject'
value = "FORESTS"
field_name = "dc.subject"
result = check.agrovoc(value, field_name)
@ -207,20 +207,20 @@ def test_check_valid_agrovoc():
def test_check_uncommon_filename_extension(capsys):
'''Test uncommon filename extension.'''
"""Test uncommon filename extension."""
value = 'file.pdf.lck'
value = "file.pdf.lck"
check.filename_extension(value)
captured = capsys.readouterr()
assert captured.out == f'Filename with uncommon extension: {value}\n'
assert captured.out == f"Filename with uncommon extension: {value}\n"
def test_check_common_filename_extension():
'''Test common filename extension.'''
"""Test common filename extension."""
value = 'file.pdf'
value = "file.pdf"
result = check.filename_extension(value)
@ -228,45 +228,51 @@ def test_check_common_filename_extension():
def test_check_incorrect_iso_639_1_language(capsys):
'''Test incorrect ISO 639-1 language, as determined by comparing the item's language field with the actual language predicted in the item's title.'''
"""Test incorrect ISO 639-1 language, as determined by comparing the item's language field with the actual language predicted in the item's title."""
title = 'A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle'
language = 'es'
title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle"
language = "es"
# Create a dictionary to mimic Pandas series
row = {'dc.title': title, 'dc.language.iso': language}
row = {"dc.title": title, "dc.language.iso": language}
series = pd.Series(row)
experimental.correct_language(series)
captured = capsys.readouterr()
assert captured.out == f'Possibly incorrect language {language} (detected en): {title}\n'
assert (
captured.out
== f"Possibly incorrect language {language} (detected en): {title}\n"
)
def test_check_incorrect_iso_639_3_language(capsys):
'''Test incorrect ISO 639-3 language, as determined by comparing the item's language field with the actual language predicted in the item's title.'''
"""Test incorrect ISO 639-3 language, as determined by comparing the item's language field with the actual language predicted in the item's title."""
title = 'A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle'
language = 'spa'
title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle"
language = "spa"
# Create a dictionary to mimic Pandas series
row = {'dc.title': title, 'dc.language.iso': language}
row = {"dc.title": title, "dc.language.iso": language}
series = pd.Series(row)
experimental.correct_language(series)
captured = capsys.readouterr()
assert captured.out == f'Possibly incorrect language {language} (detected eng): {title}\n'
assert (
captured.out
== f"Possibly incorrect language {language} (detected eng): {title}\n"
)
def test_check_correct_iso_639_1_language():
'''Test correct ISO 639-1 language, as determined by comparing the item's language field with the actual language predicted in the item's title.'''
"""Test correct ISO 639-1 language, as determined by comparing the item's language field with the actual language predicted in the item's title."""
title = 'A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle'
language = 'en'
title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle"
language = "en"
# Create a dictionary to mimic Pandas series
row = {'dc.title': title, 'dc.language.iso': language}
row = {"dc.title": title, "dc.language.iso": language}
series = pd.Series(row)
result = experimental.correct_language(series)
@ -275,13 +281,13 @@ def test_check_correct_iso_639_1_language():
def test_check_correct_iso_639_3_language():
'''Test correct ISO 639-3 language, as determined by comparing the item's language field with the actual language predicted in the item's title.'''
"""Test correct ISO 639-3 language, as determined by comparing the item's language field with the actual language predicted in the item's title."""
title = 'A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle'
language = 'eng'
title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle"
language = "eng"
# Create a dictionary to mimic Pandas series
row = {'dc.title': title, 'dc.language.iso': language}
row = {"dc.title": title, "dc.language.iso": language}
series = pd.Series(row)
result = experimental.correct_language(series)

View File

@ -2,67 +2,67 @@ import csv_metadata_quality.fix as fix
def test_fix_leading_whitespace():
'''Test fixing leading whitespace.'''
"""Test fixing leading whitespace."""
value = ' Alan'
value = " Alan"
assert fix.whitespace(value) == 'Alan'
assert fix.whitespace(value) == "Alan"
def test_fix_trailing_whitespace():
'''Test fixing trailing whitespace.'''
"""Test fixing trailing whitespace."""
value = 'Alan '
value = "Alan "
assert fix.whitespace(value) == 'Alan'
assert fix.whitespace(value) == "Alan"
def test_fix_excessive_whitespace():
'''Test fixing excessive whitespace.'''
"""Test fixing excessive whitespace."""
value = 'Alan Orth'
value = "Alan Orth"
assert fix.whitespace(value) == 'Alan Orth'
assert fix.whitespace(value) == "Alan Orth"
def test_fix_invalid_separators():
'''Test fixing invalid multi-value separators.'''
"""Test fixing invalid multi-value separators."""
value = 'Alan|Orth'
value = "Alan|Orth"
assert fix.separators(value) == 'Alan||Orth'
assert fix.separators(value) == "Alan||Orth"
def test_fix_unnecessary_unicode():
'''Test fixing unnecessary Unicode.'''
"""Test fixing unnecessary Unicode."""
value = 'Alan Orth'
value = "Alan Orth"
assert fix.unnecessary_unicode(value) == 'Alan Orth'
assert fix.unnecessary_unicode(value) == "Alan Orth"
def test_fix_duplicates():
'''Test fixing duplicate metadata values.'''
"""Test fixing duplicate metadata values."""
value = 'Kenya||Kenya'
value = "Kenya||Kenya"
assert fix.duplicates(value) == 'Kenya'
assert fix.duplicates(value) == "Kenya"
def test_fix_newlines():
'''Test fixing newlines.'''
"""Test fixing newlines."""
value = '''Ken
ya'''
value = """Ken
ya"""
assert fix.newlines(value) == 'Kenya'
assert fix.newlines(value) == "Kenya"
def test_fix_comma_space():
'''Test adding space after comma.'''
"""Test adding space after comma."""
value = 'Orth,Alan S.'
value = "Orth,Alan S."
field_name = 'dc.contributor.author'
field_name = "dc.contributor.author"
assert fix.comma_space(value, field_name) == 'Orth, Alan S.'
assert fix.comma_space(value, field_name) == "Orth, Alan S."