mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-12-22 12:12:18 +01:00
Reformat tests with black
This commit is contained in:
parent
e7c220039b
commit
604bd5bda6
@ -4,20 +4,20 @@ import pandas as pd
|
||||
|
||||
|
||||
def test_check_invalid_issn(capsys):
|
||||
'''Test checking invalid ISSN.'''
|
||||
"""Test checking invalid ISSN."""
|
||||
|
||||
value = '2321-2302'
|
||||
value = "2321-2302"
|
||||
|
||||
check.issn(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Invalid ISSN: {value}\n'
|
||||
assert captured.out == f"Invalid ISSN: {value}\n"
|
||||
|
||||
|
||||
def test_check_valid_issn():
|
||||
'''Test checking valid ISSN.'''
|
||||
"""Test checking valid ISSN."""
|
||||
|
||||
value = '0024-9319'
|
||||
value = "0024-9319"
|
||||
|
||||
result = check.issn(value)
|
||||
|
||||
@ -25,20 +25,20 @@ def test_check_valid_issn():
|
||||
|
||||
|
||||
def test_check_invalid_isbn(capsys):
|
||||
'''Test checking invalid ISBN.'''
|
||||
"""Test checking invalid ISBN."""
|
||||
|
||||
value = '99921-58-10-6'
|
||||
value = "99921-58-10-6"
|
||||
|
||||
check.isbn(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Invalid ISBN: {value}\n'
|
||||
assert captured.out == f"Invalid ISBN: {value}\n"
|
||||
|
||||
|
||||
def test_check_valid_isbn():
|
||||
'''Test checking valid ISBN.'''
|
||||
"""Test checking valid ISBN."""
|
||||
|
||||
value = '99921-58-10-7'
|
||||
value = "99921-58-10-7"
|
||||
|
||||
result = check.isbn(value)
|
||||
|
||||
@ -46,20 +46,20 @@ def test_check_valid_isbn():
|
||||
|
||||
|
||||
def test_check_invalid_separators(capsys):
|
||||
'''Test checking invalid multi-value separators.'''
|
||||
"""Test checking invalid multi-value separators."""
|
||||
|
||||
value = 'Alan|Orth'
|
||||
value = "Alan|Orth"
|
||||
|
||||
check.separators(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Invalid multi-value separator: {value}\n'
|
||||
assert captured.out == f"Invalid multi-value separator: {value}\n"
|
||||
|
||||
|
||||
def test_check_valid_separators():
|
||||
'''Test checking valid multi-value separators.'''
|
||||
"""Test checking valid multi-value separators."""
|
||||
|
||||
value = 'Alan||Orth'
|
||||
value = "Alan||Orth"
|
||||
|
||||
result = check.separators(value)
|
||||
|
||||
@ -67,50 +67,50 @@ def test_check_valid_separators():
|
||||
|
||||
|
||||
def test_check_missing_date(capsys):
|
||||
'''Test checking missing date.'''
|
||||
"""Test checking missing date."""
|
||||
|
||||
value = None
|
||||
|
||||
field_name = 'dc.date.issued'
|
||||
field_name = "dc.date.issued"
|
||||
|
||||
check.date(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Missing date ({field_name}).\n'
|
||||
assert captured.out == f"Missing date ({field_name}).\n"
|
||||
|
||||
|
||||
def test_check_multiple_dates(capsys):
|
||||
'''Test checking multiple dates.'''
|
||||
"""Test checking multiple dates."""
|
||||
|
||||
value = '1990||1991'
|
||||
value = "1990||1991"
|
||||
|
||||
field_name = 'dc.date.issued'
|
||||
field_name = "dc.date.issued"
|
||||
|
||||
check.date(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Multiple dates not allowed ({field_name}): {value}\n'
|
||||
assert captured.out == f"Multiple dates not allowed ({field_name}): {value}\n"
|
||||
|
||||
|
||||
def test_check_invalid_date(capsys):
|
||||
'''Test checking invalid ISO8601 date.'''
|
||||
"""Test checking invalid ISO8601 date."""
|
||||
|
||||
value = '1990-0'
|
||||
value = "1990-0"
|
||||
|
||||
field_name = 'dc.date.issued'
|
||||
field_name = "dc.date.issued"
|
||||
|
||||
check.date(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Invalid date ({field_name}): {value}\n'
|
||||
assert captured.out == f"Invalid date ({field_name}): {value}\n"
|
||||
|
||||
|
||||
def test_check_valid_date():
|
||||
'''Test checking valid ISO8601 date.'''
|
||||
"""Test checking valid ISO8601 date."""
|
||||
|
||||
value = '1990'
|
||||
value = "1990"
|
||||
|
||||
field_name = 'dc.date.issued'
|
||||
field_name = "dc.date.issued"
|
||||
|
||||
result = check.date(value, field_name)
|
||||
|
||||
@ -118,22 +118,22 @@ def test_check_valid_date():
|
||||
|
||||
|
||||
def test_check_suspicious_characters(capsys):
|
||||
'''Test checking for suspicious characters.'''
|
||||
"""Test checking for suspicious characters."""
|
||||
|
||||
value = 'foreˆt'
|
||||
value = "foreˆt"
|
||||
|
||||
field_name = 'dc.contributor.author'
|
||||
field_name = "dc.contributor.author"
|
||||
|
||||
check.suspicious_characters(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Suspicious character ({field_name}): ˆt\n'
|
||||
assert captured.out == f"Suspicious character ({field_name}): ˆt\n"
|
||||
|
||||
|
||||
def test_check_valid_iso639_1_language():
|
||||
'''Test valid ISO 639-1 (alpha 2) language.'''
|
||||
"""Test valid ISO 639-1 (alpha 2) language."""
|
||||
|
||||
value = 'ja'
|
||||
value = "ja"
|
||||
|
||||
result = check.language(value)
|
||||
|
||||
@ -141,9 +141,9 @@ def test_check_valid_iso639_1_language():
|
||||
|
||||
|
||||
def test_check_valid_iso639_3_language():
|
||||
'''Test valid ISO 639-3 (alpha 3) language.'''
|
||||
"""Test valid ISO 639-3 (alpha 3) language."""
|
||||
|
||||
value = 'eng'
|
||||
value = "eng"
|
||||
|
||||
result = check.language(value)
|
||||
|
||||
@ -151,55 +151,55 @@ def test_check_valid_iso639_3_language():
|
||||
|
||||
|
||||
def test_check_invalid_iso639_1_language(capsys):
|
||||
'''Test invalid ISO 639-1 (alpha 2) language.'''
|
||||
"""Test invalid ISO 639-1 (alpha 2) language."""
|
||||
|
||||
value = 'jp'
|
||||
value = "jp"
|
||||
|
||||
check.language(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Invalid ISO 639-1 language: {value}\n'
|
||||
assert captured.out == f"Invalid ISO 639-1 language: {value}\n"
|
||||
|
||||
|
||||
def test_check_invalid_iso639_3_language(capsys):
|
||||
'''Test invalid ISO 639-3 (alpha 3) language.'''
|
||||
"""Test invalid ISO 639-3 (alpha 3) language."""
|
||||
|
||||
value = 'chi'
|
||||
value = "chi"
|
||||
|
||||
check.language(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Invalid ISO 639-3 language: {value}\n'
|
||||
assert captured.out == f"Invalid ISO 639-3 language: {value}\n"
|
||||
|
||||
|
||||
def test_check_invalid_language(capsys):
|
||||
'''Test invalid language.'''
|
||||
"""Test invalid language."""
|
||||
|
||||
value = 'Span'
|
||||
value = "Span"
|
||||
|
||||
check.language(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Invalid language: {value}\n'
|
||||
assert captured.out == f"Invalid language: {value}\n"
|
||||
|
||||
|
||||
def test_check_invalid_agrovoc(capsys):
|
||||
'''Test invalid AGROVOC subject.'''
|
||||
"""Test invalid AGROVOC subject."""
|
||||
|
||||
value = 'FOREST'
|
||||
field_name = 'dc.subject'
|
||||
value = "FOREST"
|
||||
field_name = "dc.subject"
|
||||
|
||||
check.agrovoc(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Invalid AGROVOC ({field_name}): {value}\n'
|
||||
assert captured.out == f"Invalid AGROVOC ({field_name}): {value}\n"
|
||||
|
||||
|
||||
def test_check_valid_agrovoc():
|
||||
'''Test valid AGROVOC subject.'''
|
||||
"""Test valid AGROVOC subject."""
|
||||
|
||||
value = 'FORESTS'
|
||||
field_name = 'dc.subject'
|
||||
value = "FORESTS"
|
||||
field_name = "dc.subject"
|
||||
|
||||
result = check.agrovoc(value, field_name)
|
||||
|
||||
@ -207,20 +207,20 @@ def test_check_valid_agrovoc():
|
||||
|
||||
|
||||
def test_check_uncommon_filename_extension(capsys):
|
||||
'''Test uncommon filename extension.'''
|
||||
"""Test uncommon filename extension."""
|
||||
|
||||
value = 'file.pdf.lck'
|
||||
value = "file.pdf.lck"
|
||||
|
||||
check.filename_extension(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Filename with uncommon extension: {value}\n'
|
||||
assert captured.out == f"Filename with uncommon extension: {value}\n"
|
||||
|
||||
|
||||
def test_check_common_filename_extension():
|
||||
'''Test common filename extension.'''
|
||||
"""Test common filename extension."""
|
||||
|
||||
value = 'file.pdf'
|
||||
value = "file.pdf"
|
||||
|
||||
result = check.filename_extension(value)
|
||||
|
||||
@ -228,45 +228,51 @@ def test_check_common_filename_extension():
|
||||
|
||||
|
||||
def test_check_incorrect_iso_639_1_language(capsys):
|
||||
'''Test incorrect ISO 639-1 language, as determined by comparing the item's language field with the actual language predicted in the item's title.'''
|
||||
"""Test incorrect ISO 639-1 language, as determined by comparing the item's language field with the actual language predicted in the item's title."""
|
||||
|
||||
title = 'A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle'
|
||||
language = 'es'
|
||||
title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle"
|
||||
language = "es"
|
||||
|
||||
# Create a dictionary to mimic Pandas series
|
||||
row = {'dc.title': title, 'dc.language.iso': language}
|
||||
row = {"dc.title": title, "dc.language.iso": language}
|
||||
series = pd.Series(row)
|
||||
|
||||
experimental.correct_language(series)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Possibly incorrect language {language} (detected en): {title}\n'
|
||||
assert (
|
||||
captured.out
|
||||
== f"Possibly incorrect language {language} (detected en): {title}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_incorrect_iso_639_3_language(capsys):
|
||||
'''Test incorrect ISO 639-3 language, as determined by comparing the item's language field with the actual language predicted in the item's title.'''
|
||||
"""Test incorrect ISO 639-3 language, as determined by comparing the item's language field with the actual language predicted in the item's title."""
|
||||
|
||||
title = 'A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle'
|
||||
language = 'spa'
|
||||
title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle"
|
||||
language = "spa"
|
||||
|
||||
# Create a dictionary to mimic Pandas series
|
||||
row = {'dc.title': title, 'dc.language.iso': language}
|
||||
row = {"dc.title": title, "dc.language.iso": language}
|
||||
series = pd.Series(row)
|
||||
|
||||
experimental.correct_language(series)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f'Possibly incorrect language {language} (detected eng): {title}\n'
|
||||
assert (
|
||||
captured.out
|
||||
== f"Possibly incorrect language {language} (detected eng): {title}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_correct_iso_639_1_language():
|
||||
'''Test correct ISO 639-1 language, as determined by comparing the item's language field with the actual language predicted in the item's title.'''
|
||||
"""Test correct ISO 639-1 language, as determined by comparing the item's language field with the actual language predicted in the item's title."""
|
||||
|
||||
title = 'A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle'
|
||||
language = 'en'
|
||||
title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle"
|
||||
language = "en"
|
||||
|
||||
# Create a dictionary to mimic Pandas series
|
||||
row = {'dc.title': title, 'dc.language.iso': language}
|
||||
row = {"dc.title": title, "dc.language.iso": language}
|
||||
series = pd.Series(row)
|
||||
|
||||
result = experimental.correct_language(series)
|
||||
@ -275,13 +281,13 @@ def test_check_correct_iso_639_1_language():
|
||||
|
||||
|
||||
def test_check_correct_iso_639_3_language():
|
||||
'''Test correct ISO 639-3 language, as determined by comparing the item's language field with the actual language predicted in the item's title.'''
|
||||
"""Test correct ISO 639-3 language, as determined by comparing the item's language field with the actual language predicted in the item's title."""
|
||||
|
||||
title = 'A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle'
|
||||
language = 'eng'
|
||||
title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle"
|
||||
language = "eng"
|
||||
|
||||
# Create a dictionary to mimic Pandas series
|
||||
row = {'dc.title': title, 'dc.language.iso': language}
|
||||
row = {"dc.title": title, "dc.language.iso": language}
|
||||
series = pd.Series(row)
|
||||
|
||||
result = experimental.correct_language(series)
|
||||
|
@ -2,67 +2,67 @@ import csv_metadata_quality.fix as fix
|
||||
|
||||
|
||||
def test_fix_leading_whitespace():
|
||||
'''Test fixing leading whitespace.'''
|
||||
"""Test fixing leading whitespace."""
|
||||
|
||||
value = ' Alan'
|
||||
value = " Alan"
|
||||
|
||||
assert fix.whitespace(value) == 'Alan'
|
||||
assert fix.whitespace(value) == "Alan"
|
||||
|
||||
|
||||
def test_fix_trailing_whitespace():
|
||||
'''Test fixing trailing whitespace.'''
|
||||
"""Test fixing trailing whitespace."""
|
||||
|
||||
value = 'Alan '
|
||||
value = "Alan "
|
||||
|
||||
assert fix.whitespace(value) == 'Alan'
|
||||
assert fix.whitespace(value) == "Alan"
|
||||
|
||||
|
||||
def test_fix_excessive_whitespace():
|
||||
'''Test fixing excessive whitespace.'''
|
||||
"""Test fixing excessive whitespace."""
|
||||
|
||||
value = 'Alan Orth'
|
||||
value = "Alan Orth"
|
||||
|
||||
assert fix.whitespace(value) == 'Alan Orth'
|
||||
assert fix.whitespace(value) == "Alan Orth"
|
||||
|
||||
|
||||
def test_fix_invalid_separators():
|
||||
'''Test fixing invalid multi-value separators.'''
|
||||
"""Test fixing invalid multi-value separators."""
|
||||
|
||||
value = 'Alan|Orth'
|
||||
value = "Alan|Orth"
|
||||
|
||||
assert fix.separators(value) == 'Alan||Orth'
|
||||
assert fix.separators(value) == "Alan||Orth"
|
||||
|
||||
|
||||
def test_fix_unnecessary_unicode():
|
||||
'''Test fixing unnecessary Unicode.'''
|
||||
"""Test fixing unnecessary Unicode."""
|
||||
|
||||
value = 'Alan Orth'
|
||||
value = "Alan Orth"
|
||||
|
||||
assert fix.unnecessary_unicode(value) == 'Alan Orth'
|
||||
assert fix.unnecessary_unicode(value) == "Alan Orth"
|
||||
|
||||
|
||||
def test_fix_duplicates():
|
||||
'''Test fixing duplicate metadata values.'''
|
||||
"""Test fixing duplicate metadata values."""
|
||||
|
||||
value = 'Kenya||Kenya'
|
||||
value = "Kenya||Kenya"
|
||||
|
||||
assert fix.duplicates(value) == 'Kenya'
|
||||
assert fix.duplicates(value) == "Kenya"
|
||||
|
||||
|
||||
def test_fix_newlines():
|
||||
'''Test fixing newlines.'''
|
||||
"""Test fixing newlines."""
|
||||
|
||||
value = '''Ken
|
||||
ya'''
|
||||
value = """Ken
|
||||
ya"""
|
||||
|
||||
assert fix.newlines(value) == 'Kenya'
|
||||
assert fix.newlines(value) == "Kenya"
|
||||
|
||||
|
||||
def test_fix_comma_space():
|
||||
'''Test adding space after comma.'''
|
||||
"""Test adding space after comma."""
|
||||
|
||||
value = 'Orth,Alan S.'
|
||||
value = "Orth,Alan S."
|
||||
|
||||
field_name = 'dc.contributor.author'
|
||||
field_name = "dc.contributor.author"
|
||||
|
||||
assert fix.comma_space(value, field_name) == 'Orth, Alan S.'
|
||||
assert fix.comma_space(value, field_name) == "Orth, Alan S."
|
||||
|
Loading…
Reference in New Issue
Block a user