mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-05-09 14:46:00 +02:00
Improve exclude function
When a user explicitly requests that a field be excluded with -x we skip that field in most checks. Up until now that did not include the item-based checks using a transposed dataframe because we don't know the metadata field names (labels) until we iterate over them. Now the excludes are respected for item-based checks.
This commit is contained in:
@ -403,8 +403,9 @@ def test_check_doi_field():
|
||||
# the citation and a DOI field.
|
||||
d = {"cg.identifier.doi": doi, "dcterms.bibliographicCitation": citation}
|
||||
series = pd.Series(data=d)
|
||||
exclude = list()
|
||||
|
||||
result = check.citation_doi(series)
|
||||
result = check.citation_doi(series, exclude)
|
||||
|
||||
assert result == None
|
||||
|
||||
@ -413,13 +414,14 @@ def test_check_doi_only_in_citation(capsys):
|
||||
"""Test an item with a DOI in its citation, but no DOI field."""
|
||||
|
||||
citation = "Orth, A. 2021. Testing all the things. doi: 10.1186/1743-422X-9-218"
|
||||
exclude = list()
|
||||
|
||||
# Emulate a column in a transposed dataframe (which is just a series), with
|
||||
# an empty DOI field and a citation containing a DOI.
|
||||
d = {"cg.identifier.doi": None, "dcterms.bibliographicCitation": citation}
|
||||
series = pd.Series(data=d)
|
||||
|
||||
check.citation_doi(series)
|
||||
check.citation_doi(series, exclude)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert (
|
||||
@ -433,13 +435,14 @@ def test_title_in_citation():
|
||||
|
||||
title = "Testing all the things"
|
||||
citation = "Orth, A. 2021. Testing all the things."
|
||||
exclude = list()
|
||||
|
||||
# Emulate a column in a transposed dataframe (which is just a series), with
|
||||
# the title and citation.
|
||||
d = {"dc.title": title, "dcterms.bibliographicCitation": citation}
|
||||
series = pd.Series(data=d)
|
||||
|
||||
result = check.title_in_citation(series)
|
||||
result = check.title_in_citation(series, exclude)
|
||||
|
||||
assert result == None
|
||||
|
||||
@ -449,13 +452,14 @@ def test_title_not_in_citation(capsys):
|
||||
|
||||
title = "Testing all the things"
|
||||
citation = "Orth, A. 2021. Testing all teh things."
|
||||
exclude = list()
|
||||
|
||||
# Emulate a column in a transposed dataframe (which is just a series), with
|
||||
# the title and citation.
|
||||
d = {"dc.title": title, "dcterms.bibliographicCitation": citation}
|
||||
series = pd.Series(data=d)
|
||||
|
||||
check.title_in_citation(series)
|
||||
check.title_in_citation(series, exclude)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert (
|
||||
@ -469,12 +473,13 @@ def test_country_matches_region():
|
||||
|
||||
country = "Kenya"
|
||||
region = "Eastern Africa"
|
||||
exclude = list()
|
||||
|
||||
# Emulate a column in a transposed dataframe (which is just a series)
|
||||
d = {"cg.coverage.country": country, "cg.coverage.region": region}
|
||||
series = pd.Series(data=d)
|
||||
|
||||
result = check.countries_match_regions(series)
|
||||
result = check.countries_match_regions(series, exclude)
|
||||
|
||||
assert result == None
|
||||
|
||||
@ -486,6 +491,7 @@ def test_country_not_matching_region(capsys):
|
||||
country = "Kenya"
|
||||
region = ""
|
||||
missing_region = "Eastern Africa"
|
||||
exclude = list()
|
||||
|
||||
# Emulate a column in a transposed dataframe (which is just a series)
|
||||
d = {
|
||||
@ -495,7 +501,7 @@ def test_country_not_matching_region(capsys):
|
||||
}
|
||||
series = pd.Series(data=d)
|
||||
|
||||
check.countries_match_regions(series)
|
||||
check.countries_match_regions(series, exclude)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert (
|
||||
|
@ -131,6 +131,7 @@ def test_fix_country_not_matching_region():
|
||||
country = "Kenya"
|
||||
region = ""
|
||||
missing_region = "Eastern Africa"
|
||||
exclude = list()
|
||||
|
||||
# Emulate a column in a transposed dataframe (which is just a series)
|
||||
d = {
|
||||
@ -140,7 +141,7 @@ def test_fix_country_not_matching_region():
|
||||
}
|
||||
series = pd.Series(data=d)
|
||||
|
||||
result = fix.countries_match_regions(series)
|
||||
result = fix.countries_match_regions(series, exclude)
|
||||
|
||||
# Emulate the correct series we are expecting
|
||||
d_correct = {
|
||||
|
Reference in New Issue
Block a user