Improve exclude function

When a user explicitly requests that a field be excluded with -x we skip that field in most checks. Up until now that did not include the item-based checks using a transposed dataframe because we don't know the metadata field names (labels) until we iterate over them. Now the excludes are respected for item-based checks.
2025-07-02 04:27:24 +02:00 · 2022-09-02 15:59:22 +03:00
parent 1f76247353
commit 040e56fc76
6 changed files with 54 additions and 24 deletions
--- a/tests/test_check.py
+++ b/tests/test_check.py
@ -403,8 +403,9 @@ def test_check_doi_field():
    # the citation and a DOI field.
    d = {"cg.identifier.doi": doi, "dcterms.bibliographicCitation": citation}
    series = pd.Series(data=d)
+    exclude = list()

-    result = check.citation_doi(series)
+    result = check.citation_doi(series, exclude)

    assert result == None

@ -413,13 +414,14 @@ def test_check_doi_only_in_citation(capsys):
    """Test an item with a DOI in its citation, but no DOI field."""

    citation = "Orth, A. 2021. Testing all the things. doi: 10.1186/1743-422X-9-218"
+    exclude = list()

    # Emulate a column in a transposed dataframe (which is just a series), with
    # an empty DOI field and a citation containing a DOI.
    d = {"cg.identifier.doi": None, "dcterms.bibliographicCitation": citation}
    series = pd.Series(data=d)

-    check.citation_doi(series)
+    check.citation_doi(series, exclude)

    captured = capsys.readouterr()
    assert (
@ -433,13 +435,14 @@ def test_title_in_citation():

    title = "Testing all the things"
    citation = "Orth, A. 2021. Testing all the things."
+    exclude = list()

    # Emulate a column in a transposed dataframe (which is just a series), with
    # the title and citation.
    d = {"dc.title": title, "dcterms.bibliographicCitation": citation}
    series = pd.Series(data=d)

-    result = check.title_in_citation(series)
+    result = check.title_in_citation(series, exclude)

    assert result == None

@ -449,13 +452,14 @@ def test_title_not_in_citation(capsys):

    title = "Testing all the things"
    citation = "Orth, A. 2021. Testing all teh things."
+    exclude = list()

    # Emulate a column in a transposed dataframe (which is just a series), with
    # the title and citation.
    d = {"dc.title": title, "dcterms.bibliographicCitation": citation}
    series = pd.Series(data=d)

-    check.title_in_citation(series)
+    check.title_in_citation(series, exclude)

    captured = capsys.readouterr()
    assert (
@ -469,12 +473,13 @@ def test_country_matches_region():

    country = "Kenya"
    region = "Eastern Africa"
+    exclude = list()

    # Emulate a column in a transposed dataframe (which is just a series)
    d = {"cg.coverage.country": country, "cg.coverage.region": region}
    series = pd.Series(data=d)

-    result = check.countries_match_regions(series)
+    result = check.countries_match_regions(series, exclude)

    assert result == None

@ -486,6 +491,7 @@ def test_country_not_matching_region(capsys):
    country = "Kenya"
    region = ""
    missing_region = "Eastern Africa"
+    exclude = list()

    # Emulate a column in a transposed dataframe (which is just a series)
    d = {
@ -495,7 +501,7 @@ def test_country_not_matching_region(capsys):
    }
    series = pd.Series(data=d)

-    check.countries_match_regions(series)
+    check.countries_match_regions(series, exclude)

    captured = capsys.readouterr()
    assert (
--- a/tests/test_fix.py
+++ b/tests/test_fix.py
@ -131,6 +131,7 @@ def test_fix_country_not_matching_region():
    country = "Kenya"
    region = ""
    missing_region = "Eastern Africa"
+    exclude = list()

    # Emulate a column in a transposed dataframe (which is just a series)
    d = {
@ -140,7 +141,7 @@ def test_fix_country_not_matching_region():
    }
    series = pd.Series(data=d)

-    result = fix.countries_match_regions(series)
+    result = fix.countries_match_regions(series, exclude)

    # Emulate the correct series we are expecting
    d_correct = {