1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-05-29 06:20:09 +02:00

Add check for missing DOIs

Sometimes an editor includes a DOI in the citation field, but does
not add a standalone DOI field.
This commit is contained in:
2021-10-06 21:25:39 +03:00
parent 831ce979c3
commit 8a27fb2589
4 changed files with 127 additions and 38 deletions
csv_metadata_quality
data
tests

@ -367,3 +367,44 @@ def test_check_mojibake(capsys):
captured.out
== f"{Fore.YELLOW}Possible encoding issue ({field_name}): {Fore.RESET}{field}\n"
)
def test_check_doi_field():
"""Test an item with a DOI field."""
doi = "https://doi.org/10.1186/1743-422X-9-218"
citation = "Orth, A. 2021. Testing all the things. doi: 10.1186/1743-422X-9-218"
# Emulate a column in a transposed dataframe (which is just a series), with
# the citation and an empty DOI field.
d = {
"cg.identifier.doi": doi,
"dcterms.bibliographicCitation": citation
}
series = pd.Series(data=d)
result = check.citation_doi(series)
assert result == None
def test_check_doi_only_in_citation(capsys):
"""Test an item with a DOI in its citation, but no DOI field."""
citation = "Orth, A. 2021. Testing all the things. doi: 10.1186/1743-422X-9-218"
# Emulate a column in a transposed dataframe (which is just a series), with
# an empty DOI field and a citation containing a DOI.
d = {
"cg.identifier.doi": None,
"dcterms.bibliographicCitation": citation
}
series = pd.Series(data=d)
check.citation_doi(series)
captured = capsys.readouterr()
assert (
captured.out
== f"{Fore.YELLOW}DOI in citation, but missing a DOI field: {Fore.RESET}{citation}\n"
)