1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-29 00:58:19 +01:00

Compare commits

..

9 Commits

Author SHA1 Message Date
72fe38972e
Update requirements
All checks were successful
continuous-integration/drone/push Build is passing
Generated with poetry export:

    $ poetry export --without-hashes -f requirements.txt > requirements.txt
    $ poetry export --without-hashes --dev -f requirements.txt > requirements-dev.txt

I am trying `--without-hashes` to work around an error on pip install
when running in CI:

    ERROR: In --require-hashes mode, all requirements must have their versions pinned with ==.
2021-12-05 16:29:37 +02:00
04232d0ede
poetry.lock: run poetry update 2021-12-05 16:29:09 +02:00
f5fa33bbc6
CHANGELOG.md: add title in citation note 2021-12-05 16:23:39 +02:00
1b978159c1
data/text.csv: Add data for title in citation test 2021-12-05 16:23:06 +02:00
4d5696c4cb
csv_metadata_quality/check.py: update title in citation check
Initialize the titles and citations before the for loop so we can
access them later. This makes it easier to check if the item actua-
lly has a citation.
2021-12-05 16:21:44 +02:00
e02678cd7c
tests/test_check.py: add tests for title in citation 2021-12-05 16:01:11 +02:00
01b4354a14
tests/test_check.py: fix comment 2021-12-05 15:58:25 +02:00
3b40a68279
Add check for title in citation
This checks if the item title exists in the citation. If it is not
present it could just be missing, or could have minor differences
in the whitespace, accents, etc.
2021-12-05 15:52:42 +02:00
999cc65097
csv_metadata_quality/app.py: adjust mojibake check
If unsafe fixes (-u) are enabled then we don't need to do the check
first before actually fixing them. Doing the check first creates e-
tra output that needs to be reviewed by the user.
2021-12-05 15:18:35 +02:00
8 changed files with 126 additions and 49 deletions

View File

@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased ## Unreleased
### Added ### Added
- Ability to check for, and fix, "mojibake" characters using [ftfy](https://github.com/LuminosoInsight/python-ftfy) - Ability to check for, and fix, "mojibake" characters using [ftfy](https://github.com/LuminosoInsight/python-ftfy)
- Ability to check if the item's title exists in the citation
### Updated ### Updated
- Python dependencies - Python dependencies

View File

@ -109,12 +109,11 @@ def run(argv):
# Check: suspicious characters # Check: suspicious characters
df[column].apply(check.suspicious_characters, field_name=column) df[column].apply(check.suspicious_characters, field_name=column)
# Check: mojibake # Fix: mojibake. If unsafe fixes are not enabled then we only check.
df[column].apply(check.mojibake, field_name=column)
# Fix: mojibake
if args.unsafe_fixes: if args.unsafe_fixes:
df[column] = df[column].apply(fix.mojibake, field_name=column) df[column] = df[column].apply(fix.mojibake, field_name=column)
else:
df[column].apply(check.mojibake, field_name=column)
# Fix: invalid and unnecessary multi-value separators # Fix: invalid and unnecessary multi-value separators
df[column] = df[column].apply(fix.separators, field_name=column) df[column] = df[column].apply(fix.separators, field_name=column)
@ -195,6 +194,9 @@ def run(argv):
# Check: citation DOI # Check: citation DOI
check.citation_doi(df_transposed[column]) check.citation_doi(df_transposed[column])
# Check: title in citation
check.title_in_citation(df_transposed[column])
if args.experimental_checks: if args.experimental_checks:
experimental.correct_language(df_transposed[column]) experimental.correct_language(df_transposed[column])

View File

@ -410,3 +410,40 @@ def citation_doi(row):
) )
return return
def title_in_citation(row):
"""Check for the scenario where an item's title is missing from its cita-
tion. This could mean that it is missing entirely, or perhaps just exists
in a different format (whitespace, accents, etc).
Function prints a warning if the title does not appear in the citation.
"""
# Initialize some variables at global scope so that we can set them in the
# loop scope below and still be able to access them afterwards.
title = ""
citation = ""
# Iterate over the labels of the current row's values to get the names of
# the title and citation columns. Then we check if the title is present in
# the citation.
for label in row.axes[0]:
# Skip fields with missing values
if pd.isna(row[label]):
continue
# Find the name of the title column
match = re.match(r"^(dc|dcterms)\.title.*$", label)
if match is not None:
title = row[label]
# Find the name of the citation column
match = re.match(r"^.*?[cC]itation.*$", label)
if match is not None:
citation = row[label]
if citation != "":
if title not in citation:
print(f"{Fore.YELLOW}Title is not present in citation: {Fore.RESET}{title}")
return

View File

@ -33,4 +33,5 @@ Invalid SPDX license identifier,2021-03-11,,,,,,,CC-BY,,,
Duplicate Title,2021-03-17,,,,,,,,Report,, Duplicate Title,2021-03-17,,,,,,,,Report,,
Duplicate Title,2021-03-17,,,,,,,,Report,, Duplicate Title,2021-03-17,,,,,,,,Report,,
Mojibake,2021-03-18,,,,Publicaçao CIAT,,,,Report,, Mojibake,2021-03-18,,,,Publicaçao CIAT,,,,Report,,
"DOI in citation, but missing cg.identifier.doi",2021-10-06,,,,,,,,,"Orth, A. 2021. Testing all the things. doi: 10.1186/1743-422X-9-218", "DOI in citation, but missing cg.identifier.doi",2021-10-06,,,,,,,,,"Orth, A. 2021. DOI in citation, but missing cg.identifier.doi. doi: 10.1186/1743-422X-9-218",
Title missing from citation,2021-12-05,,,,,,,,,"Orth, A. 2021. Title missing f rom citation.",

1 dc.title dcterms.issued dc.identifier.issn dc.identifier.isbn dcterms.language dcterms.subject cg.coverage.country filename dcterms.license dcterms.type dcterms.bibliographicCitation cg.identifier.doi
33 Duplicate Title 2021-03-17 Report
34 Mojibake 2021-03-18 Publicaçao CIAT Report
35 DOI in citation, but missing cg.identifier.doi 2021-10-06 Orth, A. 2021. Testing all the things. doi: 10.1186/1743-422X-9-218 Orth, A. 2021. DOI in citation, but missing cg.identifier.doi. doi: 10.1186/1743-422X-9-218
36 Title missing from citation 2021-12-05 Orth, A. 2021. Title missing f rom citation.
37

70
poetry.lock generated
View File

@ -153,7 +153,7 @@ python-versions = "*"
[[package]] [[package]]
name = "charset-normalizer" name = "charset-normalizer"
version = "2.0.8" version = "2.0.9"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
category = "main" category = "main"
optional = false optional = false
@ -315,7 +315,7 @@ python-versions = "*"
[[package]] [[package]]
name = "ipython" name = "ipython"
version = "7.30.0" version = "7.30.1"
description = "IPython: Productive Interactive Computing" description = "IPython: Productive Interactive Computing"
category = "dev" category = "dev"
optional = false optional = false
@ -514,7 +514,7 @@ future = "*"
[[package]] [[package]]
name = "parso" name = "parso"
version = "0.8.2" version = "0.8.3"
description = "A Python Parser" description = "A Python Parser"
category = "dev" category = "dev"
optional = false optional = false
@ -793,7 +793,7 @@ test = ["black (==20.8b1)", "flake8", "flake8-comprehensions", "flake8-polyfill"
[[package]] [[package]]
name = "rich" name = "rich"
version = "10.14.0" version = "10.15.2"
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
category = "dev" category = "dev"
optional = false optional = false
@ -893,7 +893,7 @@ test = ["pytest"]
[[package]] [[package]]
name = "typed-ast" name = "typed-ast"
version = "1.5.0" version = "1.5.1"
description = "a fork of Python 2 and 3 ast modules with type comment support" description = "a fork of Python 2 and 3 ast modules with type comment support"
category = "dev" category = "dev"
optional = false optional = false
@ -901,7 +901,7 @@ python-versions = ">=3.6"
[[package]] [[package]]
name = "typing-extensions" name = "typing-extensions"
version = "4.0.0" version = "4.0.1"
description = "Backported and Experimental Type Hints for Python 3.6+" description = "Backported and Experimental Type Hints for Python 3.6+"
category = "main" category = "main"
optional = false optional = false
@ -1010,8 +1010,8 @@ certifi = [
{file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"}, {file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"},
] ]
charset-normalizer = [ charset-normalizer = [
{file = "charset-normalizer-2.0.8.tar.gz", hash = "sha256:735e240d9a8506778cd7a453d97e817e536bb1fc29f4f6961ce297b9c7a917b0"}, {file = "charset-normalizer-2.0.9.tar.gz", hash = "sha256:b0b883e8e874edfdece9c28f314e3dd5badf067342e42fb162203335ae61aa2c"},
{file = "charset_normalizer-2.0.8-py3-none-any.whl", hash = "sha256:83fcdeb225499d6344c8f7f34684c2981270beacc32ede2e669e94f7fa544405"}, {file = "charset_normalizer-2.0.9-py3-none-any.whl", hash = "sha256:1eecaa09422db5be9e29d7fc65664e6c33bd06f9ced7838578ba40d58bdf3721"},
] ]
click = [ click = [
{file = "click-8.0.3-py3-none-any.whl", hash = "sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3"}, {file = "click-8.0.3-py3-none-any.whl", hash = "sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3"},
@ -1116,8 +1116,8 @@ iniconfig = [
{file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
] ]
ipython = [ ipython = [
{file = "ipython-7.30.0-py3-none-any.whl", hash = "sha256:c8f3e07aefb9cf9e067f39686f035ce09b27a1ee602116a3030b91b6fc138ee4"}, {file = "ipython-7.30.1-py3-none-any.whl", hash = "sha256:fc60ef843e0863dd4e24ab2bb5698f071031332801ecf8d1aeb4fb622056545c"},
{file = "ipython-7.30.0.tar.gz", hash = "sha256:d41f8e80b99690122400f9b2069b12f670246a1b4cc5d332bd6c4e2500e6d6fb"}, {file = "ipython-7.30.1.tar.gz", hash = "sha256:cb6aef731bf708a7727ab6cde8df87f0281b1427d41e65d62d4b68934fa54e97"},
] ]
isodate = [ isodate = [
{file = "isodate-0.6.0-py2.py3-none-any.whl", hash = "sha256:aa4d33c06640f5352aca96e4b81afd8ab3b47337cc12089822d6f322ac772c81"}, {file = "isodate-0.6.0-py2.py3-none-any.whl", hash = "sha256:aa4d33c06640f5352aca96e4b81afd8ab3b47337cc12089822d6f322ac772c81"},
@ -1226,8 +1226,8 @@ parsedatetime = [
{file = "parsedatetime-2.4.tar.gz", hash = "sha256:3d817c58fb9570d1eec1dd46fa9448cd644eeed4fb612684b02dfda3a79cb84b"}, {file = "parsedatetime-2.4.tar.gz", hash = "sha256:3d817c58fb9570d1eec1dd46fa9448cd644eeed4fb612684b02dfda3a79cb84b"},
] ]
parso = [ parso = [
{file = "parso-0.8.2-py2.py3-none-any.whl", hash = "sha256:a8c4922db71e4fdb90e0d0bc6e50f9b273d3397925e5e60a717e719201778d22"}, {file = "parso-0.8.3-py2.py3-none-any.whl", hash = "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75"},
{file = "parso-0.8.2.tar.gz", hash = "sha256:12b83492c6239ce32ff5eed6d3639d6a536170723c6f3f1506869f1ace413398"}, {file = "parso-0.8.3.tar.gz", hash = "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0"},
] ]
pathspec = [ pathspec = [
{file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"}, {file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"},
@ -1371,8 +1371,8 @@ requests-cache = [
{file = "requests_cache-0.6.4-py2.py3-none-any.whl", hash = "sha256:1102daa13a804abe23fad62d694e7dee58d6063a35d94bf6e8c9821e22e5a78b"}, {file = "requests_cache-0.6.4-py2.py3-none-any.whl", hash = "sha256:1102daa13a804abe23fad62d694e7dee58d6063a35d94bf6e8c9821e22e5a78b"},
] ]
rich = [ rich = [
{file = "rich-10.14.0-py3-none-any.whl", hash = "sha256:ab9cbfd7a3802d8c6f0fa91e974630e2a69447972dcbb9dfe9b01016dd95e38e"}, {file = "rich-10.15.2-py3-none-any.whl", hash = "sha256:43b2c6ad51f46f6c94992aee546f1c177719f4e05aff8f5ea4d2efae3ebdac89"},
{file = "rich-10.14.0.tar.gz", hash = "sha256:8bfe4546d56b4131298d3a9e571a0742de342f1593770bd0d4707299f772a0af"}, {file = "rich-10.15.2.tar.gz", hash = "sha256:1dded089b79dd042b3ab5cd63439a338e16652001f0c16e73acdcf4997ad772d"},
] ]
six = [ six = [
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
@ -1431,29 +1431,29 @@ traitlets = [
{file = "traitlets-5.1.1.tar.gz", hash = "sha256:059f456c5a7c1c82b98c2e8c799f39c9b8128f6d0d46941ee118daace9eb70c7"}, {file = "traitlets-5.1.1.tar.gz", hash = "sha256:059f456c5a7c1c82b98c2e8c799f39c9b8128f6d0d46941ee118daace9eb70c7"},
] ]
typed-ast = [ typed-ast = [
{file = "typed_ast-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7b310a207ee9fde3f46ba327989e6cba4195bc0c8c70a158456e7b10233e6bed"}, {file = "typed_ast-1.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d8314c92414ce7481eee7ad42b353943679cf6f30237b5ecbf7d835519e1212"},
{file = "typed_ast-1.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52ca2b2b524d770bed7a393371a38e91943f9160a190141e0df911586066ecda"}, {file = "typed_ast-1.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b53ae5de5500529c76225d18eeb060efbcec90ad5e030713fe8dab0fb4531631"},
{file = "typed_ast-1.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:14fed8820114a389a2b7e91624db5f85f3f6682fda09fe0268a59aabd28fe5f5"}, {file = "typed_ast-1.5.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:24058827d8f5d633f97223f5148a7d22628099a3d2efe06654ce872f46f07cdb"},
{file = "typed_ast-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:65c81abbabda7d760df7304d843cc9dbe7ef5d485504ca59a46ae2d1731d2428"}, {file = "typed_ast-1.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:a6d495c1ef572519a7bac9534dbf6d94c40e5b6a608ef41136133377bba4aa08"},
{file = "typed_ast-1.5.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:37ba2ab65a0028b1a4f2b61a8fe77f12d242731977d274a03d68ebb751271508"}, {file = "typed_ast-1.5.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:de4ecae89c7d8b56169473e08f6bfd2df7f95015591f43126e4ea7865928677e"},
{file = "typed_ast-1.5.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:49af5b8f6f03ed1eb89ee06c1d7c2e7c8e743d720c3746a5857609a1abc94c94"}, {file = "typed_ast-1.5.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:256115a5bc7ea9e665c6314ed6671ee2c08ca380f9d5f130bd4d2c1f5848d695"},
{file = "typed_ast-1.5.0-cp36-cp36m-win_amd64.whl", hash = "sha256:e4374a76e61399a173137e7984a1d7e356038cf844f24fd8aea46c8029a2f712"}, {file = "typed_ast-1.5.1-cp36-cp36m-win_amd64.whl", hash = "sha256:7c42707ab981b6cf4b73490c16e9d17fcd5227039720ca14abe415d39a173a30"},
{file = "typed_ast-1.5.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ea517c2bb11c5e4ba7a83a91482a2837041181d57d3ed0749a6c382a2b6b7086"}, {file = "typed_ast-1.5.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:71dcda943a471d826ea930dd449ac7e76db7be778fcd722deb63642bab32ea3f"},
{file = "typed_ast-1.5.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:51040bf45aacefa44fa67fb9ebcd1f2bec73182b99a532c2394eea7dabd18e24"}, {file = "typed_ast-1.5.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4f30a2bcd8e68adbb791ce1567fdb897357506f7ea6716f6bbdd3053ac4d9471"},
{file = "typed_ast-1.5.0-cp37-cp37m-win_amd64.whl", hash = "sha256:806e0c7346b9b4af8c62d9a29053f484599921a4448c37fbbcbbf15c25138570"}, {file = "typed_ast-1.5.1-cp37-cp37m-win_amd64.whl", hash = "sha256:ca9e8300d8ba0b66d140820cf463438c8e7b4cdc6fd710c059bfcfb1531d03fb"},
{file = "typed_ast-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a67fd5914603e2165e075f1b12f5a8356bfb9557e8bfb74511108cfbab0f51ed"}, {file = "typed_ast-1.5.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9caaf2b440efb39ecbc45e2fabde809cbe56272719131a6318fd9bf08b58e2cb"},
{file = "typed_ast-1.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:224afecb8b39739f5c9562794a7c98325cb9d972712e1a98b6989a4720219541"}, {file = "typed_ast-1.5.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c9bcad65d66d594bffab8575f39420fe0ee96f66e23c4d927ebb4e24354ec1af"},
{file = "typed_ast-1.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:155b74b078be842d2eb630dd30a280025eca0a5383c7d45853c27afee65f278f"}, {file = "typed_ast-1.5.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:591bc04e507595887160ed7aa8d6785867fb86c5793911be79ccede61ae96f4d"},
{file = "typed_ast-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:361b9e5d27bd8e3ccb6ea6ad6c4f3c0be322a1a0f8177db6d56264fa0ae40410"}, {file = "typed_ast-1.5.1-cp38-cp38-win_amd64.whl", hash = "sha256:a80d84f535642420dd17e16ae25bb46c7f4c16ee231105e7f3eb43976a89670a"},
{file = "typed_ast-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:618912cbc7e17b4aeba86ffe071698c6e2d292acbd6d1d5ec1ee724b8c4ae450"}, {file = "typed_ast-1.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:38cf5c642fa808300bae1281460d4f9b7617cf864d4e383054a5ef336e344d32"},
{file = "typed_ast-1.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7e6731044f748340ef68dcadb5172a4b1f40847a2983fe3983b2a66445fbc8e6"}, {file = "typed_ast-1.5.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5b6ab14c56bc9c7e3c30228a0a0b54b915b1579613f6e463ba6f4eb1382e7fd4"},
{file = "typed_ast-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e8a9b9c87801cecaad3b4c2b8876387115d1a14caa602c1618cedbb0cb2a14e6"}, {file = "typed_ast-1.5.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a2b8d7007f6280e36fa42652df47087ac7b0a7d7f09f9468f07792ba646aac2d"},
{file = "typed_ast-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:ec184dfb5d3d11e82841dbb973e7092b75f306b625fad7b2e665b64c5d60ab3f"}, {file = "typed_ast-1.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:b6d17f37f6edd879141e64a5db17b67488cfeffeedad8c5cec0392305e9bc775"},
{file = "typed_ast-1.5.0.tar.gz", hash = "sha256:ff4ad88271aa7a55f19b6a161ed44e088c393846d954729549e3cde8257747bb"}, {file = "typed_ast-1.5.1.tar.gz", hash = "sha256:484137cab8ecf47e137260daa20bafbba5f4e3ec7fda1c1e69ab299b75fa81c5"},
] ]
typing-extensions = [ typing-extensions = [
{file = "typing_extensions-4.0.0-py3-none-any.whl", hash = "sha256:829704698b22e13ec9eaf959122315eabb370b0884400e9818334d8b677023d9"}, {file = "typing_extensions-4.0.1-py3-none-any.whl", hash = "sha256:7f001e5ac290a0c0401508864c7ec868be4e701886d5b573a9528ed3973d9d3b"},
{file = "typing_extensions-4.0.0.tar.gz", hash = "sha256:2cdf80e4e04866a9b3689a51869016d36db0814d84b8d8a568d22781d45d27ed"}, {file = "typing_extensions-4.0.1.tar.gz", hash = "sha256:4ca091dea149f945ec56afb48dae714f21e8692ef22a395223bcd328961b6a0e"},
] ]
url-normalize = [ url-normalize = [
{file = "url-normalize-1.4.3.tar.gz", hash = "sha256:d23d3a070ac52a67b83a1c59a0e68f8608d1cd538783b401bc9de2c0fac999b2"}, {file = "url-normalize-1.4.3.tar.gz", hash = "sha256:d23d3a070ac52a67b83a1c59a0e68f8608d1cd538783b401bc9de2c0fac999b2"},

View File

@ -9,7 +9,7 @@ babel==2.9.1; python_version >= "2.7" and python_full_version < "3.0.0" or pytho
backcall==0.2.0; python_version >= "3.7" and python_version < "4.0" backcall==0.2.0; python_version >= "3.7" and python_version < "4.0"
black==21.11b1; python_full_version >= "3.6.2" black==21.11b1; python_full_version >= "3.6.2"
certifi==2021.10.8; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6" certifi==2021.10.8; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
charset-normalizer==2.0.8; python_full_version >= "3.6.0" and python_version >= "3.6" charset-normalizer==2.0.9; python_full_version >= "3.6.0" and python_version >= "3.6"
click==8.0.3; python_version >= "3.6" and python_full_version >= "3.6.2" click==8.0.3; python_version >= "3.6" and python_full_version >= "3.6.2"
colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
commonmark==0.9.1; python_full_version >= "3.6.2" and python_full_version < "4.0.0" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0") commonmark==0.9.1; python_full_version >= "3.6.2" and python_full_version < "4.0.0" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0")
@ -24,7 +24,7 @@ greenlet==1.1.2; python_version >= "3" and python_full_version < "3.0.0" or pyth
idna==3.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6" idna==3.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
importlib-metadata==4.8.2; python_full_version >= "3.6.2" and python_version < "3.8" and python_version >= "3.6" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6") and (python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6") importlib-metadata==4.8.2; python_full_version >= "3.6.2" and python_version < "3.8" and python_version >= "3.6" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6") and (python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6")
iniconfig==1.1.1; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" iniconfig==1.1.1; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
ipython==7.30.0; python_version >= "3.7" and python_version < "4.0" ipython==7.30.1; python_version >= "3.7" and python_version < "4.0"
isodate==0.6.0 isodate==0.6.0
isort==5.10.1; python_full_version >= "3.6.1" and python_version < "4.0" isort==5.10.1; python_full_version >= "3.6.1" and python_version < "4.0"
itsdangerous==2.0.1; python_version >= "3.6" itsdangerous==2.0.1; python_version >= "3.6"
@ -40,7 +40,7 @@ openpyxl==3.0.9; python_version >= "3.6"
packaging==21.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" packaging==21.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
pandas==1.3.4; python_full_version >= "3.7.1" pandas==1.3.4; python_full_version >= "3.7.1"
parsedatetime==2.4 parsedatetime==2.4
parso==0.8.2; python_version >= "3.7" and python_version < "4.0" parso==0.8.3; python_version >= "3.7" and python_version < "4.0"
pathspec==0.9.0; python_full_version >= "3.6.2" pathspec==0.9.0; python_full_version >= "3.6.2"
pexpect==4.8.0; python_version >= "3.7" and python_version < "4.0" and sys_platform != "win32" pexpect==4.8.0; python_version >= "3.7" and python_version < "4.0" and sys_platform != "win32"
pickleshare==0.7.5; python_version >= "3.7" and python_version < "4.0" pickleshare==0.7.5; python_version >= "3.7" and python_version < "4.0"
@ -65,7 +65,7 @@ pytz==2021.3; python_full_version >= "3.7.1"
regex==2021.11.10; python_full_version >= "3.6.2" regex==2021.11.10; python_full_version >= "3.6.2"
requests-cache==0.6.4; python_version >= "3.6" requests-cache==0.6.4; python_version >= "3.6"
requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0") requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
rich==10.14.0; python_full_version >= "3.6.2" and python_full_version < "4.0.0" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0") rich==10.15.2; python_full_version >= "3.6.2" and python_full_version < "4.0.0" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0")
six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.6" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.3.0") six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.6" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.3.0")
spdx-license-list==0.5.2 spdx-license-list==0.5.2
sqlalchemy==1.4.22; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0") sqlalchemy==1.4.22; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
@ -73,8 +73,8 @@ text-unidecode==1.3; python_version >= "3.6"
toml==0.10.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" toml==0.10.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
tomli==1.2.2; python_version >= "3.6" and python_full_version >= "3.6.2" tomli==1.2.2; python_version >= "3.6" and python_full_version >= "3.6.2"
traitlets==5.1.1; python_version >= "3.7" and python_version < "4.0" traitlets==5.1.1; python_version >= "3.7" and python_version < "4.0"
typed-ast==1.5.0; python_version < "3.8" and implementation_name == "cpython" and python_full_version >= "3.6.2" and python_version >= "3.6" typed-ast==1.5.1; python_version < "3.8" and implementation_name == "cpython" and python_full_version >= "3.6.2" and python_version >= "3.6"
typing-extensions==4.0.0 typing-extensions==4.0.1
url-normalize==1.4.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6" url-normalize==1.4.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
urllib3==1.26.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.6" urllib3==1.26.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.6"
wcwidth==0.2.5; python_version >= "3.7" and python_version < "4.0" and python_full_version >= "3.6.2" wcwidth==0.2.5; python_version >= "3.7" and python_version < "4.0" and python_full_version >= "3.6.2"

View File

@ -1,5 +1,5 @@
certifi==2021.10.8; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6" certifi==2021.10.8; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
charset-normalizer==2.0.8; python_full_version >= "3.6.0" and python_version >= "3.6" charset-normalizer==2.0.9; python_full_version >= "3.6.0" and python_version >= "3.6"
colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
ftfy==5.9; python_version >= "3.5" ftfy==5.9; python_version >= "3.5"
greenlet==1.1.2; python_version >= "3" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3" greenlet==1.1.2; python_version >= "3" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3"
@ -18,7 +18,7 @@ requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or
six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.6" six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.6"
spdx-license-list==0.5.2 spdx-license-list==0.5.2
sqlalchemy==1.4.22; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0") sqlalchemy==1.4.22; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
typing-extensions==4.0.0; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6" typing-extensions==4.0.1; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6"
url-normalize==1.4.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6" url-normalize==1.4.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
urllib3==1.26.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.6" urllib3==1.26.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.6"
wcwidth==0.2.5; python_version >= "3.5" wcwidth==0.2.5; python_version >= "3.5"

View File

@ -376,7 +376,7 @@ def test_check_doi_field():
citation = "Orth, A. 2021. Testing all the things. doi: 10.1186/1743-422X-9-218" citation = "Orth, A. 2021. Testing all the things. doi: 10.1186/1743-422X-9-218"
# Emulate a column in a transposed dataframe (which is just a series), with # Emulate a column in a transposed dataframe (which is just a series), with
# the citation and an empty DOI field. # the citation and a DOI field.
d = {"cg.identifier.doi": doi, "dcterms.bibliographicCitation": citation} d = {"cg.identifier.doi": doi, "dcterms.bibliographicCitation": citation}
series = pd.Series(data=d) series = pd.Series(data=d)
@ -402,3 +402,39 @@ def test_check_doi_only_in_citation(capsys):
captured.out captured.out
== f"{Fore.YELLOW}DOI in citation, but missing a DOI field: {Fore.RESET}{citation}\n" == f"{Fore.YELLOW}DOI in citation, but missing a DOI field: {Fore.RESET}{citation}\n"
) )
def test_title_in_citation():
"""Test an item with its title in the citation."""
title = "Testing all the things"
citation = "Orth, A. 2021. Testing all the things."
# Emulate a column in a transposed dataframe (which is just a series), with
# the title and citation.
d = {"dc.title": title, "dcterms.bibliographicCitation": citation}
series = pd.Series(data=d)
result = check.title_in_citation(series)
assert result == None
def test_title_not_in_citation(capsys):
"""Test an item with its title missing from the citation."""
title = "Testing all the things"
citation = "Orth, A. 2021. Testing all teh things."
# Emulate a column in a transposed dataframe (which is just a series), with
# the title and citation.
d = {"dc.title": title, "dcterms.bibliographicCitation": citation}
series = pd.Series(data=d)
check.title_in_citation(series)
captured = capsys.readouterr()
assert (
captured.out
== f"{Fore.YELLOW}Title is not present in citation: {Fore.RESET}{title}\n"
)