mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-29 00:58:19 +01:00
Compare commits
9 Commits
a7c3be280d
...
72fe38972e
Author | SHA1 | Date | |
---|---|---|---|
72fe38972e | |||
04232d0ede | |||
f5fa33bbc6 | |||
1b978159c1 | |||
4d5696c4cb | |||
e02678cd7c | |||
01b4354a14 | |||
3b40a68279 | |||
999cc65097 |
@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
## Unreleased
|
||||
### Added
|
||||
- Ability to check for, and fix, "mojibake" characters using [ftfy](https://github.com/LuminosoInsight/python-ftfy)
|
||||
- Ability to check if the item's title exists in the citation
|
||||
|
||||
### Updated
|
||||
- Python dependencies
|
||||
|
@ -109,12 +109,11 @@ def run(argv):
|
||||
# Check: suspicious characters
|
||||
df[column].apply(check.suspicious_characters, field_name=column)
|
||||
|
||||
# Check: mojibake
|
||||
df[column].apply(check.mojibake, field_name=column)
|
||||
|
||||
# Fix: mojibake
|
||||
# Fix: mojibake. If unsafe fixes are not enabled then we only check.
|
||||
if args.unsafe_fixes:
|
||||
df[column] = df[column].apply(fix.mojibake, field_name=column)
|
||||
else:
|
||||
df[column].apply(check.mojibake, field_name=column)
|
||||
|
||||
# Fix: invalid and unnecessary multi-value separators
|
||||
df[column] = df[column].apply(fix.separators, field_name=column)
|
||||
@ -195,6 +194,9 @@ def run(argv):
|
||||
# Check: citation DOI
|
||||
check.citation_doi(df_transposed[column])
|
||||
|
||||
# Check: title in citation
|
||||
check.title_in_citation(df_transposed[column])
|
||||
|
||||
if args.experimental_checks:
|
||||
experimental.correct_language(df_transposed[column])
|
||||
|
||||
|
@ -410,3 +410,40 @@ def citation_doi(row):
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
|
||||
def title_in_citation(row):
|
||||
"""Check for the scenario where an item's title is missing from its cita-
|
||||
tion. This could mean that it is missing entirely, or perhaps just exists
|
||||
in a different format (whitespace, accents, etc).
|
||||
|
||||
Function prints a warning if the title does not appear in the citation.
|
||||
"""
|
||||
# Initialize some variables at global scope so that we can set them in the
|
||||
# loop scope below and still be able to access them afterwards.
|
||||
title = ""
|
||||
citation = ""
|
||||
|
||||
# Iterate over the labels of the current row's values to get the names of
|
||||
# the title and citation columns. Then we check if the title is present in
|
||||
# the citation.
|
||||
for label in row.axes[0]:
|
||||
# Skip fields with missing values
|
||||
if pd.isna(row[label]):
|
||||
continue
|
||||
|
||||
# Find the name of the title column
|
||||
match = re.match(r"^(dc|dcterms)\.title.*$", label)
|
||||
if match is not None:
|
||||
title = row[label]
|
||||
|
||||
# Find the name of the citation column
|
||||
match = re.match(r"^.*?[cC]itation.*$", label)
|
||||
if match is not None:
|
||||
citation = row[label]
|
||||
|
||||
if citation != "":
|
||||
if title not in citation:
|
||||
print(f"{Fore.YELLOW}Title is not present in citation: {Fore.RESET}{title}")
|
||||
|
||||
return
|
||||
|
@ -33,4 +33,5 @@ Invalid SPDX license identifier,2021-03-11,,,,,,,CC-BY,,,
|
||||
Duplicate Title,2021-03-17,,,,,,,,Report,,
|
||||
Duplicate Title,2021-03-17,,,,,,,,Report,,
|
||||
Mojibake,2021-03-18,,,,Publicaçao CIAT,,,,Report,,
|
||||
"DOI in citation, but missing cg.identifier.doi",2021-10-06,,,,,,,,,"Orth, A. 2021. Testing all the things. doi: 10.1186/1743-422X-9-218",
|
||||
"DOI in citation, but missing cg.identifier.doi",2021-10-06,,,,,,,,,"Orth, A. 2021. DOI in citation, but missing cg.identifier.doi. doi: 10.1186/1743-422X-9-218",
|
||||
Title missing from citation,2021-12-05,,,,,,,,,"Orth, A. 2021. Title missing f rom citation.",
|
||||
|
|
70
poetry.lock
generated
70
poetry.lock
generated
@ -153,7 +153,7 @@ python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "charset-normalizer"
|
||||
version = "2.0.8"
|
||||
version = "2.0.9"
|
||||
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
|
||||
category = "main"
|
||||
optional = false
|
||||
@ -315,7 +315,7 @@ python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "ipython"
|
||||
version = "7.30.0"
|
||||
version = "7.30.1"
|
||||
description = "IPython: Productive Interactive Computing"
|
||||
category = "dev"
|
||||
optional = false
|
||||
@ -514,7 +514,7 @@ future = "*"
|
||||
|
||||
[[package]]
|
||||
name = "parso"
|
||||
version = "0.8.2"
|
||||
version = "0.8.3"
|
||||
description = "A Python Parser"
|
||||
category = "dev"
|
||||
optional = false
|
||||
@ -793,7 +793,7 @@ test = ["black (==20.8b1)", "flake8", "flake8-comprehensions", "flake8-polyfill"
|
||||
|
||||
[[package]]
|
||||
name = "rich"
|
||||
version = "10.14.0"
|
||||
version = "10.15.2"
|
||||
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
|
||||
category = "dev"
|
||||
optional = false
|
||||
@ -893,7 +893,7 @@ test = ["pytest"]
|
||||
|
||||
[[package]]
|
||||
name = "typed-ast"
|
||||
version = "1.5.0"
|
||||
version = "1.5.1"
|
||||
description = "a fork of Python 2 and 3 ast modules with type comment support"
|
||||
category = "dev"
|
||||
optional = false
|
||||
@ -901,7 +901,7 @@ python-versions = ">=3.6"
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.0.0"
|
||||
version = "4.0.1"
|
||||
description = "Backported and Experimental Type Hints for Python 3.6+"
|
||||
category = "main"
|
||||
optional = false
|
||||
@ -1010,8 +1010,8 @@ certifi = [
|
||||
{file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"},
|
||||
]
|
||||
charset-normalizer = [
|
||||
{file = "charset-normalizer-2.0.8.tar.gz", hash = "sha256:735e240d9a8506778cd7a453d97e817e536bb1fc29f4f6961ce297b9c7a917b0"},
|
||||
{file = "charset_normalizer-2.0.8-py3-none-any.whl", hash = "sha256:83fcdeb225499d6344c8f7f34684c2981270beacc32ede2e669e94f7fa544405"},
|
||||
{file = "charset-normalizer-2.0.9.tar.gz", hash = "sha256:b0b883e8e874edfdece9c28f314e3dd5badf067342e42fb162203335ae61aa2c"},
|
||||
{file = "charset_normalizer-2.0.9-py3-none-any.whl", hash = "sha256:1eecaa09422db5be9e29d7fc65664e6c33bd06f9ced7838578ba40d58bdf3721"},
|
||||
]
|
||||
click = [
|
||||
{file = "click-8.0.3-py3-none-any.whl", hash = "sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3"},
|
||||
@ -1116,8 +1116,8 @@ iniconfig = [
|
||||
{file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
|
||||
]
|
||||
ipython = [
|
||||
{file = "ipython-7.30.0-py3-none-any.whl", hash = "sha256:c8f3e07aefb9cf9e067f39686f035ce09b27a1ee602116a3030b91b6fc138ee4"},
|
||||
{file = "ipython-7.30.0.tar.gz", hash = "sha256:d41f8e80b99690122400f9b2069b12f670246a1b4cc5d332bd6c4e2500e6d6fb"},
|
||||
{file = "ipython-7.30.1-py3-none-any.whl", hash = "sha256:fc60ef843e0863dd4e24ab2bb5698f071031332801ecf8d1aeb4fb622056545c"},
|
||||
{file = "ipython-7.30.1.tar.gz", hash = "sha256:cb6aef731bf708a7727ab6cde8df87f0281b1427d41e65d62d4b68934fa54e97"},
|
||||
]
|
||||
isodate = [
|
||||
{file = "isodate-0.6.0-py2.py3-none-any.whl", hash = "sha256:aa4d33c06640f5352aca96e4b81afd8ab3b47337cc12089822d6f322ac772c81"},
|
||||
@ -1226,8 +1226,8 @@ parsedatetime = [
|
||||
{file = "parsedatetime-2.4.tar.gz", hash = "sha256:3d817c58fb9570d1eec1dd46fa9448cd644eeed4fb612684b02dfda3a79cb84b"},
|
||||
]
|
||||
parso = [
|
||||
{file = "parso-0.8.2-py2.py3-none-any.whl", hash = "sha256:a8c4922db71e4fdb90e0d0bc6e50f9b273d3397925e5e60a717e719201778d22"},
|
||||
{file = "parso-0.8.2.tar.gz", hash = "sha256:12b83492c6239ce32ff5eed6d3639d6a536170723c6f3f1506869f1ace413398"},
|
||||
{file = "parso-0.8.3-py2.py3-none-any.whl", hash = "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75"},
|
||||
{file = "parso-0.8.3.tar.gz", hash = "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0"},
|
||||
]
|
||||
pathspec = [
|
||||
{file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"},
|
||||
@ -1371,8 +1371,8 @@ requests-cache = [
|
||||
{file = "requests_cache-0.6.4-py2.py3-none-any.whl", hash = "sha256:1102daa13a804abe23fad62d694e7dee58d6063a35d94bf6e8c9821e22e5a78b"},
|
||||
]
|
||||
rich = [
|
||||
{file = "rich-10.14.0-py3-none-any.whl", hash = "sha256:ab9cbfd7a3802d8c6f0fa91e974630e2a69447972dcbb9dfe9b01016dd95e38e"},
|
||||
{file = "rich-10.14.0.tar.gz", hash = "sha256:8bfe4546d56b4131298d3a9e571a0742de342f1593770bd0d4707299f772a0af"},
|
||||
{file = "rich-10.15.2-py3-none-any.whl", hash = "sha256:43b2c6ad51f46f6c94992aee546f1c177719f4e05aff8f5ea4d2efae3ebdac89"},
|
||||
{file = "rich-10.15.2.tar.gz", hash = "sha256:1dded089b79dd042b3ab5cd63439a338e16652001f0c16e73acdcf4997ad772d"},
|
||||
]
|
||||
six = [
|
||||
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
|
||||
@ -1431,29 +1431,29 @@ traitlets = [
|
||||
{file = "traitlets-5.1.1.tar.gz", hash = "sha256:059f456c5a7c1c82b98c2e8c799f39c9b8128f6d0d46941ee118daace9eb70c7"},
|
||||
]
|
||||
typed-ast = [
|
||||
{file = "typed_ast-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7b310a207ee9fde3f46ba327989e6cba4195bc0c8c70a158456e7b10233e6bed"},
|
||||
{file = "typed_ast-1.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52ca2b2b524d770bed7a393371a38e91943f9160a190141e0df911586066ecda"},
|
||||
{file = "typed_ast-1.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:14fed8820114a389a2b7e91624db5f85f3f6682fda09fe0268a59aabd28fe5f5"},
|
||||
{file = "typed_ast-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:65c81abbabda7d760df7304d843cc9dbe7ef5d485504ca59a46ae2d1731d2428"},
|
||||
{file = "typed_ast-1.5.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:37ba2ab65a0028b1a4f2b61a8fe77f12d242731977d274a03d68ebb751271508"},
|
||||
{file = "typed_ast-1.5.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:49af5b8f6f03ed1eb89ee06c1d7c2e7c8e743d720c3746a5857609a1abc94c94"},
|
||||
{file = "typed_ast-1.5.0-cp36-cp36m-win_amd64.whl", hash = "sha256:e4374a76e61399a173137e7984a1d7e356038cf844f24fd8aea46c8029a2f712"},
|
||||
{file = "typed_ast-1.5.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ea517c2bb11c5e4ba7a83a91482a2837041181d57d3ed0749a6c382a2b6b7086"},
|
||||
{file = "typed_ast-1.5.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:51040bf45aacefa44fa67fb9ebcd1f2bec73182b99a532c2394eea7dabd18e24"},
|
||||
{file = "typed_ast-1.5.0-cp37-cp37m-win_amd64.whl", hash = "sha256:806e0c7346b9b4af8c62d9a29053f484599921a4448c37fbbcbbf15c25138570"},
|
||||
{file = "typed_ast-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a67fd5914603e2165e075f1b12f5a8356bfb9557e8bfb74511108cfbab0f51ed"},
|
||||
{file = "typed_ast-1.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:224afecb8b39739f5c9562794a7c98325cb9d972712e1a98b6989a4720219541"},
|
||||
{file = "typed_ast-1.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:155b74b078be842d2eb630dd30a280025eca0a5383c7d45853c27afee65f278f"},
|
||||
{file = "typed_ast-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:361b9e5d27bd8e3ccb6ea6ad6c4f3c0be322a1a0f8177db6d56264fa0ae40410"},
|
||||
{file = "typed_ast-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:618912cbc7e17b4aeba86ffe071698c6e2d292acbd6d1d5ec1ee724b8c4ae450"},
|
||||
{file = "typed_ast-1.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7e6731044f748340ef68dcadb5172a4b1f40847a2983fe3983b2a66445fbc8e6"},
|
||||
{file = "typed_ast-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e8a9b9c87801cecaad3b4c2b8876387115d1a14caa602c1618cedbb0cb2a14e6"},
|
||||
{file = "typed_ast-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:ec184dfb5d3d11e82841dbb973e7092b75f306b625fad7b2e665b64c5d60ab3f"},
|
||||
{file = "typed_ast-1.5.0.tar.gz", hash = "sha256:ff4ad88271aa7a55f19b6a161ed44e088c393846d954729549e3cde8257747bb"},
|
||||
{file = "typed_ast-1.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d8314c92414ce7481eee7ad42b353943679cf6f30237b5ecbf7d835519e1212"},
|
||||
{file = "typed_ast-1.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b53ae5de5500529c76225d18eeb060efbcec90ad5e030713fe8dab0fb4531631"},
|
||||
{file = "typed_ast-1.5.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:24058827d8f5d633f97223f5148a7d22628099a3d2efe06654ce872f46f07cdb"},
|
||||
{file = "typed_ast-1.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:a6d495c1ef572519a7bac9534dbf6d94c40e5b6a608ef41136133377bba4aa08"},
|
||||
{file = "typed_ast-1.5.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:de4ecae89c7d8b56169473e08f6bfd2df7f95015591f43126e4ea7865928677e"},
|
||||
{file = "typed_ast-1.5.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:256115a5bc7ea9e665c6314ed6671ee2c08ca380f9d5f130bd4d2c1f5848d695"},
|
||||
{file = "typed_ast-1.5.1-cp36-cp36m-win_amd64.whl", hash = "sha256:7c42707ab981b6cf4b73490c16e9d17fcd5227039720ca14abe415d39a173a30"},
|
||||
{file = "typed_ast-1.5.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:71dcda943a471d826ea930dd449ac7e76db7be778fcd722deb63642bab32ea3f"},
|
||||
{file = "typed_ast-1.5.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4f30a2bcd8e68adbb791ce1567fdb897357506f7ea6716f6bbdd3053ac4d9471"},
|
||||
{file = "typed_ast-1.5.1-cp37-cp37m-win_amd64.whl", hash = "sha256:ca9e8300d8ba0b66d140820cf463438c8e7b4cdc6fd710c059bfcfb1531d03fb"},
|
||||
{file = "typed_ast-1.5.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9caaf2b440efb39ecbc45e2fabde809cbe56272719131a6318fd9bf08b58e2cb"},
|
||||
{file = "typed_ast-1.5.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c9bcad65d66d594bffab8575f39420fe0ee96f66e23c4d927ebb4e24354ec1af"},
|
||||
{file = "typed_ast-1.5.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:591bc04e507595887160ed7aa8d6785867fb86c5793911be79ccede61ae96f4d"},
|
||||
{file = "typed_ast-1.5.1-cp38-cp38-win_amd64.whl", hash = "sha256:a80d84f535642420dd17e16ae25bb46c7f4c16ee231105e7f3eb43976a89670a"},
|
||||
{file = "typed_ast-1.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:38cf5c642fa808300bae1281460d4f9b7617cf864d4e383054a5ef336e344d32"},
|
||||
{file = "typed_ast-1.5.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5b6ab14c56bc9c7e3c30228a0a0b54b915b1579613f6e463ba6f4eb1382e7fd4"},
|
||||
{file = "typed_ast-1.5.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a2b8d7007f6280e36fa42652df47087ac7b0a7d7f09f9468f07792ba646aac2d"},
|
||||
{file = "typed_ast-1.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:b6d17f37f6edd879141e64a5db17b67488cfeffeedad8c5cec0392305e9bc775"},
|
||||
{file = "typed_ast-1.5.1.tar.gz", hash = "sha256:484137cab8ecf47e137260daa20bafbba5f4e3ec7fda1c1e69ab299b75fa81c5"},
|
||||
]
|
||||
typing-extensions = [
|
||||
{file = "typing_extensions-4.0.0-py3-none-any.whl", hash = "sha256:829704698b22e13ec9eaf959122315eabb370b0884400e9818334d8b677023d9"},
|
||||
{file = "typing_extensions-4.0.0.tar.gz", hash = "sha256:2cdf80e4e04866a9b3689a51869016d36db0814d84b8d8a568d22781d45d27ed"},
|
||||
{file = "typing_extensions-4.0.1-py3-none-any.whl", hash = "sha256:7f001e5ac290a0c0401508864c7ec868be4e701886d5b573a9528ed3973d9d3b"},
|
||||
{file = "typing_extensions-4.0.1.tar.gz", hash = "sha256:4ca091dea149f945ec56afb48dae714f21e8692ef22a395223bcd328961b6a0e"},
|
||||
]
|
||||
url-normalize = [
|
||||
{file = "url-normalize-1.4.3.tar.gz", hash = "sha256:d23d3a070ac52a67b83a1c59a0e68f8608d1cd538783b401bc9de2c0fac999b2"},
|
||||
|
@ -9,7 +9,7 @@ babel==2.9.1; python_version >= "2.7" and python_full_version < "3.0.0" or pytho
|
||||
backcall==0.2.0; python_version >= "3.7" and python_version < "4.0"
|
||||
black==21.11b1; python_full_version >= "3.6.2"
|
||||
certifi==2021.10.8; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
|
||||
charset-normalizer==2.0.8; python_full_version >= "3.6.0" and python_version >= "3.6"
|
||||
charset-normalizer==2.0.9; python_full_version >= "3.6.0" and python_version >= "3.6"
|
||||
click==8.0.3; python_version >= "3.6" and python_full_version >= "3.6.2"
|
||||
colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
|
||||
commonmark==0.9.1; python_full_version >= "3.6.2" and python_full_version < "4.0.0" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0")
|
||||
@ -24,7 +24,7 @@ greenlet==1.1.2; python_version >= "3" and python_full_version < "3.0.0" or pyth
|
||||
idna==3.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
|
||||
importlib-metadata==4.8.2; python_full_version >= "3.6.2" and python_version < "3.8" and python_version >= "3.6" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6") and (python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6")
|
||||
iniconfig==1.1.1; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||
ipython==7.30.0; python_version >= "3.7" and python_version < "4.0"
|
||||
ipython==7.30.1; python_version >= "3.7" and python_version < "4.0"
|
||||
isodate==0.6.0
|
||||
isort==5.10.1; python_full_version >= "3.6.1" and python_version < "4.0"
|
||||
itsdangerous==2.0.1; python_version >= "3.6"
|
||||
@ -40,7 +40,7 @@ openpyxl==3.0.9; python_version >= "3.6"
|
||||
packaging==21.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||
pandas==1.3.4; python_full_version >= "3.7.1"
|
||||
parsedatetime==2.4
|
||||
parso==0.8.2; python_version >= "3.7" and python_version < "4.0"
|
||||
parso==0.8.3; python_version >= "3.7" and python_version < "4.0"
|
||||
pathspec==0.9.0; python_full_version >= "3.6.2"
|
||||
pexpect==4.8.0; python_version >= "3.7" and python_version < "4.0" and sys_platform != "win32"
|
||||
pickleshare==0.7.5; python_version >= "3.7" and python_version < "4.0"
|
||||
@ -65,7 +65,7 @@ pytz==2021.3; python_full_version >= "3.7.1"
|
||||
regex==2021.11.10; python_full_version >= "3.6.2"
|
||||
requests-cache==0.6.4; python_version >= "3.6"
|
||||
requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
|
||||
rich==10.14.0; python_full_version >= "3.6.2" and python_full_version < "4.0.0" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0")
|
||||
rich==10.15.2; python_full_version >= "3.6.2" and python_full_version < "4.0.0" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0")
|
||||
six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.6" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.3.0")
|
||||
spdx-license-list==0.5.2
|
||||
sqlalchemy==1.4.22; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
|
||||
@ -73,8 +73,8 @@ text-unidecode==1.3; python_version >= "3.6"
|
||||
toml==0.10.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||
tomli==1.2.2; python_version >= "3.6" and python_full_version >= "3.6.2"
|
||||
traitlets==5.1.1; python_version >= "3.7" and python_version < "4.0"
|
||||
typed-ast==1.5.0; python_version < "3.8" and implementation_name == "cpython" and python_full_version >= "3.6.2" and python_version >= "3.6"
|
||||
typing-extensions==4.0.0
|
||||
typed-ast==1.5.1; python_version < "3.8" and implementation_name == "cpython" and python_full_version >= "3.6.2" and python_version >= "3.6"
|
||||
typing-extensions==4.0.1
|
||||
url-normalize==1.4.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
|
||||
urllib3==1.26.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.6"
|
||||
wcwidth==0.2.5; python_version >= "3.7" and python_version < "4.0" and python_full_version >= "3.6.2"
|
||||
|
@ -1,5 +1,5 @@
|
||||
certifi==2021.10.8; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
|
||||
charset-normalizer==2.0.8; python_full_version >= "3.6.0" and python_version >= "3.6"
|
||||
charset-normalizer==2.0.9; python_full_version >= "3.6.0" and python_version >= "3.6"
|
||||
colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
|
||||
ftfy==5.9; python_version >= "3.5"
|
||||
greenlet==1.1.2; python_version >= "3" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3"
|
||||
@ -18,7 +18,7 @@ requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or
|
||||
six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.6"
|
||||
spdx-license-list==0.5.2
|
||||
sqlalchemy==1.4.22; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
|
||||
typing-extensions==4.0.0; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6"
|
||||
typing-extensions==4.0.1; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6"
|
||||
url-normalize==1.4.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
|
||||
urllib3==1.26.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.6"
|
||||
wcwidth==0.2.5; python_version >= "3.5"
|
||||
|
@ -376,7 +376,7 @@ def test_check_doi_field():
|
||||
citation = "Orth, A. 2021. Testing all the things. doi: 10.1186/1743-422X-9-218"
|
||||
|
||||
# Emulate a column in a transposed dataframe (which is just a series), with
|
||||
# the citation and an empty DOI field.
|
||||
# the citation and a DOI field.
|
||||
d = {"cg.identifier.doi": doi, "dcterms.bibliographicCitation": citation}
|
||||
series = pd.Series(data=d)
|
||||
|
||||
@ -402,3 +402,39 @@ def test_check_doi_only_in_citation(capsys):
|
||||
captured.out
|
||||
== f"{Fore.YELLOW}DOI in citation, but missing a DOI field: {Fore.RESET}{citation}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_title_in_citation():
|
||||
"""Test an item with its title in the citation."""
|
||||
|
||||
title = "Testing all the things"
|
||||
citation = "Orth, A. 2021. Testing all the things."
|
||||
|
||||
# Emulate a column in a transposed dataframe (which is just a series), with
|
||||
# the title and citation.
|
||||
d = {"dc.title": title, "dcterms.bibliographicCitation": citation}
|
||||
series = pd.Series(data=d)
|
||||
|
||||
result = check.title_in_citation(series)
|
||||
|
||||
assert result == None
|
||||
|
||||
|
||||
def test_title_not_in_citation(capsys):
|
||||
"""Test an item with its title missing from the citation."""
|
||||
|
||||
title = "Testing all the things"
|
||||
citation = "Orth, A. 2021. Testing all teh things."
|
||||
|
||||
# Emulate a column in a transposed dataframe (which is just a series), with
|
||||
# the title and citation.
|
||||
d = {"dc.title": title, "dcterms.bibliographicCitation": citation}
|
||||
series = pd.Series(data=d)
|
||||
|
||||
check.title_in_citation(series)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert (
|
||||
captured.out
|
||||
== f"{Fore.YELLOW}Title is not present in citation: {Fore.RESET}{title}\n"
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user