1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-28 16:48:20 +01:00

Compare commits

...

7 Commits

Author SHA1 Message Date
cc34db7ff8
Version 0.5.0
All checks were successful
continuous-integration/drone/push Build is passing
2021-12-08 15:29:46 +02:00
b79e07b814
CHANGELOG.md: Add note about countries without regions 2021-12-08 15:21:45 +02:00
865b950c33
Update requirements
Generated with poetry export:

    $ poetry export --without-hashes -f requirements.txt > requirements.txt
    $ poetry export --without-hashes --dev -f requirements.txt > requirements-dev.txt

I am trying `--without-hashes` to work around an error on pip install
when running in CI:

    ERROR: In --require-hashes mode, all requirements must have their versions pinned with ==.
2021-12-08 15:20:22 +02:00
6f269ca6b1
poetry.lock: run poetry update 2021-12-08 15:19:49 +02:00
120e8cf09f
tests/test_check.py: add checks for countries without regions 2021-12-08 15:18:50 +02:00
a4eb79f625
data/test.csv: add data for countries without regions check 2021-12-08 15:17:55 +02:00
ccc2a73456
Add check for countries without matching regions
If we have country "Kenya" we should have region "Eastern Africa"
according to the UN M.49 geolocation scheme.
2021-12-08 15:02:20 +02:00
11 changed files with 380 additions and 132 deletions

View File

@ -4,10 +4,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Unreleased
## [0.5.0] - 2021-12-08
### Added
- Ability to check for, and fix, "mojibake" characters using [ftfy](https://github.com/LuminosoInsight/python-ftfy)
- Ability to check if the item's title exists in the citation
- Ability to check if an item has countries, but no matching regions (only
suggests missing regions if there is a region field in the CSV)
### Updated
- Python dependencies

View File

@ -197,6 +197,9 @@ def run(argv):
# Check: title in citation
check.title_in_citation(df_transposed[column])
# Check: countries match regions
check.countries_match_regions(df_transposed[column])
if args.experimental_checks:
experimental.correct_language(df_transposed[column])

View File

@ -4,6 +4,7 @@ import os
import re
from datetime import datetime, timedelta
import country_converter as coco
import pandas as pd
import requests
import requests_cache
@ -447,3 +448,74 @@ def title_in_citation(row):
print(f"{Fore.YELLOW}Title is not present in citation: {Fore.RESET}{title}")
return
def countries_match_regions(row):
"""Check for the scenario where an item has country coverage metadata, but
does not have the corresponding region metadata. For example, an item that
has country coverage "Kenya" should also have region "Eastern Africa" acc-
ording to the UN M.49 classification scheme.
See: https://unstats.un.org/unsd/methodology/m49/
Function prints a warning if the appropriate region is not present.
"""
# Initialize some variables at global scope so that we can set them in the
# loop scope below and still be able to access them afterwards.
country_column_name = ""
region_column_name = ""
title_column_name = ""
# Iterate over the labels of the current row's values to get the names of
# the title and citation columns. Then we check if the title is present in
# the citation.
for label in row.axes[0]:
# Find the name of the country column
match = re.match(r"^.*?country.*$", label)
if match is not None:
country_column_name = label
# Find the name of the region column
match = re.match(r"^.*?region.*$", label)
if match is not None:
region_column_name = label
# Find the name of the title column
match = re.match(r"^(dc|dcterms)\.title.*$", label)
if match is not None:
title_column_name = label
# Make sure we found the country and region columns
if country_column_name != "" and region_column_name != "":
# If we don't have any countries then we should return early before
# suggesting regions.
if row[country_column_name] is not None:
countries = row[country_column_name].split("||")
else:
return
if row[region_column_name] is not None:
regions = row[region_column_name].split("||")
else:
regions = list()
# An empty list for our regions so we can keep track for all countries
missing_regions = list()
for country in countries:
# Look up the UN M.49 regions for this country code. CoCo seems to
# only list the direct region, ie Western Africa, rather than all
# the parent regions ("Sub-Saharan Africa", "Africa", "World")
un_region = coco.convert(names=country, to="UNRegion")
if un_region not in regions:
if un_region not in missing_regions:
missing_regions.append(un_region)
if len(missing_regions) > 0:
for missing_region in missing_regions:
print(
f"{Fore.YELLOW}Missing region ({missing_region}): {Fore.RESET}{row[title_column_name]}"
)
return

View File

@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-3.0-only
VERSION = "0.4.8-dev"
VERSION = "0.5.0"

View File

@ -1,37 +1,38 @@
dc.title,dcterms.issued,dc.identifier.issn,dc.identifier.isbn,dcterms.language,dcterms.subject,cg.coverage.country,filename,dcterms.license,dcterms.type,dcterms.bibliographicCitation,cg.identifier.doi
Leading space,2019-07-29,,,,,,,,,,
Trailing space ,2019-07-29,,,,,,,,,,
Excessive space,2019-07-29,,,,,,,,,,
Miscellaenous ||whitespace | issues ,2019-07-29,,,,,,,,,,
Duplicate||Duplicate,2019-07-29,,,,,,,,,,
Invalid ISSN,2019-07-29,2321-2302,,,,,,,,,
Invalid ISBN,2019-07-29,,978-0-306-40615-6,,,,,,,,
Multiple valid ISSNs,2019-07-29,0378-5955||0024-9319,,,,,,,,,
Multiple valid ISBNs,2019-07-29,,99921-58-10-7||978-0-306-40615-7,,,,,,,,
Invalid date,2019-07-260,,,,,,,,,,
Multiple dates,2019-07-26||2019-01-10,,,,,,,,,,
Invalid multi-value separator,2019-07-29,0378-5955|0024-9319,,,,,,,,,
Unnecessary Unicode,2019-07-29,,,,,,,,,,
Suspicious character||foreˆt,2019-07-29,,,,,,,,,,
Invalid ISO 639-1 (alpha 2) language,2019-07-29,,,jp,,,,,,,
Invalid ISO 639-3 (alpha 3) language,2019-07-29,,,chi,,,,,,,
Invalid language,2019-07-29,,,Span,,,,,,,
Invalid AGROVOC subject,2019-07-29,,,,FOREST,,,,,,
dc.title,dcterms.issued,dc.identifier.issn,dc.identifier.isbn,dcterms.language,dcterms.subject,cg.coverage.country,filename,dcterms.license,dcterms.type,dcterms.bibliographicCitation,cg.identifier.doi,cg.coverage.region
Leading space,2019-07-29,,,,,,,,,,,
Trailing space ,2019-07-29,,,,,,,,,,,
Excessive space,2019-07-29,,,,,,,,,,,
Miscellaenous ||whitespace | issues ,2019-07-29,,,,,,,,,,,
Duplicate||Duplicate,2019-07-29,,,,,,,,,,,
Invalid ISSN,2019-07-29,2321-2302,,,,,,,,,,
Invalid ISBN,2019-07-29,,978-0-306-40615-6,,,,,,,,,
Multiple valid ISSNs,2019-07-29,0378-5955||0024-9319,,,,,,,,,,
Multiple valid ISBNs,2019-07-29,,99921-58-10-7||978-0-306-40615-7,,,,,,,,,
Invalid date,2019-07-260,,,,,,,,,,,
Multiple dates,2019-07-26||2019-01-10,,,,,,,,,,,
Invalid multi-value separator,2019-07-29,0378-5955|0024-9319,,,,,,,,,,
Unnecessary Unicode,2019-07-29,,,,,,,,,,,
Suspicious character||foreˆt,2019-07-29,,,,,,,,,,,
Invalid ISO 639-1 (alpha 2) language,2019-07-29,,,jp,,,,,,,,
Invalid ISO 639-3 (alpha 3) language,2019-07-29,,,chi,,,,,,,,
Invalid language,2019-07-29,,,Span,,,,,,,,
Invalid AGROVOC subject,2019-07-29,,,,FOREST,,,,,,,
Newline (LF),2019-07-30,,,,"TANZA
NIA",,,,,,
Missing date,,,,,,,,,,,
Invalid country,2019-08-01,,,,,KENYAA,,,,,
Uncommon filename extension,2019-08-10,,,,,,file.pdf.lck,,,,
Unneccesary unicode (U+002D + U+00AD),2019-08-10,,978-­92-­9043-­823-­6,,,,,,,,
"Missing space,after comma",2019-08-27,,,,,,,,,,
Incorrect ISO 639-1 language,2019-09-26,,,es,,,,,,,
Incorrect ISO 639-3 language,2019-09-26,,,spa,,,,,,,
Composéd Unicode,2020-01-14,,,,,,,,,,
Decomposéd Unicode,2020-01-14,,,,,,,,,,
Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,,,,,,
Invalid SPDX license identifier,2021-03-11,,,,,,,CC-BY,,,
Duplicate Title,2021-03-17,,,,,,,,Report,,
Duplicate Title,2021-03-17,,,,,,,,Report,,
Mojibake,2021-03-18,,,,Publicaçao CIAT,,,,Report,,
"DOI in citation, but missing cg.identifier.doi",2021-10-06,,,,,,,,,"Orth, A. 2021. DOI in citation, but missing cg.identifier.doi. doi: 10.1186/1743-422X-9-218",
Title missing from citation,2021-12-05,,,,,,,,,"Orth, A. 2021. Title missing f rom citation.",
NIA",,,,,,,
Missing date,,,,,,,,,,,,
Invalid country,2019-08-01,,,,,KENYAA,,,,,,
Uncommon filename extension,2019-08-10,,,,,,file.pdf.lck,,,,,
Unneccesary unicode (U+002D + U+00AD),2019-08-10,,978-­92-­9043-­823-­6,,,,,,,,,
"Missing space,after comma",2019-08-27,,,,,,,,,,,
Incorrect ISO 639-1 language,2019-09-26,,,es,,,,,,,,
Incorrect ISO 639-3 language,2019-09-26,,,spa,,,,,,,,
Composéd Unicode,2020-01-14,,,,,,,,,,,
Decomposéd Unicode,2020-01-14,,,,,,,,,,,
Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,,,,,,,
Invalid SPDX license identifier,2021-03-11,,,,,,,CC-BY,,,,
Duplicate Title,2021-03-17,,,,,,,,Report,,,
Duplicate Title,2021-03-17,,,,,,,,Report,,,
Mojibake,2021-03-18,,,,Publicaçao CIAT,,,,Report,,,
"DOI in citation, but missing cg.identifier.doi",2021-10-06,,,,,,,,,"Orth, A. 2021. DOI in citation, but missing cg.identifier.doi. doi: 10.1186/1743-422X-9-218",,
Title missing from citation,2021-12-05,,,,,,,,,"Orth, A. 2021. Title missing f rom citation.",,
Country missing region,2021-12-08,,,,,Kenya,,,,,,

1 dc.title dcterms.issued dc.identifier.issn dc.identifier.isbn dcterms.language dcterms.subject cg.coverage.country filename dcterms.license dcterms.type dcterms.bibliographicCitation cg.identifier.doi cg.coverage.region
2 Leading space 2019-07-29
3 Trailing space 2019-07-29
4 Excessive space 2019-07-29
5 Miscellaenous ||whitespace | issues 2019-07-29
6 Duplicate||Duplicate 2019-07-29
7 Invalid ISSN 2019-07-29 2321-2302
8 Invalid ISBN 2019-07-29 978-0-306-40615-6
9 Multiple valid ISSNs 2019-07-29 0378-5955||0024-9319
10 Multiple valid ISBNs 2019-07-29 99921-58-10-7||978-0-306-40615-7
11 Invalid date 2019-07-260
12 Multiple dates 2019-07-26||2019-01-10
13 Invalid multi-value separator 2019-07-29 0378-5955|0024-9319
14 Unnecessary Unicode​ 2019-07-29
15 Suspicious character||foreˆt 2019-07-29
16 Invalid ISO 639-1 (alpha 2) language 2019-07-29 jp
17 Invalid ISO 639-3 (alpha 3) language 2019-07-29 chi
18 Invalid language 2019-07-29 Span
19 Invalid AGROVOC subject 2019-07-29 FOREST
20 Newline (LF) 2019-07-30 TANZA NIA
21 Missing date
22 Invalid country 2019-08-01 KENYAA
23 Uncommon filename extension 2019-08-10 file.pdf.lck
24 Unneccesary unicode (U+002D + U+00AD) 2019-08-10 978-­92-­9043-­823-­6
25 Missing space,after comma 2019-08-27
26 Incorrect ISO 639-1 language 2019-09-26 es
27 Incorrect ISO 639-3 language 2019-09-26 spa
28 Composéd Unicode 2020-01-14
29 Decomposéd Unicode 2020-01-14
30 Unnecessary multi-value separator 2021-01-03 0378-5955||
31 Invalid SPDX license identifier 2021-03-11 CC-BY
32 Duplicate Title 2021-03-17 Report
33 Duplicate Title 2021-03-17 Report
34 Mojibake 2021-03-18 Publicaçao CIAT Report
35 DOI in citation, but missing cg.identifier.doi 2021-10-06 Orth, A. 2021. DOI in citation, but missing cg.identifier.doi. doi: 10.1186/1743-422X-9-218
36 Title missing from citation 2021-12-05 Orth, A. 2021. Title missing f rom citation.
37 Country missing region 2021-12-08 Kenya
38

82
poetry.lock generated
View File

@ -117,7 +117,7 @@ python-versions = "*"
[[package]]
name = "black"
version = "21.11b1"
version = "21.12b0"
description = "The uncompromising code formatter."
category = "dev"
optional = false
@ -128,7 +128,6 @@ click = ">=7.1.2"
mypy-extensions = ">=0.4.3"
pathspec = ">=0.9.0,<1"
platformdirs = ">=2"
regex = ">=2021.4.4"
tomli = ">=0.2.6,<2.0.0"
typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\" and implementation_name == \"cpython\""}
typing-extensions = [
@ -193,6 +192,17 @@ python-versions = "*"
[package.extras]
test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"]
[[package]]
name = "country-converter"
version = "0.7.4"
description = "The country converter (coco) - a Python package for converting country names between different classifications schemes."
category = "main"
optional = false
python-versions = "*"
[package.dependencies]
pandas = ">=1.0"
[[package]]
name = "csvkit"
version = "1.0.6"
@ -745,14 +755,6 @@ category = "main"
optional = false
python-versions = "*"
[[package]]
name = "regex"
version = "2021.11.10"
description = "Alternative regular expression module, to replace re."
category = "dev"
optional = false
python-versions = "*"
[[package]]
name = "requests"
version = "2.26.0"
@ -962,7 +964,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes
[metadata]
lock-version = "1.1"
python-versions = "^3.7.1"
content-hash = "8616f204bfc9c5ddacb519c62d02f8d09c8c5df6a668e2ad38768f2b9ec9414b"
content-hash = "2e53197acdff785ea46adcb5ca22650efdecc6693a434a0f56a4f2b958442a78"
[metadata.files]
agate = [
@ -1002,8 +1004,8 @@ backcall = [
{file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"},
]
black = [
{file = "black-21.11b1-py3-none-any.whl", hash = "sha256:802c6c30b637b28645b7fde282ed2569c0cd777dbe493a41b6a03c1d903f99ac"},
{file = "black-21.11b1.tar.gz", hash = "sha256:a042adbb18b3262faad5aff4e834ff186bb893f95ba3a8013f09de1e5569def2"},
{file = "black-21.12b0-py3-none-any.whl", hash = "sha256:a615e69ae185e08fdd73e4715e260e2479c861b5740057fde6e8b4e3b7dd589f"},
{file = "black-21.12b0.tar.gz", hash = "sha256:77b80f693a569e2e527958459634f18df9b0ba2625ba4e0c2d5da5be42e6f2b3"},
]
certifi = [
{file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"},
@ -1025,6 +1027,9 @@ commonmark = [
{file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"},
{file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"},
]
country-converter = [
{file = "country_converter-0.7.4.tar.gz", hash = "sha256:0291cc91c4a4efe7f128a11c8c6e4cb761f7fea7cde2517f8677c7c56da334d3"},
]
csvkit = [
{file = "csvkit-1.0.6-py2.py3-none-any.whl", hash = "sha256:4c99390a09f7311bebcd9409ba042023c8649045b356d4d8f207f6ebe76cc27f"},
{file = "csvkit-1.0.6.tar.gz", hash = "sha256:c8f761b5605cc978a7515a3d6a9e7ceec49a08eeefd7ad78480dea5f8bf80d35"},
@ -1311,57 +1316,6 @@ pytz = [
{file = "pytz-2021.3-py2.py3-none-any.whl", hash = "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c"},
{file = "pytz-2021.3.tar.gz", hash = "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"},
]
regex = [
{file = "regex-2021.11.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9345b6f7ee578bad8e475129ed40123d265464c4cfead6c261fd60fc9de00bcf"},
{file = "regex-2021.11.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:416c5f1a188c91e3eb41e9c8787288e707f7d2ebe66e0a6563af280d9b68478f"},
{file = "regex-2021.11.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0538c43565ee6e703d3a7c3bdfe4037a5209250e8502c98f20fea6f5fdf2965"},
{file = "regex-2021.11.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ee1227cf08b6716c85504aebc49ac827eb88fcc6e51564f010f11a406c0a667"},
{file = "regex-2021.11.10-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6650f16365f1924d6014d2ea770bde8555b4a39dc9576abb95e3cd1ff0263b36"},
{file = "regex-2021.11.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30ab804ea73972049b7a2a5c62d97687d69b5a60a67adca07eb73a0ddbc9e29f"},
{file = "regex-2021.11.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:68a067c11463de2a37157930d8b153005085e42bcb7ad9ca562d77ba7d1404e0"},
{file = "regex-2021.11.10-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:162abfd74e88001d20cb73ceaffbfe601469923e875caf9118333b1a4aaafdc4"},
{file = "regex-2021.11.10-cp310-cp310-win32.whl", hash = "sha256:98ba568e8ae26beb726aeea2273053c717641933836568c2a0278a84987b2a1a"},
{file = "regex-2021.11.10-cp310-cp310-win_amd64.whl", hash = "sha256:780b48456a0f0ba4d390e8b5f7c661fdd218934388cde1a974010a965e200e12"},
{file = "regex-2021.11.10-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:dba70f30fd81f8ce6d32ddeef37d91c8948e5d5a4c63242d16a2b2df8143aafc"},
{file = "regex-2021.11.10-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1f54b9b4b6c53369f40028d2dd07a8c374583417ee6ec0ea304e710a20f80a0"},
{file = "regex-2021.11.10-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fbb9dc00e39f3e6c0ef48edee202f9520dafb233e8b51b06b8428cfcb92abd30"},
{file = "regex-2021.11.10-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:666abff54e474d28ff42756d94544cdfd42e2ee97065857413b72e8a2d6a6345"},
{file = "regex-2021.11.10-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5537f71b6d646f7f5f340562ec4c77b6e1c915f8baae822ea0b7e46c1f09b733"},
{file = "regex-2021.11.10-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed2e07c6a26ed4bea91b897ee2b0835c21716d9a469a96c3e878dc5f8c55bb23"},
{file = "regex-2021.11.10-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ca5f18a75e1256ce07494e245cdb146f5a9267d3c702ebf9b65c7f8bd843431e"},
{file = "regex-2021.11.10-cp36-cp36m-win32.whl", hash = "sha256:93a5051fcf5fad72de73b96f07d30bc29665697fb8ecdfbc474f3452c78adcf4"},
{file = "regex-2021.11.10-cp36-cp36m-win_amd64.whl", hash = "sha256:b483c9d00a565633c87abd0aaf27eb5016de23fed952e054ecc19ce32f6a9e7e"},
{file = "regex-2021.11.10-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:fff55f3ce50a3ff63ec8e2a8d3dd924f1941b250b0aac3d3d42b687eeff07a8e"},
{file = "regex-2021.11.10-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e32d2a2b02ccbef10145df9135751abea1f9f076e67a4e261b05f24b94219e36"},
{file = "regex-2021.11.10-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:53db2c6be8a2710b359bfd3d3aa17ba38f8aa72a82309a12ae99d3c0c3dcd74d"},
{file = "regex-2021.11.10-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2207ae4f64ad3af399e2d30dde66f0b36ae5c3129b52885f1bffc2f05ec505c8"},
{file = "regex-2021.11.10-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5ca078bb666c4a9d1287a379fe617a6dccd18c3e8a7e6c7e1eb8974330c626a"},
{file = "regex-2021.11.10-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd33eb9bdcfbabab3459c9ee651d94c842bc8a05fabc95edf4ee0c15a072495e"},
{file = "regex-2021.11.10-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:05b7d6d7e64efe309972adab77fc2af8907bb93217ec60aa9fe12a0dad35874f"},
{file = "regex-2021.11.10-cp37-cp37m-win32.whl", hash = "sha256:e71255ba42567d34a13c03968736c5d39bb4a97ce98188fafb27ce981115beec"},
{file = "regex-2021.11.10-cp37-cp37m-win_amd64.whl", hash = "sha256:07856afef5ffcc052e7eccf3213317fbb94e4a5cd8177a2caa69c980657b3cb4"},
{file = "regex-2021.11.10-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ba05430e819e58544e840a68b03b28b6d328aff2e41579037e8bab7653b37d83"},
{file = "regex-2021.11.10-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7f301b11b9d214f83ddaf689181051e7f48905568b0c7017c04c06dfd065e244"},
{file = "regex-2021.11.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aaa4e0705ef2b73dd8e36eeb4c868f80f8393f5f4d855e94025ce7ad8525f50"},
{file = "regex-2021.11.10-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:788aef3549f1924d5c38263104dae7395bf020a42776d5ec5ea2b0d3d85d6646"},
{file = "regex-2021.11.10-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f8af619e3be812a2059b212064ea7a640aff0568d972cd1b9e920837469eb3cb"},
{file = "regex-2021.11.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85bfa6a5413be0ee6c5c4a663668a2cad2cbecdee367630d097d7823041bdeec"},
{file = "regex-2021.11.10-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f23222527b307970e383433daec128d769ff778d9b29343fb3496472dc20dabe"},
{file = "regex-2021.11.10-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:da1a90c1ddb7531b1d5ff1e171b4ee61f6345119be7351104b67ff413843fe94"},
{file = "regex-2021.11.10-cp38-cp38-win32.whl", hash = "sha256:0617383e2fe465732af4509e61648b77cbe3aee68b6ac8c0b6fe934db90be5cc"},
{file = "regex-2021.11.10-cp38-cp38-win_amd64.whl", hash = "sha256:a3feefd5e95871872673b08636f96b61ebef62971eab044f5124fb4dea39919d"},
{file = "regex-2021.11.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f7f325be2804246a75a4f45c72d4ce80d2443ab815063cdf70ee8fb2ca59ee1b"},
{file = "regex-2021.11.10-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:537ca6a3586931b16a85ac38c08cc48f10fc870a5b25e51794c74df843e9966d"},
{file = "regex-2021.11.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eef2afb0fd1747f33f1ee3e209bce1ed582d1896b240ccc5e2697e3275f037c7"},
{file = "regex-2021.11.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:432bd15d40ed835a51617521d60d0125867f7b88acf653e4ed994a1f8e4995dc"},
{file = "regex-2021.11.10-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b43c2b8a330a490daaef5a47ab114935002b13b3f9dc5da56d5322ff218eeadb"},
{file = "regex-2021.11.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:962b9a917dd7ceacbe5cd424556914cb0d636001e393b43dc886ba31d2a1e449"},
{file = "regex-2021.11.10-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fa8c626d6441e2d04b6ee703ef2d1e17608ad44c7cb75258c09dd42bacdfc64b"},
{file = "regex-2021.11.10-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3c5fb32cc6077abad3bbf0323067636d93307c9fa93e072771cf9a64d1c0f3ef"},
{file = "regex-2021.11.10-cp39-cp39-win32.whl", hash = "sha256:3b5df18db1fccd66de15aa59c41e4f853b5df7550723d26aa6cb7f40e5d9da5a"},
{file = "regex-2021.11.10-cp39-cp39-win_amd64.whl", hash = "sha256:83ee89483672b11f8952b158640d0c0ff02dc43d9cb1b70c1564b49abe92ce29"},
{file = "regex-2021.11.10.tar.gz", hash = "sha256:f341ee2df0999bfdf7a95e448075effe0db212a59387de1a70690e4acb03d4c6"},
]
requests = [
{file = "requests-2.26.0-py2.py3-none-any.whl", hash = "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24"},
{file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"},

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "csv-metadata-quality"
version = "0.4.8-dev"
version = "0.5.0"
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem."
authors = ["Alan Orth <alan.orth@gmail.com>"]
license="GPL-3.0-only"
@ -23,6 +23,7 @@ colorama = "^0.4.4"
spdx-license-list = "^0.5.2"
ftfy = "^5.9"
SQLAlchemy = ">=1.3.3,<1.4.23"
country-converter = "^0.7.4"
[tool.poetry.dev-dependencies]
pytest = "^6.1.1"

View File

@ -7,12 +7,13 @@ atomicwrites==1.4.0; python_version >= "3.6" and python_full_version < "3.0.0" a
attrs==21.2.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
babel==2.9.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
backcall==0.2.0; python_version >= "3.7" and python_version < "4.0"
black==21.11b1; python_full_version >= "3.6.2"
black==21.12b0; python_full_version >= "3.6.2"
certifi==2021.10.8; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
charset-normalizer==2.0.9; python_full_version >= "3.6.0" and python_version >= "3.6"
click==8.0.3; python_version >= "3.6" and python_full_version >= "3.6.2"
colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
commonmark==0.9.1; python_full_version >= "3.6.2" and python_full_version < "4.0.0" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0")
country-converter==0.7.4
csvkit==1.0.6
dbfread==2.0.7
decorator==5.1.0; python_version >= "3.7" and python_version < "4.0"
@ -62,7 +63,6 @@ python-slugify==5.0.2; python_version >= "3.6"
python-stdnum==1.17
pytimeparse==1.1.8
pytz==2021.3; python_full_version >= "3.7.1"
regex==2021.11.10; python_full_version >= "3.6.2"
requests-cache==0.6.4; python_version >= "3.6"
requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
rich==10.15.2; python_full_version >= "3.6.2" and python_full_version < "4.0.0" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0")

View File

@ -1,26 +1,201 @@
certifi==2021.10.8; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
charset-normalizer==2.0.9; python_full_version >= "3.6.0" and python_version >= "3.6"
colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
ftfy==5.9; python_version >= "3.5"
greenlet==1.1.2; python_version >= "3" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3"
idna==3.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
importlib-metadata==4.8.2; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6"
itsdangerous==2.0.1; python_version >= "3.6"
langid==1.1.6
numpy==1.21.1
pandas==1.3.4; python_full_version >= "3.7.1"
pycountry==19.8.18
python-dateutil==2.8.2; python_full_version >= "3.7.1"
python-stdnum==1.17
pytz==2021.3; python_full_version >= "3.7.1"
requests-cache==0.6.4; python_version >= "3.6"
requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.6"
spdx-license-list==0.5.2
sqlalchemy==1.4.22; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
typing-extensions==4.0.1; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6"
url-normalize==1.4.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
urllib3==1.26.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.6"
wcwidth==0.2.5; python_version >= "3.5"
xlrd==1.2.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0")
zipp==3.6.0; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6"
certifi==2021.10.8; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6" \
--hash=sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569 \
--hash=sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872
charset-normalizer==2.0.9; python_full_version >= "3.6.0" and python_version >= "3.6" \
--hash=sha256:b0b883e8e874edfdece9c28f314e3dd5badf067342e42fb162203335ae61aa2c \
--hash=sha256:1eecaa09422db5be9e29d7fc65664e6c33bd06f9ced7838578ba40d58bdf3721
colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") \
--hash=sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2 \
--hash=sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b
country-converter==0.7.4 \
--hash=sha256:0291cc91c4a4efe7f128a11c8c6e4cb761f7fea7cde2517f8677c7c56da334d3
ftfy==5.9; python_version >= "3.5" \
--hash=sha256:8c4fb2863c0b82eae2ab3cf353d9ade268dfbde863d322f78d6a9fd5cefb31e9
greenlet==1.1.2; python_version >= "3" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3" \
--hash=sha256:58df5c2a0e293bf665a51f8a100d3e9956febfbf1d9aaf8c0677cf70218910c6 \
--hash=sha256:aec52725173bd3a7b56fe91bc56eccb26fbdff1386ef123abb63c84c5b43b63a \
--hash=sha256:833e1551925ed51e6b44c800e71e77dacd7e49181fdc9ac9a0bf3714d515785d \
--hash=sha256:aa5b467f15e78b82257319aebc78dd2915e4c1436c3c0d1ad6f53e47ba6e2713 \
--hash=sha256:40b951f601af999a8bf2ce8c71e8aaa4e8c6f78ff8afae7b808aae2dc50d4c40 \
--hash=sha256:95e69877983ea39b7303570fa6760f81a3eec23d0e3ab2021b7144b94d06202d \
--hash=sha256:356b3576ad078c89a6107caa9c50cc14e98e3a6c4874a37c3e0273e4baf33de8 \
--hash=sha256:8639cadfda96737427330a094476d4c7a56ac03de7265622fcf4cfe57c8ae18d \
--hash=sha256:97e5306482182170ade15c4b0d8386ded995a07d7cc2ca8f27958d34d6736497 \
--hash=sha256:e6a36bb9474218c7a5b27ae476035497a6990e21d04c279884eb10d9b290f1b1 \
--hash=sha256:abb7a75ed8b968f3061327c433a0fbd17b729947b400747c334a9c29a9af6c58 \
--hash=sha256:14d4f3cd4e8b524ae9b8aa567858beed70c392fdec26dbdb0a8a418392e71708 \
--hash=sha256:17ff94e7a83aa8671a25bf5b59326ec26da379ace2ebc4411d690d80a7fbcf23 \
--hash=sha256:9f3cba480d3deb69f6ee2c1825060177a22c7826431458c697df88e6aeb3caee \
--hash=sha256:fa877ca7f6b48054f847b61d6fa7bed5cebb663ebc55e018fda12db09dcc664c \
--hash=sha256:7cbd7574ce8e138bda9df4efc6bf2ab8572c9aff640d8ecfece1b006b68da963 \
--hash=sha256:903bbd302a2378f984aef528f76d4c9b1748f318fe1294961c072bdc7f2ffa3e \
--hash=sha256:049fe7579230e44daef03a259faa24511d10ebfa44f69411d99e6a184fe68073 \
--hash=sha256:dd0b1e9e891f69e7675ba5c92e28b90eaa045f6ab134ffe70b52e948aa175b3c \
--hash=sha256:7418b6bfc7fe3331541b84bb2141c9baf1ec7132a7ecd9f375912eca810e714e \
--hash=sha256:f9d29ca8a77117315101425ec7ec2a47a22ccf59f5593378fc4077ac5b754fce \
--hash=sha256:21915eb821a6b3d9d8eefdaf57d6c345b970ad722f856cd71739493ce003ad08 \
--hash=sha256:eff9d20417ff9dcb0d25e2defc2574d10b491bf2e693b4e491914738b7908168 \
--hash=sha256:32ca72bbc673adbcfecb935bb3fb1b74e663d10a4b241aaa2f5a75fe1d1f90aa \
--hash=sha256:f0214eb2a23b85528310dad848ad2ac58e735612929c8072f6093f3585fd342d \
--hash=sha256:b92e29e58bef6d9cfd340c72b04d74c4b4e9f70c9fa7c78b674d1fec18896dc4 \
--hash=sha256:fdcec0b8399108577ec290f55551d926d9a1fa6cad45882093a7a07ac5ec147b \
--hash=sha256:93f81b134a165cc17123626ab8da2e30c0455441d4ab5576eed73a64c025b25c \
--hash=sha256:1e12bdc622676ce47ae9abbf455c189e442afdde8818d9da983085df6312e7a1 \
--hash=sha256:8c790abda465726cfb8bb08bd4ca9a5d0a7bd77c7ac1ca1b839ad823b948ea28 \
--hash=sha256:f276df9830dba7a333544bd41070e8175762a7ac20350786b322b714b0e654f5 \
--hash=sha256:64e6175c2e53195278d7388c454e0b30997573f3f4bd63697f88d855f7a6a1fc \
--hash=sha256:b11548073a2213d950c3f671aa88e6f83cda6e2fb97a8b6317b1b5b33d850e06 \
--hash=sha256:9633b3034d3d901f0a46b7939f8c4d64427dfba6bbc5a36b1a67364cf148a1b0 \
--hash=sha256:eb6ea6da4c787111adf40f697b4e58732ee0942b5d3bd8f435277643329ba627 \
--hash=sha256:f3acda1924472472ddd60c29e5b9db0cec629fbe3c5c5accb74d6d6d14773478 \
--hash=sha256:e859fcb4cbe93504ea18008d1df98dee4f7766db66c435e4882ab35cf70cac43 \
--hash=sha256:00e44c8afdbe5467e4f7b5851be223be68adb4272f44696ee71fe46b7036a711 \
--hash=sha256:ec8c433b3ab0419100bd45b47c9c8551248a5aee30ca5e9d399a0b57ac04651b \
--hash=sha256:288c6a76705dc54fba69fbcb59904ae4ad768b4c768839b8ca5fdadec6dd8cfd \
--hash=sha256:8d2f1fb53a421b410751887eb4ff21386d119ef9cde3797bf5e7ed49fb51a3b3 \
--hash=sha256:166eac03e48784a6a6e0e5f041cfebb1ab400b394db188c48b3a84737f505b67 \
--hash=sha256:572e1787d1460da79590bf44304abbc0a2da944ea64ec549188fa84d89bba7ab \
--hash=sha256:be5f425ff1f5f4b3c1e33ad64ab994eed12fc284a6ea71c5243fd564502ecbe5 \
--hash=sha256:b1692f7d6bc45e3200844be0dba153612103db241691088626a33ff1f24a0d88 \
--hash=sha256:7227b47e73dedaa513cdebb98469705ef0d66eb5a1250144468e9c3097d6b59b \
--hash=sha256:7ff61ff178250f9bb3cd89752df0f1dd0e27316a8bd1465351652b1b4a4cdfd3 \
--hash=sha256:f70a9e237bb792c7cc7e44c531fd48f5897961701cdaa06cf22fc14965c496cf \
--hash=sha256:013d61294b6cd8fe3242932c1c5e36e5d1db2c8afb58606c5a67efce62c1f5fd \
--hash=sha256:e30f5ea4ae2346e62cedde8794a56858a67b878dd79f7df76a0767e356b1744a
idna==3.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6" \
--hash=sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff \
--hash=sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d
importlib-metadata==4.8.2; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6" \
--hash=sha256:53ccfd5c134223e497627b9815d5030edf77d2ed573922f7a0b8f8bb81a1c100 \
--hash=sha256:75bdec14c397f528724c1bfd9709d660b33a4d2e77387a3358f20b848bb5e5fb
itsdangerous==2.0.1; python_version >= "3.6" \
--hash=sha256:5174094b9637652bdb841a3029700391451bd092ba3db90600dea710ba28e97c \
--hash=sha256:9e724d68fc22902a1435351f84c3fb8623f303fffcc566a4cb952df8c572cff0
langid==1.1.6 \
--hash=sha256:044bcae1912dab85c33d8e98f2811b8f4ff1213e5e9a9e9510137b84da2cb293
numpy==1.21.1 \
--hash=sha256:38e8648f9449a549a7dfe8d8755a5979b45b3538520d1e735637ef28e8c2dc50 \
--hash=sha256:fd7d7409fa643a91d0a05c7554dd68aa9c9bb16e186f6ccfe40d6e003156e33a \
--hash=sha256:a75b4498b1e93d8b700282dc8e655b8bd559c0904b3910b144646dbbbc03e062 \
--hash=sha256:1412aa0aec3e00bc23fbb8664d76552b4efde98fb71f60737c83efbac24112f1 \
--hash=sha256:e46ceaff65609b5399163de5893d8f2a82d3c77d5e56d976c8b5fb01faa6b671 \
--hash=sha256:c6a2324085dd52f96498419ba95b5777e40b6bcbc20088fddb9e8cbb58885e8e \
--hash=sha256:73101b2a1fef16602696d133db402a7e7586654682244344b8329cdcbbb82172 \
--hash=sha256:7a708a79c9a9d26904d1cca8d383bf869edf6f8e7650d85dbc77b041e8c5a0f8 \
--hash=sha256:95b995d0c413f5d0428b3f880e8fe1660ff9396dcd1f9eedbc311f37b5652e16 \
--hash=sha256:635e6bd31c9fb3d475c8f44a089569070d10a9ef18ed13738b03049280281267 \
--hash=sha256:4a3d5fb89bfe21be2ef47c0614b9c9c707b7362386c9a3ff1feae63e0267ccb6 \
--hash=sha256:8a326af80e86d0e9ce92bcc1e65c8ff88297de4fa14ee936cb2293d414c9ec63 \
--hash=sha256:791492091744b0fe390a6ce85cc1bf5149968ac7d5f0477288f78c89b385d9af \
--hash=sha256:0318c465786c1f63ac05d7c4dbcecd4d2d7e13f0959b01b534ea1e92202235c5 \
--hash=sha256:9a513bd9c1551894ee3d31369f9b07460ef223694098cf27d399513415855b68 \
--hash=sha256:91c6f5fc58df1e0a3cc0c3a717bb3308ff850abdaa6d2d802573ee2b11f674a8 \
--hash=sha256:978010b68e17150db8765355d1ccdd450f9fc916824e8c4e35ee620590e234cd \
--hash=sha256:9749a40a5b22333467f02fe11edc98f022133ee1bfa8ab99bda5e5437b831214 \
--hash=sha256:d7a4aeac3b94af92a9373d6e77b37691b86411f9745190d2c351f410ab3a791f \
--hash=sha256:d9e7912a56108aba9b31df688a4c4f5cb0d9d3787386b87d504762b6754fbb1b \
--hash=sha256:25b40b98ebdd272bc3020935427a4530b7d60dfbe1ab9381a39147834e985eac \
--hash=sha256:8a92c5aea763d14ba9d6475803fc7904bda7decc2a0a68153f587ad82941fec1 \
--hash=sha256:05a0f648eb28bae4bcb204e6fd14603de2908de982e761a2fc78efe0f19e96e1 \
--hash=sha256:f01f28075a92eede918b965e86e8f0ba7b7797a95aa8d35e1cc8821f5fc3ad6a \
--hash=sha256:88c0b89ad1cc24a5efbb99ff9ab5db0f9a86e9cc50240177a571fbe9c2860ac2 \
--hash=sha256:01721eefe70544d548425a07c80be8377096a54118070b8a62476866d5208e33 \
--hash=sha256:2d4d1de6e6fb3d28781c73fbde702ac97f03d79e4ffd6598b880b2d95d62ead4 \
--hash=sha256:dff4af63638afcc57a3dfb9e4b26d434a7a602d225b42d746ea7fe2edf1342fd
pandas==1.3.4; python_full_version >= "3.7.1" \
--hash=sha256:9707bdc1ea9639c886b4d3be6e2a45812c1ac0c2080f94c31b71c9fa35556f9b \
--hash=sha256:c2f44425594ae85e119459bb5abb0748d76ef01d9c08583a667e3339e134218e \
--hash=sha256:372d72a3d8a5f2dbaf566a5fa5fa7f230842ac80f29a931fb4b071502cf86b9a \
--hash=sha256:d99d2350adb7b6c3f7f8f0e5dfb7d34ff8dd4bc0a53e62c445b7e43e163fce63 \
--hash=sha256:4acc28364863127bca1029fb72228e6f473bb50c32e77155e80b410e2068eeac \
--hash=sha256:c2646458e1dce44df9f71a01dc65f7e8fa4307f29e5c0f2f92c97f47a5bf22f5 \
--hash=sha256:5298a733e5bfbb761181fd4672c36d0c627320eb999c59c65156c6a90c7e1b4f \
--hash=sha256:22808afb8f96e2269dcc5b846decacb2f526dd0b47baebc63d913bf847317c8f \
--hash=sha256:b528e126c13816a4374e56b7b18bfe91f7a7f6576d1aadba5dee6a87a7f479ae \
--hash=sha256:fe48e4925455c964db914b958f6e7032d285848b7538a5e1b19aeb26ffaea3ec \
--hash=sha256:eaca36a80acaacb8183930e2e5ad7f71539a66805d6204ea88736570b2876a7b \
--hash=sha256:42493f8ae67918bf129869abea8204df899902287a7f5eaf596c8e54e0ac7ff4 \
--hash=sha256:a388960f979665b447f0847626e40f99af8cf191bce9dc571d716433130cb3a7 \
--hash=sha256:5ba0aac1397e1d7b654fccf263a4798a9e84ef749866060d19e577e927d66e1b \
--hash=sha256:f567e972dce3bbc3a8076e0b675273b4a9e8576ac629149cf8286ee13c259ae5 \
--hash=sha256:c1aa4de4919358c5ef119f6377bc5964b3a7023c23e845d9db7d9016fa0c5b1c \
--hash=sha256:dd324f8ee05925ee85de0ea3f0d66e1362e8c80799eb4eb04927d32335a3e44a \
--hash=sha256:d47750cf07dee6b55d8423471be70d627314277976ff2edd1381f02d52dbadf9 \
--hash=sha256:2d1dc09c0013d8faa7474574d61b575f9af6257ab95c93dcf33a14fd8d2c1bab \
--hash=sha256:10e10a2527db79af6e830c3d5842a4d60383b162885270f8cffc15abca4ba4a9 \
--hash=sha256:35c77609acd2e4d517da41bae0c11c70d31c87aae8dd1aabd2670906c6d2c143 \
--hash=sha256:003ba92db58b71a5f8add604a17a059f3068ef4e8c0c365b088468d0d64935fd \
--hash=sha256:a51528192755f7429c5bcc9e80832c517340317c861318fea9cea081b57c9afd \
--hash=sha256:a2aa18d3f0b7d538e21932f637fbfe8518d085238b429e4790a35e1e44a96ffc
pycountry==19.8.18 \
--hash=sha256:3c57aa40adcf293d59bebaffbe60d8c39976fba78d846a018dc0c2ec9c6cb3cb
python-dateutil==2.8.2; python_full_version >= "3.7.1" \
--hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \
--hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9
python-stdnum==1.17 \
--hash=sha256:374e2b5e13912ccdbf50b0b23fca2c3e0531174805c32d74e145f37756328340 \
--hash=sha256:a46e6cf9652807314d369b654b255c86a59f93d18be2834f3d567ed1a346c547
pytz==2021.3; python_full_version >= "3.7.1" \
--hash=sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c \
--hash=sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326
requests-cache==0.6.4; python_version >= "3.6" \
--hash=sha256:dd9120a4ab7b8128cba9b6b120d8b5560d566a3cd0f828cced3d3fd60a42ec40 \
--hash=sha256:1102daa13a804abe23fad62d694e7dee58d6063a35d94bf6e8c9821e22e5a78b
requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0") \
--hash=sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24 \
--hash=sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7
six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.6" \
--hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 \
--hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926
spdx-license-list==0.5.2 \
--hash=sha256:1b338470c7b403dbecceca563a316382c7977516128ca6c1e8f7078e3ed6e7b0 \
--hash=sha256:952996f72ab807972dc2278bb9b91e5294767211e51f09aad9c0e2ff5b82a31b
sqlalchemy==1.4.22; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0") \
--hash=sha256:488608953385d6c127d2dcbc4b11f8d7f2f30b89f6bd27c01b042253d985cc2f \
--hash=sha256:5d856cc50fd26fc8dd04892ed5a5a3d7eeb914fea2c2e484183e2d84c14926e0 \
--hash=sha256:a00d9c6d3a8afe1d1681cd8a5266d2f0ed684b0b44bada2ca82403b9e8b25d39 \
--hash=sha256:5908ea6c652a050d768580d01219c98c071e71910ab8e7b42c02af4010608397 \
--hash=sha256:b7fb937c720847879c7402fe300cfdb2aeff22349fa4ea3651bca4e2d6555939 \
--hash=sha256:9bfe882d5a1bbde0245dca0bd48da0976bd6634cf2041d2fdf0417c5463e40e5 \
--hash=sha256:eedd76f135461cf237534a6dc0d1e0f6bb88a1dc193678fab48a11d223462da5 \
--hash=sha256:6a16c7c4452293da5143afa3056680db2d187b380b3ef4d470d4e29885720de3 \
--hash=sha256:44d23ea797a5e0be71bc5454b9ae99158ea0edc79e2393c6e9a2354de88329c0 \
--hash=sha256:a5e14cb0c0a4ac095395f24575a0e7ab5d1be27f5f9347f1762f21505e3ba9f1 \
--hash=sha256:bc34a007e604091ca3a4a057525efc4cefd2b7fe970f44d20b9cfa109ab1bddb \
--hash=sha256:756f5d2f5b92d27450167247fb574b09c4cd192a3f8c2e493b3e518a204ee543 \
--hash=sha256:9fcbb4b4756b250ed19adc5e28c005b8ed56fdb5c21efa24c6822c0575b4964d \
--hash=sha256:09dbb4bc01a734ccddbf188deb2a69aede4b3c153a72b6d5c6900be7fb2945b1 \
--hash=sha256:f028ef6a1d828bc754852a022b2160e036202ac8658a6c7d34875aafd14a9a15 \
--hash=sha256:68393d3fd31469845b6ba11f5b4209edbea0b58506be0e077aafbf9aa2e21e11 \
--hash=sha256:891927a49b2363a4199763a9d436d97b0b42c65922a4ea09025600b81a00d17e \
--hash=sha256:fd2102a8f8a659522719ed73865dff3d3cc76eb0833039dc473e0ad3041d04be \
--hash=sha256:4014978de28163cd8027434916a92d0f5bb1a3a38dff5e8bf8bff4d9372a9117 \
--hash=sha256:f814d80844969b0d22ea63663da4de5ca1c434cfbae226188901e5d368792c17 \
--hash=sha256:d09a760b0a045b4d799102ae7965b5491ccf102123f14b2a8cc6c01d1021a2d9 \
--hash=sha256:26daa429f039e29b1e523bf763bfab17490556b974c77b5ca7acb545b9230e9a \
--hash=sha256:12bac5fa1a6ea870bdccb96fe01610641dd44ebe001ed91ef7fcd980e9702db5 \
--hash=sha256:39b5d36ab71f73c068cdcf70c38075511de73616e6c7fdd112d6268c2704d9f5 \
--hash=sha256:5102b9face693e8b2db3b2539c7e1a5d9a5b4dc0d79967670626ffd2f710d6e6 \
--hash=sha256:c9373ef67a127799027091fa53449125351a8c943ddaa97bec4e99271dbb21f4 \
--hash=sha256:36a089dc604032d41343d86290ce85d4e6886012eea73faa88001260abf5ff81 \
--hash=sha256:b48148ceedfb55f764562e04c00539bb9ea72bf07820ca15a594a9a049ff6b0e \
--hash=sha256:1fdae7d980a2fa617d119d0dc13ecb5c23cc63a8b04ffcb5298f2c59d86851e9 \
--hash=sha256:ec1be26cdccd60d180359a527d5980d959a26269a2c7b1b327a1eea0cab37ed8
typing-extensions==4.0.1; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6" \
--hash=sha256:7f001e5ac290a0c0401508864c7ec868be4e701886d5b573a9528ed3973d9d3b \
--hash=sha256:4ca091dea149f945ec56afb48dae714f21e8692ef22a395223bcd328961b6a0e
url-normalize==1.4.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6" \
--hash=sha256:d23d3a070ac52a67b83a1c59a0e68f8608d1cd538783b401bc9de2c0fac999b2 \
--hash=sha256:ec3c301f04e5bb676d333a7fa162fa977ad2ca04b7e652bfc9fac4e405728eed
urllib3==1.26.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.6" \
--hash=sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844 \
--hash=sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece
wcwidth==0.2.5; python_version >= "3.5" \
--hash=sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784 \
--hash=sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83
xlrd==1.2.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0") \
--hash=sha256:e551fb498759fa3a5384a94ccd4c3c02eb7c00ea424426e212ac0c57be9dfbde \
--hash=sha256:546eb36cee8db40c3eaa46c351e67ffee6eeb5fa2650b71bc4c758a29a1b29b2
zipp==3.6.0; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6" \
--hash=sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc \
--hash=sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832

View File

@ -14,7 +14,7 @@ install_requires = [
setuptools.setup(
name="csv-metadata-quality",
version="0.4.8-dev",
version="0.5.0",
author="Alan Orth",
author_email="aorth@mjanja.ch",
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",

View File

@ -438,3 +438,43 @@ def test_title_not_in_citation(capsys):
captured.out
== f"{Fore.YELLOW}Title is not present in citation: {Fore.RESET}{title}\n"
)
def test_country_matches_region():
"""Test an item with regions matching its country list."""
country = "Kenya"
region = "Eastern Africa"
# Emulate a column in a transposed dataframe (which is just a series)
d = {"cg.coverage.country": country, "cg.coverage.region": region}
series = pd.Series(data=d)
result = check.countries_match_regions(series)
assert result == None
def test_country_not_matching_region(capsys):
"""Test an item with regions not matching its country list."""
title = "Testing an item with no matching region."
country = "Kenya"
region = ""
missing_region = "Eastern Africa"
# Emulate a column in a transposed dataframe (which is just a series)
d = {
"dc.title": title,
"cg.coverage.country": country,
"cg.coverage.region": region,
}
series = pd.Series(data=d)
check.countries_match_regions(series)
captured = capsys.readouterr()
assert (
captured.out
== f"{Fore.YELLOW}Missing region ({missing_region}): {Fore.RESET}{title}\n"
)