mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-07-13 17:52:22 +02:00
Compare commits
8 Commits
72fe38972e
...
v0.5.0
Author | SHA1 | Date | |
---|---|---|---|
cc34db7ff8
|
|||
b79e07b814
|
|||
865b950c33
|
|||
6f269ca6b1
|
|||
120e8cf09f
|
|||
a4eb79f625
|
|||
ccc2a73456
|
|||
ad33195ba3
|
@ -4,10 +4,12 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## Unreleased
|
||||
## [0.5.0] - 2021-12-08
|
||||
### Added
|
||||
- Ability to check for, and fix, "mojibake" characters using [ftfy](https://github.com/LuminosoInsight/python-ftfy)
|
||||
- Ability to check if the item's title exists in the citation
|
||||
- Ability to check if an item has countries, but no matching regions (only
|
||||
suggests missing regions if there is a region field in the CSV)
|
||||
|
||||
### Updated
|
||||
- Python dependencies
|
||||
|
@ -1,4 +1,11 @@
|
||||
# DSpace CSV Metadata Quality Checker  [](https://ci.mjanja.ch/alanorth/csv-metadata-quality)
|
||||
<h1 align="center">DSpace CSV Metadata Quality Checker</h1>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://ci.mjanja.ch/alanorth/csv-metadata-quality"><img alt="Build Status" src="https://ci.mjanja.ch/api/badges/alanorth/csv-metadata-quality/status.svg"></a>
|
||||
<a href="https://github.com/ilri/csv-metadata-quality/actions"><img alt="Build and Test" src="https://github.com/ilri/csv-metadata-quality/workflows/Build%20and%20Test/badge.svg"></a>
|
||||
<a href="https://github.com/psf/black"><img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
|
||||
</p>
|
||||
|
||||
A simple, but opinionated metadata quality checker and fixer designed to work with CSVs in the DSpace ecosystem (though it could theoretically work on any CSV that uses Dublin Core fields as columns). The implementation is essentially a pipeline of checks and fixes that begins with splitting multi-value fields on the standard DSpace "||" separator, trimming leading/trailing whitespace, and then proceeding to more specialized cases like ISSNs, ISBNs, languages, unnecessary Unicode, AGROVOC terms, etc.
|
||||
|
||||
Requires Python 3.7.1 or greater (3.8+ recommended). CSV and Excel support comes from the [Pandas](https://pandas.pydata.org/) library, though your mileage may vary with Excel because this is much less tested.
|
||||
|
@ -197,6 +197,9 @@ def run(argv):
|
||||
# Check: title in citation
|
||||
check.title_in_citation(df_transposed[column])
|
||||
|
||||
# Check: countries match regions
|
||||
check.countries_match_regions(df_transposed[column])
|
||||
|
||||
if args.experimental_checks:
|
||||
experimental.correct_language(df_transposed[column])
|
||||
|
||||
|
@ -4,6 +4,7 @@ import os
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import country_converter as coco
|
||||
import pandas as pd
|
||||
import requests
|
||||
import requests_cache
|
||||
@ -447,3 +448,74 @@ def title_in_citation(row):
|
||||
print(f"{Fore.YELLOW}Title is not present in citation: {Fore.RESET}{title}")
|
||||
|
||||
return
|
||||
|
||||
|
||||
def countries_match_regions(row):
|
||||
"""Check for the scenario where an item has country coverage metadata, but
|
||||
does not have the corresponding region metadata. For example, an item that
|
||||
has country coverage "Kenya" should also have region "Eastern Africa" acc-
|
||||
ording to the UN M.49 classification scheme.
|
||||
|
||||
See: https://unstats.un.org/unsd/methodology/m49/
|
||||
|
||||
Function prints a warning if the appropriate region is not present.
|
||||
"""
|
||||
# Initialize some variables at global scope so that we can set them in the
|
||||
# loop scope below and still be able to access them afterwards.
|
||||
country_column_name = ""
|
||||
region_column_name = ""
|
||||
title_column_name = ""
|
||||
|
||||
# Iterate over the labels of the current row's values to get the names of
|
||||
# the title and citation columns. Then we check if the title is present in
|
||||
# the citation.
|
||||
for label in row.axes[0]:
|
||||
# Find the name of the country column
|
||||
match = re.match(r"^.*?country.*$", label)
|
||||
if match is not None:
|
||||
country_column_name = label
|
||||
|
||||
# Find the name of the region column
|
||||
match = re.match(r"^.*?region.*$", label)
|
||||
if match is not None:
|
||||
region_column_name = label
|
||||
|
||||
# Find the name of the title column
|
||||
match = re.match(r"^(dc|dcterms)\.title.*$", label)
|
||||
if match is not None:
|
||||
title_column_name = label
|
||||
|
||||
# Make sure we found the country and region columns
|
||||
if country_column_name != "" and region_column_name != "":
|
||||
# If we don't have any countries then we should return early before
|
||||
# suggesting regions.
|
||||
if row[country_column_name] is not None:
|
||||
countries = row[country_column_name].split("||")
|
||||
else:
|
||||
return
|
||||
|
||||
if row[region_column_name] is not None:
|
||||
regions = row[region_column_name].split("||")
|
||||
else:
|
||||
regions = list()
|
||||
|
||||
# An empty list for our regions so we can keep track for all countries
|
||||
missing_regions = list()
|
||||
|
||||
for country in countries:
|
||||
# Look up the UN M.49 regions for this country code. CoCo seems to
|
||||
# only list the direct region, ie Western Africa, rather than all
|
||||
# the parent regions ("Sub-Saharan Africa", "Africa", "World")
|
||||
un_region = coco.convert(names=country, to="UNRegion")
|
||||
|
||||
if un_region not in regions:
|
||||
if un_region not in missing_regions:
|
||||
missing_regions.append(un_region)
|
||||
|
||||
if len(missing_regions) > 0:
|
||||
for missing_region in missing_regions:
|
||||
print(
|
||||
f"{Fore.YELLOW}Missing region ({missing_region}): {Fore.RESET}{row[title_column_name]}"
|
||||
)
|
||||
|
||||
return
|
||||
|
@ -1,3 +1,3 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
VERSION = "0.4.8-dev"
|
||||
VERSION = "0.5.0"
|
||||
|
@ -1,37 +1,38 @@
|
||||
dc.title,dcterms.issued,dc.identifier.issn,dc.identifier.isbn,dcterms.language,dcterms.subject,cg.coverage.country,filename,dcterms.license,dcterms.type,dcterms.bibliographicCitation,cg.identifier.doi
|
||||
Leading space,2019-07-29,,,,,,,,,,
|
||||
Trailing space ,2019-07-29,,,,,,,,,,
|
||||
Excessive space,2019-07-29,,,,,,,,,,
|
||||
Miscellaenous ||whitespace | issues ,2019-07-29,,,,,,,,,,
|
||||
Duplicate||Duplicate,2019-07-29,,,,,,,,,,
|
||||
Invalid ISSN,2019-07-29,2321-2302,,,,,,,,,
|
||||
Invalid ISBN,2019-07-29,,978-0-306-40615-6,,,,,,,,
|
||||
Multiple valid ISSNs,2019-07-29,0378-5955||0024-9319,,,,,,,,,
|
||||
Multiple valid ISBNs,2019-07-29,,99921-58-10-7||978-0-306-40615-7,,,,,,,,
|
||||
Invalid date,2019-07-260,,,,,,,,,,
|
||||
Multiple dates,2019-07-26||2019-01-10,,,,,,,,,,
|
||||
Invalid multi-value separator,2019-07-29,0378-5955|0024-9319,,,,,,,,,
|
||||
Unnecessary Unicode,2019-07-29,,,,,,,,,,
|
||||
Suspicious character||foreˆt,2019-07-29,,,,,,,,,,
|
||||
Invalid ISO 639-1 (alpha 2) language,2019-07-29,,,jp,,,,,,,
|
||||
Invalid ISO 639-3 (alpha 3) language,2019-07-29,,,chi,,,,,,,
|
||||
Invalid language,2019-07-29,,,Span,,,,,,,
|
||||
Invalid AGROVOC subject,2019-07-29,,,,FOREST,,,,,,
|
||||
dc.title,dcterms.issued,dc.identifier.issn,dc.identifier.isbn,dcterms.language,dcterms.subject,cg.coverage.country,filename,dcterms.license,dcterms.type,dcterms.bibliographicCitation,cg.identifier.doi,cg.coverage.region
|
||||
Leading space,2019-07-29,,,,,,,,,,,
|
||||
Trailing space ,2019-07-29,,,,,,,,,,,
|
||||
Excessive space,2019-07-29,,,,,,,,,,,
|
||||
Miscellaenous ||whitespace | issues ,2019-07-29,,,,,,,,,,,
|
||||
Duplicate||Duplicate,2019-07-29,,,,,,,,,,,
|
||||
Invalid ISSN,2019-07-29,2321-2302,,,,,,,,,,
|
||||
Invalid ISBN,2019-07-29,,978-0-306-40615-6,,,,,,,,,
|
||||
Multiple valid ISSNs,2019-07-29,0378-5955||0024-9319,,,,,,,,,,
|
||||
Multiple valid ISBNs,2019-07-29,,99921-58-10-7||978-0-306-40615-7,,,,,,,,,
|
||||
Invalid date,2019-07-260,,,,,,,,,,,
|
||||
Multiple dates,2019-07-26||2019-01-10,,,,,,,,,,,
|
||||
Invalid multi-value separator,2019-07-29,0378-5955|0024-9319,,,,,,,,,,
|
||||
Unnecessary Unicode,2019-07-29,,,,,,,,,,,
|
||||
Suspicious character||foreˆt,2019-07-29,,,,,,,,,,,
|
||||
Invalid ISO 639-1 (alpha 2) language,2019-07-29,,,jp,,,,,,,,
|
||||
Invalid ISO 639-3 (alpha 3) language,2019-07-29,,,chi,,,,,,,,
|
||||
Invalid language,2019-07-29,,,Span,,,,,,,,
|
||||
Invalid AGROVOC subject,2019-07-29,,,,FOREST,,,,,,,
|
||||
Newline (LF),2019-07-30,,,,"TANZA
|
||||
NIA",,,,,,
|
||||
Missing date,,,,,,,,,,,
|
||||
Invalid country,2019-08-01,,,,,KENYAA,,,,,
|
||||
Uncommon filename extension,2019-08-10,,,,,,file.pdf.lck,,,,
|
||||
Unneccesary unicode (U+002D + U+00AD),2019-08-10,,978-92-9043-823-6,,,,,,,,
|
||||
"Missing space,after comma",2019-08-27,,,,,,,,,,
|
||||
Incorrect ISO 639-1 language,2019-09-26,,,es,,,,,,,
|
||||
Incorrect ISO 639-3 language,2019-09-26,,,spa,,,,,,,
|
||||
Composéd Unicode,2020-01-14,,,,,,,,,,
|
||||
Decomposéd Unicode,2020-01-14,,,,,,,,,,
|
||||
Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,,,,,,
|
||||
Invalid SPDX license identifier,2021-03-11,,,,,,,CC-BY,,,
|
||||
Duplicate Title,2021-03-17,,,,,,,,Report,,
|
||||
Duplicate Title,2021-03-17,,,,,,,,Report,,
|
||||
Mojibake,2021-03-18,,,,Publicaçao CIAT,,,,Report,,
|
||||
"DOI in citation, but missing cg.identifier.doi",2021-10-06,,,,,,,,,"Orth, A. 2021. DOI in citation, but missing cg.identifier.doi. doi: 10.1186/1743-422X-9-218",
|
||||
Title missing from citation,2021-12-05,,,,,,,,,"Orth, A. 2021. Title missing f rom citation.",
|
||||
NIA",,,,,,,
|
||||
Missing date,,,,,,,,,,,,
|
||||
Invalid country,2019-08-01,,,,,KENYAA,,,,,,
|
||||
Uncommon filename extension,2019-08-10,,,,,,file.pdf.lck,,,,,
|
||||
Unneccesary unicode (U+002D + U+00AD),2019-08-10,,978-92-9043-823-6,,,,,,,,,
|
||||
"Missing space,after comma",2019-08-27,,,,,,,,,,,
|
||||
Incorrect ISO 639-1 language,2019-09-26,,,es,,,,,,,,
|
||||
Incorrect ISO 639-3 language,2019-09-26,,,spa,,,,,,,,
|
||||
Composéd Unicode,2020-01-14,,,,,,,,,,,
|
||||
Decomposéd Unicode,2020-01-14,,,,,,,,,,,
|
||||
Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,,,,,,,
|
||||
Invalid SPDX license identifier,2021-03-11,,,,,,,CC-BY,,,,
|
||||
Duplicate Title,2021-03-17,,,,,,,,Report,,,
|
||||
Duplicate Title,2021-03-17,,,,,,,,Report,,,
|
||||
Mojibake,2021-03-18,,,,Publicaçao CIAT,,,,Report,,,
|
||||
"DOI in citation, but missing cg.identifier.doi",2021-10-06,,,,,,,,,"Orth, A. 2021. DOI in citation, but missing cg.identifier.doi. doi: 10.1186/1743-422X-9-218",,
|
||||
Title missing from citation,2021-12-05,,,,,,,,,"Orth, A. 2021. Title missing f rom citation.",,
|
||||
Country missing region,2021-12-08,,,,,Kenya,,,,,,
|
||||
|
|
82
poetry.lock
generated
82
poetry.lock
generated
@ -117,7 +117,7 @@ python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "black"
|
||||
version = "21.11b1"
|
||||
version = "21.12b0"
|
||||
description = "The uncompromising code formatter."
|
||||
category = "dev"
|
||||
optional = false
|
||||
@ -128,7 +128,6 @@ click = ">=7.1.2"
|
||||
mypy-extensions = ">=0.4.3"
|
||||
pathspec = ">=0.9.0,<1"
|
||||
platformdirs = ">=2"
|
||||
regex = ">=2021.4.4"
|
||||
tomli = ">=0.2.6,<2.0.0"
|
||||
typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\" and implementation_name == \"cpython\""}
|
||||
typing-extensions = [
|
||||
@ -193,6 +192,17 @@ python-versions = "*"
|
||||
[package.extras]
|
||||
test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"]
|
||||
|
||||
[[package]]
|
||||
name = "country-converter"
|
||||
version = "0.7.4"
|
||||
description = "The country converter (coco) - a Python package for converting country names between different classifications schemes."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[package.dependencies]
|
||||
pandas = ">=1.0"
|
||||
|
||||
[[package]]
|
||||
name = "csvkit"
|
||||
version = "1.0.6"
|
||||
@ -745,14 +755,6 @@ category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "2021.11.10"
|
||||
description = "Alternative regular expression module, to replace re."
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "requests"
|
||||
version = "2.26.0"
|
||||
@ -962,7 +964,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes
|
||||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.7.1"
|
||||
content-hash = "8616f204bfc9c5ddacb519c62d02f8d09c8c5df6a668e2ad38768f2b9ec9414b"
|
||||
content-hash = "2e53197acdff785ea46adcb5ca22650efdecc6693a434a0f56a4f2b958442a78"
|
||||
|
||||
[metadata.files]
|
||||
agate = [
|
||||
@ -1002,8 +1004,8 @@ backcall = [
|
||||
{file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"},
|
||||
]
|
||||
black = [
|
||||
{file = "black-21.11b1-py3-none-any.whl", hash = "sha256:802c6c30b637b28645b7fde282ed2569c0cd777dbe493a41b6a03c1d903f99ac"},
|
||||
{file = "black-21.11b1.tar.gz", hash = "sha256:a042adbb18b3262faad5aff4e834ff186bb893f95ba3a8013f09de1e5569def2"},
|
||||
{file = "black-21.12b0-py3-none-any.whl", hash = "sha256:a615e69ae185e08fdd73e4715e260e2479c861b5740057fde6e8b4e3b7dd589f"},
|
||||
{file = "black-21.12b0.tar.gz", hash = "sha256:77b80f693a569e2e527958459634f18df9b0ba2625ba4e0c2d5da5be42e6f2b3"},
|
||||
]
|
||||
certifi = [
|
||||
{file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"},
|
||||
@ -1025,6 +1027,9 @@ commonmark = [
|
||||
{file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"},
|
||||
{file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"},
|
||||
]
|
||||
country-converter = [
|
||||
{file = "country_converter-0.7.4.tar.gz", hash = "sha256:0291cc91c4a4efe7f128a11c8c6e4cb761f7fea7cde2517f8677c7c56da334d3"},
|
||||
]
|
||||
csvkit = [
|
||||
{file = "csvkit-1.0.6-py2.py3-none-any.whl", hash = "sha256:4c99390a09f7311bebcd9409ba042023c8649045b356d4d8f207f6ebe76cc27f"},
|
||||
{file = "csvkit-1.0.6.tar.gz", hash = "sha256:c8f761b5605cc978a7515a3d6a9e7ceec49a08eeefd7ad78480dea5f8bf80d35"},
|
||||
@ -1311,57 +1316,6 @@ pytz = [
|
||||
{file = "pytz-2021.3-py2.py3-none-any.whl", hash = "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c"},
|
||||
{file = "pytz-2021.3.tar.gz", hash = "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"},
|
||||
]
|
||||
regex = [
|
||||
{file = "regex-2021.11.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9345b6f7ee578bad8e475129ed40123d265464c4cfead6c261fd60fc9de00bcf"},
|
||||
{file = "regex-2021.11.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:416c5f1a188c91e3eb41e9c8787288e707f7d2ebe66e0a6563af280d9b68478f"},
|
||||
{file = "regex-2021.11.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0538c43565ee6e703d3a7c3bdfe4037a5209250e8502c98f20fea6f5fdf2965"},
|
||||
{file = "regex-2021.11.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ee1227cf08b6716c85504aebc49ac827eb88fcc6e51564f010f11a406c0a667"},
|
||||
{file = "regex-2021.11.10-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6650f16365f1924d6014d2ea770bde8555b4a39dc9576abb95e3cd1ff0263b36"},
|
||||
{file = "regex-2021.11.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30ab804ea73972049b7a2a5c62d97687d69b5a60a67adca07eb73a0ddbc9e29f"},
|
||||
{file = "regex-2021.11.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:68a067c11463de2a37157930d8b153005085e42bcb7ad9ca562d77ba7d1404e0"},
|
||||
{file = "regex-2021.11.10-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:162abfd74e88001d20cb73ceaffbfe601469923e875caf9118333b1a4aaafdc4"},
|
||||
{file = "regex-2021.11.10-cp310-cp310-win32.whl", hash = "sha256:98ba568e8ae26beb726aeea2273053c717641933836568c2a0278a84987b2a1a"},
|
||||
{file = "regex-2021.11.10-cp310-cp310-win_amd64.whl", hash = "sha256:780b48456a0f0ba4d390e8b5f7c661fdd218934388cde1a974010a965e200e12"},
|
||||
{file = "regex-2021.11.10-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:dba70f30fd81f8ce6d32ddeef37d91c8948e5d5a4c63242d16a2b2df8143aafc"},
|
||||
{file = "regex-2021.11.10-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1f54b9b4b6c53369f40028d2dd07a8c374583417ee6ec0ea304e710a20f80a0"},
|
||||
{file = "regex-2021.11.10-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fbb9dc00e39f3e6c0ef48edee202f9520dafb233e8b51b06b8428cfcb92abd30"},
|
||||
{file = "regex-2021.11.10-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:666abff54e474d28ff42756d94544cdfd42e2ee97065857413b72e8a2d6a6345"},
|
||||
{file = "regex-2021.11.10-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5537f71b6d646f7f5f340562ec4c77b6e1c915f8baae822ea0b7e46c1f09b733"},
|
||||
{file = "regex-2021.11.10-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed2e07c6a26ed4bea91b897ee2b0835c21716d9a469a96c3e878dc5f8c55bb23"},
|
||||
{file = "regex-2021.11.10-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ca5f18a75e1256ce07494e245cdb146f5a9267d3c702ebf9b65c7f8bd843431e"},
|
||||
{file = "regex-2021.11.10-cp36-cp36m-win32.whl", hash = "sha256:93a5051fcf5fad72de73b96f07d30bc29665697fb8ecdfbc474f3452c78adcf4"},
|
||||
{file = "regex-2021.11.10-cp36-cp36m-win_amd64.whl", hash = "sha256:b483c9d00a565633c87abd0aaf27eb5016de23fed952e054ecc19ce32f6a9e7e"},
|
||||
{file = "regex-2021.11.10-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:fff55f3ce50a3ff63ec8e2a8d3dd924f1941b250b0aac3d3d42b687eeff07a8e"},
|
||||
{file = "regex-2021.11.10-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e32d2a2b02ccbef10145df9135751abea1f9f076e67a4e261b05f24b94219e36"},
|
||||
{file = "regex-2021.11.10-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:53db2c6be8a2710b359bfd3d3aa17ba38f8aa72a82309a12ae99d3c0c3dcd74d"},
|
||||
{file = "regex-2021.11.10-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2207ae4f64ad3af399e2d30dde66f0b36ae5c3129b52885f1bffc2f05ec505c8"},
|
||||
{file = "regex-2021.11.10-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5ca078bb666c4a9d1287a379fe617a6dccd18c3e8a7e6c7e1eb8974330c626a"},
|
||||
{file = "regex-2021.11.10-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd33eb9bdcfbabab3459c9ee651d94c842bc8a05fabc95edf4ee0c15a072495e"},
|
||||
{file = "regex-2021.11.10-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:05b7d6d7e64efe309972adab77fc2af8907bb93217ec60aa9fe12a0dad35874f"},
|
||||
{file = "regex-2021.11.10-cp37-cp37m-win32.whl", hash = "sha256:e71255ba42567d34a13c03968736c5d39bb4a97ce98188fafb27ce981115beec"},
|
||||
{file = "regex-2021.11.10-cp37-cp37m-win_amd64.whl", hash = "sha256:07856afef5ffcc052e7eccf3213317fbb94e4a5cd8177a2caa69c980657b3cb4"},
|
||||
{file = "regex-2021.11.10-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ba05430e819e58544e840a68b03b28b6d328aff2e41579037e8bab7653b37d83"},
|
||||
{file = "regex-2021.11.10-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7f301b11b9d214f83ddaf689181051e7f48905568b0c7017c04c06dfd065e244"},
|
||||
{file = "regex-2021.11.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aaa4e0705ef2b73dd8e36eeb4c868f80f8393f5f4d855e94025ce7ad8525f50"},
|
||||
{file = "regex-2021.11.10-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:788aef3549f1924d5c38263104dae7395bf020a42776d5ec5ea2b0d3d85d6646"},
|
||||
{file = "regex-2021.11.10-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f8af619e3be812a2059b212064ea7a640aff0568d972cd1b9e920837469eb3cb"},
|
||||
{file = "regex-2021.11.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85bfa6a5413be0ee6c5c4a663668a2cad2cbecdee367630d097d7823041bdeec"},
|
||||
{file = "regex-2021.11.10-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f23222527b307970e383433daec128d769ff778d9b29343fb3496472dc20dabe"},
|
||||
{file = "regex-2021.11.10-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:da1a90c1ddb7531b1d5ff1e171b4ee61f6345119be7351104b67ff413843fe94"},
|
||||
{file = "regex-2021.11.10-cp38-cp38-win32.whl", hash = "sha256:0617383e2fe465732af4509e61648b77cbe3aee68b6ac8c0b6fe934db90be5cc"},
|
||||
{file = "regex-2021.11.10-cp38-cp38-win_amd64.whl", hash = "sha256:a3feefd5e95871872673b08636f96b61ebef62971eab044f5124fb4dea39919d"},
|
||||
{file = "regex-2021.11.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f7f325be2804246a75a4f45c72d4ce80d2443ab815063cdf70ee8fb2ca59ee1b"},
|
||||
{file = "regex-2021.11.10-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:537ca6a3586931b16a85ac38c08cc48f10fc870a5b25e51794c74df843e9966d"},
|
||||
{file = "regex-2021.11.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eef2afb0fd1747f33f1ee3e209bce1ed582d1896b240ccc5e2697e3275f037c7"},
|
||||
{file = "regex-2021.11.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:432bd15d40ed835a51617521d60d0125867f7b88acf653e4ed994a1f8e4995dc"},
|
||||
{file = "regex-2021.11.10-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b43c2b8a330a490daaef5a47ab114935002b13b3f9dc5da56d5322ff218eeadb"},
|
||||
{file = "regex-2021.11.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:962b9a917dd7ceacbe5cd424556914cb0d636001e393b43dc886ba31d2a1e449"},
|
||||
{file = "regex-2021.11.10-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fa8c626d6441e2d04b6ee703ef2d1e17608ad44c7cb75258c09dd42bacdfc64b"},
|
||||
{file = "regex-2021.11.10-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3c5fb32cc6077abad3bbf0323067636d93307c9fa93e072771cf9a64d1c0f3ef"},
|
||||
{file = "regex-2021.11.10-cp39-cp39-win32.whl", hash = "sha256:3b5df18db1fccd66de15aa59c41e4f853b5df7550723d26aa6cb7f40e5d9da5a"},
|
||||
{file = "regex-2021.11.10-cp39-cp39-win_amd64.whl", hash = "sha256:83ee89483672b11f8952b158640d0c0ff02dc43d9cb1b70c1564b49abe92ce29"},
|
||||
{file = "regex-2021.11.10.tar.gz", hash = "sha256:f341ee2df0999bfdf7a95e448075effe0db212a59387de1a70690e4acb03d4c6"},
|
||||
]
|
||||
requests = [
|
||||
{file = "requests-2.26.0-py2.py3-none-any.whl", hash = "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24"},
|
||||
{file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"},
|
||||
|
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "csv-metadata-quality"
|
||||
version = "0.4.8-dev"
|
||||
version = "0.5.0"
|
||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem."
|
||||
authors = ["Alan Orth <alan.orth@gmail.com>"]
|
||||
license="GPL-3.0-only"
|
||||
@ -23,6 +23,7 @@ colorama = "^0.4.4"
|
||||
spdx-license-list = "^0.5.2"
|
||||
ftfy = "^5.9"
|
||||
SQLAlchemy = ">=1.3.3,<1.4.23"
|
||||
country-converter = "^0.7.4"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
pytest = "^6.1.1"
|
||||
|
@ -7,12 +7,13 @@ atomicwrites==1.4.0; python_version >= "3.6" and python_full_version < "3.0.0" a
|
||||
attrs==21.2.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
|
||||
babel==2.9.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
||||
backcall==0.2.0; python_version >= "3.7" and python_version < "4.0"
|
||||
black==21.11b1; python_full_version >= "3.6.2"
|
||||
black==21.12b0; python_full_version >= "3.6.2"
|
||||
certifi==2021.10.8; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
|
||||
charset-normalizer==2.0.9; python_full_version >= "3.6.0" and python_version >= "3.6"
|
||||
click==8.0.3; python_version >= "3.6" and python_full_version >= "3.6.2"
|
||||
colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
|
||||
commonmark==0.9.1; python_full_version >= "3.6.2" and python_full_version < "4.0.0" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0")
|
||||
country-converter==0.7.4
|
||||
csvkit==1.0.6
|
||||
dbfread==2.0.7
|
||||
decorator==5.1.0; python_version >= "3.7" and python_version < "4.0"
|
||||
@ -62,7 +63,6 @@ python-slugify==5.0.2; python_version >= "3.6"
|
||||
python-stdnum==1.17
|
||||
pytimeparse==1.1.8
|
||||
pytz==2021.3; python_full_version >= "3.7.1"
|
||||
regex==2021.11.10; python_full_version >= "3.6.2"
|
||||
requests-cache==0.6.4; python_version >= "3.6"
|
||||
requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
|
||||
rich==10.15.2; python_full_version >= "3.6.2" and python_full_version < "4.0.0" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0")
|
||||
|
227
requirements.txt
227
requirements.txt
@ -1,26 +1,201 @@
|
||||
certifi==2021.10.8; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
|
||||
charset-normalizer==2.0.9; python_full_version >= "3.6.0" and python_version >= "3.6"
|
||||
colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
|
||||
ftfy==5.9; python_version >= "3.5"
|
||||
greenlet==1.1.2; python_version >= "3" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3"
|
||||
idna==3.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
|
||||
importlib-metadata==4.8.2; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6"
|
||||
itsdangerous==2.0.1; python_version >= "3.6"
|
||||
langid==1.1.6
|
||||
numpy==1.21.1
|
||||
pandas==1.3.4; python_full_version >= "3.7.1"
|
||||
pycountry==19.8.18
|
||||
python-dateutil==2.8.2; python_full_version >= "3.7.1"
|
||||
python-stdnum==1.17
|
||||
pytz==2021.3; python_full_version >= "3.7.1"
|
||||
requests-cache==0.6.4; python_version >= "3.6"
|
||||
requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
|
||||
six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.6"
|
||||
spdx-license-list==0.5.2
|
||||
sqlalchemy==1.4.22; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
|
||||
typing-extensions==4.0.1; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6"
|
||||
url-normalize==1.4.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
|
||||
urllib3==1.26.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.6"
|
||||
wcwidth==0.2.5; python_version >= "3.5"
|
||||
xlrd==1.2.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0")
|
||||
zipp==3.6.0; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6"
|
||||
certifi==2021.10.8; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6" \
|
||||
--hash=sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569 \
|
||||
--hash=sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872
|
||||
charset-normalizer==2.0.9; python_full_version >= "3.6.0" and python_version >= "3.6" \
|
||||
--hash=sha256:b0b883e8e874edfdece9c28f314e3dd5badf067342e42fb162203335ae61aa2c \
|
||||
--hash=sha256:1eecaa09422db5be9e29d7fc65664e6c33bd06f9ced7838578ba40d58bdf3721
|
||||
colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") \
|
||||
--hash=sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2 \
|
||||
--hash=sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b
|
||||
country-converter==0.7.4 \
|
||||
--hash=sha256:0291cc91c4a4efe7f128a11c8c6e4cb761f7fea7cde2517f8677c7c56da334d3
|
||||
ftfy==5.9; python_version >= "3.5" \
|
||||
--hash=sha256:8c4fb2863c0b82eae2ab3cf353d9ade268dfbde863d322f78d6a9fd5cefb31e9
|
||||
greenlet==1.1.2; python_version >= "3" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3" \
|
||||
--hash=sha256:58df5c2a0e293bf665a51f8a100d3e9956febfbf1d9aaf8c0677cf70218910c6 \
|
||||
--hash=sha256:aec52725173bd3a7b56fe91bc56eccb26fbdff1386ef123abb63c84c5b43b63a \
|
||||
--hash=sha256:833e1551925ed51e6b44c800e71e77dacd7e49181fdc9ac9a0bf3714d515785d \
|
||||
--hash=sha256:aa5b467f15e78b82257319aebc78dd2915e4c1436c3c0d1ad6f53e47ba6e2713 \
|
||||
--hash=sha256:40b951f601af999a8bf2ce8c71e8aaa4e8c6f78ff8afae7b808aae2dc50d4c40 \
|
||||
--hash=sha256:95e69877983ea39b7303570fa6760f81a3eec23d0e3ab2021b7144b94d06202d \
|
||||
--hash=sha256:356b3576ad078c89a6107caa9c50cc14e98e3a6c4874a37c3e0273e4baf33de8 \
|
||||
--hash=sha256:8639cadfda96737427330a094476d4c7a56ac03de7265622fcf4cfe57c8ae18d \
|
||||
--hash=sha256:97e5306482182170ade15c4b0d8386ded995a07d7cc2ca8f27958d34d6736497 \
|
||||
--hash=sha256:e6a36bb9474218c7a5b27ae476035497a6990e21d04c279884eb10d9b290f1b1 \
|
||||
--hash=sha256:abb7a75ed8b968f3061327c433a0fbd17b729947b400747c334a9c29a9af6c58 \
|
||||
--hash=sha256:14d4f3cd4e8b524ae9b8aa567858beed70c392fdec26dbdb0a8a418392e71708 \
|
||||
--hash=sha256:17ff94e7a83aa8671a25bf5b59326ec26da379ace2ebc4411d690d80a7fbcf23 \
|
||||
--hash=sha256:9f3cba480d3deb69f6ee2c1825060177a22c7826431458c697df88e6aeb3caee \
|
||||
--hash=sha256:fa877ca7f6b48054f847b61d6fa7bed5cebb663ebc55e018fda12db09dcc664c \
|
||||
--hash=sha256:7cbd7574ce8e138bda9df4efc6bf2ab8572c9aff640d8ecfece1b006b68da963 \
|
||||
--hash=sha256:903bbd302a2378f984aef528f76d4c9b1748f318fe1294961c072bdc7f2ffa3e \
|
||||
--hash=sha256:049fe7579230e44daef03a259faa24511d10ebfa44f69411d99e6a184fe68073 \
|
||||
--hash=sha256:dd0b1e9e891f69e7675ba5c92e28b90eaa045f6ab134ffe70b52e948aa175b3c \
|
||||
--hash=sha256:7418b6bfc7fe3331541b84bb2141c9baf1ec7132a7ecd9f375912eca810e714e \
|
||||
--hash=sha256:f9d29ca8a77117315101425ec7ec2a47a22ccf59f5593378fc4077ac5b754fce \
|
||||
--hash=sha256:21915eb821a6b3d9d8eefdaf57d6c345b970ad722f856cd71739493ce003ad08 \
|
||||
--hash=sha256:eff9d20417ff9dcb0d25e2defc2574d10b491bf2e693b4e491914738b7908168 \
|
||||
--hash=sha256:32ca72bbc673adbcfecb935bb3fb1b74e663d10a4b241aaa2f5a75fe1d1f90aa \
|
||||
--hash=sha256:f0214eb2a23b85528310dad848ad2ac58e735612929c8072f6093f3585fd342d \
|
||||
--hash=sha256:b92e29e58bef6d9cfd340c72b04d74c4b4e9f70c9fa7c78b674d1fec18896dc4 \
|
||||
--hash=sha256:fdcec0b8399108577ec290f55551d926d9a1fa6cad45882093a7a07ac5ec147b \
|
||||
--hash=sha256:93f81b134a165cc17123626ab8da2e30c0455441d4ab5576eed73a64c025b25c \
|
||||
--hash=sha256:1e12bdc622676ce47ae9abbf455c189e442afdde8818d9da983085df6312e7a1 \
|
||||
--hash=sha256:8c790abda465726cfb8bb08bd4ca9a5d0a7bd77c7ac1ca1b839ad823b948ea28 \
|
||||
--hash=sha256:f276df9830dba7a333544bd41070e8175762a7ac20350786b322b714b0e654f5 \
|
||||
--hash=sha256:64e6175c2e53195278d7388c454e0b30997573f3f4bd63697f88d855f7a6a1fc \
|
||||
--hash=sha256:b11548073a2213d950c3f671aa88e6f83cda6e2fb97a8b6317b1b5b33d850e06 \
|
||||
--hash=sha256:9633b3034d3d901f0a46b7939f8c4d64427dfba6bbc5a36b1a67364cf148a1b0 \
|
||||
--hash=sha256:eb6ea6da4c787111adf40f697b4e58732ee0942b5d3bd8f435277643329ba627 \
|
||||
--hash=sha256:f3acda1924472472ddd60c29e5b9db0cec629fbe3c5c5accb74d6d6d14773478 \
|
||||
--hash=sha256:e859fcb4cbe93504ea18008d1df98dee4f7766db66c435e4882ab35cf70cac43 \
|
||||
--hash=sha256:00e44c8afdbe5467e4f7b5851be223be68adb4272f44696ee71fe46b7036a711 \
|
||||
--hash=sha256:ec8c433b3ab0419100bd45b47c9c8551248a5aee30ca5e9d399a0b57ac04651b \
|
||||
--hash=sha256:288c6a76705dc54fba69fbcb59904ae4ad768b4c768839b8ca5fdadec6dd8cfd \
|
||||
--hash=sha256:8d2f1fb53a421b410751887eb4ff21386d119ef9cde3797bf5e7ed49fb51a3b3 \
|
||||
--hash=sha256:166eac03e48784a6a6e0e5f041cfebb1ab400b394db188c48b3a84737f505b67 \
|
||||
--hash=sha256:572e1787d1460da79590bf44304abbc0a2da944ea64ec549188fa84d89bba7ab \
|
||||
--hash=sha256:be5f425ff1f5f4b3c1e33ad64ab994eed12fc284a6ea71c5243fd564502ecbe5 \
|
||||
--hash=sha256:b1692f7d6bc45e3200844be0dba153612103db241691088626a33ff1f24a0d88 \
|
||||
--hash=sha256:7227b47e73dedaa513cdebb98469705ef0d66eb5a1250144468e9c3097d6b59b \
|
||||
--hash=sha256:7ff61ff178250f9bb3cd89752df0f1dd0e27316a8bd1465351652b1b4a4cdfd3 \
|
||||
--hash=sha256:f70a9e237bb792c7cc7e44c531fd48f5897961701cdaa06cf22fc14965c496cf \
|
||||
--hash=sha256:013d61294b6cd8fe3242932c1c5e36e5d1db2c8afb58606c5a67efce62c1f5fd \
|
||||
--hash=sha256:e30f5ea4ae2346e62cedde8794a56858a67b878dd79f7df76a0767e356b1744a
|
||||
idna==3.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6" \
|
||||
--hash=sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff \
|
||||
--hash=sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d
|
||||
importlib-metadata==4.8.2; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6" \
|
||||
--hash=sha256:53ccfd5c134223e497627b9815d5030edf77d2ed573922f7a0b8f8bb81a1c100 \
|
||||
--hash=sha256:75bdec14c397f528724c1bfd9709d660b33a4d2e77387a3358f20b848bb5e5fb
|
||||
itsdangerous==2.0.1; python_version >= "3.6" \
|
||||
--hash=sha256:5174094b9637652bdb841a3029700391451bd092ba3db90600dea710ba28e97c \
|
||||
--hash=sha256:9e724d68fc22902a1435351f84c3fb8623f303fffcc566a4cb952df8c572cff0
|
||||
langid==1.1.6 \
|
||||
--hash=sha256:044bcae1912dab85c33d8e98f2811b8f4ff1213e5e9a9e9510137b84da2cb293
|
||||
numpy==1.21.1 \
|
||||
--hash=sha256:38e8648f9449a549a7dfe8d8755a5979b45b3538520d1e735637ef28e8c2dc50 \
|
||||
--hash=sha256:fd7d7409fa643a91d0a05c7554dd68aa9c9bb16e186f6ccfe40d6e003156e33a \
|
||||
--hash=sha256:a75b4498b1e93d8b700282dc8e655b8bd559c0904b3910b144646dbbbc03e062 \
|
||||
--hash=sha256:1412aa0aec3e00bc23fbb8664d76552b4efde98fb71f60737c83efbac24112f1 \
|
||||
--hash=sha256:e46ceaff65609b5399163de5893d8f2a82d3c77d5e56d976c8b5fb01faa6b671 \
|
||||
--hash=sha256:c6a2324085dd52f96498419ba95b5777e40b6bcbc20088fddb9e8cbb58885e8e \
|
||||
--hash=sha256:73101b2a1fef16602696d133db402a7e7586654682244344b8329cdcbbb82172 \
|
||||
--hash=sha256:7a708a79c9a9d26904d1cca8d383bf869edf6f8e7650d85dbc77b041e8c5a0f8 \
|
||||
--hash=sha256:95b995d0c413f5d0428b3f880e8fe1660ff9396dcd1f9eedbc311f37b5652e16 \
|
||||
--hash=sha256:635e6bd31c9fb3d475c8f44a089569070d10a9ef18ed13738b03049280281267 \
|
||||
--hash=sha256:4a3d5fb89bfe21be2ef47c0614b9c9c707b7362386c9a3ff1feae63e0267ccb6 \
|
||||
--hash=sha256:8a326af80e86d0e9ce92bcc1e65c8ff88297de4fa14ee936cb2293d414c9ec63 \
|
||||
--hash=sha256:791492091744b0fe390a6ce85cc1bf5149968ac7d5f0477288f78c89b385d9af \
|
||||
--hash=sha256:0318c465786c1f63ac05d7c4dbcecd4d2d7e13f0959b01b534ea1e92202235c5 \
|
||||
--hash=sha256:9a513bd9c1551894ee3d31369f9b07460ef223694098cf27d399513415855b68 \
|
||||
--hash=sha256:91c6f5fc58df1e0a3cc0c3a717bb3308ff850abdaa6d2d802573ee2b11f674a8 \
|
||||
--hash=sha256:978010b68e17150db8765355d1ccdd450f9fc916824e8c4e35ee620590e234cd \
|
||||
--hash=sha256:9749a40a5b22333467f02fe11edc98f022133ee1bfa8ab99bda5e5437b831214 \
|
||||
--hash=sha256:d7a4aeac3b94af92a9373d6e77b37691b86411f9745190d2c351f410ab3a791f \
|
||||
--hash=sha256:d9e7912a56108aba9b31df688a4c4f5cb0d9d3787386b87d504762b6754fbb1b \
|
||||
--hash=sha256:25b40b98ebdd272bc3020935427a4530b7d60dfbe1ab9381a39147834e985eac \
|
||||
--hash=sha256:8a92c5aea763d14ba9d6475803fc7904bda7decc2a0a68153f587ad82941fec1 \
|
||||
--hash=sha256:05a0f648eb28bae4bcb204e6fd14603de2908de982e761a2fc78efe0f19e96e1 \
|
||||
--hash=sha256:f01f28075a92eede918b965e86e8f0ba7b7797a95aa8d35e1cc8821f5fc3ad6a \
|
||||
--hash=sha256:88c0b89ad1cc24a5efbb99ff9ab5db0f9a86e9cc50240177a571fbe9c2860ac2 \
|
||||
--hash=sha256:01721eefe70544d548425a07c80be8377096a54118070b8a62476866d5208e33 \
|
||||
--hash=sha256:2d4d1de6e6fb3d28781c73fbde702ac97f03d79e4ffd6598b880b2d95d62ead4 \
|
||||
--hash=sha256:dff4af63638afcc57a3dfb9e4b26d434a7a602d225b42d746ea7fe2edf1342fd
|
||||
pandas==1.3.4; python_full_version >= "3.7.1" \
|
||||
--hash=sha256:9707bdc1ea9639c886b4d3be6e2a45812c1ac0c2080f94c31b71c9fa35556f9b \
|
||||
--hash=sha256:c2f44425594ae85e119459bb5abb0748d76ef01d9c08583a667e3339e134218e \
|
||||
--hash=sha256:372d72a3d8a5f2dbaf566a5fa5fa7f230842ac80f29a931fb4b071502cf86b9a \
|
||||
--hash=sha256:d99d2350adb7b6c3f7f8f0e5dfb7d34ff8dd4bc0a53e62c445b7e43e163fce63 \
|
||||
--hash=sha256:4acc28364863127bca1029fb72228e6f473bb50c32e77155e80b410e2068eeac \
|
||||
--hash=sha256:c2646458e1dce44df9f71a01dc65f7e8fa4307f29e5c0f2f92c97f47a5bf22f5 \
|
||||
--hash=sha256:5298a733e5bfbb761181fd4672c36d0c627320eb999c59c65156c6a90c7e1b4f \
|
||||
--hash=sha256:22808afb8f96e2269dcc5b846decacb2f526dd0b47baebc63d913bf847317c8f \
|
||||
--hash=sha256:b528e126c13816a4374e56b7b18bfe91f7a7f6576d1aadba5dee6a87a7f479ae \
|
||||
--hash=sha256:fe48e4925455c964db914b958f6e7032d285848b7538a5e1b19aeb26ffaea3ec \
|
||||
--hash=sha256:eaca36a80acaacb8183930e2e5ad7f71539a66805d6204ea88736570b2876a7b \
|
||||
--hash=sha256:42493f8ae67918bf129869abea8204df899902287a7f5eaf596c8e54e0ac7ff4 \
|
||||
--hash=sha256:a388960f979665b447f0847626e40f99af8cf191bce9dc571d716433130cb3a7 \
|
||||
--hash=sha256:5ba0aac1397e1d7b654fccf263a4798a9e84ef749866060d19e577e927d66e1b \
|
||||
--hash=sha256:f567e972dce3bbc3a8076e0b675273b4a9e8576ac629149cf8286ee13c259ae5 \
|
||||
--hash=sha256:c1aa4de4919358c5ef119f6377bc5964b3a7023c23e845d9db7d9016fa0c5b1c \
|
||||
--hash=sha256:dd324f8ee05925ee85de0ea3f0d66e1362e8c80799eb4eb04927d32335a3e44a \
|
||||
--hash=sha256:d47750cf07dee6b55d8423471be70d627314277976ff2edd1381f02d52dbadf9 \
|
||||
--hash=sha256:2d1dc09c0013d8faa7474574d61b575f9af6257ab95c93dcf33a14fd8d2c1bab \
|
||||
--hash=sha256:10e10a2527db79af6e830c3d5842a4d60383b162885270f8cffc15abca4ba4a9 \
|
||||
--hash=sha256:35c77609acd2e4d517da41bae0c11c70d31c87aae8dd1aabd2670906c6d2c143 \
|
||||
--hash=sha256:003ba92db58b71a5f8add604a17a059f3068ef4e8c0c365b088468d0d64935fd \
|
||||
--hash=sha256:a51528192755f7429c5bcc9e80832c517340317c861318fea9cea081b57c9afd \
|
||||
--hash=sha256:a2aa18d3f0b7d538e21932f637fbfe8518d085238b429e4790a35e1e44a96ffc
|
||||
pycountry==19.8.18 \
|
||||
--hash=sha256:3c57aa40adcf293d59bebaffbe60d8c39976fba78d846a018dc0c2ec9c6cb3cb
|
||||
python-dateutil==2.8.2; python_full_version >= "3.7.1" \
|
||||
--hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \
|
||||
--hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9
|
||||
python-stdnum==1.17 \
|
||||
--hash=sha256:374e2b5e13912ccdbf50b0b23fca2c3e0531174805c32d74e145f37756328340 \
|
||||
--hash=sha256:a46e6cf9652807314d369b654b255c86a59f93d18be2834f3d567ed1a346c547
|
||||
pytz==2021.3; python_full_version >= "3.7.1" \
|
||||
--hash=sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c \
|
||||
--hash=sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326
|
||||
requests-cache==0.6.4; python_version >= "3.6" \
|
||||
--hash=sha256:dd9120a4ab7b8128cba9b6b120d8b5560d566a3cd0f828cced3d3fd60a42ec40 \
|
||||
--hash=sha256:1102daa13a804abe23fad62d694e7dee58d6063a35d94bf6e8c9821e22e5a78b
|
||||
requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0") \
|
||||
--hash=sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24 \
|
||||
--hash=sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7
|
||||
six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.6" \
|
||||
--hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 \
|
||||
--hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926
|
||||
spdx-license-list==0.5.2 \
|
||||
--hash=sha256:1b338470c7b403dbecceca563a316382c7977516128ca6c1e8f7078e3ed6e7b0 \
|
||||
--hash=sha256:952996f72ab807972dc2278bb9b91e5294767211e51f09aad9c0e2ff5b82a31b
|
||||
sqlalchemy==1.4.22; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0") \
|
||||
--hash=sha256:488608953385d6c127d2dcbc4b11f8d7f2f30b89f6bd27c01b042253d985cc2f \
|
||||
--hash=sha256:5d856cc50fd26fc8dd04892ed5a5a3d7eeb914fea2c2e484183e2d84c14926e0 \
|
||||
--hash=sha256:a00d9c6d3a8afe1d1681cd8a5266d2f0ed684b0b44bada2ca82403b9e8b25d39 \
|
||||
--hash=sha256:5908ea6c652a050d768580d01219c98c071e71910ab8e7b42c02af4010608397 \
|
||||
--hash=sha256:b7fb937c720847879c7402fe300cfdb2aeff22349fa4ea3651bca4e2d6555939 \
|
||||
--hash=sha256:9bfe882d5a1bbde0245dca0bd48da0976bd6634cf2041d2fdf0417c5463e40e5 \
|
||||
--hash=sha256:eedd76f135461cf237534a6dc0d1e0f6bb88a1dc193678fab48a11d223462da5 \
|
||||
--hash=sha256:6a16c7c4452293da5143afa3056680db2d187b380b3ef4d470d4e29885720de3 \
|
||||
--hash=sha256:44d23ea797a5e0be71bc5454b9ae99158ea0edc79e2393c6e9a2354de88329c0 \
|
||||
--hash=sha256:a5e14cb0c0a4ac095395f24575a0e7ab5d1be27f5f9347f1762f21505e3ba9f1 \
|
||||
--hash=sha256:bc34a007e604091ca3a4a057525efc4cefd2b7fe970f44d20b9cfa109ab1bddb \
|
||||
--hash=sha256:756f5d2f5b92d27450167247fb574b09c4cd192a3f8c2e493b3e518a204ee543 \
|
||||
--hash=sha256:9fcbb4b4756b250ed19adc5e28c005b8ed56fdb5c21efa24c6822c0575b4964d \
|
||||
--hash=sha256:09dbb4bc01a734ccddbf188deb2a69aede4b3c153a72b6d5c6900be7fb2945b1 \
|
||||
--hash=sha256:f028ef6a1d828bc754852a022b2160e036202ac8658a6c7d34875aafd14a9a15 \
|
||||
--hash=sha256:68393d3fd31469845b6ba11f5b4209edbea0b58506be0e077aafbf9aa2e21e11 \
|
||||
--hash=sha256:891927a49b2363a4199763a9d436d97b0b42c65922a4ea09025600b81a00d17e \
|
||||
--hash=sha256:fd2102a8f8a659522719ed73865dff3d3cc76eb0833039dc473e0ad3041d04be \
|
||||
--hash=sha256:4014978de28163cd8027434916a92d0f5bb1a3a38dff5e8bf8bff4d9372a9117 \
|
||||
--hash=sha256:f814d80844969b0d22ea63663da4de5ca1c434cfbae226188901e5d368792c17 \
|
||||
--hash=sha256:d09a760b0a045b4d799102ae7965b5491ccf102123f14b2a8cc6c01d1021a2d9 \
|
||||
--hash=sha256:26daa429f039e29b1e523bf763bfab17490556b974c77b5ca7acb545b9230e9a \
|
||||
--hash=sha256:12bac5fa1a6ea870bdccb96fe01610641dd44ebe001ed91ef7fcd980e9702db5 \
|
||||
--hash=sha256:39b5d36ab71f73c068cdcf70c38075511de73616e6c7fdd112d6268c2704d9f5 \
|
||||
--hash=sha256:5102b9face693e8b2db3b2539c7e1a5d9a5b4dc0d79967670626ffd2f710d6e6 \
|
||||
--hash=sha256:c9373ef67a127799027091fa53449125351a8c943ddaa97bec4e99271dbb21f4 \
|
||||
--hash=sha256:36a089dc604032d41343d86290ce85d4e6886012eea73faa88001260abf5ff81 \
|
||||
--hash=sha256:b48148ceedfb55f764562e04c00539bb9ea72bf07820ca15a594a9a049ff6b0e \
|
||||
--hash=sha256:1fdae7d980a2fa617d119d0dc13ecb5c23cc63a8b04ffcb5298f2c59d86851e9 \
|
||||
--hash=sha256:ec1be26cdccd60d180359a527d5980d959a26269a2c7b1b327a1eea0cab37ed8
|
||||
typing-extensions==4.0.1; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6" \
|
||||
--hash=sha256:7f001e5ac290a0c0401508864c7ec868be4e701886d5b573a9528ed3973d9d3b \
|
||||
--hash=sha256:4ca091dea149f945ec56afb48dae714f21e8692ef22a395223bcd328961b6a0e
|
||||
url-normalize==1.4.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6" \
|
||||
--hash=sha256:d23d3a070ac52a67b83a1c59a0e68f8608d1cd538783b401bc9de2c0fac999b2 \
|
||||
--hash=sha256:ec3c301f04e5bb676d333a7fa162fa977ad2ca04b7e652bfc9fac4e405728eed
|
||||
urllib3==1.26.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.6" \
|
||||
--hash=sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844 \
|
||||
--hash=sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece
|
||||
wcwidth==0.2.5; python_version >= "3.5" \
|
||||
--hash=sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784 \
|
||||
--hash=sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83
|
||||
xlrd==1.2.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0") \
|
||||
--hash=sha256:e551fb498759fa3a5384a94ccd4c3c02eb7c00ea424426e212ac0c57be9dfbde \
|
||||
--hash=sha256:546eb36cee8db40c3eaa46c351e67ffee6eeb5fa2650b71bc4c758a29a1b29b2
|
||||
zipp==3.6.0; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6" \
|
||||
--hash=sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc \
|
||||
--hash=sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832
|
||||
|
2
setup.py
2
setup.py
@ -14,7 +14,7 @@ install_requires = [
|
||||
|
||||
setuptools.setup(
|
||||
name="csv-metadata-quality",
|
||||
version="0.4.8-dev",
|
||||
version="0.5.0",
|
||||
author="Alan Orth",
|
||||
author_email="aorth@mjanja.ch",
|
||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
|
||||
|
@ -438,3 +438,43 @@ def test_title_not_in_citation(capsys):
|
||||
captured.out
|
||||
== f"{Fore.YELLOW}Title is not present in citation: {Fore.RESET}{title}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_country_matches_region():
|
||||
"""Test an item with regions matching its country list."""
|
||||
|
||||
country = "Kenya"
|
||||
region = "Eastern Africa"
|
||||
|
||||
# Emulate a column in a transposed dataframe (which is just a series)
|
||||
d = {"cg.coverage.country": country, "cg.coverage.region": region}
|
||||
series = pd.Series(data=d)
|
||||
|
||||
result = check.countries_match_regions(series)
|
||||
|
||||
assert result == None
|
||||
|
||||
|
||||
def test_country_not_matching_region(capsys):
|
||||
"""Test an item with regions not matching its country list."""
|
||||
|
||||
title = "Testing an item with no matching region."
|
||||
country = "Kenya"
|
||||
region = ""
|
||||
missing_region = "Eastern Africa"
|
||||
|
||||
# Emulate a column in a transposed dataframe (which is just a series)
|
||||
d = {
|
||||
"dc.title": title,
|
||||
"cg.coverage.country": country,
|
||||
"cg.coverage.region": region,
|
||||
}
|
||||
series = pd.Series(data=d)
|
||||
|
||||
check.countries_match_regions(series)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert (
|
||||
captured.out
|
||||
== f"{Fore.YELLOW}Missing region ({missing_region}): {Fore.RESET}{title}\n"
|
||||
)
|
||||
|
Reference in New Issue
Block a user