1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-07-05 14:03:27 +02:00

18 Commits

Author SHA1 Message Date
8ee5e2e306 setup.py: denote that Python 3.10 works
All checks were successful
continuous-integration/drone/push Build is passing
I have been using Python 3.10 for months, and already added it to
the CI builds.
2022-01-29 16:08:01 +03:00
490701f244 Run more CLI tests in CI
All checks were successful
continuous-integration/drone/push Build is passing
2021-12-24 14:47:25 +02:00
e1b270cf83 CHANGELOG.md: add note about dropping invalid AGROVOC values
All checks were successful
continuous-integration/drone/push Build is passing
2021-12-23 12:47:42 +02:00
b7efe2de40 data/test.csv: update invalid AGROVOC entry
Now that we can drop invalid AGROVOC values we should have a valid
value and an invalid value here. Depending on how the checker is
invoked we will either print a warning or drop the invalid value.
2021-12-23 12:45:38 +02:00
c43095139a tests/test_check.py: add tests for dropping invalid AGROVOC 2021-12-23 12:44:32 +02:00
a7727b8431 Add support for dropping invalid AGROVOC terms
Requires --agrovoc-fields <field.name> to do the actual validation,
and -d to drop invalid ones.
2021-12-23 12:43:55 +02:00
7763a021c5 csv_metadata_quality/fix.py: sort imports with isort
All checks were successful
continuous-integration/drone/push Build is passing
2021-12-15 23:15:02 +02:00
3c12ef3f66 Update requirements
Generated with poetry export:

    $ poetry export --without-hashes -f requirements.txt > requirements.txt
    $ poetry export --without-hashes --dev -f requirements.txt > requirements-dev.txt

I am trying `--without-hashes` to work around an error on pip install
when running in CI:

    ERROR: In --require-hashes mode, all requirements must have their versions pinned with ==.
2021-12-15 23:11:44 +02:00
aee2438e94 poetry.lock: run poetry update 2021-12-15 23:10:27 +02:00
a351ba9706 CHANGELOG.md: add notes about ftfy 2021-12-15 22:09:01 +02:00
e4faf114dc csv_metadata_quality/util.py: update for ftfy 6.0
The sequence_weirdness() heuristic is deprecated. Now we should use
is_bad().

See: https://ftfy.readthedocs.io/en/v6.0/heuristic.html
See: https://github.com/rspeer/python-ftfy/blob/master/CHANGELOG.md#version-60-april-2-2021
2021-12-15 21:58:07 +02:00
ff49a80432 csv_metadata_quality/fix.py: configure ftfy
Don't replace smart quotes in ftfy. If our text has them we should
keep them.
2021-12-15 21:51:51 +02:00
8b15154285 pyproject.toml: use ftfy 6.0
Lots of improvements here! Improvements to heuristics and a new way
to configure which fixes get applied.

See: https://github.com/rspeer/python-ftfy/blob/master/CHANGELOG.md#version-60-april-2-2021
2021-12-15 21:48:56 +02:00
5854f8e865 CHANGELOG.md: add note about unnecessary Unicode 2021-12-15 13:56:31 +02:00
e7322efadd csv_metadata_quality/app.py: move unnecessary Unicode fix
We actually want to do this after we try to fix mojibake with ftfy.
These "unnecessary" Unicode characters could actually help ftfy in
some cases because often times they indicate that some character
from another encoding was there before (like an accent, dash, or
smart quote).
2021-12-15 13:53:25 +02:00
95015febbd csv_metadata_quality/fix.py: fix thin spaces
All checks were successful
continuous-integration/drone/push Build is passing
Replace thin spaces with normal spaces. Sometimes I see these get
mis handled on Windows machines and they end up as "?" or so.
2021-12-09 23:22:53 +02:00
cef6c66b30 CHANGELOG.md: start next changes 2021-12-09 23:21:58 +02:00
9905e183ea Bump version to 0.6.0-dev 2021-12-09 23:21:30 +02:00
15 changed files with 231 additions and 276 deletions

View File

@ -13,7 +13,16 @@ steps:
- pip install -r requirements-dev.txt - pip install -r requirements-dev.txt
- pytest - pytest
- python setup.py install - python setup.py install
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e -u --agrovoc-fields dcterms.subject,cg.coverage.country # Basic test
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv
# Test with unsafe fixes
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
# Test with experimental checks
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
# Test with AGROVOC validation
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
# Test with AGROVOC validation (and dropping invalid)
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
--- ---
kind: pipeline kind: pipeline
@ -30,7 +39,16 @@ steps:
- pip install -r requirements-dev.txt - pip install -r requirements-dev.txt
- pytest - pytest
- python setup.py install - python setup.py install
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e -u --agrovoc-fields dcterms.subject,cg.coverage.country # Basic test
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv
# Test with unsafe fixes
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
# Test with experimental checks
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
# Test with AGROVOC validation
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
# Test with AGROVOC validation (and dropping invalid)
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
--- ---
kind: pipeline kind: pipeline
@ -47,7 +65,16 @@ steps:
- pip install -r requirements-dev.txt - pip install -r requirements-dev.txt
- pytest - pytest
- python setup.py install - python setup.py install
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e -u --agrovoc-fields dcterms.subject,cg.coverage.country # Basic test
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv
# Test with unsafe fixes
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
# Test with experimental checks
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
# Test with AGROVOC validation
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
# Test with AGROVOC validation (and dropping invalid)
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
--- ---
kind: pipeline kind: pipeline
@ -64,6 +91,15 @@ steps:
- pip install -r requirements-dev.txt - pip install -r requirements-dev.txt
- pytest - pytest
- python setup.py install - python setup.py install
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e -u --agrovoc-fields dcterms.subject,cg.coverage.country # Basic test
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv
# Test with unsafe fixes
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
# Test with experimental checks
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
# Test with AGROVOC validation
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
# Test with AGROVOC validation (and dropping invalid)
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
# vim: ts=2 sw=2 et # vim: ts=2 sw=2 et

View File

@ -38,4 +38,13 @@ jobs:
- name: Test CLI - name: Test CLI
run: | run: |
python setup.py install python setup.py install
csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e -u --agrovoc-fields dcterms.subject,cg.coverage.country # Basic test
csv-metadata-quality -i data/test.csv -o /tmp/test.csv
# Test with unsafe fixes
csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
# Test with experimental checks
csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
# Test with AGROVOC validation
csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
# Test with AGROVOC validation (and dropping invalid)
csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d

View File

@ -4,6 +4,20 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Unreleased
### Changed
- Perform fix for "unnecessary" Unicode characters after we try to fix encoding
issues with ftfy
- ftfy heuristics to use `is_bad()` instead of `sequence_weirdness()`
- ftfy `fix_text()` to *not* change “smart quotes” to "ASCII quotes"
### Updated
- Python dependencies
### Added
- Ability to drop invalid AGROVOC values with `-d` when checking AGROVOC values
with `-a <field.name>`
## [0.5.0] - 2021-12-08 ## [0.5.0] - 2021-12-08
### Added ### Added
- Ability to check for, and fix, "mojibake" characters using [ftfy](https://github.com/LuminosoInsight/python-ftfy) - Ability to check for, and fix, "mojibake" characters using [ftfy](https://github.com/LuminosoInsight/python-ftfy)

View File

@ -21,6 +21,12 @@ def parse_args(argv):
"-a", "-a",
help="Comma-separated list of fields to validate against AGROVOC, for example: dcterms.subject,cg.coverage.country", help="Comma-separated list of fields to validate against AGROVOC, for example: dcterms.subject,cg.coverage.country",
) )
parser.add_argument(
"--drop-invalid-agrovoc",
"-d",
help="After validating metadata values against AGROVOC, drop invalid values.",
action="store_true",
)
parser.add_argument( parser.add_argument(
"--experimental-checks", "--experimental-checks",
"-e", "-e",
@ -103,9 +109,6 @@ def run(argv):
if args.unsafe_fixes: if args.unsafe_fixes:
df[column] = df[column].apply(fix.normalize_unicode, field_name=column) df[column] = df[column].apply(fix.normalize_unicode, field_name=column)
# Fix: unnecessary Unicode
df[column] = df[column].apply(fix.unnecessary_unicode)
# Check: suspicious characters # Check: suspicious characters
df[column].apply(check.suspicious_characters, field_name=column) df[column].apply(check.suspicious_characters, field_name=column)
@ -115,6 +118,9 @@ def run(argv):
else: else:
df[column].apply(check.mojibake, field_name=column) df[column].apply(check.mojibake, field_name=column)
# Fix: unnecessary Unicode
df[column] = df[column].apply(fix.unnecessary_unicode)
# Fix: invalid and unnecessary multi-value separators # Fix: invalid and unnecessary multi-value separators
df[column] = df[column].apply(fix.separators, field_name=column) df[column] = df[column].apply(fix.separators, field_name=column)
# Run whitespace fix again after fixing invalid separators # Run whitespace fix again after fixing invalid separators
@ -123,12 +129,14 @@ def run(argv):
# Fix: duplicate metadata values # Fix: duplicate metadata values
df[column] = df[column].apply(fix.duplicates, field_name=column) df[column] = df[column].apply(fix.duplicates, field_name=column)
# Check: invalid AGROVOC subject # Check: invalid AGROVOC subject and optionally drop them
if args.agrovoc_fields: if args.agrovoc_fields:
# Identify fields the user wants to validate against AGROVOC # Identify fields the user wants to validate against AGROVOC
for field in args.agrovoc_fields.split(","): for field in args.agrovoc_fields.split(","):
if column == field: if column == field:
df[column].apply(check.agrovoc, field_name=column) df[column] = df[column].apply(
check.agrovoc, field_name=column, drop=args.drop_invalid_agrovoc
)
# Check: invalid language # Check: invalid language
match = re.match(r"^.*?language.*$", column) match = re.match(r"^.*?language.*$", column)

View File

@ -188,7 +188,7 @@ def language(field):
return return
def agrovoc(field, field_name): def agrovoc(field, field_name, drop):
"""Check subject terms against AGROVOC REST API. """Check subject terms against AGROVOC REST API.
Function constructor expects the field as well as the field name because Function constructor expects the field as well as the field name because
@ -219,6 +219,9 @@ def agrovoc(field, field_name):
# prune old cache entries # prune old cache entries
requests_cache.remove_expired_responses() requests_cache.remove_expired_responses()
# Initialize an empty list to hold the validated AGROVOC values
values = list()
# Try to split multi-value field on "||" separator # Try to split multi-value field on "||" separator
for value in field.split("||"): for value in field.split("||"):
request_url = "http://agrovoc.uniroma2.it/agrovoc/rest/v1/agrovoc/search" request_url = "http://agrovoc.uniroma2.it/agrovoc/rest/v1/agrovoc/search"
@ -231,9 +234,25 @@ def agrovoc(field, field_name):
# check if there are any results # check if there are any results
if len(data["results"]) == 0: if len(data["results"]) == 0:
print(f"{Fore.RED}Invalid AGROVOC ({field_name}): {Fore.RESET}{value}") if drop:
print(
f"{Fore.GREEN}Dropping invalid AGROVOC ({field_name}): {Fore.RESET}{value}"
)
else:
print(
f"{Fore.RED}Invalid AGROVOC ({field_name}): {Fore.RESET}{value}"
)
return # value is invalid AGROVOC, but we are not dropping
values.append(value)
else:
# value is valid AGROVOC so save it
values.append(value)
# Create a new field consisting of all values joined with "||"
new_field = "||".join(values)
return new_field
def filename_extension(field): def filename_extension(field):

View File

@ -5,7 +5,7 @@ from unicodedata import normalize
import pandas as pd import pandas as pd
from colorama import Fore from colorama import Fore
from ftfy import fix_text from ftfy import TextFixerConfig, fix_text
from csv_metadata_quality.util import is_mojibake, is_nfc from csv_metadata_quality.util import is_mojibake, is_nfc
@ -104,6 +104,7 @@ def unnecessary_unicode(field):
Replaces unnecessary Unicode characters like: Replaces unnecessary Unicode characters like:
- Soft hyphen (U+00AD) → hyphen - Soft hyphen (U+00AD) → hyphen
- No-break space (U+00A0) → space - No-break space (U+00A0) → space
- Thin space (U+2009) → space
Return string with characters removed or replaced. Return string with characters removed or replaced.
""" """
@ -148,6 +149,16 @@ def unnecessary_unicode(field):
) )
field = re.sub(pattern, "-", field) field = re.sub(pattern, "-", field)
# Check for thin spaces (U+2009)
pattern = re.compile(r"\u2009")
match = re.findall(pattern, field)
if match:
print(
f"{Fore.GREEN}Replacing unnecessary Unicode (U+2009): {Fore.RESET}{field}"
)
field = re.sub(pattern, " ", field)
return field return field
@ -269,9 +280,12 @@ def mojibake(field, field_name):
if pd.isna(field): if pd.isna(field):
return field return field
# We don't want ftfy to change “smart quotes” to "ASCII quotes"
config = TextFixerConfig(uncurl_quotes=False)
if is_mojibake(field): if is_mojibake(field):
print(f"{Fore.GREEN}Fixing encoding issue ({field_name}): {Fore.RESET}{field}") print(f"{Fore.GREEN}Fixing encoding issue ({field_name}): {Fore.RESET}{field}")
return fix_text(field) return fix_text(field, config)
else: else:
return field return field

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-3.0-only # SPDX-License-Identifier: GPL-3.0-only
from ftfy.badness import sequence_weirdness from ftfy.badness import is_bad
def is_nfc(field): def is_nfc(field):
@ -38,7 +38,7 @@ def is_mojibake(field):
Return boolean. Return boolean.
""" """
if not sequence_weirdness(field): if not is_bad(field):
# Nothing weird, should be okay # Nothing weird, should be okay
return False return False
try: try:

View File

@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-3.0-only # SPDX-License-Identifier: GPL-3.0-only
VERSION = "0.5.0" VERSION = "0.6.0-dev"

View File

@ -16,7 +16,7 @@ Suspicious character||foreˆt,2019-07-29,,,,,,,,,,,
Invalid ISO 639-1 (alpha 2) language,2019-07-29,,,jp,,,,,,,, Invalid ISO 639-1 (alpha 2) language,2019-07-29,,,jp,,,,,,,,
Invalid ISO 639-3 (alpha 3) language,2019-07-29,,,chi,,,,,,,, Invalid ISO 639-3 (alpha 3) language,2019-07-29,,,chi,,,,,,,,
Invalid language,2019-07-29,,,Span,,,,,,,, Invalid language,2019-07-29,,,Span,,,,,,,,
Invalid AGROVOC subject,2019-07-29,,,,FOREST,,,,,,, Invalid AGROVOC subject,2019-07-29,,,,LIVESTOCK||FOREST,,,,,,,
Newline (LF),2019-07-30,,,,"TANZA Newline (LF),2019-07-30,,,,"TANZA
NIA",,,,,,, NIA",,,,,,,
Missing date,,,,,,,,,,,, Missing date,,,,,,,,,,,,

1 dc.title dcterms.issued dc.identifier.issn dc.identifier.isbn dcterms.language dcterms.subject cg.coverage.country filename dcterms.license dcterms.type dcterms.bibliographicCitation cg.identifier.doi cg.coverage.region
16 Invalid ISO 639-1 (alpha 2) language 2019-07-29 jp
17 Invalid ISO 639-3 (alpha 3) language 2019-07-29 chi
18 Invalid language 2019-07-29 Span
19 Invalid AGROVOC subject 2019-07-29 FOREST LIVESTOCK||FOREST
20 Newline (LF) 2019-07-30 TANZA NIA
21 Missing date
22 Invalid country 2019-08-01 KENYAA

86
poetry.lock generated
View File

@ -262,15 +262,18 @@ pyflakes = ">=2.3.0,<2.4.0"
[[package]] [[package]]
name = "ftfy" name = "ftfy"
version = "5.9" version = "6.0.3"
description = "Fixes some problems with Unicode text after the fact" description = "Fixes some problems with Unicode text after the fact"
category = "main" category = "main"
optional = false optional = false
python-versions = ">=3.5" python-versions = ">=3.6"
[package.dependencies] [package.dependencies]
wcwidth = "*" wcwidth = "*"
[package.extras]
docs = ["furo", "sphinx"]
[[package]] [[package]]
name = "future" name = "future"
version = "0.18.2" version = "0.18.2"
@ -357,7 +360,7 @@ test = ["nose (>=0.10.1)", "requests", "testpath", "pygments", "nbformat", "ipyk
[[package]] [[package]]
name = "isodate" name = "isodate"
version = "0.6.0" version = "0.6.1"
description = "An ISO 8601 date/time/duration parser and formatter" description = "An ISO 8601 date/time/duration parser and formatter"
category = "dev" category = "dev"
optional = false optional = false
@ -492,7 +495,7 @@ pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
[[package]] [[package]]
name = "pandas" name = "pandas"
version = "1.3.4" version = "1.3.5"
description = "Powerful data structures for data analysis, time series, and statistics" description = "Powerful data structures for data analysis, time series, and statistics"
category = "main" category = "main"
optional = false optional = false
@ -598,7 +601,7 @@ python-versions = "*"
[[package]] [[package]]
name = "prompt-toolkit" name = "prompt-toolkit"
version = "3.0.23" version = "3.0.24"
description = "Library for building powerful interactive command lines in Python" description = "Library for building powerful interactive command lines in Python"
category = "dev" category = "dev"
optional = false optional = false
@ -795,7 +798,7 @@ test = ["black (==20.8b1)", "flake8", "flake8-comprehensions", "flake8-polyfill"
[[package]] [[package]]
name = "rich" name = "rich"
version = "10.15.2" version = "10.16.1"
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
category = "dev" category = "dev"
optional = false optional = false
@ -876,7 +879,7 @@ python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
[[package]] [[package]]
name = "tomli" name = "tomli"
version = "1.2.2" version = "1.2.3"
description = "A lil' TOML parser" description = "A lil' TOML parser"
category = "dev" category = "dev"
optional = false optional = false
@ -964,7 +967,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes
[metadata] [metadata]
lock-version = "1.1" lock-version = "1.1"
python-versions = "^3.7.1" python-versions = "^3.7.1"
content-hash = "2e53197acdff785ea46adcb5ca22650efdecc6693a434a0f56a4f2b958442a78" content-hash = "a2ac1525ab60dc0758408867f5d651b2a3e40eb6a090b9eb7fad4a885a7965af"
[metadata.files] [metadata.files]
agate = [ agate = [
@ -1051,7 +1054,7 @@ flake8 = [
{file = "flake8-3.9.2.tar.gz", hash = "sha256:07528381786f2a6237b061f6e96610a4167b226cb926e2aa2b6b1d78057c576b"}, {file = "flake8-3.9.2.tar.gz", hash = "sha256:07528381786f2a6237b061f6e96610a4167b226cb926e2aa2b6b1d78057c576b"},
] ]
ftfy = [ ftfy = [
{file = "ftfy-5.9.tar.gz", hash = "sha256:8c4fb2863c0b82eae2ab3cf353d9ade268dfbde863d322f78d6a9fd5cefb31e9"}, {file = "ftfy-6.0.3.tar.gz", hash = "sha256:ba71121a9c8d7790d3e833c6c1021143f3e5c4118293ec3afb5d43ed9ca8e72b"},
] ]
future = [ future = [
{file = "future-0.18.2.tar.gz", hash = "sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d"}, {file = "future-0.18.2.tar.gz", hash = "sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d"},
@ -1125,8 +1128,8 @@ ipython = [
{file = "ipython-7.30.1.tar.gz", hash = "sha256:cb6aef731bf708a7727ab6cde8df87f0281b1427d41e65d62d4b68934fa54e97"}, {file = "ipython-7.30.1.tar.gz", hash = "sha256:cb6aef731bf708a7727ab6cde8df87f0281b1427d41e65d62d4b68934fa54e97"},
] ]
isodate = [ isodate = [
{file = "isodate-0.6.0-py2.py3-none-any.whl", hash = "sha256:aa4d33c06640f5352aca96e4b81afd8ab3b47337cc12089822d6f322ac772c81"}, {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"},
{file = "isodate-0.6.0.tar.gz", hash = "sha256:2e364a3d5759479cdb2d37cce6b9376ea504db2ff90252a2e5b7cc89cc9ff2d8"}, {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"},
] ]
isort = [ isort = [
{file = "isort-5.10.1-py3-none-any.whl", hash = "sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7"}, {file = "isort-5.10.1-py3-none-any.whl", hash = "sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7"},
@ -1201,30 +1204,31 @@ packaging = [
{file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
] ]
pandas = [ pandas = [
{file = "pandas-1.3.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9707bdc1ea9639c886b4d3be6e2a45812c1ac0c2080f94c31b71c9fa35556f9b"}, {file = "pandas-1.3.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:62d5b5ce965bae78f12c1c0df0d387899dd4211ec0bdc52822373f13a3a022b9"},
{file = "pandas-1.3.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c2f44425594ae85e119459bb5abb0748d76ef01d9c08583a667e3339e134218e"}, {file = "pandas-1.3.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:adfeb11be2d54f275142c8ba9bf67acee771b7186a5745249c7d5a06c670136b"},
{file = "pandas-1.3.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:372d72a3d8a5f2dbaf566a5fa5fa7f230842ac80f29a931fb4b071502cf86b9a"}, {file = "pandas-1.3.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a8c055d58873ad81cae290d974d13dd479b82cbb975c3e1fa2cf1920715296"},
{file = "pandas-1.3.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d99d2350adb7b6c3f7f8f0e5dfb7d34ff8dd4bc0a53e62c445b7e43e163fce63"}, {file = "pandas-1.3.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd541ab09e1f80a2a1760032d665f6e032d8e44055d602d65eeea6e6e85498cb"},
{file = "pandas-1.3.4-cp310-cp310-win_amd64.whl", hash = "sha256:4acc28364863127bca1029fb72228e6f473bb50c32e77155e80b410e2068eeac"}, {file = "pandas-1.3.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2651d75b9a167cc8cc572cf787ab512d16e316ae00ba81874b560586fa1325e0"},
{file = "pandas-1.3.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c2646458e1dce44df9f71a01dc65f7e8fa4307f29e5c0f2f92c97f47a5bf22f5"}, {file = "pandas-1.3.5-cp310-cp310-win_amd64.whl", hash = "sha256:aaf183a615ad790801fa3cf2fa450e5b6d23a54684fe386f7e3208f8b9bfbef6"},
{file = "pandas-1.3.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5298a733e5bfbb761181fd4672c36d0c627320eb999c59c65156c6a90c7e1b4f"}, {file = "pandas-1.3.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:344295811e67f8200de2390093aeb3c8309f5648951b684d8db7eee7d1c81fb7"},
{file = "pandas-1.3.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22808afb8f96e2269dcc5b846decacb2f526dd0b47baebc63d913bf847317c8f"}, {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:552020bf83b7f9033b57cbae65589c01e7ef1544416122da0c79140c93288f56"},
{file = "pandas-1.3.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b528e126c13816a4374e56b7b18bfe91f7a7f6576d1aadba5dee6a87a7f479ae"}, {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cce0c6bbeb266b0e39e35176ee615ce3585233092f685b6a82362523e59e5b4"},
{file = "pandas-1.3.4-cp37-cp37m-win32.whl", hash = "sha256:fe48e4925455c964db914b958f6e7032d285848b7538a5e1b19aeb26ffaea3ec"}, {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d28a3c65463fd0d0ba8bbb7696b23073efee0510783340a44b08f5e96ffce0c"},
{file = "pandas-1.3.4-cp37-cp37m-win_amd64.whl", hash = "sha256:eaca36a80acaacb8183930e2e5ad7f71539a66805d6204ea88736570b2876a7b"}, {file = "pandas-1.3.5-cp37-cp37m-win32.whl", hash = "sha256:a62949c626dd0ef7de11de34b44c6475db76995c2064e2d99c6498c3dba7fe58"},
{file = "pandas-1.3.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:42493f8ae67918bf129869abea8204df899902287a7f5eaf596c8e54e0ac7ff4"}, {file = "pandas-1.3.5-cp37-cp37m-win_amd64.whl", hash = "sha256:8025750767e138320b15ca16d70d5cdc1886e8f9cc56652d89735c016cd8aea6"},
{file = "pandas-1.3.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a388960f979665b447f0847626e40f99af8cf191bce9dc571d716433130cb3a7"}, {file = "pandas-1.3.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fe95bae4e2d579812865db2212bb733144e34d0c6785c0685329e5b60fcb85dd"},
{file = "pandas-1.3.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ba0aac1397e1d7b654fccf263a4798a9e84ef749866060d19e577e927d66e1b"}, {file = "pandas-1.3.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f261553a1e9c65b7a310302b9dbac31cf0049a51695c14ebe04e4bfd4a96f02"},
{file = "pandas-1.3.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f567e972dce3bbc3a8076e0b675273b4a9e8576ac629149cf8286ee13c259ae5"}, {file = "pandas-1.3.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b6dbec5f3e6d5dc80dcfee250e0a2a652b3f28663492f7dab9a24416a48ac39"},
{file = "pandas-1.3.4-cp38-cp38-win32.whl", hash = "sha256:c1aa4de4919358c5ef119f6377bc5964b3a7023c23e845d9db7d9016fa0c5b1c"}, {file = "pandas-1.3.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3bc49af96cd6285030a64779de5b3688633a07eb75c124b0747134a63f4c05f"},
{file = "pandas-1.3.4-cp38-cp38-win_amd64.whl", hash = "sha256:dd324f8ee05925ee85de0ea3f0d66e1362e8c80799eb4eb04927d32335a3e44a"}, {file = "pandas-1.3.5-cp38-cp38-win32.whl", hash = "sha256:b6b87b2fb39e6383ca28e2829cddef1d9fc9e27e55ad91ca9c435572cdba51bf"},
{file = "pandas-1.3.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d47750cf07dee6b55d8423471be70d627314277976ff2edd1381f02d52dbadf9"}, {file = "pandas-1.3.5-cp38-cp38-win_amd64.whl", hash = "sha256:a395692046fd8ce1edb4c6295c35184ae0c2bbe787ecbe384251da609e27edcb"},
{file = "pandas-1.3.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d1dc09c0013d8faa7474574d61b575f9af6257ab95c93dcf33a14fd8d2c1bab"}, {file = "pandas-1.3.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bd971a3f08b745a75a86c00b97f3007c2ea175951286cdda6abe543e687e5f2f"},
{file = "pandas-1.3.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10e10a2527db79af6e830c3d5842a4d60383b162885270f8cffc15abca4ba4a9"}, {file = "pandas-1.3.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37f06b59e5bc05711a518aa10beaec10942188dccb48918bb5ae602ccbc9f1a0"},
{file = "pandas-1.3.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:35c77609acd2e4d517da41bae0c11c70d31c87aae8dd1aabd2670906c6d2c143"}, {file = "pandas-1.3.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c21778a688d3712d35710501f8001cdbf96eb70a7c587a3d5613573299fdca6"},
{file = "pandas-1.3.4-cp39-cp39-win32.whl", hash = "sha256:003ba92db58b71a5f8add604a17a059f3068ef4e8c0c365b088468d0d64935fd"}, {file = "pandas-1.3.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3345343206546545bc26a05b4602b6a24385b5ec7c75cb6059599e3d56831da2"},
{file = "pandas-1.3.4-cp39-cp39-win_amd64.whl", hash = "sha256:a51528192755f7429c5bcc9e80832c517340317c861318fea9cea081b57c9afd"}, {file = "pandas-1.3.5-cp39-cp39-win32.whl", hash = "sha256:c69406a2808ba6cf580c2255bcf260b3f214d2664a3a4197d0e640f573b46fd3"},
{file = "pandas-1.3.4.tar.gz", hash = "sha256:a2aa18d3f0b7d538e21932f637fbfe8518d085238b429e4790a35e1e44a96ffc"}, {file = "pandas-1.3.5-cp39-cp39-win_amd64.whl", hash = "sha256:32e1a26d5ade11b547721a72f9bfc4bd113396947606e00d5b4a5b79b3dcb006"},
{file = "pandas-1.3.5.tar.gz", hash = "sha256:1e4285f5de1012de20ca46b188ccf33521bff61ba5c5ebd78b4fb28e5416a9f1"},
] ]
parsedatetime = [ parsedatetime = [
{file = "parsedatetime-2.4-py2-none-any.whl", hash = "sha256:9ee3529454bf35c40a77115f5a596771e59e1aee8c53306f346c461b8e913094"}, {file = "parsedatetime-2.4-py2-none-any.whl", hash = "sha256:9ee3529454bf35c40a77115f5a596771e59e1aee8c53306f346c461b8e913094"},
@ -1259,8 +1263,8 @@ pprintpp = [
{file = "pprintpp-0.4.0.tar.gz", hash = "sha256:ea826108e2c7f49dc6d66c752973c3fc9749142a798d6b254e1e301cfdbc6403"}, {file = "pprintpp-0.4.0.tar.gz", hash = "sha256:ea826108e2c7f49dc6d66c752973c3fc9749142a798d6b254e1e301cfdbc6403"},
] ]
prompt-toolkit = [ prompt-toolkit = [
{file = "prompt_toolkit-3.0.23-py3-none-any.whl", hash = "sha256:5f29d62cb7a0ecacfa3d8ceea05a63cd22500543472d64298fc06ddda906b25d"}, {file = "prompt_toolkit-3.0.24-py3-none-any.whl", hash = "sha256:e56f2ff799bacecd3e88165b1e2f5ebf9bcd59e80e06d395fa0cc4b8bd7bb506"},
{file = "prompt_toolkit-3.0.23.tar.gz", hash = "sha256:7053aba00895473cb357819358ef33f11aa97e4ac83d38efb123e5649ceeecaf"}, {file = "prompt_toolkit-3.0.24.tar.gz", hash = "sha256:1bb05628c7d87b645974a1bad3f17612be0c29fa39af9f7688030163f680bad6"},
] ]
ptyprocess = [ ptyprocess = [
{file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
@ -1325,8 +1329,8 @@ requests-cache = [
{file = "requests_cache-0.6.4-py2.py3-none-any.whl", hash = "sha256:1102daa13a804abe23fad62d694e7dee58d6063a35d94bf6e8c9821e22e5a78b"}, {file = "requests_cache-0.6.4-py2.py3-none-any.whl", hash = "sha256:1102daa13a804abe23fad62d694e7dee58d6063a35d94bf6e8c9821e22e5a78b"},
] ]
rich = [ rich = [
{file = "rich-10.15.2-py3-none-any.whl", hash = "sha256:43b2c6ad51f46f6c94992aee546f1c177719f4e05aff8f5ea4d2efae3ebdac89"}, {file = "rich-10.16.1-py3-none-any.whl", hash = "sha256:bbe04dd6ac09e4b00d22cb1051aa127beaf6e16c3d8687b026e96d3fca6aad52"},
{file = "rich-10.15.2.tar.gz", hash = "sha256:1dded089b79dd042b3ab5cd63439a338e16652001f0c16e73acdcf4997ad772d"}, {file = "rich-10.16.1.tar.gz", hash = "sha256:4949e73de321784ef6664ebbc854ac82b20ff60b2865097b93f3b9b41e30da27"},
] ]
six = [ six = [
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
@ -1377,8 +1381,8 @@ toml = [
{file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
] ]
tomli = [ tomli = [
{file = "tomli-1.2.2-py3-none-any.whl", hash = "sha256:f04066f68f5554911363063a30b108d2b5a5b1a010aa8b6132af78489fe3aade"}, {file = "tomli-1.2.3-py3-none-any.whl", hash = "sha256:e3069e4be3ead9668e21cb9b074cd948f7b3113fd9c8bba083f48247aab8b11c"},
{file = "tomli-1.2.2.tar.gz", hash = "sha256:c6ce0015eb38820eaf32b5db832dbc26deb3dd427bd5f6556cf0acac2c214fee"}, {file = "tomli-1.2.3.tar.gz", hash = "sha256:05b6166bff487dc068d322585c7ea4ef78deed501cc124060e0f238e89a9231f"},
] ]
traitlets = [ traitlets = [
{file = "traitlets-5.1.1-py3-none-any.whl", hash = "sha256:2d313cc50a42cd6c277e7d7dc8d4d7fedd06a2c215f78766ae7b1a66277e0033"}, {file = "traitlets-5.1.1-py3-none-any.whl", hash = "sha256:2d313cc50a42cd6c277e7d7dc8d4d7fedd06a2c215f78766ae7b1a66277e0033"},

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "csv-metadata-quality" name = "csv-metadata-quality"
version = "0.5.0" version = "0.6.0-dev"
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem." description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem."
authors = ["Alan Orth <alan.orth@gmail.com>"] authors = ["Alan Orth <alan.orth@gmail.com>"]
license="GPL-3.0-only" license="GPL-3.0-only"
@ -21,7 +21,7 @@ pycountry = "^19.8.18"
langid = "^1.1.6" langid = "^1.1.6"
colorama = "^0.4.4" colorama = "^0.4.4"
spdx-license-list = "^0.5.2" spdx-license-list = "^0.5.2"
ftfy = "^5.9" ftfy = "^6.0"
SQLAlchemy = ">=1.3.3,<1.4.23" SQLAlchemy = ">=1.3.3,<1.4.23"
country-converter = "^0.7.4" country-converter = "^0.7.4"

View File

@ -19,14 +19,14 @@ dbfread==2.0.7
decorator==5.1.0; python_version >= "3.7" and python_version < "4.0" decorator==5.1.0; python_version >= "3.7" and python_version < "4.0"
et-xmlfile==1.1.0; python_version >= "3.6" et-xmlfile==1.1.0; python_version >= "3.6"
flake8==3.9.2; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") flake8==3.9.2; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
ftfy==5.9; python_version >= "3.5" ftfy==6.0.3; python_version >= "3.6"
future==0.18.2; python_version >= "2.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" future==0.18.2; python_version >= "2.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0"
greenlet==1.1.2; python_version >= "3" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3" greenlet==1.1.2; python_version >= "3" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3"
idna==3.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6" idna==3.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
importlib-metadata==4.8.2; python_full_version >= "3.6.2" and python_version < "3.8" and python_version >= "3.6" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6") and (python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6") importlib-metadata==4.8.2; python_full_version >= "3.6.2" and python_version < "3.8" and python_version >= "3.6" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6") and (python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6")
iniconfig==1.1.1; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" iniconfig==1.1.1; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
ipython==7.30.1; python_version >= "3.7" and python_version < "4.0" ipython==7.30.1; python_version >= "3.7" and python_version < "4.0"
isodate==0.6.0 isodate==0.6.1
isort==5.10.1; python_full_version >= "3.6.1" and python_version < "4.0" isort==5.10.1; python_full_version >= "3.6.1" and python_version < "4.0"
itsdangerous==2.0.1; python_version >= "3.6" itsdangerous==2.0.1; python_version >= "3.6"
jedi==0.18.1; python_version >= "3.7" and python_version < "4.0" jedi==0.18.1; python_version >= "3.7" and python_version < "4.0"
@ -39,7 +39,7 @@ numpy==1.21.1
olefile==0.46; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" olefile==0.46; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
openpyxl==3.0.9; python_version >= "3.6" openpyxl==3.0.9; python_version >= "3.6"
packaging==21.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" packaging==21.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
pandas==1.3.4; python_full_version >= "3.7.1" pandas==1.3.5; python_full_version >= "3.7.1"
parsedatetime==2.4 parsedatetime==2.4
parso==0.8.3; python_version >= "3.7" and python_version < "4.0" parso==0.8.3; python_version >= "3.7" and python_version < "4.0"
pathspec==0.9.0; python_full_version >= "3.6.2" pathspec==0.9.0; python_full_version >= "3.6.2"
@ -48,7 +48,7 @@ pickleshare==0.7.5; python_version >= "3.7" and python_version < "4.0"
platformdirs==2.4.0; python_version >= "3.6" and python_full_version >= "3.6.2" platformdirs==2.4.0; python_version >= "3.6" and python_full_version >= "3.6.2"
pluggy==1.0.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" pluggy==1.0.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
pprintpp==0.4.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" pprintpp==0.4.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
prompt-toolkit==3.0.23; python_version >= "3.7" and python_version < "4.0" and python_full_version >= "3.6.2" prompt-toolkit==3.0.24; python_version >= "3.7" and python_version < "4.0" and python_full_version >= "3.6.2"
ptyprocess==0.7.0; python_version >= "3.7" and python_version < "4.0" and sys_platform != "win32" ptyprocess==0.7.0; python_version >= "3.7" and python_version < "4.0" and sys_platform != "win32"
py==1.11.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6" py==1.11.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
pycodestyle==2.7.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" pycodestyle==2.7.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
@ -65,13 +65,13 @@ pytimeparse==1.1.8
pytz==2021.3; python_full_version >= "3.7.1" pytz==2021.3; python_full_version >= "3.7.1"
requests-cache==0.6.4; python_version >= "3.6" requests-cache==0.6.4; python_version >= "3.6"
requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0") requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
rich==10.15.2; python_full_version >= "3.6.2" and python_full_version < "4.0.0" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0") rich==10.16.1; python_full_version >= "3.6.2" and python_full_version < "4.0.0" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0")
six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.6" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.3.0") six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.6" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.3.0")
spdx-license-list==0.5.2 spdx-license-list==0.5.2
sqlalchemy==1.4.22; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0") sqlalchemy==1.4.22; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
text-unidecode==1.3; python_version >= "3.6" text-unidecode==1.3; python_version >= "3.6"
toml==0.10.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" toml==0.10.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
tomli==1.2.2; python_version >= "3.6" and python_full_version >= "3.6.2" tomli==1.2.3; python_version >= "3.6" and python_full_version >= "3.6.2"
traitlets==5.1.1; python_version >= "3.7" and python_version < "4.0" traitlets==5.1.1; python_version >= "3.7" and python_version < "4.0"
typed-ast==1.5.1; python_version < "3.8" and implementation_name == "cpython" and python_full_version >= "3.6.2" and python_version >= "3.6" typed-ast==1.5.1; python_version < "3.8" and implementation_name == "cpython" and python_full_version >= "3.6.2" and python_version >= "3.6"
typing-extensions==4.0.1 typing-extensions==4.0.1

View File

@ -1,201 +1,27 @@
certifi==2021.10.8; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6" \ certifi==2021.10.8; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
--hash=sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569 \ charset-normalizer==2.0.9; python_full_version >= "3.6.0" and python_version >= "3.6"
--hash=sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872 colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
charset-normalizer==2.0.9; python_full_version >= "3.6.0" and python_version >= "3.6" \ country-converter==0.7.4
--hash=sha256:b0b883e8e874edfdece9c28f314e3dd5badf067342e42fb162203335ae61aa2c \ ftfy==6.0.3; python_version >= "3.6"
--hash=sha256:1eecaa09422db5be9e29d7fc65664e6c33bd06f9ced7838578ba40d58bdf3721 greenlet==1.1.2; python_version >= "3" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3"
colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") \ idna==3.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
--hash=sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2 \ importlib-metadata==4.8.2; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6"
--hash=sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b itsdangerous==2.0.1; python_version >= "3.6"
country-converter==0.7.4 \ langid==1.1.6
--hash=sha256:0291cc91c4a4efe7f128a11c8c6e4cb761f7fea7cde2517f8677c7c56da334d3 numpy==1.21.1
ftfy==5.9; python_version >= "3.5" \ pandas==1.3.5; python_full_version >= "3.7.1"
--hash=sha256:8c4fb2863c0b82eae2ab3cf353d9ade268dfbde863d322f78d6a9fd5cefb31e9 pycountry==19.8.18
greenlet==1.1.2; python_version >= "3" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3" \ python-dateutil==2.8.2; python_full_version >= "3.7.1"
--hash=sha256:58df5c2a0e293bf665a51f8a100d3e9956febfbf1d9aaf8c0677cf70218910c6 \ python-stdnum==1.17
--hash=sha256:aec52725173bd3a7b56fe91bc56eccb26fbdff1386ef123abb63c84c5b43b63a \ pytz==2021.3; python_full_version >= "3.7.1"
--hash=sha256:833e1551925ed51e6b44c800e71e77dacd7e49181fdc9ac9a0bf3714d515785d \ requests-cache==0.6.4; python_version >= "3.6"
--hash=sha256:aa5b467f15e78b82257319aebc78dd2915e4c1436c3c0d1ad6f53e47ba6e2713 \ requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
--hash=sha256:40b951f601af999a8bf2ce8c71e8aaa4e8c6f78ff8afae7b808aae2dc50d4c40 \ six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.6"
--hash=sha256:95e69877983ea39b7303570fa6760f81a3eec23d0e3ab2021b7144b94d06202d \ spdx-license-list==0.5.2
--hash=sha256:356b3576ad078c89a6107caa9c50cc14e98e3a6c4874a37c3e0273e4baf33de8 \ sqlalchemy==1.4.22; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
--hash=sha256:8639cadfda96737427330a094476d4c7a56ac03de7265622fcf4cfe57c8ae18d \ typing-extensions==4.0.1; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6"
--hash=sha256:97e5306482182170ade15c4b0d8386ded995a07d7cc2ca8f27958d34d6736497 \ url-normalize==1.4.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6"
--hash=sha256:e6a36bb9474218c7a5b27ae476035497a6990e21d04c279884eb10d9b290f1b1 \ urllib3==1.26.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.6"
--hash=sha256:abb7a75ed8b968f3061327c433a0fbd17b729947b400747c334a9c29a9af6c58 \ wcwidth==0.2.5; python_version >= "3.6"
--hash=sha256:14d4f3cd4e8b524ae9b8aa567858beed70c392fdec26dbdb0a8a418392e71708 \ xlrd==1.2.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0")
--hash=sha256:17ff94e7a83aa8671a25bf5b59326ec26da379ace2ebc4411d690d80a7fbcf23 \ zipp==3.6.0; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6"
--hash=sha256:9f3cba480d3deb69f6ee2c1825060177a22c7826431458c697df88e6aeb3caee \
--hash=sha256:fa877ca7f6b48054f847b61d6fa7bed5cebb663ebc55e018fda12db09dcc664c \
--hash=sha256:7cbd7574ce8e138bda9df4efc6bf2ab8572c9aff640d8ecfece1b006b68da963 \
--hash=sha256:903bbd302a2378f984aef528f76d4c9b1748f318fe1294961c072bdc7f2ffa3e \
--hash=sha256:049fe7579230e44daef03a259faa24511d10ebfa44f69411d99e6a184fe68073 \
--hash=sha256:dd0b1e9e891f69e7675ba5c92e28b90eaa045f6ab134ffe70b52e948aa175b3c \
--hash=sha256:7418b6bfc7fe3331541b84bb2141c9baf1ec7132a7ecd9f375912eca810e714e \
--hash=sha256:f9d29ca8a77117315101425ec7ec2a47a22ccf59f5593378fc4077ac5b754fce \
--hash=sha256:21915eb821a6b3d9d8eefdaf57d6c345b970ad722f856cd71739493ce003ad08 \
--hash=sha256:eff9d20417ff9dcb0d25e2defc2574d10b491bf2e693b4e491914738b7908168 \
--hash=sha256:32ca72bbc673adbcfecb935bb3fb1b74e663d10a4b241aaa2f5a75fe1d1f90aa \
--hash=sha256:f0214eb2a23b85528310dad848ad2ac58e735612929c8072f6093f3585fd342d \
--hash=sha256:b92e29e58bef6d9cfd340c72b04d74c4b4e9f70c9fa7c78b674d1fec18896dc4 \
--hash=sha256:fdcec0b8399108577ec290f55551d926d9a1fa6cad45882093a7a07ac5ec147b \
--hash=sha256:93f81b134a165cc17123626ab8da2e30c0455441d4ab5576eed73a64c025b25c \
--hash=sha256:1e12bdc622676ce47ae9abbf455c189e442afdde8818d9da983085df6312e7a1 \
--hash=sha256:8c790abda465726cfb8bb08bd4ca9a5d0a7bd77c7ac1ca1b839ad823b948ea28 \
--hash=sha256:f276df9830dba7a333544bd41070e8175762a7ac20350786b322b714b0e654f5 \
--hash=sha256:64e6175c2e53195278d7388c454e0b30997573f3f4bd63697f88d855f7a6a1fc \
--hash=sha256:b11548073a2213d950c3f671aa88e6f83cda6e2fb97a8b6317b1b5b33d850e06 \
--hash=sha256:9633b3034d3d901f0a46b7939f8c4d64427dfba6bbc5a36b1a67364cf148a1b0 \
--hash=sha256:eb6ea6da4c787111adf40f697b4e58732ee0942b5d3bd8f435277643329ba627 \
--hash=sha256:f3acda1924472472ddd60c29e5b9db0cec629fbe3c5c5accb74d6d6d14773478 \
--hash=sha256:e859fcb4cbe93504ea18008d1df98dee4f7766db66c435e4882ab35cf70cac43 \
--hash=sha256:00e44c8afdbe5467e4f7b5851be223be68adb4272f44696ee71fe46b7036a711 \
--hash=sha256:ec8c433b3ab0419100bd45b47c9c8551248a5aee30ca5e9d399a0b57ac04651b \
--hash=sha256:288c6a76705dc54fba69fbcb59904ae4ad768b4c768839b8ca5fdadec6dd8cfd \
--hash=sha256:8d2f1fb53a421b410751887eb4ff21386d119ef9cde3797bf5e7ed49fb51a3b3 \
--hash=sha256:166eac03e48784a6a6e0e5f041cfebb1ab400b394db188c48b3a84737f505b67 \
--hash=sha256:572e1787d1460da79590bf44304abbc0a2da944ea64ec549188fa84d89bba7ab \
--hash=sha256:be5f425ff1f5f4b3c1e33ad64ab994eed12fc284a6ea71c5243fd564502ecbe5 \
--hash=sha256:b1692f7d6bc45e3200844be0dba153612103db241691088626a33ff1f24a0d88 \
--hash=sha256:7227b47e73dedaa513cdebb98469705ef0d66eb5a1250144468e9c3097d6b59b \
--hash=sha256:7ff61ff178250f9bb3cd89752df0f1dd0e27316a8bd1465351652b1b4a4cdfd3 \
--hash=sha256:f70a9e237bb792c7cc7e44c531fd48f5897961701cdaa06cf22fc14965c496cf \
--hash=sha256:013d61294b6cd8fe3242932c1c5e36e5d1db2c8afb58606c5a67efce62c1f5fd \
--hash=sha256:e30f5ea4ae2346e62cedde8794a56858a67b878dd79f7df76a0767e356b1744a
idna==3.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6" \
--hash=sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff \
--hash=sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d
importlib-metadata==4.8.2; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6" \
--hash=sha256:53ccfd5c134223e497627b9815d5030edf77d2ed573922f7a0b8f8bb81a1c100 \
--hash=sha256:75bdec14c397f528724c1bfd9709d660b33a4d2e77387a3358f20b848bb5e5fb
itsdangerous==2.0.1; python_version >= "3.6" \
--hash=sha256:5174094b9637652bdb841a3029700391451bd092ba3db90600dea710ba28e97c \
--hash=sha256:9e724d68fc22902a1435351f84c3fb8623f303fffcc566a4cb952df8c572cff0
langid==1.1.6 \
--hash=sha256:044bcae1912dab85c33d8e98f2811b8f4ff1213e5e9a9e9510137b84da2cb293
numpy==1.21.1 \
--hash=sha256:38e8648f9449a549a7dfe8d8755a5979b45b3538520d1e735637ef28e8c2dc50 \
--hash=sha256:fd7d7409fa643a91d0a05c7554dd68aa9c9bb16e186f6ccfe40d6e003156e33a \
--hash=sha256:a75b4498b1e93d8b700282dc8e655b8bd559c0904b3910b144646dbbbc03e062 \
--hash=sha256:1412aa0aec3e00bc23fbb8664d76552b4efde98fb71f60737c83efbac24112f1 \
--hash=sha256:e46ceaff65609b5399163de5893d8f2a82d3c77d5e56d976c8b5fb01faa6b671 \
--hash=sha256:c6a2324085dd52f96498419ba95b5777e40b6bcbc20088fddb9e8cbb58885e8e \
--hash=sha256:73101b2a1fef16602696d133db402a7e7586654682244344b8329cdcbbb82172 \
--hash=sha256:7a708a79c9a9d26904d1cca8d383bf869edf6f8e7650d85dbc77b041e8c5a0f8 \
--hash=sha256:95b995d0c413f5d0428b3f880e8fe1660ff9396dcd1f9eedbc311f37b5652e16 \
--hash=sha256:635e6bd31c9fb3d475c8f44a089569070d10a9ef18ed13738b03049280281267 \
--hash=sha256:4a3d5fb89bfe21be2ef47c0614b9c9c707b7362386c9a3ff1feae63e0267ccb6 \
--hash=sha256:8a326af80e86d0e9ce92bcc1e65c8ff88297de4fa14ee936cb2293d414c9ec63 \
--hash=sha256:791492091744b0fe390a6ce85cc1bf5149968ac7d5f0477288f78c89b385d9af \
--hash=sha256:0318c465786c1f63ac05d7c4dbcecd4d2d7e13f0959b01b534ea1e92202235c5 \
--hash=sha256:9a513bd9c1551894ee3d31369f9b07460ef223694098cf27d399513415855b68 \
--hash=sha256:91c6f5fc58df1e0a3cc0c3a717bb3308ff850abdaa6d2d802573ee2b11f674a8 \
--hash=sha256:978010b68e17150db8765355d1ccdd450f9fc916824e8c4e35ee620590e234cd \
--hash=sha256:9749a40a5b22333467f02fe11edc98f022133ee1bfa8ab99bda5e5437b831214 \
--hash=sha256:d7a4aeac3b94af92a9373d6e77b37691b86411f9745190d2c351f410ab3a791f \
--hash=sha256:d9e7912a56108aba9b31df688a4c4f5cb0d9d3787386b87d504762b6754fbb1b \
--hash=sha256:25b40b98ebdd272bc3020935427a4530b7d60dfbe1ab9381a39147834e985eac \
--hash=sha256:8a92c5aea763d14ba9d6475803fc7904bda7decc2a0a68153f587ad82941fec1 \
--hash=sha256:05a0f648eb28bae4bcb204e6fd14603de2908de982e761a2fc78efe0f19e96e1 \
--hash=sha256:f01f28075a92eede918b965e86e8f0ba7b7797a95aa8d35e1cc8821f5fc3ad6a \
--hash=sha256:88c0b89ad1cc24a5efbb99ff9ab5db0f9a86e9cc50240177a571fbe9c2860ac2 \
--hash=sha256:01721eefe70544d548425a07c80be8377096a54118070b8a62476866d5208e33 \
--hash=sha256:2d4d1de6e6fb3d28781c73fbde702ac97f03d79e4ffd6598b880b2d95d62ead4 \
--hash=sha256:dff4af63638afcc57a3dfb9e4b26d434a7a602d225b42d746ea7fe2edf1342fd
pandas==1.3.4; python_full_version >= "3.7.1" \
--hash=sha256:9707bdc1ea9639c886b4d3be6e2a45812c1ac0c2080f94c31b71c9fa35556f9b \
--hash=sha256:c2f44425594ae85e119459bb5abb0748d76ef01d9c08583a667e3339e134218e \
--hash=sha256:372d72a3d8a5f2dbaf566a5fa5fa7f230842ac80f29a931fb4b071502cf86b9a \
--hash=sha256:d99d2350adb7b6c3f7f8f0e5dfb7d34ff8dd4bc0a53e62c445b7e43e163fce63 \
--hash=sha256:4acc28364863127bca1029fb72228e6f473bb50c32e77155e80b410e2068eeac \
--hash=sha256:c2646458e1dce44df9f71a01dc65f7e8fa4307f29e5c0f2f92c97f47a5bf22f5 \
--hash=sha256:5298a733e5bfbb761181fd4672c36d0c627320eb999c59c65156c6a90c7e1b4f \
--hash=sha256:22808afb8f96e2269dcc5b846decacb2f526dd0b47baebc63d913bf847317c8f \
--hash=sha256:b528e126c13816a4374e56b7b18bfe91f7a7f6576d1aadba5dee6a87a7f479ae \
--hash=sha256:fe48e4925455c964db914b958f6e7032d285848b7538a5e1b19aeb26ffaea3ec \
--hash=sha256:eaca36a80acaacb8183930e2e5ad7f71539a66805d6204ea88736570b2876a7b \
--hash=sha256:42493f8ae67918bf129869abea8204df899902287a7f5eaf596c8e54e0ac7ff4 \
--hash=sha256:a388960f979665b447f0847626e40f99af8cf191bce9dc571d716433130cb3a7 \
--hash=sha256:5ba0aac1397e1d7b654fccf263a4798a9e84ef749866060d19e577e927d66e1b \
--hash=sha256:f567e972dce3bbc3a8076e0b675273b4a9e8576ac629149cf8286ee13c259ae5 \
--hash=sha256:c1aa4de4919358c5ef119f6377bc5964b3a7023c23e845d9db7d9016fa0c5b1c \
--hash=sha256:dd324f8ee05925ee85de0ea3f0d66e1362e8c80799eb4eb04927d32335a3e44a \
--hash=sha256:d47750cf07dee6b55d8423471be70d627314277976ff2edd1381f02d52dbadf9 \
--hash=sha256:2d1dc09c0013d8faa7474574d61b575f9af6257ab95c93dcf33a14fd8d2c1bab \
--hash=sha256:10e10a2527db79af6e830c3d5842a4d60383b162885270f8cffc15abca4ba4a9 \
--hash=sha256:35c77609acd2e4d517da41bae0c11c70d31c87aae8dd1aabd2670906c6d2c143 \
--hash=sha256:003ba92db58b71a5f8add604a17a059f3068ef4e8c0c365b088468d0d64935fd \
--hash=sha256:a51528192755f7429c5bcc9e80832c517340317c861318fea9cea081b57c9afd \
--hash=sha256:a2aa18d3f0b7d538e21932f637fbfe8518d085238b429e4790a35e1e44a96ffc
pycountry==19.8.18 \
--hash=sha256:3c57aa40adcf293d59bebaffbe60d8c39976fba78d846a018dc0c2ec9c6cb3cb
python-dateutil==2.8.2; python_full_version >= "3.7.1" \
--hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \
--hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9
python-stdnum==1.17 \
--hash=sha256:374e2b5e13912ccdbf50b0b23fca2c3e0531174805c32d74e145f37756328340 \
--hash=sha256:a46e6cf9652807314d369b654b255c86a59f93d18be2834f3d567ed1a346c547
pytz==2021.3; python_full_version >= "3.7.1" \
--hash=sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c \
--hash=sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326
requests-cache==0.6.4; python_version >= "3.6" \
--hash=sha256:dd9120a4ab7b8128cba9b6b120d8b5560d566a3cd0f828cced3d3fd60a42ec40 \
--hash=sha256:1102daa13a804abe23fad62d694e7dee58d6063a35d94bf6e8c9821e22e5a78b
requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0") \
--hash=sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24 \
--hash=sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7
six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.6" \
--hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 \
--hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926
spdx-license-list==0.5.2 \
--hash=sha256:1b338470c7b403dbecceca563a316382c7977516128ca6c1e8f7078e3ed6e7b0 \
--hash=sha256:952996f72ab807972dc2278bb9b91e5294767211e51f09aad9c0e2ff5b82a31b
sqlalchemy==1.4.22; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0") \
--hash=sha256:488608953385d6c127d2dcbc4b11f8d7f2f30b89f6bd27c01b042253d985cc2f \
--hash=sha256:5d856cc50fd26fc8dd04892ed5a5a3d7eeb914fea2c2e484183e2d84c14926e0 \
--hash=sha256:a00d9c6d3a8afe1d1681cd8a5266d2f0ed684b0b44bada2ca82403b9e8b25d39 \
--hash=sha256:5908ea6c652a050d768580d01219c98c071e71910ab8e7b42c02af4010608397 \
--hash=sha256:b7fb937c720847879c7402fe300cfdb2aeff22349fa4ea3651bca4e2d6555939 \
--hash=sha256:9bfe882d5a1bbde0245dca0bd48da0976bd6634cf2041d2fdf0417c5463e40e5 \
--hash=sha256:eedd76f135461cf237534a6dc0d1e0f6bb88a1dc193678fab48a11d223462da5 \
--hash=sha256:6a16c7c4452293da5143afa3056680db2d187b380b3ef4d470d4e29885720de3 \
--hash=sha256:44d23ea797a5e0be71bc5454b9ae99158ea0edc79e2393c6e9a2354de88329c0 \
--hash=sha256:a5e14cb0c0a4ac095395f24575a0e7ab5d1be27f5f9347f1762f21505e3ba9f1 \
--hash=sha256:bc34a007e604091ca3a4a057525efc4cefd2b7fe970f44d20b9cfa109ab1bddb \
--hash=sha256:756f5d2f5b92d27450167247fb574b09c4cd192a3f8c2e493b3e518a204ee543 \
--hash=sha256:9fcbb4b4756b250ed19adc5e28c005b8ed56fdb5c21efa24c6822c0575b4964d \
--hash=sha256:09dbb4bc01a734ccddbf188deb2a69aede4b3c153a72b6d5c6900be7fb2945b1 \
--hash=sha256:f028ef6a1d828bc754852a022b2160e036202ac8658a6c7d34875aafd14a9a15 \
--hash=sha256:68393d3fd31469845b6ba11f5b4209edbea0b58506be0e077aafbf9aa2e21e11 \
--hash=sha256:891927a49b2363a4199763a9d436d97b0b42c65922a4ea09025600b81a00d17e \
--hash=sha256:fd2102a8f8a659522719ed73865dff3d3cc76eb0833039dc473e0ad3041d04be \
--hash=sha256:4014978de28163cd8027434916a92d0f5bb1a3a38dff5e8bf8bff4d9372a9117 \
--hash=sha256:f814d80844969b0d22ea63663da4de5ca1c434cfbae226188901e5d368792c17 \
--hash=sha256:d09a760b0a045b4d799102ae7965b5491ccf102123f14b2a8cc6c01d1021a2d9 \
--hash=sha256:26daa429f039e29b1e523bf763bfab17490556b974c77b5ca7acb545b9230e9a \
--hash=sha256:12bac5fa1a6ea870bdccb96fe01610641dd44ebe001ed91ef7fcd980e9702db5 \
--hash=sha256:39b5d36ab71f73c068cdcf70c38075511de73616e6c7fdd112d6268c2704d9f5 \
--hash=sha256:5102b9face693e8b2db3b2539c7e1a5d9a5b4dc0d79967670626ffd2f710d6e6 \
--hash=sha256:c9373ef67a127799027091fa53449125351a8c943ddaa97bec4e99271dbb21f4 \
--hash=sha256:36a089dc604032d41343d86290ce85d4e6886012eea73faa88001260abf5ff81 \
--hash=sha256:b48148ceedfb55f764562e04c00539bb9ea72bf07820ca15a594a9a049ff6b0e \
--hash=sha256:1fdae7d980a2fa617d119d0dc13ecb5c23cc63a8b04ffcb5298f2c59d86851e9 \
--hash=sha256:ec1be26cdccd60d180359a527d5980d959a26269a2c7b1b327a1eea0cab37ed8
typing-extensions==4.0.1; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6" \
--hash=sha256:7f001e5ac290a0c0401508864c7ec868be4e701886d5b573a9528ed3973d9d3b \
--hash=sha256:4ca091dea149f945ec56afb48dae714f21e8692ef22a395223bcd328961b6a0e
url-normalize==1.4.3; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6" \
--hash=sha256:d23d3a070ac52a67b83a1c59a0e68f8608d1cd538783b401bc9de2c0fac999b2 \
--hash=sha256:ec3c301f04e5bb676d333a7fa162fa977ad2ca04b7e652bfc9fac4e405728eed
urllib3==1.26.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.6" \
--hash=sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844 \
--hash=sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece
wcwidth==0.2.5; python_version >= "3.5" \
--hash=sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784 \
--hash=sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83
xlrd==1.2.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0") \
--hash=sha256:e551fb498759fa3a5384a94ccd4c3c02eb7c00ea424426e212ac0c57be9dfbde \
--hash=sha256:546eb36cee8db40c3eaa46c351e67ffee6eeb5fa2650b71bc4c758a29a1b29b2
zipp==3.6.0; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.6.0" and python_version < "3.8" and python_version >= "3.6" \
--hash=sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc \
--hash=sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832

View File

@ -14,7 +14,7 @@ install_requires = [
setuptools.setup( setuptools.setup(
name="csv-metadata-quality", name="csv-metadata-quality",
version="0.5.0", version="0.6.0-dev",
author="Alan Orth", author="Alan Orth",
author_email="aorth@mjanja.ch", author_email="aorth@mjanja.ch",
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.", description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
@ -26,6 +26,7 @@ setuptools.setup(
"Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Operating System :: OS Independent", "Operating System :: OS Independent",
], ],

View File

@ -179,18 +179,41 @@ def test_check_invalid_language(capsys):
def test_check_invalid_agrovoc(capsys): def test_check_invalid_agrovoc(capsys):
"""Test invalid AGROVOC subject.""" """Test invalid AGROVOC subject. Invalid values *will not* be dropped."""
value = "FOREST" valid_agrovoc = "LIVESTOCK"
invalid_agrovoc = "FOREST"
value = f"{valid_agrovoc}||{invalid_agrovoc}"
field_name = "dcterms.subject" field_name = "dcterms.subject"
drop = False
check.agrovoc(value, field_name) new_value = check.agrovoc(value, field_name, drop)
captured = capsys.readouterr() captured = capsys.readouterr()
assert ( assert (
captured.out captured.out
== f"{Fore.RED}Invalid AGROVOC ({field_name}): {Fore.RESET}{value}\n" == f"{Fore.RED}Invalid AGROVOC ({field_name}): {Fore.RESET}{invalid_agrovoc}\n"
) )
assert new_value == value
def test_check_invalid_agrovoc_dropped(capsys):
"""Test invalid AGROVOC subjects. Invalid values *will* be dropped."""
valid_agrovoc = "LIVESTOCK"
invalid_agrovoc = "FOREST"
value = f"{valid_agrovoc}||{invalid_agrovoc}"
field_name = "dcterms.subject"
drop = True
new_value = check.agrovoc(value, field_name, drop)
captured = capsys.readouterr()
assert (
captured.out
== f"{Fore.GREEN}Dropping invalid AGROVOC ({field_name}): {Fore.RESET}{invalid_agrovoc}\n"
)
assert new_value == valid_agrovoc
def test_check_valid_agrovoc(): def test_check_valid_agrovoc():
@ -198,10 +221,11 @@ def test_check_valid_agrovoc():
value = "FORESTS" value = "FORESTS"
field_name = "dcterms.subject" field_name = "dcterms.subject"
drop = False
result = check.agrovoc(value, field_name) result = check.agrovoc(value, field_name, drop)
assert result == None assert result == "FORESTS"
def test_check_uncommon_filename_extension(capsys): def test_check_uncommon_filename_extension(capsys):