mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-07-02 04:27:24 +02:00
Compare commits
15 Commits
202abf140c
...
v0.4.5
Author | SHA1 | Date | |
---|---|---|---|
202bda862a
|
|||
7479310ac0
|
|||
98a91bc9c2
|
|||
fc5bedcc5c
|
|||
44d12d771a
|
|||
4a7000e975
|
|||
27b2d81ca8
|
|||
91ebd0f606
|
|||
dd2cfae047
|
|||
d76e72532a
|
|||
13980d2dde
|
|||
9aaaa62461
|
|||
a7fc5a246c
|
|||
7fb8acb866
|
|||
9f5d2c2c4f
|
12
CHANGELOG.md
12
CHANGELOG.md
@ -4,10 +4,20 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## Unreleased
|
||||
## [0.4.5] - 2021-03-04
|
||||
### Added
|
||||
- Check dates in dcterms.issued field as well, not just fields that have the
|
||||
word "date" in them
|
||||
|
||||
### Updated
|
||||
- Run `poetry update` to update project dependencies
|
||||
|
||||
## [0.4.4] - 2021-02-21
|
||||
### Added
|
||||
- Accept dates formatted in ISO 8601 extended with combined date and time, for
|
||||
example: 2020-08-31T11:04:56Z
|
||||
- Colorized output: red for errors, yellow for warnings and information, green
|
||||
for changes
|
||||
|
||||
### Updated
|
||||
- Run `poetry update` to update project dependencies
|
||||
|
@ -109,8 +109,13 @@ This currently uses the [Python langid](https://github.com/saffsd/langid.py) lib
|
||||
- Add an option to drop invalid AGROVOC subjects?
|
||||
- Add tests for application invocation, ie `tests/test_app.py`?
|
||||
- Validate ISSNs or journal titles against CrossRef API?
|
||||
- Better ISO 8601 date parsing (currently only supports simple dates, perhaps we need to use dateutil.parser.parseiso())
|
||||
- Fix lazy date check (assumes field name has "date" but could be dcterms.issued etc!)
|
||||
- Add configurable field validation, like specify a field name and a validation file?
|
||||
- Perhaps like --validate=field.name,filename
|
||||
- Add some row-based item sanity checks and fixes:
|
||||
- Warn if item is Open Access, but missing a filename or URL
|
||||
- Warn if item is Open Access, but missing a license
|
||||
- Warn if item has an ISSN but no journal title
|
||||
- Update journal titles from ISSN
|
||||
|
||||
## License
|
||||
This work is licensed under the [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html).
|
||||
|
@ -4,6 +4,7 @@ import signal
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
from colorama import Fore
|
||||
|
||||
import csv_metadata_quality.check as check
|
||||
import csv_metadata_quality.experimental as experimental
|
||||
@ -77,7 +78,7 @@ def run(argv):
|
||||
if column == exclude and skip is False:
|
||||
skip = True
|
||||
if skip:
|
||||
print(f"Skipping {column}")
|
||||
print(f"{Fore.YELLOW}Skipping {Fore.RESET}{column}")
|
||||
|
||||
continue
|
||||
|
||||
@ -141,7 +142,7 @@ def run(argv):
|
||||
df[column] = df[column].apply(check.isbn)
|
||||
|
||||
# Check: invalid date
|
||||
match = re.match(r"^.*?date.*$", column)
|
||||
match = re.match(r"^.*?(date|dcterms\.issued).*$", column)
|
||||
if match is not None:
|
||||
df[column] = df[column].apply(check.date, field_name=column)
|
||||
|
||||
|
@ -3,6 +3,7 @@ from datetime import datetime, timedelta
|
||||
import pandas as pd
|
||||
import requests
|
||||
import requests_cache
|
||||
from colorama import Fore
|
||||
from pycountry import languages
|
||||
|
||||
|
||||
@ -26,7 +27,7 @@ def issn(field):
|
||||
for value in field.split("||"):
|
||||
|
||||
if not issn.is_valid(value):
|
||||
print(f"Invalid ISSN: {value}")
|
||||
print(f"{Fore.RED}Invalid ISSN: {Fore.RESET}{value}")
|
||||
|
||||
return field
|
||||
|
||||
@ -51,7 +52,7 @@ def isbn(field):
|
||||
for value in field.split("||"):
|
||||
|
||||
if not isbn.is_valid(value):
|
||||
print(f"Invalid ISBN: {value}")
|
||||
print(f"{Fore.RED}Invalid ISBN: {Fore.RESET}{value}")
|
||||
|
||||
return field
|
||||
|
||||
@ -76,7 +77,9 @@ def separators(field, field_name):
|
||||
for value in field.split("||"):
|
||||
# Check if the current value is blank
|
||||
if value == "":
|
||||
print(f"Unnecessary multi-value separator ({field_name}): {field}")
|
||||
print(
|
||||
f"{Fore.RED}Unnecessary multi-value separator ({field_name}): {Fore.RESET}{field}"
|
||||
)
|
||||
|
||||
continue
|
||||
|
||||
@ -85,7 +88,9 @@ def separators(field, field_name):
|
||||
|
||||
# Check if there was a match
|
||||
if match:
|
||||
print(f"Invalid multi-value separator ({field_name}): {field}")
|
||||
print(
|
||||
f"{Fore.RED}Invalid multi-value separator ({field_name}): {Fore.RESET}{field}"
|
||||
)
|
||||
|
||||
return field
|
||||
|
||||
@ -102,7 +107,7 @@ def date(field, field_name):
|
||||
"""
|
||||
|
||||
if pd.isna(field):
|
||||
print(f"Missing date ({field_name}).")
|
||||
print(f"{Fore.RED}Missing date ({field_name}).{Fore.RESET}")
|
||||
|
||||
return
|
||||
|
||||
@ -111,7 +116,9 @@ def date(field, field_name):
|
||||
|
||||
# We don't allow multi-value date fields
|
||||
if len(multiple_dates) > 1:
|
||||
print(f"Multiple dates not allowed ({field_name}): {field}")
|
||||
print(
|
||||
f"{Fore.RED}Multiple dates not allowed ({field_name}): {Fore.RESET}{field}"
|
||||
)
|
||||
|
||||
return field
|
||||
|
||||
@ -145,7 +152,7 @@ def date(field, field_name):
|
||||
|
||||
return field
|
||||
except ValueError:
|
||||
print(f"Invalid date ({field_name}): {field}")
|
||||
print(f"{Fore.RED}Invalid date ({field_name}): {Fore.RESET}{field}")
|
||||
|
||||
return field
|
||||
|
||||
@ -178,9 +185,7 @@ def suspicious_characters(field, field_name):
|
||||
# character and spanning enough of the rest to give a preview,
|
||||
# but not too much to cause the line to break in terminals with
|
||||
# a default of 80 characters width.
|
||||
suspicious_character_msg = (
|
||||
f"Suspicious character ({field_name}): {field_subset}"
|
||||
)
|
||||
suspicious_character_msg = f"{Fore.YELLOW}Suspicious character ({field_name}): {Fore.RESET}{field_subset}"
|
||||
print(f"{suspicious_character_msg:1.80}")
|
||||
|
||||
return field
|
||||
@ -205,16 +210,16 @@ def language(field):
|
||||
# can check it against ISO 639-1 or ISO 639-3 accordingly.
|
||||
if len(value) == 2:
|
||||
if not languages.get(alpha_2=value):
|
||||
print(f"Invalid ISO 639-1 language: {value}")
|
||||
print(f"{Fore.RED}Invalid ISO 639-1 language: {Fore.RESET}{value}")
|
||||
|
||||
pass
|
||||
elif len(value) == 3:
|
||||
if not languages.get(alpha_3=value):
|
||||
print(f"Invalid ISO 639-3 language: {value}")
|
||||
print(f"{Fore.RED}Invalid ISO 639-3 language: {Fore.RESET}{value}")
|
||||
|
||||
pass
|
||||
else:
|
||||
print(f"Invalid language: {value}")
|
||||
print(f"{Fore.RED}Invalid language: {Fore.RESET}{value}")
|
||||
|
||||
return field
|
||||
|
||||
@ -256,7 +261,7 @@ def agrovoc(field, field_name):
|
||||
|
||||
# check if there are any results
|
||||
if len(data["results"]) == 0:
|
||||
print(f"Invalid AGROVOC ({field_name}): {value}")
|
||||
print(f"{Fore.RED}Invalid AGROVOC ({field_name}): {Fore.RESET}{value}")
|
||||
|
||||
return field
|
||||
|
||||
@ -309,6 +314,6 @@ def filename_extension(field):
|
||||
break
|
||||
|
||||
if filename_extension_match is False:
|
||||
print(f"Filename with uncommon extension: {value}")
|
||||
print(f"{Fore.YELLOW}Filename with uncommon extension: {Fore.RESET}{value}")
|
||||
|
||||
return field
|
||||
|
@ -1,4 +1,5 @@
|
||||
import pandas as pd
|
||||
from colorama import Fore
|
||||
|
||||
|
||||
def correct_language(row):
|
||||
@ -10,10 +11,11 @@ def correct_language(row):
|
||||
language and returns the value in the language field if it does match.
|
||||
"""
|
||||
|
||||
from pycountry import languages
|
||||
import langid
|
||||
import re
|
||||
|
||||
import langid
|
||||
from pycountry import languages
|
||||
|
||||
# Initialize some variables at global scope so that we can set them in the
|
||||
# loop scope below and still be able to access them afterwards.
|
||||
language = ""
|
||||
@ -83,12 +85,12 @@ def correct_language(row):
|
||||
detected_language = languages.get(alpha_2=langid_classification[0])
|
||||
if len(language) == 2 and language != detected_language.alpha_2:
|
||||
print(
|
||||
f"Possibly incorrect language {language} (detected {detected_language.alpha_2}): {title}"
|
||||
f"{Fore.YELLOW}Possibly incorrect language {language} (detected {detected_language.alpha_2}): {Fore.RESET}{title}"
|
||||
)
|
||||
|
||||
elif len(language) == 3 and language != detected_language.alpha_3:
|
||||
print(
|
||||
f"Possibly incorrect language {language} (detected {detected_language.alpha_3}): {title}"
|
||||
f"{Fore.YELLOW}Possibly incorrect language {language} (detected {detected_language.alpha_3}): {Fore.RESET}{title}"
|
||||
)
|
||||
|
||||
else:
|
||||
|
@ -2,6 +2,7 @@ import re
|
||||
from unicodedata import normalize
|
||||
|
||||
import pandas as pd
|
||||
from colorama import Fore
|
||||
|
||||
from csv_metadata_quality.util import is_nfc
|
||||
|
||||
@ -29,7 +30,9 @@ def whitespace(field, field_name):
|
||||
match = re.findall(pattern, value)
|
||||
|
||||
if match:
|
||||
print(f"Removing excessive whitespace ({field_name}): {value}")
|
||||
print(
|
||||
f"{Fore.GREEN}Removing excessive whitespace ({field_name}): {Fore.RESET}{value}"
|
||||
)
|
||||
value = re.sub(pattern, " ", value)
|
||||
|
||||
# Save cleaned value
|
||||
@ -62,7 +65,9 @@ def separators(field, field_name):
|
||||
for value in field.split("||"):
|
||||
# Check if the value is blank and skip it
|
||||
if value == "":
|
||||
print(f"Fixing unnecessary multi-value separator ({field_name}): {field}")
|
||||
print(
|
||||
f"{Fore.GREEN}Fixing unnecessary multi-value separator ({field_name}): {Fore.RESET}{field}"
|
||||
)
|
||||
|
||||
continue
|
||||
|
||||
@ -71,7 +76,9 @@ def separators(field, field_name):
|
||||
match = re.findall(pattern, value)
|
||||
|
||||
if match:
|
||||
print(f"Fixing invalid multi-value separator ({field_name}): {value}")
|
||||
print(
|
||||
f"{Fore.RED}Fixing invalid multi-value separator ({field_name}): {Fore.RESET}{value}"
|
||||
)
|
||||
|
||||
value = re.sub(pattern, "||", value)
|
||||
|
||||
@ -107,7 +114,7 @@ def unnecessary_unicode(field):
|
||||
match = re.findall(pattern, field)
|
||||
|
||||
if match:
|
||||
print(f"Removing unnecessary Unicode (U+200B): {field}")
|
||||
print(f"{Fore.GREEN}Removing unnecessary Unicode (U+200B): {Fore.RESET}{field}")
|
||||
field = re.sub(pattern, "", field)
|
||||
|
||||
# Check for replacement characters (U+FFFD)
|
||||
@ -115,7 +122,7 @@ def unnecessary_unicode(field):
|
||||
match = re.findall(pattern, field)
|
||||
|
||||
if match:
|
||||
print(f"Removing unnecessary Unicode (U+FFFD): {field}")
|
||||
print(f"{Fore.GREEN}Removing unnecessary Unicode (U+FFFD): {Fore.RESET}{field}")
|
||||
field = re.sub(pattern, "", field)
|
||||
|
||||
# Check for no-break spaces (U+00A0)
|
||||
@ -123,7 +130,9 @@ def unnecessary_unicode(field):
|
||||
match = re.findall(pattern, field)
|
||||
|
||||
if match:
|
||||
print(f"Replacing unnecessary Unicode (U+00A0): {field}")
|
||||
print(
|
||||
f"{Fore.GREEN}Replacing unnecessary Unicode (U+00A0): {Fore.RESET}{field}"
|
||||
)
|
||||
field = re.sub(pattern, " ", field)
|
||||
|
||||
# Check for soft hyphens (U+00AD), sometimes preceeded with a normal hyphen
|
||||
@ -131,7 +140,9 @@ def unnecessary_unicode(field):
|
||||
match = re.findall(pattern, field)
|
||||
|
||||
if match:
|
||||
print(f"Replacing unnecessary Unicode (U+00AD): {field}")
|
||||
print(
|
||||
f"{Fore.GREEN}Replacing unnecessary Unicode (U+00AD): {Fore.RESET}{field}"
|
||||
)
|
||||
field = re.sub(pattern, "-", field)
|
||||
|
||||
return field
|
||||
@ -156,7 +167,9 @@ def duplicates(field, field_name):
|
||||
if value not in new_values:
|
||||
new_values.append(value)
|
||||
else:
|
||||
print(f"Removing duplicate value ({field_name}): {value}")
|
||||
print(
|
||||
f"{Fore.GREEN}Removing duplicate value ({field_name}): {Fore.RESET}{value}"
|
||||
)
|
||||
|
||||
# Create a new field consisting of all values joined with "||"
|
||||
new_field = "||".join(new_values)
|
||||
@ -189,7 +202,7 @@ def newlines(field):
|
||||
match = re.findall(r"\n", field)
|
||||
|
||||
if match:
|
||||
print(f"Removing newline: {field}")
|
||||
print(f"{Fore.GREEN}Removing newline: {Fore.RESET}{field}")
|
||||
field = field.replace("\n", "")
|
||||
|
||||
return field
|
||||
@ -213,7 +226,9 @@ def comma_space(field, field_name):
|
||||
match = re.findall(r",\w", field)
|
||||
|
||||
if match:
|
||||
print(f"Adding space after comma ({field_name}): {field}")
|
||||
print(
|
||||
f"{Fore.GREEN}Adding space after comma ({field_name}): {Fore.RESET}{field}"
|
||||
)
|
||||
field = re.sub(r",(\w)", r", \1", field)
|
||||
|
||||
return field
|
||||
@ -234,7 +249,7 @@ def normalize_unicode(field, field_name):
|
||||
|
||||
# Check if the current string is using normalized Unicode (NFC)
|
||||
if not is_nfc(field):
|
||||
print(f"Normalizing Unicode ({field_name}): {field}")
|
||||
print(f"{Fore.GREEN}Normalizing Unicode ({field_name}): {Fore.RESET}{field}")
|
||||
field = normalize("NFC", field)
|
||||
|
||||
return field
|
||||
|
@ -1 +1 @@
|
||||
VERSION = "0.4.3"
|
||||
VERSION = "0.4.5"
|
||||
|
113
poetry.lock
generated
113
poetry.lock
generated
@ -159,7 +159,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
name = "colorama"
|
||||
version = "0.4.4"
|
||||
description = "Cross-platform colored terminal text."
|
||||
category = "dev"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
|
||||
@ -233,7 +233,7 @@ python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "ipython"
|
||||
version = "7.20.0"
|
||||
version = "7.21.0"
|
||||
description = "IPython: Productive Interactive Computing"
|
||||
category = "dev"
|
||||
optional = false
|
||||
@ -357,7 +357,7 @@ python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "numpy"
|
||||
version = "1.20.0"
|
||||
version = "1.20.1"
|
||||
description = "NumPy is the fundamental package for array computing with Python."
|
||||
category = "main"
|
||||
optional = false
|
||||
@ -388,7 +388,7 @@ pyparsing = ">=2.0.2"
|
||||
|
||||
[[package]]
|
||||
name = "pandas"
|
||||
version = "1.2.1"
|
||||
version = "1.2.3"
|
||||
description = "Powerful data structures for data analysis, time series, and statistics"
|
||||
category = "main"
|
||||
optional = false
|
||||
@ -462,7 +462,7 @@ dev = ["pre-commit", "tox"]
|
||||
|
||||
[[package]]
|
||||
name = "prompt-toolkit"
|
||||
version = "3.0.14"
|
||||
version = "3.0.16"
|
||||
description = "Library for building powerful interactive command lines in Python"
|
||||
category = "dev"
|
||||
optional = false
|
||||
@ -513,7 +513,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||
|
||||
[[package]]
|
||||
name = "pygments"
|
||||
version = "2.7.4"
|
||||
version = "2.8.0"
|
||||
description = "Pygments is a syntax highlighting package written in Python."
|
||||
category = "dev"
|
||||
optional = false
|
||||
@ -587,7 +587,7 @@ unidecode = ["Unidecode (>=1.1.1)"]
|
||||
|
||||
[[package]]
|
||||
name = "python-stdnum"
|
||||
version = "1.15"
|
||||
version = "1.16"
|
||||
description = "Python module to handle standardized numbers and codes"
|
||||
category = "main"
|
||||
optional = false
|
||||
@ -765,7 +765,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.8"
|
||||
content-hash = "63f2c6ef09652c4f8407660ff7b4690c8a07e5501eb8fc8c477f485de5888fcf"
|
||||
content-hash = "8c4ba410bbdc930d2d74f7864470a18827029a5697869833959708d7425460ae"
|
||||
|
||||
[metadata.files]
|
||||
agate = [
|
||||
@ -851,8 +851,8 @@ iniconfig = [
|
||||
{file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
|
||||
]
|
||||
ipython = [
|
||||
{file = "ipython-7.20.0-py3-none-any.whl", hash = "sha256:1918dea4bfdc5d1a830fcfce9a710d1d809cbed123e85eab0539259cb0f56640"},
|
||||
{file = "ipython-7.20.0.tar.gz", hash = "sha256:1923af00820a8cf58e91d56b89efc59780a6e81363b94464a0f17c039dffff9e"},
|
||||
{file = "ipython-7.21.0-py3-none-any.whl", hash = "sha256:34207ffb2f653bced2bc8e3756c1db86e7d93e44ed049daae9814fed66d408ec"},
|
||||
{file = "ipython-7.21.0.tar.gz", hash = "sha256:04323f72d5b85b606330b6d7e2dc8d2683ad46c3905e955aa96ecc7a99388e70"},
|
||||
]
|
||||
ipython-genutils = [
|
||||
{file = "ipython_genutils-0.2.0-py2.py3-none-any.whl", hash = "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8"},
|
||||
@ -890,30 +890,30 @@ mypy-extensions = [
|
||||
{file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"},
|
||||
]
|
||||
numpy = [
|
||||
{file = "numpy-1.20.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:89bd70c9ad540febe6c28451ba225eb4e49d27f64728357f512c808002325dfa"},
|
||||
{file = "numpy-1.20.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:1264c66129f5ef63187649dd43f1ca59532e8c098723643336a85131c0dcce3f"},
|
||||
{file = "numpy-1.20.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:e9c5fd330d2fedf06051bafb996252de9b032fcb2ec03eefc9a543e56efa66d4"},
|
||||
{file = "numpy-1.20.0-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:db5e69d08756a2fa75a42b4e433880b6187768fe1bc73d21819def893e5128c6"},
|
||||
{file = "numpy-1.20.0-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:1abc02e30e3efd81a4571e00f8e62bf42e343c76698e0a3e11d9c2b3ee0d77a7"},
|
||||
{file = "numpy-1.20.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:5ae765dd29c71a555f8102281f6fb15a3f4dbd35f6e7daf36af9df6d9dd716a5"},
|
||||
{file = "numpy-1.20.0-cp37-cp37m-win32.whl", hash = "sha256:b51b9ef0624f4b01b846c981034c10d2e30db33f9f8be71e992f3900741f6f77"},
|
||||
{file = "numpy-1.20.0-cp37-cp37m-win_amd64.whl", hash = "sha256:afeee581b50df20ef07b736e62ca612858f1fcdba96651d26ab44e3d567a4e6e"},
|
||||
{file = "numpy-1.20.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2bf0e68c92ef077fe766e53f8937d8ac341bdbca68ec128ae049b7d5c34e3206"},
|
||||
{file = "numpy-1.20.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:2445a96fbae23a4109c61be0f0af0f3bc273905dc5687a710850c1dfde0fc994"},
|
||||
{file = "numpy-1.20.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:33edfc0eb229f86f539493917b34035054313a11afbed48404aaf9f86bf4b0f6"},
|
||||
{file = "numpy-1.20.0-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:894aaee60043a98b03f0ad992c810f62e3a15f98a701e1c0f58a4f4a0df13429"},
|
||||
{file = "numpy-1.20.0-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:b66a6c15d793eda7cdad986e737775aa31b9306d588c14dd0277d2dda5546150"},
|
||||
{file = "numpy-1.20.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:eee454d3aa3955d0c0069a0f265fea47f1e1384c35a110a95efed358eb6e1562"},
|
||||
{file = "numpy-1.20.0-cp38-cp38-win32.whl", hash = "sha256:abdfa075e293d73638ece434708aa60b510dc6e70d805f57f481a0f550b25a9e"},
|
||||
{file = "numpy-1.20.0-cp38-cp38-win_amd64.whl", hash = "sha256:f1e9424e9aa3834ea27cc12f9c6ea8ace5da18ee60a720bb3a85b2f733f41782"},
|
||||
{file = "numpy-1.20.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cb257bb0c0a3176c32782a63cfab2eace7eabfa2a3b2dfd85a13700617ccaf28"},
|
||||
{file = "numpy-1.20.0-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:cf5d9dcbdbe523fa665c5309cce5f144648d94a7fddbf5a40f8e0d5c9f5b596d"},
|
||||
{file = "numpy-1.20.0-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:93c2abea7bb69f47029b84ceac30ab46dfcfdb99b671ad850a333ff794a765e4"},
|
||||
{file = "numpy-1.20.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:0d28a54afcf46f1f9ebd163e49ad6b49087f22986fefd01a23ca0c1cdda25ca6"},
|
||||
{file = "numpy-1.20.0-cp39-cp39-win32.whl", hash = "sha256:d1bc331e1706fd1809a1bc8a31205329e5b30cf5ba50461c624da267e99f6ae6"},
|
||||
{file = "numpy-1.20.0-cp39-cp39-win_amd64.whl", hash = "sha256:e3db646af9f6a145f0c57202f4b55d4a33f975e395e78fb7b394644c17c1a3a6"},
|
||||
{file = "numpy-1.20.0-pp37-pypy37_pp73-manylinux2010_x86_64.whl", hash = "sha256:4d592264d2a4f368afbb4288b5ceb646d4cbaf559c0249c096fbb0a149806b90"},
|
||||
{file = "numpy-1.20.0.zip", hash = "sha256:3d8233c03f116d068d5365fed4477f2947c7229582dad81e5953088989294cec"},
|
||||
{file = "numpy-1.20.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ae61f02b84a0211abb56462a3b6cd1e7ec39d466d3160eb4e1da8bf6717cdbeb"},
|
||||
{file = "numpy-1.20.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:65410c7f4398a0047eea5cca9b74009ea61178efd78d1be9847fac1d6716ec1e"},
|
||||
{file = "numpy-1.20.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:2d7e27442599104ee08f4faed56bb87c55f8b10a5494ac2ead5c98a4b289e61f"},
|
||||
{file = "numpy-1.20.1-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:4ed8e96dc146e12c1c5cdd6fb9fd0757f2ba66048bf94c5126b7efebd12d0090"},
|
||||
{file = "numpy-1.20.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:ecb5b74c702358cdc21268ff4c37f7466357871f53a30e6f84c686952bef16a9"},
|
||||
{file = "numpy-1.20.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b9410c0b6fed4a22554f072a86c361e417f0258838957b78bd063bde2c7f841f"},
|
||||
{file = "numpy-1.20.1-cp37-cp37m-win32.whl", hash = "sha256:3d3087e24e354c18fb35c454026af3ed8997cfd4997765266897c68d724e4845"},
|
||||
{file = "numpy-1.20.1-cp37-cp37m-win_amd64.whl", hash = "sha256:89f937b13b8dd17b0099c7c2e22066883c86ca1575a975f754babc8fbf8d69a9"},
|
||||
{file = "numpy-1.20.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a1d7995d1023335e67fb070b2fae6f5968f5be3802b15ad6d79d81ecaa014fe0"},
|
||||
{file = "numpy-1.20.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:60759ab15c94dd0e1ed88241fd4fa3312db4e91d2c8f5a2d4cf3863fad83d65b"},
|
||||
{file = "numpy-1.20.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:125a0e10ddd99a874fd357bfa1b636cd58deb78ba4a30b5ddb09f645c3512e04"},
|
||||
{file = "numpy-1.20.1-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:c26287dfc888cf1e65181f39ea75e11f42ffc4f4529e5bd19add57ad458996e2"},
|
||||
{file = "numpy-1.20.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:7199109fa46277be503393be9250b983f325880766f847885607d9b13848f257"},
|
||||
{file = "numpy-1.20.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:72251e43ac426ff98ea802a931922c79b8d7596480300eb9f1b1e45e0543571e"},
|
||||
{file = "numpy-1.20.1-cp38-cp38-win32.whl", hash = "sha256:c91ec9569facd4757ade0888371eced2ecf49e7982ce5634cc2cf4e7331a4b14"},
|
||||
{file = "numpy-1.20.1-cp38-cp38-win_amd64.whl", hash = "sha256:13adf545732bb23a796914fe5f891a12bd74cf3d2986eed7b7eba2941eea1590"},
|
||||
{file = "numpy-1.20.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:104f5e90b143dbf298361a99ac1af4cf59131218a045ebf4ee5990b83cff5fab"},
|
||||
{file = "numpy-1.20.1-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:89e5336f2bec0c726ac7e7cdae181b325a9c0ee24e604704ed830d241c5e47ff"},
|
||||
{file = "numpy-1.20.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:032be656d89bbf786d743fee11d01ef318b0781281241997558fa7950028dd29"},
|
||||
{file = "numpy-1.20.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:66b467adfcf628f66ea4ac6430ded0614f5cc06ba530d09571ea404789064adc"},
|
||||
{file = "numpy-1.20.1-cp39-cp39-win32.whl", hash = "sha256:12e4ba5c6420917571f1a5becc9338abbde71dd811ce40b37ba62dec7b39af6d"},
|
||||
{file = "numpy-1.20.1-cp39-cp39-win_amd64.whl", hash = "sha256:9c94cab5054bad82a70b2e77741271790304651d584e2cdfe2041488e753863b"},
|
||||
{file = "numpy-1.20.1-pp37-pypy37_pp73-manylinux2010_x86_64.whl", hash = "sha256:9eb551d122fadca7774b97db8a112b77231dcccda8e91a5bc99e79890797175e"},
|
||||
{file = "numpy-1.20.1.zip", hash = "sha256:3bc63486a870294683980d76ec1e3efc786295ae00128f9ea38e2c6e74d5a60a"},
|
||||
]
|
||||
openpyxl = [
|
||||
{file = "openpyxl-3.0.6-py2.py3-none-any.whl", hash = "sha256:1a4b3869c2500b5c713e8e28341cdada49ecfcff1b10cd9006945f5bcefc090d"},
|
||||
@ -924,23 +924,22 @@ packaging = [
|
||||
{file = "packaging-20.9.tar.gz", hash = "sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5"},
|
||||
]
|
||||
pandas = [
|
||||
{file = "pandas-1.2.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:50e6c0a17ef7f831b5565fd0394dbf9bfd5d615ee4dd4bb60a3d8c9d2e872323"},
|
||||
{file = "pandas-1.2.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:324e60bea729cf3b55c1bf9e88fe8b9932c26f8669d13b928e3c96b3a1453dff"},
|
||||
{file = "pandas-1.2.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:37443199f451f8badfe0add666e43cdb817c59fa36bceedafd9c543a42f236ca"},
|
||||
{file = "pandas-1.2.1-cp37-cp37m-win32.whl", hash = "sha256:23ac77a3a222d9304cb2a7934bb7b4805ff43d513add7a42d1a22dc7df14edd2"},
|
||||
{file = "pandas-1.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:496fcc29321e9a804d56d5aa5d7ec1320edfd1898eee2f451aa70171cf1d5a29"},
|
||||
{file = "pandas-1.2.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:30e9e8bc8c5c17c03d943e8d6f778313efff59e413b8dbdd8214c2ed9aa165f6"},
|
||||
{file = "pandas-1.2.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:055647e7f4c5e66ba92c2a7dcae6c2c57898b605a3fb007745df61cc4015937f"},
|
||||
{file = "pandas-1.2.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:9d45f58b03af1fea4b48e44aa38a819a33dccb9821ef9e1d68f529995f8a632f"},
|
||||
{file = "pandas-1.2.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:b26e2dabda73d347c7af3e6fed58483161c7b87a886a4e06d76ccfe55a044aa9"},
|
||||
{file = "pandas-1.2.1-cp38-cp38-win32.whl", hash = "sha256:47ec0808a8357ab3890ce0eca39a63f79dcf941e2e7f494470fe1c9ec43f6091"},
|
||||
{file = "pandas-1.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:57d5c7ac62925a8d2ab43ea442b297a56cc8452015e71e24f4aa7e4ed6be3d77"},
|
||||
{file = "pandas-1.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d7cca42dba13bfee369e2944ae31f6549a55831cba3117e17636955176004088"},
|
||||
{file = "pandas-1.2.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:cfd237865d878da9b65cfee883da5e0067f5e2ff839e459466fb90565a77bda3"},
|
||||
{file = "pandas-1.2.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:050ed2c9d825ef36738e018454e6d055c63d947c1d52010fbadd7584f09df5db"},
|
||||
{file = "pandas-1.2.1-cp39-cp39-win32.whl", hash = "sha256:fe7de6fed43e7d086e3d947651ec89e55ddf00102f9dd5758763d56d182f0564"},
|
||||
{file = "pandas-1.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:2de012a36cc507debd9c3351b4d757f828d5a784a5fc4e6766eafc2b56e4b0f5"},
|
||||
{file = "pandas-1.2.1.tar.gz", hash = "sha256:5527c5475d955c0bc9689c56865aaa2a7b13c504d6c44f0aadbf57b565af5ebd"},
|
||||
{file = "pandas-1.2.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4d821b9b911fc1b7d428978d04ace33f0af32bb7549525c8a7b08444bce46b74"},
|
||||
{file = "pandas-1.2.3-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:9f5829e64507ad10e2561b60baf285c470f3c4454b007c860e77849b88865ae7"},
|
||||
{file = "pandas-1.2.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:97b1954533b2a74c7e20d1342c4f01311d3203b48f2ebf651891e6a6eaf01104"},
|
||||
{file = "pandas-1.2.3-cp37-cp37m-win32.whl", hash = "sha256:5e3c8c60541396110586bcbe6eccdc335a38e7de8c217060edaf4722260b158f"},
|
||||
{file = "pandas-1.2.3-cp37-cp37m-win_amd64.whl", hash = "sha256:8a051e957c5206f722e83f295f95a2cf053e890f9a1fba0065780a8c2d045f5d"},
|
||||
{file = "pandas-1.2.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a93e34f10f67d81de706ce00bf8bb3798403cabce4ccb2de10c61b5ae8786ab5"},
|
||||
{file = "pandas-1.2.3-cp38-cp38-manylinux1_i686.whl", hash = "sha256:46fc671c542a8392a4f4c13edc8527e3a10f6cb62912d856f82248feb747f06e"},
|
||||
{file = "pandas-1.2.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:43e00770552595c2250d8d712ec8b6e08ca73089ac823122344f023efa4abea3"},
|
||||
{file = "pandas-1.2.3-cp38-cp38-win32.whl", hash = "sha256:475b7772b6e18a93a43ea83517932deff33954a10d4fbae18d0c1aba4182310f"},
|
||||
{file = "pandas-1.2.3-cp38-cp38-win_amd64.whl", hash = "sha256:72ffcea00ae8ffcdbdefff800284311e155fbb5ed6758f1a6110fc1f8f8f0c1c"},
|
||||
{file = "pandas-1.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:621c044a1b5e535cf7dcb3ab39fca6f867095c3ef223a524f18f60c7fee028ea"},
|
||||
{file = "pandas-1.2.3-cp39-cp39-manylinux1_i686.whl", hash = "sha256:0f27fd1adfa256388dc34895ca5437eaf254832223812afd817a6f73127f969c"},
|
||||
{file = "pandas-1.2.3-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:dbb255975eb94143f2e6ec7dadda671d25147939047839cd6b8a4aff0379bb9b"},
|
||||
{file = "pandas-1.2.3-cp39-cp39-win32.whl", hash = "sha256:d59842a5aa89ca03c2099312163ffdd06f56486050e641a45d926a072f04d994"},
|
||||
{file = "pandas-1.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:09761bf5f8c741d47d4b8b9073288de1be39bbfccc281d70b889ade12b2aad29"},
|
||||
{file = "pandas-1.2.3.tar.gz", hash = "sha256:df6f10b85aef7a5bb25259ad651ad1cc1d6bb09000595cab47e718cbac250b1d"},
|
||||
]
|
||||
parsedatetime = [
|
||||
{file = "parsedatetime-2.6-py3-none-any.whl", hash = "sha256:cb96edd7016872f58479e35879294258c71437195760746faffedb692aef000b"},
|
||||
@ -967,8 +966,8 @@ pluggy = [
|
||||
{file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"},
|
||||
]
|
||||
prompt-toolkit = [
|
||||
{file = "prompt_toolkit-3.0.14-py3-none-any.whl", hash = "sha256:c96b30925025a7635471dc083ffb6af0cc67482a00611bd81aeaeeeb7e5a5e12"},
|
||||
{file = "prompt_toolkit-3.0.14.tar.gz", hash = "sha256:7e966747c18ececaec785699626b771c1ba8344c8d31759a1915d6b12fad6525"},
|
||||
{file = "prompt_toolkit-3.0.16-py3-none-any.whl", hash = "sha256:62c811e46bd09130fb11ab759012a4ae385ce4fb2073442d1898867a824183bd"},
|
||||
{file = "prompt_toolkit-3.0.16.tar.gz", hash = "sha256:0fa02fa80363844a4ab4b8d6891f62dd0645ba672723130423ca4037b80c1974"},
|
||||
]
|
||||
ptyprocess = [
|
||||
{file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
|
||||
@ -990,8 +989,8 @@ pyflakes = [
|
||||
{file = "pyflakes-2.2.0.tar.gz", hash = "sha256:35b2d75ee967ea93b55750aa9edbbf72813e06a66ba54438df2cfac9e3c27fc8"},
|
||||
]
|
||||
pygments = [
|
||||
{file = "Pygments-2.7.4-py3-none-any.whl", hash = "sha256:bc9591213a8f0e0ca1a5e68a479b4887fdc3e75d0774e5c71c31920c427de435"},
|
||||
{file = "Pygments-2.7.4.tar.gz", hash = "sha256:df49d09b498e83c1a73128295860250b0b7edd4c723a32e9bc0d295c7c2ec337"},
|
||||
{file = "Pygments-2.8.0-py3-none-any.whl", hash = "sha256:b21b072d0ccdf29297a82a2363359d99623597b8a265b8081760e4d0f7153c88"},
|
||||
{file = "Pygments-2.8.0.tar.gz", hash = "sha256:37a13ba168a02ac54cc5891a42b1caec333e59b66addb7fa633ea8a6d73445c0"},
|
||||
]
|
||||
pyparsing = [
|
||||
{file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"},
|
||||
@ -1012,8 +1011,8 @@ python-slugify = [
|
||||
{file = "python-slugify-4.0.1.tar.gz", hash = "sha256:69a517766e00c1268e5bbfc0d010a0a8508de0b18d30ad5a1ff357f8ae724270"},
|
||||
]
|
||||
python-stdnum = [
|
||||
{file = "python-stdnum-1.15.tar.gz", hash = "sha256:ff0e4d2b8731711cf2a73d22964139208088b4bd1a3e3b0c4f78d6e823dd8923"},
|
||||
{file = "python_stdnum-1.15-py2.py3-none-any.whl", hash = "sha256:46c05dff2d5ff89b12cc63ab54cca1e4788e4e3087c163a1b7434a53fa926f28"},
|
||||
{file = "python-stdnum-1.16.tar.gz", hash = "sha256:4248d898042a801fc4eff96fbfe4bf63a43324854efe3b5534718c1c195c6f43"},
|
||||
{file = "python_stdnum-1.16-py2.py3-none-any.whl", hash = "sha256:2e2c56c548ca166b95547a8d748f4d71320a5b4896960717c8e6380e08d993a5"},
|
||||
]
|
||||
pytimeparse = [
|
||||
{file = "pytimeparse-1.1.8-py2.py3-none-any.whl", hash = "sha256:04b7be6cc8bd9f5647a6325444926c3ac34ee6bc7e69da4367ba282f076036bd"},
|
||||
|
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "csv-metadata-quality"
|
||||
version = "0.4.3"
|
||||
version = "0.4.5"
|
||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem."
|
||||
authors = ["Alan Orth <alan.orth@gmail.com>"]
|
||||
license="GPL-3.0-only"
|
||||
@ -16,6 +16,7 @@ requests = "^2.23.0"
|
||||
requests-cache = "^0.5.2"
|
||||
pycountry = "^19.8.18"
|
||||
langid = "^1.1.6"
|
||||
colorama = "^0.4.4"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
pytest = "^6.1.1"
|
||||
|
@ -12,7 +12,7 @@ black==20.8b1; python_version >= "3.6"
|
||||
certifi==2020.12.5; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
||||
chardet==4.0.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
||||
click==7.1.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
|
||||
colorama==0.4.4; python_version >= "3.7" and python_full_version < "3.0.0" and sys_platform == "win32" and python_version < "4.0" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6") or sys_platform == "win32" and python_version >= "3.7" and python_full_version >= "3.5.0" and python_version < "4.0" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6")
|
||||
colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
|
||||
csvkit==1.0.5
|
||||
dbfread==2.0.7
|
||||
decorator==4.4.2; python_version >= "3.7" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.7" and python_version < "4.0" and python_full_version >= "3.2.0"
|
||||
@ -21,7 +21,7 @@ flake8==3.8.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (p
|
||||
idna==2.10; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
||||
iniconfig==1.1.1; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||
ipython-genutils==0.2.0; python_version >= "3.7" and python_version < "4.0"
|
||||
ipython==7.20.0; python_version >= "3.7" and python_version < "4.0"
|
||||
ipython==7.21.0; python_version >= "3.7" and python_version < "4.0"
|
||||
isodate==0.6.0
|
||||
isort==5.7.0; python_version >= "3.6" and python_version < "4.0"
|
||||
jdcal==1.4.1; python_version >= "3.6"
|
||||
@ -30,29 +30,29 @@ langid==1.1.6
|
||||
leather==0.3.3
|
||||
mccabe==0.6.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
||||
mypy-extensions==0.4.3; python_version >= "3.6"
|
||||
numpy==1.20.0; python_version >= "3.7" and python_full_version >= "3.7.1"
|
||||
numpy==1.20.1; python_version >= "3.7" and python_full_version >= "3.7.1"
|
||||
openpyxl==3.0.6; python_version >= "3.6"
|
||||
packaging==20.9; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||
pandas==1.2.1; python_full_version >= "3.7.1"
|
||||
pandas==1.2.3; python_full_version >= "3.7.1"
|
||||
parsedatetime==2.6
|
||||
parso==0.8.1; python_version >= "3.7" and python_version < "4.0"
|
||||
pathspec==0.8.1; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
|
||||
pexpect==4.8.0; python_version >= "3.7" and python_version < "4.0" and sys_platform != "win32"
|
||||
pickleshare==0.7.5; python_version >= "3.7" and python_version < "4.0"
|
||||
pluggy==0.13.1; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||
prompt-toolkit==3.0.14; python_version >= "3.7" and python_version < "4.0" and python_full_version >= "3.6.1"
|
||||
prompt-toolkit==3.0.16; python_version >= "3.7" and python_version < "4.0" and python_full_version >= "3.6.1"
|
||||
ptyprocess==0.7.0; python_version >= "3.7" and python_version < "4.0" and sys_platform != "win32"
|
||||
py==1.10.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||
pycodestyle==2.6.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
||||
pycountry==19.8.18
|
||||
pyflakes==2.2.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
||||
pygments==2.7.4; python_version >= "3.7" and python_version < "4.0"
|
||||
pygments==2.8.0; python_version >= "3.7" and python_version < "4.0"
|
||||
pyparsing==2.4.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||
pytest-clarity==0.3.0a0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0")
|
||||
pytest==6.2.2; python_version >= "3.6"
|
||||
python-dateutil==2.8.1; python_full_version >= "3.7.1"
|
||||
python-slugify==4.0.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
||||
python-stdnum==1.15
|
||||
python-stdnum==1.16
|
||||
pytimeparse==1.1.8
|
||||
pytz==2021.1; python_full_version >= "3.7.1"
|
||||
regex==2020.11.13; python_version >= "3.6"
|
||||
|
@ -1,12 +1,13 @@
|
||||
certifi==2020.12.5; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
||||
chardet==4.0.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
||||
colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
|
||||
idna==2.10; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
||||
langid==1.1.6
|
||||
numpy==1.20.0; python_version >= "3.7" and python_full_version >= "3.7.1"
|
||||
pandas==1.2.1; python_full_version >= "3.7.1"
|
||||
numpy==1.20.1; python_version >= "3.7" and python_full_version >= "3.7.1"
|
||||
pandas==1.2.3; python_full_version >= "3.7.1"
|
||||
pycountry==19.8.18
|
||||
python-dateutil==2.8.1; python_full_version >= "3.7.1"
|
||||
python-stdnum==1.15
|
||||
python-stdnum==1.16
|
||||
pytz==2021.1; python_full_version >= "3.7.1"
|
||||
requests-cache==0.5.2
|
||||
requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
|
||||
|
2
setup.py
2
setup.py
@ -14,7 +14,7 @@ install_requires = [
|
||||
|
||||
setuptools.setup(
|
||||
name="csv-metadata-quality",
|
||||
version="0.4.3",
|
||||
version="0.4.5",
|
||||
author="Alan Orth",
|
||||
author_email="aorth@mjanja.ch",
|
||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
|
||||
|
@ -1,4 +1,5 @@
|
||||
import pandas as pd
|
||||
from colorama import Fore
|
||||
|
||||
import csv_metadata_quality.check as check
|
||||
import csv_metadata_quality.experimental as experimental
|
||||
@ -12,7 +13,7 @@ def test_check_invalid_issn(capsys):
|
||||
check.issn(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid ISSN: {value}\n"
|
||||
assert captured.out == f"{Fore.RED}Invalid ISSN: {Fore.RESET}{value}\n"
|
||||
|
||||
|
||||
def test_check_valid_issn():
|
||||
@ -33,7 +34,7 @@ def test_check_invalid_isbn(capsys):
|
||||
check.isbn(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid ISBN: {value}\n"
|
||||
assert captured.out == f"{Fore.RED}Invalid ISBN: {Fore.RESET}{value}\n"
|
||||
|
||||
|
||||
def test_check_valid_isbn():
|
||||
@ -56,7 +57,10 @@ def test_check_invalid_separators(capsys):
|
||||
check.separators(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid multi-value separator ({field_name}): {value}\n"
|
||||
assert (
|
||||
captured.out
|
||||
== f"{Fore.RED}Invalid multi-value separator ({field_name}): {Fore.RESET}{value}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_unnecessary_separators(capsys):
|
||||
@ -70,7 +74,8 @@ def test_check_unnecessary_separators(capsys):
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert (
|
||||
captured.out == f"Unnecessary multi-value separator ({field_name}): {field}\n"
|
||||
captured.out
|
||||
== f"{Fore.RED}Unnecessary multi-value separator ({field_name}): {Fore.RESET}{field}\n"
|
||||
)
|
||||
|
||||
|
||||
@ -96,7 +101,7 @@ def test_check_missing_date(capsys):
|
||||
check.date(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Missing date ({field_name}).\n"
|
||||
assert captured.out == f"{Fore.RED}Missing date ({field_name}).{Fore.RESET}\n"
|
||||
|
||||
|
||||
def test_check_multiple_dates(capsys):
|
||||
@ -109,7 +114,10 @@ def test_check_multiple_dates(capsys):
|
||||
check.date(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Multiple dates not allowed ({field_name}): {value}\n"
|
||||
assert (
|
||||
captured.out
|
||||
== f"{Fore.RED}Multiple dates not allowed ({field_name}): {Fore.RESET}{value}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_invalid_date(capsys):
|
||||
@ -122,7 +130,9 @@ def test_check_invalid_date(capsys):
|
||||
check.date(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid date ({field_name}): {value}\n"
|
||||
assert (
|
||||
captured.out == f"{Fore.RED}Invalid date ({field_name}): {Fore.RESET}{value}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_valid_date():
|
||||
@ -147,7 +157,10 @@ def test_check_suspicious_characters(capsys):
|
||||
check.suspicious_characters(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Suspicious character ({field_name}): ˆt\n"
|
||||
assert (
|
||||
captured.out
|
||||
== f"{Fore.YELLOW}Suspicious character ({field_name}): {Fore.RESET}ˆt\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_valid_iso639_1_language():
|
||||
@ -178,7 +191,9 @@ def test_check_invalid_iso639_1_language(capsys):
|
||||
check.language(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid ISO 639-1 language: {value}\n"
|
||||
assert (
|
||||
captured.out == f"{Fore.RED}Invalid ISO 639-1 language: {Fore.RESET}{value}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_invalid_iso639_3_language(capsys):
|
||||
@ -189,7 +204,9 @@ def test_check_invalid_iso639_3_language(capsys):
|
||||
check.language(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid ISO 639-3 language: {value}\n"
|
||||
assert (
|
||||
captured.out == f"{Fore.RED}Invalid ISO 639-3 language: {Fore.RESET}{value}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_invalid_language(capsys):
|
||||
@ -200,7 +217,7 @@ def test_check_invalid_language(capsys):
|
||||
check.language(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid language: {value}\n"
|
||||
assert captured.out == f"{Fore.RED}Invalid language: {Fore.RESET}{value}\n"
|
||||
|
||||
|
||||
def test_check_invalid_agrovoc(capsys):
|
||||
@ -212,7 +229,10 @@ def test_check_invalid_agrovoc(capsys):
|
||||
check.agrovoc(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid AGROVOC ({field_name}): {value}\n"
|
||||
assert (
|
||||
captured.out
|
||||
== f"{Fore.RED}Invalid AGROVOC ({field_name}): {Fore.RESET}{value}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_valid_agrovoc():
|
||||
@ -234,7 +254,10 @@ def test_check_uncommon_filename_extension(capsys):
|
||||
check.filename_extension(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Filename with uncommon extension: {value}\n"
|
||||
assert (
|
||||
captured.out
|
||||
== f"{Fore.YELLOW}Filename with uncommon extension: {Fore.RESET}{value}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_common_filename_extension():
|
||||
@ -262,7 +285,7 @@ def test_check_incorrect_iso_639_1_language(capsys):
|
||||
captured = capsys.readouterr()
|
||||
assert (
|
||||
captured.out
|
||||
== f"Possibly incorrect language {language} (detected en): {title}\n"
|
||||
== f"{Fore.YELLOW}Possibly incorrect language {language} (detected en): {Fore.RESET}{title}\n"
|
||||
)
|
||||
|
||||
|
||||
@ -281,7 +304,7 @@ def test_check_incorrect_iso_639_3_language(capsys):
|
||||
captured = capsys.readouterr()
|
||||
assert (
|
||||
captured.out
|
||||
== f"Possibly incorrect language {language} (detected eng): {title}\n"
|
||||
== f"{Fore.YELLOW}Possibly incorrect language {language} (detected eng): {Fore.RESET}{title}\n"
|
||||
)
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user