mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-07-23 06:21:45 +02:00
Compare commits
23 Commits
8c23382b22
...
v0.6.1
Author | SHA1 | Date | |
---|---|---|---|
fdccdf7318
|
|||
ff2c986eec
|
|||
547574866e
|
|||
8aa7b93d87
|
|||
53fdb50906
|
|||
3e0e9a7f8b
|
|||
03d824b78e
|
|||
8bc4cd419c
|
|||
bde38e9ed4
|
|||
8db1e36a6d
|
|||
fbb625be5c
|
|||
084b970798
|
|||
171b35b015
|
|||
545bb8cd0c
|
|||
d5afbad788
|
|||
d40c9ed97a
|
|||
c4a2ee8563
|
|||
3596381d03
|
|||
5abd32a41f
|
|||
0ed0fabe21
|
|||
d5cfec65bd
|
|||
66893753ba
|
|||
57be05ebb6
|
10
CHANGELOG.md
10
CHANGELOG.md
@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## Unreleased
|
||||
## [0.6.1] - 2023-02-23
|
||||
### Fixed
|
||||
- Missing region check should ignore subregion field, if it exists
|
||||
|
||||
@ -12,6 +12,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Use SPDX license data from SPDX themselves instead of spdx-license-list
|
||||
because it is deprecated and outdated
|
||||
- Require Python 3.9+
|
||||
- Don't run `fix.separators()` on title or abstract fields
|
||||
- Don't run whitespace or newline fixes on abstract fields
|
||||
- Ignore some common non-SPDX licenses
|
||||
- Ignore `__description` suffix in filenames meant for SAFBuilder when checking
|
||||
for uncommon file extensions
|
||||
|
||||
### Updated
|
||||
- Python dependencies
|
||||
|
||||
## [0.6.0] - 2022-09-02
|
||||
### Changed
|
||||
|
@ -90,12 +90,14 @@ def run(argv):
|
||||
|
||||
continue
|
||||
|
||||
# Fix: whitespace
|
||||
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
||||
|
||||
# Fix: newlines
|
||||
if args.unsafe_fixes:
|
||||
df[column] = df[column].apply(fix.newlines, field_name=column)
|
||||
match = re.match(r"^.*?abstract.*$", column)
|
||||
if match is None:
|
||||
# Fix: whitespace
|
||||
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
||||
|
||||
# Fix: newlines
|
||||
df[column] = df[column].apply(fix.newlines, field_name=column)
|
||||
|
||||
# Fix: missing space after comma. Only run on author and citation
|
||||
# fields for now, as this problem is mostly an issue in names.
|
||||
@ -121,10 +123,14 @@ def run(argv):
|
||||
# Fix: unnecessary Unicode
|
||||
df[column] = df[column].apply(fix.unnecessary_unicode)
|
||||
|
||||
# Fix: invalid and unnecessary multi-value separators
|
||||
df[column] = df[column].apply(fix.separators, field_name=column)
|
||||
# Run whitespace fix again after fixing invalid separators
|
||||
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
||||
# Fix: invalid and unnecessary multi-value separators. Skip the title
|
||||
# and abstract fields because "|" is used to indicate something like
|
||||
# a subtitle.
|
||||
match = re.match(r"^.*?(abstract|title).*$", column)
|
||||
if match is None:
|
||||
df[column] = df[column].apply(fix.separators, field_name=column)
|
||||
# Run whitespace fix again after fixing invalid separators
|
||||
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
||||
|
||||
# Fix: duplicate metadata values
|
||||
df[column] = df[column].apply(fix.duplicates, field_name=column)
|
||||
|
@ -33,7 +33,6 @@ def issn(field):
|
||||
|
||||
# Try to split multi-value field on "||" separator
|
||||
for value in field.split("||"):
|
||||
|
||||
if not stdnum_issn.is_valid(value):
|
||||
print(f"{Fore.RED}Invalid ISSN: {Fore.RESET}{value}")
|
||||
|
||||
@ -56,7 +55,6 @@ def isbn(field):
|
||||
|
||||
# Try to split multi-value field on "||" separator
|
||||
for value in field.split("||"):
|
||||
|
||||
if not stdnum_isbn.is_valid(value):
|
||||
print(f"{Fore.RED}Invalid ISBN: {Fore.RESET}{value}")
|
||||
|
||||
@ -173,7 +171,6 @@ def language(field):
|
||||
|
||||
# Try to split multi-value field on "||" separator
|
||||
for value in field.split("||"):
|
||||
|
||||
# After splitting, check if language value is 2 or 3 characters so we
|
||||
# can check it against ISO 639-1 or ISO 639-3 accordingly.
|
||||
if len(value) == 2:
|
||||
@ -286,6 +283,11 @@ def filename_extension(field):
|
||||
|
||||
# Iterate over all values
|
||||
for value in values:
|
||||
# Strip filename descriptions that are meant for SAF Bundler, for
|
||||
# example: Annual_Report_2020.pdf__description:Report
|
||||
if "__description" in value:
|
||||
value = value.split("__")[0]
|
||||
|
||||
# Assume filename extension does not match
|
||||
filename_extension_match = False
|
||||
|
||||
@ -312,8 +314,19 @@ def spdx_license_identifier(field):
|
||||
Prints the value if it is invalid.
|
||||
"""
|
||||
|
||||
# List of common non-SPDX licenses to ignore
|
||||
# See: https://ilri.github.io/cgspace-submission-guidelines/dcterms-license/dcterms-license.txt
|
||||
ignore_licenses = {
|
||||
"All rights reserved; no re-use allowed",
|
||||
"All rights reserved; self-archive copy only",
|
||||
"Copyrighted; Non-commercial educational use only",
|
||||
"Copyrighted; Non-commercial use only",
|
||||
"Copyrighted; all rights reserved",
|
||||
"Other",
|
||||
}
|
||||
|
||||
# Skip fields with missing values
|
||||
if pd.isna(field):
|
||||
if pd.isna(field) or field in ignore_licenses:
|
||||
return
|
||||
|
||||
spdx_licenses = load_spdx_licenses()
|
||||
|
@ -1,3 +1,3 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
VERSION = "0.6.0"
|
||||
VERSION = "0.6.1"
|
||||
|
17
data/abstract-check.csv
Normal file
17
data/abstract-check.csv
Normal file
@ -0,0 +1,17 @@
|
||||
id,dc.title,dcterms.abstract
|
||||
1,Normal item,This is an abstract
|
||||
2,Leading whitespace, This is an abstract
|
||||
3,Trailing whitespace,This is an abstract
|
||||
4,Consecutive whitespace,This is an abstract
|
||||
5,Newline,"This
|
||||
is an abstract"
|
||||
6,Newline with leading whitespace," This
|
||||
is an abstract"
|
||||
7,Newline with trailing whitespace,"This
|
||||
is an abstract "
|
||||
8,Newline with consecutive whitespace,"This
|
||||
is an abstract"
|
||||
9,Multiple newlines,"This
|
||||
is
|
||||
an
|
||||
abstract"
|
|
776
poetry.lock
generated
776
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "csv-metadata-quality"
|
||||
version = "0.6.0"
|
||||
version = "0.6.1"
|
||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem."
|
||||
authors = ["Alan Orth <alan.orth@gmail.com>"]
|
||||
license="GPL-3.0-only"
|
||||
@ -12,27 +12,31 @@ csv-metadata-quality = 'csv_metadata_quality.__main__:main'
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.9"
|
||||
pandas = "^1.5.1"
|
||||
python-stdnum = "^1.17"
|
||||
requests = "^2.28.1"
|
||||
requests-cache = "^0.9.7"
|
||||
pandas = "^1.5.2"
|
||||
python-stdnum = "^1.18"
|
||||
requests = "^2.28.2"
|
||||
requests-cache = "^0.9.8"
|
||||
langid = "^1.1.6"
|
||||
colorama = "^0.4.5"
|
||||
colorama = "^0.4.6"
|
||||
ftfy = "^6.1.1"
|
||||
country-converter = {git = "https://github.com/alanorth/country_converter.git", rev = "myanmar-region"}
|
||||
pycountry = {git = "https://github.com/alanorth/pycountry", rev = "iso-codes-4.12.0"}
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
pytest = "^7.2.0"
|
||||
flake8 = "^5.0.4"
|
||||
pytest = "^7.2.1"
|
||||
flake8 = "^6.0.0"
|
||||
pytest-clarity = "^1.0.1"
|
||||
black = "^22.10.0"
|
||||
isort = "^5.10.1"
|
||||
csvkit = "^1.0.7"
|
||||
black = "^23.1.0"
|
||||
isort = "^5.12.0"
|
||||
csvkit = "^1.1.0"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
ipython = "^8.7.0"
|
||||
ipython = "^8.10.0"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry>=0.12"]
|
||||
build-backend = "poetry.masonry.api"
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
line_length=88
|
||||
|
@ -1,80 +1,80 @@
|
||||
agate-dbf==0.2.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||
agate-excel==0.2.5 ; python_version >= "3.9" and python_version < "4.0"
|
||||
agate-sql==0.5.8 ; python_version >= "3.9" and python_version < "4.0"
|
||||
agate==1.6.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||
agate-sql==0.5.9 ; python_version >= "3.9" and python_version < "4.0"
|
||||
agate==1.7.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
|
||||
appnope==0.1.3 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "darwin"
|
||||
asttokens==2.2.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
attrs==22.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
attrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
babel==2.11.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
backcall==0.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
black==22.12.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
black==23.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4"
|
||||
charset-normalizer==2.1.1 ; python_version >= "3.9" and python_version < "4"
|
||||
charset-normalizer==3.0.1 ; python_version >= "3.9" and python_version < "4"
|
||||
click==8.1.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||
colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||
commonmark==0.9.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
country-converter @ git+https://github.com/alanorth/country_converter.git@myanmar-region ; python_version >= "3.9" and python_version < "4.0"
|
||||
csvkit==1.0.7 ; python_version >= "3.9" and python_version < "4.0"
|
||||
csvkit==1.1.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
dbfread==2.0.7 ; python_version >= "3.9" and python_version < "4.0"
|
||||
decorator==5.1.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
et-xmlfile==1.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
exceptiongroup==1.0.4 ; python_version >= "3.9" and python_version < "3.11"
|
||||
exceptiongroup==1.1.0 ; python_version >= "3.9" and python_version < "3.11"
|
||||
executing==1.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
flake8==5.0.4 ; python_version >= "3.9" and python_version < "4.0"
|
||||
flake8==6.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
ftfy==6.1.1 ; python_version >= "3.9" and python_version < "4"
|
||||
future==0.18.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||
greenlet==2.0.1 ; python_version >= "3.9" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32") and python_version < "4.0"
|
||||
greenlet==2.0.2 ; python_version >= "3.9" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32") and python_version < "4.0"
|
||||
idna==3.4 ; python_version >= "3.9" and python_version < "4"
|
||||
iniconfig==1.1.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
ipython==8.7.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
ipython==8.10.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
isodate==0.6.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
isort==5.11.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
isort==5.12.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
jedi==0.18.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||
langid==1.1.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||
leather==0.3.4 ; python_version >= "3.9" and python_version < "4.0"
|
||||
markdown-it-py==2.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
matplotlib-inline==0.1.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||
mccabe==0.7.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
mypy-extensions==0.4.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||
numpy==1.23.5 ; python_version < "4.0" and python_version >= "3.9"
|
||||
mdurl==0.1.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||
mypy-extensions==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
numpy==1.24.2 ; python_version < "4.0" and python_version >= "3.9"
|
||||
olefile==0.46 ; python_version >= "3.9" and python_version < "4.0"
|
||||
openpyxl==3.0.10 ; python_version >= "3.9" and python_version < "4.0"
|
||||
packaging==22.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pandas==1.5.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||
parsedatetime==2.4 ; python_version >= "3.9" and python_version < "4.0"
|
||||
openpyxl==3.1.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
packaging==23.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pandas==1.5.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||
parsedatetime==2.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||
parso==0.8.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pathspec==0.10.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pathspec==0.11.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pexpect==4.8.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32"
|
||||
pickleshare==0.7.5 ; python_version >= "3.9" and python_version < "4.0"
|
||||
platformdirs==2.6.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
platformdirs==3.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pluggy==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pprintpp==0.4.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
prompt-toolkit==3.0.36 ; python_version >= "3.9" and python_version < "4.0"
|
||||
prompt-toolkit==3.0.37 ; python_version >= "3.9" and python_version < "4.0"
|
||||
ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32"
|
||||
pure-eval==0.2.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pycodestyle==2.9.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pycodestyle==2.10.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.12.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pyflakes==2.5.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pygments==2.13.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pyflakes==3.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pygments==2.14.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pytest-clarity==1.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pytest==7.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pytest==7.2.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||
python-slugify==7.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
python-slugify==8.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pytimeparse==1.1.8 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pytz==2022.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||
requests-cache==0.9.7 ; python_version >= "3.9" and python_version < "4.0"
|
||||
requests==2.28.1 ; python_version >= "3.9" and python_version < "4"
|
||||
rich==12.6.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pytz==2022.7.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
requests-cache==0.9.8 ; python_version >= "3.9" and python_version < "4.0"
|
||||
requests==2.28.2 ; python_version >= "3.9" and python_version < "4"
|
||||
rich==13.3.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
sqlalchemy==1.4.45 ; python_version >= "3.9" and python_version < "4.0"
|
||||
sqlalchemy==1.4.46 ; python_version >= "3.9" and python_version < "4.0"
|
||||
stack-data==0.6.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||
text-unidecode==1.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||
tomli==2.0.1 ; python_version >= "3.9" and python_full_version < "3.11.0a7"
|
||||
traitlets==5.7.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
typing-extensions==4.4.0 ; python_version >= "3.9" and python_version < "3.10"
|
||||
tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
|
||||
traitlets==5.9.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
typing-extensions==4.5.0 ; python_version >= "3.9" and python_version < "3.10"
|
||||
url-normalize==1.4.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||
urllib3==1.26.13 ; python_version >= "3.9" and python_version < "4"
|
||||
wcwidth==0.2.5 ; python_version >= "3.9" and python_version < "4"
|
||||
urllib3==1.26.14 ; python_version >= "3.9" and python_version < "4"
|
||||
wcwidth==0.2.6 ; python_version >= "3.9" and python_version < "4"
|
||||
xlrd==2.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
|
@ -1,23 +1,23 @@
|
||||
appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
|
||||
attrs==22.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
attrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4"
|
||||
charset-normalizer==2.1.1 ; python_version >= "3.9" and python_version < "4"
|
||||
charset-normalizer==3.0.1 ; python_version >= "3.9" and python_version < "4"
|
||||
colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||
country-converter @ git+https://github.com/alanorth/country_converter.git@myanmar-region ; python_version >= "3.9" and python_version < "4.0"
|
||||
exceptiongroup==1.0.4 ; python_version >= "3.9" and python_version < "3.11"
|
||||
exceptiongroup==1.1.0 ; python_version >= "3.9" and python_version < "3.11"
|
||||
ftfy==6.1.1 ; python_version >= "3.9" and python_version < "4"
|
||||
idna==3.4 ; python_version >= "3.9" and python_version < "4"
|
||||
langid==1.1.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||
numpy==1.23.5 ; python_version < "4.0" and python_version >= "3.9"
|
||||
pandas==1.5.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||
numpy==1.24.2 ; python_version < "4.0" and python_version >= "3.9"
|
||||
pandas==1.5.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.12.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||
python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0"
|
||||
pytz==2022.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||
requests-cache==0.9.7 ; python_version >= "3.9" and python_version < "4.0"
|
||||
requests==2.28.1 ; python_version >= "3.9" and python_version < "4"
|
||||
pytz==2022.7.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||
requests-cache==0.9.8 ; python_version >= "3.9" and python_version < "4.0"
|
||||
requests==2.28.2 ; python_version >= "3.9" and python_version < "4"
|
||||
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||
url-normalize==1.4.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||
urllib3==1.26.13 ; python_version >= "3.9" and python_version < "4"
|
||||
wcwidth==0.2.5 ; python_version >= "3.9" and python_version < "4"
|
||||
urllib3==1.26.14 ; python_version >= "3.9" and python_version < "4"
|
||||
wcwidth==0.2.6 ; python_version >= "3.9" and python_version < "4"
|
||||
|
@ -1,6 +0,0 @@
|
||||
[isort]
|
||||
multi_line_output=3
|
||||
include_trailing_comma=True
|
||||
force_grid_wrap=0
|
||||
use_parentheses=True
|
||||
line_length=88
|
3
setup.py
3
setup.py
@ -14,7 +14,7 @@ install_requires = [
|
||||
|
||||
setuptools.setup(
|
||||
name="csv-metadata-quality",
|
||||
version="0.6.0",
|
||||
version="0.6.1",
|
||||
author="Alan Orth",
|
||||
author_email="aorth@mjanja.ch",
|
||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
|
||||
@ -23,7 +23,6 @@ setuptools.setup(
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/alanorth/csv-metadata-quality",
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
||||
|
@ -25,7 +25,7 @@ def test_check_valid_issn():
|
||||
|
||||
result = check.issn(value)
|
||||
|
||||
assert result == None
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_check_invalid_isbn(capsys):
|
||||
@ -46,7 +46,7 @@ def test_check_valid_isbn():
|
||||
|
||||
result = check.isbn(value)
|
||||
|
||||
assert result == None
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_check_missing_date(capsys):
|
||||
@ -102,7 +102,7 @@ def test_check_valid_date():
|
||||
|
||||
result = check.date(value, field_name)
|
||||
|
||||
assert result == None
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_check_suspicious_characters(capsys):
|
||||
@ -128,7 +128,7 @@ def test_check_valid_iso639_1_language():
|
||||
|
||||
result = check.language(value)
|
||||
|
||||
assert result == None
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_check_valid_iso639_3_language():
|
||||
@ -138,7 +138,7 @@ def test_check_valid_iso639_3_language():
|
||||
|
||||
result = check.language(value)
|
||||
|
||||
assert result == None
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_check_invalid_iso639_1_language(capsys):
|
||||
@ -249,7 +249,7 @@ def test_check_common_filename_extension():
|
||||
|
||||
result = check.filename_extension(value)
|
||||
|
||||
assert result == None
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_check_incorrect_iso_639_1_language(capsys):
|
||||
@ -305,7 +305,7 @@ def test_check_correct_iso_639_1_language():
|
||||
|
||||
result = experimental.correct_language(series, exclude)
|
||||
|
||||
assert result == None
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_check_correct_iso_639_3_language():
|
||||
@ -321,7 +321,7 @@ def test_check_correct_iso_639_3_language():
|
||||
|
||||
result = experimental.correct_language(series, exclude)
|
||||
|
||||
assert result == None
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_check_valid_spdx_license_identifier():
|
||||
@ -331,7 +331,7 @@ def test_check_valid_spdx_license_identifier():
|
||||
|
||||
result = check.spdx_license_identifier(license)
|
||||
|
||||
assert result == None
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_check_invalid_spdx_license_identifier(capsys):
|
||||
@ -339,7 +339,7 @@ def test_check_invalid_spdx_license_identifier(capsys):
|
||||
|
||||
license = "CC-BY-SA"
|
||||
|
||||
result = check.spdx_license_identifier(license)
|
||||
check.spdx_license_identifier(license)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert (
|
||||
@ -362,7 +362,7 @@ def test_check_duplicate_item(capsys):
|
||||
}
|
||||
df = pd.DataFrame(data=d)
|
||||
|
||||
result = check.duplicate_items(df)
|
||||
check.duplicate_items(df)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert (
|
||||
@ -379,7 +379,7 @@ def test_check_no_mojibake():
|
||||
|
||||
result = check.mojibake(field, field_name)
|
||||
|
||||
assert result == None
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_check_mojibake(capsys):
|
||||
@ -388,7 +388,7 @@ def test_check_mojibake(capsys):
|
||||
field = "CIAT Publicaçao"
|
||||
field_name = "dcterms.isPartOf"
|
||||
|
||||
result = check.mojibake(field, field_name)
|
||||
check.mojibake(field, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert (
|
||||
@ -411,7 +411,7 @@ def test_check_doi_field():
|
||||
|
||||
result = check.citation_doi(series, exclude)
|
||||
|
||||
assert result == None
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_check_doi_only_in_citation(capsys):
|
||||
@ -448,7 +448,7 @@ def test_title_in_citation():
|
||||
|
||||
result = check.title_in_citation(series, exclude)
|
||||
|
||||
assert result == None
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_title_not_in_citation(capsys):
|
||||
@ -485,7 +485,7 @@ def test_country_matches_region():
|
||||
|
||||
result = check.countries_match_regions(series, exclude)
|
||||
|
||||
assert result == None
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_country_not_matching_region(capsys):
|
||||
|
Reference in New Issue
Block a user