mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-07-23 06:21:45 +02:00
Compare commits
23 Commits
8c23382b22
...
v0.6.1
Author | SHA1 | Date | |
---|---|---|---|
fdccdf7318
|
|||
ff2c986eec
|
|||
547574866e
|
|||
8aa7b93d87
|
|||
53fdb50906
|
|||
3e0e9a7f8b
|
|||
03d824b78e
|
|||
8bc4cd419c
|
|||
bde38e9ed4
|
|||
8db1e36a6d
|
|||
fbb625be5c
|
|||
084b970798
|
|||
171b35b015
|
|||
545bb8cd0c
|
|||
d5afbad788
|
|||
d40c9ed97a
|
|||
c4a2ee8563
|
|||
3596381d03
|
|||
5abd32a41f
|
|||
0ed0fabe21
|
|||
d5cfec65bd
|
|||
66893753ba
|
|||
57be05ebb6
|
10
CHANGELOG.md
10
CHANGELOG.md
@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
## Unreleased
|
## [0.6.1] - 2023-02-23
|
||||||
### Fixed
|
### Fixed
|
||||||
- Missing region check should ignore subregion field, if it exists
|
- Missing region check should ignore subregion field, if it exists
|
||||||
|
|
||||||
@ -12,6 +12,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
- Use SPDX license data from SPDX themselves instead of spdx-license-list
|
- Use SPDX license data from SPDX themselves instead of spdx-license-list
|
||||||
because it is deprecated and outdated
|
because it is deprecated and outdated
|
||||||
- Require Python 3.9+
|
- Require Python 3.9+
|
||||||
|
- Don't run `fix.separators()` on title or abstract fields
|
||||||
|
- Don't run whitespace or newline fixes on abstract fields
|
||||||
|
- Ignore some common non-SPDX licenses
|
||||||
|
- Ignore `__description` suffix in filenames meant for SAFBuilder when checking
|
||||||
|
for uncommon file extensions
|
||||||
|
|
||||||
|
### Updated
|
||||||
|
- Python dependencies
|
||||||
|
|
||||||
## [0.6.0] - 2022-09-02
|
## [0.6.0] - 2022-09-02
|
||||||
### Changed
|
### Changed
|
||||||
|
@ -90,12 +90,14 @@ def run(argv):
|
|||||||
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Fix: whitespace
|
|
||||||
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
|
||||||
|
|
||||||
# Fix: newlines
|
|
||||||
if args.unsafe_fixes:
|
if args.unsafe_fixes:
|
||||||
df[column] = df[column].apply(fix.newlines, field_name=column)
|
match = re.match(r"^.*?abstract.*$", column)
|
||||||
|
if match is None:
|
||||||
|
# Fix: whitespace
|
||||||
|
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
||||||
|
|
||||||
|
# Fix: newlines
|
||||||
|
df[column] = df[column].apply(fix.newlines, field_name=column)
|
||||||
|
|
||||||
# Fix: missing space after comma. Only run on author and citation
|
# Fix: missing space after comma. Only run on author and citation
|
||||||
# fields for now, as this problem is mostly an issue in names.
|
# fields for now, as this problem is mostly an issue in names.
|
||||||
@ -121,10 +123,14 @@ def run(argv):
|
|||||||
# Fix: unnecessary Unicode
|
# Fix: unnecessary Unicode
|
||||||
df[column] = df[column].apply(fix.unnecessary_unicode)
|
df[column] = df[column].apply(fix.unnecessary_unicode)
|
||||||
|
|
||||||
# Fix: invalid and unnecessary multi-value separators
|
# Fix: invalid and unnecessary multi-value separators. Skip the title
|
||||||
df[column] = df[column].apply(fix.separators, field_name=column)
|
# and abstract fields because "|" is used to indicate something like
|
||||||
# Run whitespace fix again after fixing invalid separators
|
# a subtitle.
|
||||||
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
match = re.match(r"^.*?(abstract|title).*$", column)
|
||||||
|
if match is None:
|
||||||
|
df[column] = df[column].apply(fix.separators, field_name=column)
|
||||||
|
# Run whitespace fix again after fixing invalid separators
|
||||||
|
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
||||||
|
|
||||||
# Fix: duplicate metadata values
|
# Fix: duplicate metadata values
|
||||||
df[column] = df[column].apply(fix.duplicates, field_name=column)
|
df[column] = df[column].apply(fix.duplicates, field_name=column)
|
||||||
|
@ -33,7 +33,6 @@ def issn(field):
|
|||||||
|
|
||||||
# Try to split multi-value field on "||" separator
|
# Try to split multi-value field on "||" separator
|
||||||
for value in field.split("||"):
|
for value in field.split("||"):
|
||||||
|
|
||||||
if not stdnum_issn.is_valid(value):
|
if not stdnum_issn.is_valid(value):
|
||||||
print(f"{Fore.RED}Invalid ISSN: {Fore.RESET}{value}")
|
print(f"{Fore.RED}Invalid ISSN: {Fore.RESET}{value}")
|
||||||
|
|
||||||
@ -56,7 +55,6 @@ def isbn(field):
|
|||||||
|
|
||||||
# Try to split multi-value field on "||" separator
|
# Try to split multi-value field on "||" separator
|
||||||
for value in field.split("||"):
|
for value in field.split("||"):
|
||||||
|
|
||||||
if not stdnum_isbn.is_valid(value):
|
if not stdnum_isbn.is_valid(value):
|
||||||
print(f"{Fore.RED}Invalid ISBN: {Fore.RESET}{value}")
|
print(f"{Fore.RED}Invalid ISBN: {Fore.RESET}{value}")
|
||||||
|
|
||||||
@ -173,7 +171,6 @@ def language(field):
|
|||||||
|
|
||||||
# Try to split multi-value field on "||" separator
|
# Try to split multi-value field on "||" separator
|
||||||
for value in field.split("||"):
|
for value in field.split("||"):
|
||||||
|
|
||||||
# After splitting, check if language value is 2 or 3 characters so we
|
# After splitting, check if language value is 2 or 3 characters so we
|
||||||
# can check it against ISO 639-1 or ISO 639-3 accordingly.
|
# can check it against ISO 639-1 or ISO 639-3 accordingly.
|
||||||
if len(value) == 2:
|
if len(value) == 2:
|
||||||
@ -286,6 +283,11 @@ def filename_extension(field):
|
|||||||
|
|
||||||
# Iterate over all values
|
# Iterate over all values
|
||||||
for value in values:
|
for value in values:
|
||||||
|
# Strip filename descriptions that are meant for SAF Bundler, for
|
||||||
|
# example: Annual_Report_2020.pdf__description:Report
|
||||||
|
if "__description" in value:
|
||||||
|
value = value.split("__")[0]
|
||||||
|
|
||||||
# Assume filename extension does not match
|
# Assume filename extension does not match
|
||||||
filename_extension_match = False
|
filename_extension_match = False
|
||||||
|
|
||||||
@ -312,8 +314,19 @@ def spdx_license_identifier(field):
|
|||||||
Prints the value if it is invalid.
|
Prints the value if it is invalid.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# List of common non-SPDX licenses to ignore
|
||||||
|
# See: https://ilri.github.io/cgspace-submission-guidelines/dcterms-license/dcterms-license.txt
|
||||||
|
ignore_licenses = {
|
||||||
|
"All rights reserved; no re-use allowed",
|
||||||
|
"All rights reserved; self-archive copy only",
|
||||||
|
"Copyrighted; Non-commercial educational use only",
|
||||||
|
"Copyrighted; Non-commercial use only",
|
||||||
|
"Copyrighted; all rights reserved",
|
||||||
|
"Other",
|
||||||
|
}
|
||||||
|
|
||||||
# Skip fields with missing values
|
# Skip fields with missing values
|
||||||
if pd.isna(field):
|
if pd.isna(field) or field in ignore_licenses:
|
||||||
return
|
return
|
||||||
|
|
||||||
spdx_licenses = load_spdx_licenses()
|
spdx_licenses = load_spdx_licenses()
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
# SPDX-License-Identifier: GPL-3.0-only
|
# SPDX-License-Identifier: GPL-3.0-only
|
||||||
|
|
||||||
VERSION = "0.6.0"
|
VERSION = "0.6.1"
|
||||||
|
17
data/abstract-check.csv
Normal file
17
data/abstract-check.csv
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
id,dc.title,dcterms.abstract
|
||||||
|
1,Normal item,This is an abstract
|
||||||
|
2,Leading whitespace, This is an abstract
|
||||||
|
3,Trailing whitespace,This is an abstract
|
||||||
|
4,Consecutive whitespace,This is an abstract
|
||||||
|
5,Newline,"This
|
||||||
|
is an abstract"
|
||||||
|
6,Newline with leading whitespace," This
|
||||||
|
is an abstract"
|
||||||
|
7,Newline with trailing whitespace,"This
|
||||||
|
is an abstract "
|
||||||
|
8,Newline with consecutive whitespace,"This
|
||||||
|
is an abstract"
|
||||||
|
9,Multiple newlines,"This
|
||||||
|
is
|
||||||
|
an
|
||||||
|
abstract"
|
|
776
poetry.lock
generated
776
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "csv-metadata-quality"
|
name = "csv-metadata-quality"
|
||||||
version = "0.6.0"
|
version = "0.6.1"
|
||||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem."
|
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem."
|
||||||
authors = ["Alan Orth <alan.orth@gmail.com>"]
|
authors = ["Alan Orth <alan.orth@gmail.com>"]
|
||||||
license="GPL-3.0-only"
|
license="GPL-3.0-only"
|
||||||
@ -12,27 +12,31 @@ csv-metadata-quality = 'csv_metadata_quality.__main__:main'
|
|||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.9"
|
python = "^3.9"
|
||||||
pandas = "^1.5.1"
|
pandas = "^1.5.2"
|
||||||
python-stdnum = "^1.17"
|
python-stdnum = "^1.18"
|
||||||
requests = "^2.28.1"
|
requests = "^2.28.2"
|
||||||
requests-cache = "^0.9.7"
|
requests-cache = "^0.9.8"
|
||||||
langid = "^1.1.6"
|
langid = "^1.1.6"
|
||||||
colorama = "^0.4.5"
|
colorama = "^0.4.6"
|
||||||
ftfy = "^6.1.1"
|
ftfy = "^6.1.1"
|
||||||
country-converter = {git = "https://github.com/alanorth/country_converter.git", rev = "myanmar-region"}
|
country-converter = {git = "https://github.com/alanorth/country_converter.git", rev = "myanmar-region"}
|
||||||
pycountry = {git = "https://github.com/alanorth/pycountry", rev = "iso-codes-4.12.0"}
|
pycountry = {git = "https://github.com/alanorth/pycountry", rev = "iso-codes-4.12.0"}
|
||||||
|
|
||||||
[tool.poetry.dev-dependencies]
|
[tool.poetry.dev-dependencies]
|
||||||
pytest = "^7.2.0"
|
pytest = "^7.2.1"
|
||||||
flake8 = "^5.0.4"
|
flake8 = "^6.0.0"
|
||||||
pytest-clarity = "^1.0.1"
|
pytest-clarity = "^1.0.1"
|
||||||
black = "^22.10.0"
|
black = "^23.1.0"
|
||||||
isort = "^5.10.1"
|
isort = "^5.12.0"
|
||||||
csvkit = "^1.0.7"
|
csvkit = "^1.1.0"
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
ipython = "^8.7.0"
|
ipython = "^8.10.0"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry>=0.12"]
|
requires = ["poetry>=0.12"]
|
||||||
build-backend = "poetry.masonry.api"
|
build-backend = "poetry.masonry.api"
|
||||||
|
|
||||||
|
[tool.isort]
|
||||||
|
profile = "black"
|
||||||
|
line_length=88
|
||||||
|
@ -1,80 +1,80 @@
|
|||||||
agate-dbf==0.2.2 ; python_version >= "3.9" and python_version < "4.0"
|
agate-dbf==0.2.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
agate-excel==0.2.5 ; python_version >= "3.9" and python_version < "4.0"
|
agate-excel==0.2.5 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
agate-sql==0.5.8 ; python_version >= "3.9" and python_version < "4.0"
|
agate-sql==0.5.9 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
agate==1.6.3 ; python_version >= "3.9" and python_version < "4.0"
|
agate==1.7.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
|
appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
appnope==0.1.3 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "darwin"
|
appnope==0.1.3 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "darwin"
|
||||||
asttokens==2.2.1 ; python_version >= "3.9" and python_version < "4.0"
|
asttokens==2.2.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
attrs==22.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
attrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
babel==2.11.0 ; python_version >= "3.9" and python_version < "4.0"
|
babel==2.11.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
backcall==0.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
backcall==0.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
black==22.12.0 ; python_version >= "3.9" and python_version < "4.0"
|
black==23.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4"
|
certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4"
|
||||||
charset-normalizer==2.1.1 ; python_version >= "3.9" and python_version < "4"
|
charset-normalizer==3.0.1 ; python_version >= "3.9" and python_version < "4"
|
||||||
click==8.1.3 ; python_version >= "3.9" and python_version < "4.0"
|
click==8.1.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0"
|
colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
commonmark==0.9.1 ; python_version >= "3.9" and python_version < "4.0"
|
|
||||||
country-converter @ git+https://github.com/alanorth/country_converter.git@myanmar-region ; python_version >= "3.9" and python_version < "4.0"
|
country-converter @ git+https://github.com/alanorth/country_converter.git@myanmar-region ; python_version >= "3.9" and python_version < "4.0"
|
||||||
csvkit==1.0.7 ; python_version >= "3.9" and python_version < "4.0"
|
csvkit==1.1.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
dbfread==2.0.7 ; python_version >= "3.9" and python_version < "4.0"
|
dbfread==2.0.7 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
decorator==5.1.1 ; python_version >= "3.9" and python_version < "4.0"
|
decorator==5.1.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
et-xmlfile==1.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
et-xmlfile==1.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
exceptiongroup==1.0.4 ; python_version >= "3.9" and python_version < "3.11"
|
exceptiongroup==1.1.0 ; python_version >= "3.9" and python_version < "3.11"
|
||||||
executing==1.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
executing==1.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
flake8==5.0.4 ; python_version >= "3.9" and python_version < "4.0"
|
flake8==6.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
ftfy==6.1.1 ; python_version >= "3.9" and python_version < "4"
|
ftfy==6.1.1 ; python_version >= "3.9" and python_version < "4"
|
||||||
future==0.18.2 ; python_version >= "3.9" and python_version < "4.0"
|
greenlet==2.0.2 ; python_version >= "3.9" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32") and python_version < "4.0"
|
||||||
greenlet==2.0.1 ; python_version >= "3.9" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32") and python_version < "4.0"
|
|
||||||
idna==3.4 ; python_version >= "3.9" and python_version < "4"
|
idna==3.4 ; python_version >= "3.9" and python_version < "4"
|
||||||
iniconfig==1.1.1 ; python_version >= "3.9" and python_version < "4.0"
|
iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
ipython==8.7.0 ; python_version >= "3.9" and python_version < "4.0"
|
ipython==8.10.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
isodate==0.6.1 ; python_version >= "3.9" and python_version < "4.0"
|
isodate==0.6.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
isort==5.11.1 ; python_version >= "3.9" and python_version < "4.0"
|
isort==5.12.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
jedi==0.18.2 ; python_version >= "3.9" and python_version < "4.0"
|
jedi==0.18.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
langid==1.1.6 ; python_version >= "3.9" and python_version < "4.0"
|
langid==1.1.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
leather==0.3.4 ; python_version >= "3.9" and python_version < "4.0"
|
leather==0.3.4 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
|
markdown-it-py==2.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
matplotlib-inline==0.1.6 ; python_version >= "3.9" and python_version < "4.0"
|
matplotlib-inline==0.1.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
mccabe==0.7.0 ; python_version >= "3.9" and python_version < "4.0"
|
mccabe==0.7.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
mypy-extensions==0.4.3 ; python_version >= "3.9" and python_version < "4.0"
|
mdurl==0.1.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
numpy==1.23.5 ; python_version < "4.0" and python_version >= "3.9"
|
mypy-extensions==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
|
numpy==1.24.2 ; python_version < "4.0" and python_version >= "3.9"
|
||||||
olefile==0.46 ; python_version >= "3.9" and python_version < "4.0"
|
olefile==0.46 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
openpyxl==3.0.10 ; python_version >= "3.9" and python_version < "4.0"
|
openpyxl==3.1.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
packaging==22.0 ; python_version >= "3.9" and python_version < "4.0"
|
packaging==23.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pandas==1.5.2 ; python_version >= "3.9" and python_version < "4.0"
|
pandas==1.5.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
parsedatetime==2.4 ; python_version >= "3.9" and python_version < "4.0"
|
parsedatetime==2.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
parso==0.8.3 ; python_version >= "3.9" and python_version < "4.0"
|
parso==0.8.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pathspec==0.10.3 ; python_version >= "3.9" and python_version < "4.0"
|
pathspec==0.11.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pexpect==4.8.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32"
|
pexpect==4.8.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32"
|
||||||
pickleshare==0.7.5 ; python_version >= "3.9" and python_version < "4.0"
|
pickleshare==0.7.5 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
platformdirs==2.6.0 ; python_version >= "3.9" and python_version < "4.0"
|
platformdirs==3.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pluggy==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
pluggy==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pprintpp==0.4.0 ; python_version >= "3.9" and python_version < "4.0"
|
pprintpp==0.4.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
prompt-toolkit==3.0.36 ; python_version >= "3.9" and python_version < "4.0"
|
prompt-toolkit==3.0.37 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32"
|
ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32"
|
||||||
pure-eval==0.2.2 ; python_version >= "3.9" and python_version < "4.0"
|
pure-eval==0.2.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pycodestyle==2.9.1 ; python_version >= "3.9" and python_version < "4.0"
|
pycodestyle==2.10.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.12.0 ; python_version >= "3.9" and python_version < "4.0"
|
pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.12.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pyflakes==2.5.0 ; python_version >= "3.9" and python_version < "4.0"
|
pyflakes==3.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pygments==2.13.0 ; python_version >= "3.9" and python_version < "4.0"
|
pygments==2.14.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pytest-clarity==1.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
pytest-clarity==1.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pytest==7.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
pytest==7.2.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
|
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
python-slugify==7.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
python-slugify==8.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0"
|
python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pytimeparse==1.1.8 ; python_version >= "3.9" and python_version < "4.0"
|
pytimeparse==1.1.8 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pytz==2022.6 ; python_version >= "3.9" and python_version < "4.0"
|
pytz==2022.7.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
requests-cache==0.9.7 ; python_version >= "3.9" and python_version < "4.0"
|
requests-cache==0.9.8 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
requests==2.28.1 ; python_version >= "3.9" and python_version < "4"
|
requests==2.28.2 ; python_version >= "3.9" and python_version < "4"
|
||||||
rich==12.6.0 ; python_version >= "3.9" and python_version < "4.0"
|
rich==13.3.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
|
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
sqlalchemy==1.4.45 ; python_version >= "3.9" and python_version < "4.0"
|
sqlalchemy==1.4.46 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
stack-data==0.6.2 ; python_version >= "3.9" and python_version < "4.0"
|
stack-data==0.6.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
text-unidecode==1.3 ; python_version >= "3.9" and python_version < "4.0"
|
text-unidecode==1.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
tomli==2.0.1 ; python_version >= "3.9" and python_full_version < "3.11.0a7"
|
tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
|
||||||
traitlets==5.7.1 ; python_version >= "3.9" and python_version < "4.0"
|
traitlets==5.9.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
typing-extensions==4.4.0 ; python_version >= "3.9" and python_version < "3.10"
|
typing-extensions==4.5.0 ; python_version >= "3.9" and python_version < "3.10"
|
||||||
url-normalize==1.4.3 ; python_version >= "3.9" and python_version < "4.0"
|
url-normalize==1.4.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
urllib3==1.26.13 ; python_version >= "3.9" and python_version < "4"
|
urllib3==1.26.14 ; python_version >= "3.9" and python_version < "4"
|
||||||
wcwidth==0.2.5 ; python_version >= "3.9" and python_version < "4"
|
wcwidth==0.2.6 ; python_version >= "3.9" and python_version < "4"
|
||||||
xlrd==2.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
xlrd==2.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
|
@ -1,23 +1,23 @@
|
|||||||
appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
|
appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
attrs==22.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
attrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4"
|
certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4"
|
||||||
charset-normalizer==2.1.1 ; python_version >= "3.9" and python_version < "4"
|
charset-normalizer==3.0.1 ; python_version >= "3.9" and python_version < "4"
|
||||||
colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0"
|
colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
country-converter @ git+https://github.com/alanorth/country_converter.git@myanmar-region ; python_version >= "3.9" and python_version < "4.0"
|
country-converter @ git+https://github.com/alanorth/country_converter.git@myanmar-region ; python_version >= "3.9" and python_version < "4.0"
|
||||||
exceptiongroup==1.0.4 ; python_version >= "3.9" and python_version < "3.11"
|
exceptiongroup==1.1.0 ; python_version >= "3.9" and python_version < "3.11"
|
||||||
ftfy==6.1.1 ; python_version >= "3.9" and python_version < "4"
|
ftfy==6.1.1 ; python_version >= "3.9" and python_version < "4"
|
||||||
idna==3.4 ; python_version >= "3.9" and python_version < "4"
|
idna==3.4 ; python_version >= "3.9" and python_version < "4"
|
||||||
langid==1.1.6 ; python_version >= "3.9" and python_version < "4.0"
|
langid==1.1.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
numpy==1.23.5 ; python_version < "4.0" and python_version >= "3.9"
|
numpy==1.24.2 ; python_version < "4.0" and python_version >= "3.9"
|
||||||
pandas==1.5.2 ; python_version >= "3.9" and python_version < "4.0"
|
pandas==1.5.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.12.0 ; python_version >= "3.9" and python_version < "4.0"
|
pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.12.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
|
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0"
|
python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pytz==2022.6 ; python_version >= "3.9" and python_version < "4.0"
|
pytz==2022.7.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
requests-cache==0.9.7 ; python_version >= "3.9" and python_version < "4.0"
|
requests-cache==0.9.8 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
requests==2.28.1 ; python_version >= "3.9" and python_version < "4"
|
requests==2.28.2 ; python_version >= "3.9" and python_version < "4"
|
||||||
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
|
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
url-normalize==1.4.3 ; python_version >= "3.9" and python_version < "4.0"
|
url-normalize==1.4.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
urllib3==1.26.13 ; python_version >= "3.9" and python_version < "4"
|
urllib3==1.26.14 ; python_version >= "3.9" and python_version < "4"
|
||||||
wcwidth==0.2.5 ; python_version >= "3.9" and python_version < "4"
|
wcwidth==0.2.6 ; python_version >= "3.9" and python_version < "4"
|
||||||
|
@ -1,6 +0,0 @@
|
|||||||
[isort]
|
|
||||||
multi_line_output=3
|
|
||||||
include_trailing_comma=True
|
|
||||||
force_grid_wrap=0
|
|
||||||
use_parentheses=True
|
|
||||||
line_length=88
|
|
3
setup.py
3
setup.py
@ -14,7 +14,7 @@ install_requires = [
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name="csv-metadata-quality",
|
name="csv-metadata-quality",
|
||||||
version="0.6.0",
|
version="0.6.1",
|
||||||
author="Alan Orth",
|
author="Alan Orth",
|
||||||
author_email="aorth@mjanja.ch",
|
author_email="aorth@mjanja.ch",
|
||||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
|
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
|
||||||
@ -23,7 +23,6 @@ setuptools.setup(
|
|||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
url="https://github.com/alanorth/csv-metadata-quality",
|
url="https://github.com/alanorth/csv-metadata-quality",
|
||||||
classifiers=[
|
classifiers=[
|
||||||
"Programming Language :: Python :: 3.8",
|
|
||||||
"Programming Language :: Python :: 3.9",
|
"Programming Language :: Python :: 3.9",
|
||||||
"Programming Language :: Python :: 3.10",
|
"Programming Language :: Python :: 3.10",
|
||||||
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
||||||
|
@ -25,7 +25,7 @@ def test_check_valid_issn():
|
|||||||
|
|
||||||
result = check.issn(value)
|
result = check.issn(value)
|
||||||
|
|
||||||
assert result == None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
def test_check_invalid_isbn(capsys):
|
def test_check_invalid_isbn(capsys):
|
||||||
@ -46,7 +46,7 @@ def test_check_valid_isbn():
|
|||||||
|
|
||||||
result = check.isbn(value)
|
result = check.isbn(value)
|
||||||
|
|
||||||
assert result == None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
def test_check_missing_date(capsys):
|
def test_check_missing_date(capsys):
|
||||||
@ -102,7 +102,7 @@ def test_check_valid_date():
|
|||||||
|
|
||||||
result = check.date(value, field_name)
|
result = check.date(value, field_name)
|
||||||
|
|
||||||
assert result == None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
def test_check_suspicious_characters(capsys):
|
def test_check_suspicious_characters(capsys):
|
||||||
@ -128,7 +128,7 @@ def test_check_valid_iso639_1_language():
|
|||||||
|
|
||||||
result = check.language(value)
|
result = check.language(value)
|
||||||
|
|
||||||
assert result == None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
def test_check_valid_iso639_3_language():
|
def test_check_valid_iso639_3_language():
|
||||||
@ -138,7 +138,7 @@ def test_check_valid_iso639_3_language():
|
|||||||
|
|
||||||
result = check.language(value)
|
result = check.language(value)
|
||||||
|
|
||||||
assert result == None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
def test_check_invalid_iso639_1_language(capsys):
|
def test_check_invalid_iso639_1_language(capsys):
|
||||||
@ -249,7 +249,7 @@ def test_check_common_filename_extension():
|
|||||||
|
|
||||||
result = check.filename_extension(value)
|
result = check.filename_extension(value)
|
||||||
|
|
||||||
assert result == None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
def test_check_incorrect_iso_639_1_language(capsys):
|
def test_check_incorrect_iso_639_1_language(capsys):
|
||||||
@ -305,7 +305,7 @@ def test_check_correct_iso_639_1_language():
|
|||||||
|
|
||||||
result = experimental.correct_language(series, exclude)
|
result = experimental.correct_language(series, exclude)
|
||||||
|
|
||||||
assert result == None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
def test_check_correct_iso_639_3_language():
|
def test_check_correct_iso_639_3_language():
|
||||||
@ -321,7 +321,7 @@ def test_check_correct_iso_639_3_language():
|
|||||||
|
|
||||||
result = experimental.correct_language(series, exclude)
|
result = experimental.correct_language(series, exclude)
|
||||||
|
|
||||||
assert result == None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
def test_check_valid_spdx_license_identifier():
|
def test_check_valid_spdx_license_identifier():
|
||||||
@ -331,7 +331,7 @@ def test_check_valid_spdx_license_identifier():
|
|||||||
|
|
||||||
result = check.spdx_license_identifier(license)
|
result = check.spdx_license_identifier(license)
|
||||||
|
|
||||||
assert result == None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
def test_check_invalid_spdx_license_identifier(capsys):
|
def test_check_invalid_spdx_license_identifier(capsys):
|
||||||
@ -339,7 +339,7 @@ def test_check_invalid_spdx_license_identifier(capsys):
|
|||||||
|
|
||||||
license = "CC-BY-SA"
|
license = "CC-BY-SA"
|
||||||
|
|
||||||
result = check.spdx_license_identifier(license)
|
check.spdx_license_identifier(license)
|
||||||
|
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert (
|
assert (
|
||||||
@ -362,7 +362,7 @@ def test_check_duplicate_item(capsys):
|
|||||||
}
|
}
|
||||||
df = pd.DataFrame(data=d)
|
df = pd.DataFrame(data=d)
|
||||||
|
|
||||||
result = check.duplicate_items(df)
|
check.duplicate_items(df)
|
||||||
|
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert (
|
assert (
|
||||||
@ -379,7 +379,7 @@ def test_check_no_mojibake():
|
|||||||
|
|
||||||
result = check.mojibake(field, field_name)
|
result = check.mojibake(field, field_name)
|
||||||
|
|
||||||
assert result == None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
def test_check_mojibake(capsys):
|
def test_check_mojibake(capsys):
|
||||||
@ -388,7 +388,7 @@ def test_check_mojibake(capsys):
|
|||||||
field = "CIAT Publicaçao"
|
field = "CIAT Publicaçao"
|
||||||
field_name = "dcterms.isPartOf"
|
field_name = "dcterms.isPartOf"
|
||||||
|
|
||||||
result = check.mojibake(field, field_name)
|
check.mojibake(field, field_name)
|
||||||
|
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert (
|
assert (
|
||||||
@ -411,7 +411,7 @@ def test_check_doi_field():
|
|||||||
|
|
||||||
result = check.citation_doi(series, exclude)
|
result = check.citation_doi(series, exclude)
|
||||||
|
|
||||||
assert result == None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
def test_check_doi_only_in_citation(capsys):
|
def test_check_doi_only_in_citation(capsys):
|
||||||
@ -448,7 +448,7 @@ def test_title_in_citation():
|
|||||||
|
|
||||||
result = check.title_in_citation(series, exclude)
|
result = check.title_in_citation(series, exclude)
|
||||||
|
|
||||||
assert result == None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
def test_title_not_in_citation(capsys):
|
def test_title_not_in_citation(capsys):
|
||||||
@ -485,7 +485,7 @@ def test_country_matches_region():
|
|||||||
|
|
||||||
result = check.countries_match_regions(series, exclude)
|
result = check.countries_match_regions(series, exclude)
|
||||||
|
|
||||||
assert result == None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
def test_country_not_matching_region(capsys):
|
def test_country_not_matching_region(capsys):
|
||||||
|
Reference in New Issue
Block a user