1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-25 23:28:18 +01:00

Compare commits

...

10 Commits

Author SHA1 Message Date
3dbe656f9f
Update requirements
Some checks failed
continuous-integration/drone/push Build is failing
Generated with poetry export:

    $ poetry export --without-hashes -f requirements.txt > requirements.txt
    $ poetry export --without-hashes --dev -f requirements.txt > requirements-dev.txt

I am trying `--without-hashes` to work around an error on pip install
when running in CI:

    ERROR: In --require-hashes mode, all requirements must have
their versions pinned with ==.
2021-03-11 11:11:19 +02:00
7ad821dcad
CHANGELOG.md: Add note about poetry dependencies 2021-03-11 11:10:27 +02:00
cd876c4fb3
poetry.lock: Run poetry update 2021-03-11 11:10:02 +02:00
d88ea56488
csv_metadata_quality/check.py: Move all imports to top of file
PEP8 recommends keeping imports at the top of the file. Also, I had
to re-work the issn/isbn so they didn't conflict with the functions
in check.py (flake8 warned about them being redefined).

Imports sorted with isort.

See: https://www.python.org/dev/peps/pep-0008/#imports
2021-03-11 10:52:20 +02:00
e0e3ca6c58
CHANGELOG.md: Add notes about DCTERMS in data/test.csv 2021-03-11 10:50:52 +02:00
abae8ca4fb
data/test.csv: Move some DC fields to DCTERMS
The original Dublin Core elements set was superceded by DCTERMS in
2008 and we have started using them in our DSpace repository so I
think it's good to update them in our test data. Old DC fields are
still checked and fixed in this tool, though.

It's worth nothing that currently supported DSpace versions (4, 5,
and 6) all have hard-coded a few fields like dc.title internally so
we can't migrate those to their DCTERMS counterparts just yet.
2021-03-11 10:49:05 +02:00
d7d4d4efca
CHANGELOG.md: Add note about SPDX license identifiers 2021-03-11 10:37:27 +02:00
5318953150
tests/test_check.py: Add tests for licenses 2021-03-11 10:36:26 +02:00
3b17914002
data/test.csv: Add invalid SPDX license
Now we are checking dcterms.license against the list of SPDX license
identifiers using https://pypi.org/project/spdx-license-list/.
2021-03-11 10:34:58 +02:00
6e4b0e5c1b
Add validation of SPDX license identifiers
Currently this only checks the dcterms.license field and the result
will only be a warning.
2021-03-11 10:33:16 +02:00
10 changed files with 136 additions and 68 deletions

View File

@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Unreleased
## Added
- Validation of dcterms.license field against SPDX license identifiers
## Changed
- Use DCTERMS fields where possible in `data/test.csv`
### Updated
- Run `poetry update` to update project dependencies
## [0.4.5] - 2021-03-04 ## [0.4.5] - 2021-03-04
### Added ### Added
- Check dates in dcterms.issued field as well, not just fields that have the - Check dates in dcterms.issued field as well, not just fields that have the

View File

@ -103,7 +103,6 @@ This currently uses the [Python langid](https://github.com/saffsd/langid.py) lib
- Better logging, for example with INFO, WARN, and ERR levels - Better logging, for example with INFO, WARN, and ERR levels
- Verbose, debug, or quiet options - Verbose, debug, or quiet options
- Warn if an author is shorter than 3 characters? - Warn if an author is shorter than 3 characters?
- Validate dc.rights field against SPDX? Perhaps with an option like `-m spdx` to enable the spdx module?
- Validate DOIs? Normalize to https://doi.org format? Or use just the DOI part: 10.1016/j.worlddev.2010.06.006 - Validate DOIs? Normalize to https://doi.org format? Or use just the DOI part: 10.1016/j.worlddev.2010.06.006
- Warn if two items use the same file in `filename` column - Warn if two items use the same file in `filename` column
- Add an option to drop invalid AGROVOC subjects? - Add an option to drop invalid AGROVOC subjects?

View File

@ -150,6 +150,11 @@ def run(argv):
if column == "filename": if column == "filename":
df[column] = df[column].apply(check.filename_extension) df[column] = df[column].apply(check.filename_extension)
# Check: SPDX license identifier
match = re.match(r"dcterms\.license.*$", column)
if match is not None:
df[column] = df[column].apply(check.spdx_license_identifier)
## ##
# Perform some checks on rows so we can consider items as a whole rather # Perform some checks on rows so we can consider items as a whole rather
# than simple on a field-by-field basis. This allows us to check whether # than simple on a field-by-field basis. This allows us to check whether

View File

@ -1,10 +1,14 @@
import re
from datetime import datetime, timedelta from datetime import datetime, timedelta
import pandas as pd import pandas as pd
import requests import requests
import requests_cache import requests_cache
import spdx_license_list
from colorama import Fore from colorama import Fore
from pycountry import languages from pycountry import languages
from stdnum import isbn as stdnum_isbn
from stdnum import issn as stdnum_issn
def issn(field): def issn(field):
@ -17,8 +21,6 @@ def issn(field):
See: https://arthurdejong.org/python-stdnum/doc/1.11/index.html#stdnum.module.is_valid See: https://arthurdejong.org/python-stdnum/doc/1.11/index.html#stdnum.module.is_valid
""" """
from stdnum import issn
# Skip fields with missing values # Skip fields with missing values
if pd.isna(field): if pd.isna(field):
return return
@ -26,7 +28,7 @@ def issn(field):
# Try to split multi-value field on "||" separator # Try to split multi-value field on "||" separator
for value in field.split("||"): for value in field.split("||"):
if not issn.is_valid(value): if not stdnum_issn.is_valid(value):
print(f"{Fore.RED}Invalid ISSN: {Fore.RESET}{value}") print(f"{Fore.RED}Invalid ISSN: {Fore.RESET}{value}")
return field return field
@ -42,8 +44,6 @@ def isbn(field):
See: https://arthurdejong.org/python-stdnum/doc/1.11/index.html#stdnum.module.is_valid See: https://arthurdejong.org/python-stdnum/doc/1.11/index.html#stdnum.module.is_valid
""" """
from stdnum import isbn
# Skip fields with missing values # Skip fields with missing values
if pd.isna(field): if pd.isna(field):
return return
@ -51,7 +51,7 @@ def isbn(field):
# Try to split multi-value field on "||" separator # Try to split multi-value field on "||" separator
for value in field.split("||"): for value in field.split("||"):
if not isbn.is_valid(value): if not stdnum_isbn.is_valid(value):
print(f"{Fore.RED}Invalid ISBN: {Fore.RESET}{value}") print(f"{Fore.RED}Invalid ISBN: {Fore.RESET}{value}")
return field return field
@ -67,8 +67,6 @@ def separators(field, field_name):
Prints the field with the invalid multi-value separator. Prints the field with the invalid multi-value separator.
""" """
import re
# Skip fields with missing values # Skip fields with missing values
if pd.isna(field): if pd.isna(field):
return return
@ -277,8 +275,6 @@ def filename_extension(field):
than .pdf, .xls(x), .doc(x), ppt(x), case insensitive). than .pdf, .xls(x), .doc(x), ppt(x), case insensitive).
""" """
import re
# Skip fields with missing values # Skip fields with missing values
if pd.isna(field): if pd.isna(field):
return return
@ -317,3 +313,23 @@ def filename_extension(field):
print(f"{Fore.YELLOW}Filename with uncommon extension: {Fore.RESET}{value}") print(f"{Fore.YELLOW}Filename with uncommon extension: {Fore.RESET}{value}")
return field return field
def spdx_license_identifier(field):
"""Check if a license is a valid SPDX identifier.
Prints the value if it is invalid.
"""
# Skip fields with missing values
if pd.isna(field):
return
# Try to split multi-value field on "||" separator
for value in field.split("||"):
if value not in spdx_license_list.LICENSES:
print(f"{Fore.YELLOW}Non-SPDX license identifier: {Fore.RESET}{value}")
pass
return field

View File

@ -1,31 +1,32 @@
dc.title,dc.date.issued,dc.identifier.issn,dc.identifier.isbn,dc.language.iso,dc.subject,cg.coverage.country,filename dc.title,dcterms.issued,dc.identifier.issn,dc.identifier.isbn,dcterms.language,dcterms.subject,cg.coverage.country,filename,dcterms.license
Leading space,2019-07-29,,,,,, Leading space,2019-07-29,,,,,,,
Trailing space ,2019-07-29,,,,,, Trailing space ,2019-07-29,,,,,,,
Excessive space,2019-07-29,,,,,, Excessive space,2019-07-29,,,,,,,
Miscellaenous ||whitespace | issues ,2019-07-29,,,,,, Miscellaenous ||whitespace | issues ,2019-07-29,,,,,,,
Duplicate||Duplicate,2019-07-29,,,,,, Duplicate||Duplicate,2019-07-29,,,,,,,
Invalid ISSN,2019-07-29,2321-2302,,,,, Invalid ISSN,2019-07-29,2321-2302,,,,,,
Invalid ISBN,2019-07-29,,978-0-306-40615-6,,,, Invalid ISBN,2019-07-29,,978-0-306-40615-6,,,,,
Multiple valid ISSNs,2019-07-29,0378-5955||0024-9319,,,,, Multiple valid ISSNs,2019-07-29,0378-5955||0024-9319,,,,,,
Multiple valid ISBNs,2019-07-29,,99921-58-10-7||978-0-306-40615-7,,,, Multiple valid ISBNs,2019-07-29,,99921-58-10-7||978-0-306-40615-7,,,,,
Invalid date,2019-07-260,,,,,, Invalid date,2019-07-260,,,,,,,
Multiple dates,2019-07-26||2019-01-10,,,,,, Multiple dates,2019-07-26||2019-01-10,,,,,,,
Invalid multi-value separator,2019-07-29,0378-5955|0024-9319,,,,, Invalid multi-value separator,2019-07-29,0378-5955|0024-9319,,,,,,
Unnecessary Unicode,2019-07-29,,,,,, Unnecessary Unicode,2019-07-29,,,,,,,
Suspicious character||foreˆt,2019-07-29,,,,,, Suspicious character||foreˆt,2019-07-29,,,,,,,
Invalid ISO 639-1 (alpha 2) language,2019-07-29,,,jp,,, Invalid ISO 639-1 (alpha 2) language,2019-07-29,,,jp,,,,
Invalid ISO 639-3 (alpha 3) language,2019-07-29,,,chi,,, Invalid ISO 639-3 (alpha 3) language,2019-07-29,,,chi,,,,
Invalid language,2019-07-29,,,Span,,, Invalid language,2019-07-29,,,Span,,,,
Invalid AGROVOC subject,2019-07-29,,,,FOREST,, Invalid AGROVOC subject,2019-07-29,,,,FOREST,,,
Newline (LF),2019-07-30,,,,"TANZA Newline (LF),2019-07-30,,,,"TANZA
NIA",, NIA",,,
Missing date,,,,,,, Missing date,,,,,,,,
Invalid country,2019-08-01,,,,,KENYAA, Invalid country,2019-08-01,,,,,KENYAA,,
Uncommon filename extension,2019-08-10,,,,,,file.pdf.lck Uncommon filename extension,2019-08-10,,,,,,file.pdf.lck,
Unneccesary unicode (U+002D + U+00AD),2019-08-10,,978-­92-­9043-­823-­6,,,, Unneccesary unicode (U+002D + U+00AD),2019-08-10,,978-­92-­9043-­823-­6,,,,,
"Missing space,after comma",2019-08-27,,,,,, "Missing space,after comma",2019-08-27,,,,,,,
Incorrect ISO 639-1 language,2019-09-26,,,es,,, Incorrect ISO 639-1 language,2019-09-26,,,es,,,,
Incorrect ISO 639-3 language,2019-09-26,,,spa,,, Incorrect ISO 639-3 language,2019-09-26,,,spa,,,,
Composéd Unicode,2020-01-14,,,,,, Composéd Unicode,2020-01-14,,,,,,,
Decomposéd Unicode,2020-01-14,,,,,, Decomposéd Unicode,2020-01-14,,,,,,,
Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,, Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,,,
Invalid SPDX license identifier,2021-03-11,,,,,,,CC-BY

1 dc.title dc.date.issued dcterms.issued dc.identifier.issn dc.identifier.isbn dc.language.iso dcterms.language dc.subject dcterms.subject cg.coverage.country filename dcterms.license
2 Leading space 2019-07-29
3 Trailing space 2019-07-29
4 Excessive space 2019-07-29
5 Miscellaenous ||whitespace | issues 2019-07-29
6 Duplicate||Duplicate 2019-07-29
7 Invalid ISSN 2019-07-29 2321-2302
8 Invalid ISBN 2019-07-29 978-0-306-40615-6
9 Multiple valid ISSNs 2019-07-29 0378-5955||0024-9319
10 Multiple valid ISBNs 2019-07-29 99921-58-10-7||978-0-306-40615-7
11 Invalid date 2019-07-260
12 Multiple dates 2019-07-26||2019-01-10
13 Invalid multi-value separator 2019-07-29 0378-5955|0024-9319
14 Unnecessary Unicode​ 2019-07-29
15 Suspicious character||foreˆt 2019-07-29
16 Invalid ISO 639-1 (alpha 2) language 2019-07-29 jp
17 Invalid ISO 639-3 (alpha 3) language 2019-07-29 chi
18 Invalid language 2019-07-29 Span
19 Invalid AGROVOC subject 2019-07-29 FOREST
20 Newline (LF) 2019-07-30 TANZA NIA
21 Missing date
22 Invalid country 2019-08-01 KENYAA
23 Uncommon filename extension 2019-08-10 file.pdf.lck
24 Unneccesary unicode (U+002D + U+00AD) 2019-08-10 978-­92-­9043-­823-­6
25 Missing space,after comma 2019-08-27
26 Incorrect ISO 639-1 language 2019-09-26 es
27 Incorrect ISO 639-3 language 2019-09-26 spa
28 Composéd Unicode 2020-01-14
29 Decomposéd Unicode 2020-01-14
30 Unnecessary multi-value separator 2021-01-03 0378-5955||
31 Invalid SPDX license identifier 2021-03-11 CC-BY
32

56
poetry.lock generated
View File

@ -1,6 +1,6 @@
[[package]] [[package]]
name = "agate" name = "agate"
version = "1.6.1" version = "1.6.2"
description = "A data analysis library that is optimized for humans instead of machines." description = "A data analysis library that is optimized for humans instead of machines."
category = "dev" category = "dev"
optional = false optional = false
@ -11,6 +11,7 @@ Babel = ">=2.0"
isodate = ">=0.5.4" isodate = ">=0.5.4"
leather = ">=0.3.2" leather = ">=0.3.2"
parsedatetime = ">=2.1" parsedatetime = ">=2.1"
PyICU = ">=2.4.2"
python-slugify = ">=1.2.1" python-slugify = ">=1.2.1"
pytimeparse = ">=1.1.5" pytimeparse = ">=1.1.5"
six = ">=1.9.0" six = ">=1.9.0"
@ -294,14 +295,6 @@ pipfile_deprecated_finder = ["pipreqs", "requirementslib"]
requirements_deprecated_finder = ["pipreqs", "pip-api"] requirements_deprecated_finder = ["pipreqs", "pip-api"]
colors = ["colorama (>=0.4.3,<0.5.0)"] colors = ["colorama (>=0.4.3,<0.5.0)"]
[[package]]
name = "jdcal"
version = "1.4.1"
description = "Julian dates from proleptic Gregorian and Julian calendars."
category = "dev"
optional = false
python-versions = "*"
[[package]] [[package]]
name = "jedi" name = "jedi"
version = "0.18.0" version = "0.18.0"
@ -365,7 +358,7 @@ python-versions = ">=3.7"
[[package]] [[package]]
name = "openpyxl" name = "openpyxl"
version = "3.0.6" version = "3.0.7"
description = "A Python library to read/write Excel 2010 xlsx/xlsm files" description = "A Python library to read/write Excel 2010 xlsx/xlsm files"
category = "dev" category = "dev"
optional = false optional = false
@ -373,7 +366,6 @@ python-versions = ">=3.6,"
[package.dependencies] [package.dependencies]
et-xmlfile = "*" et-xmlfile = "*"
jdcal = "*"
[[package]] [[package]]
name = "packaging" name = "packaging"
@ -513,12 +505,20 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[[package]] [[package]]
name = "pygments" name = "pygments"
version = "2.8.0" version = "2.8.1"
description = "Pygments is a syntax highlighting package written in Python." description = "Pygments is a syntax highlighting package written in Python."
category = "dev" category = "dev"
optional = false optional = false
python-versions = ">=3.5" python-versions = ">=3.5"
[[package]]
name = "pyicu"
version = "2.6"
description = "Python extension wrapping the ICU C++ API"
category = "dev"
optional = false
python-versions = "*"
[[package]] [[package]]
name = "pyparsing" name = "pyparsing"
version = "2.4.7" version = "2.4.7"
@ -659,6 +659,14 @@ category = "main"
optional = false optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
[[package]]
name = "spdx-license-list"
version = "0.5.2"
description = "A simple tool/library for working with SPDX license definitions."
category = "main"
optional = false
python-versions = "*"
[[package]] [[package]]
name = "sqlalchemy" name = "sqlalchemy"
version = "1.3.23" version = "1.3.23"
@ -765,12 +773,11 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[metadata] [metadata]
lock-version = "1.1" lock-version = "1.1"
python-versions = "^3.8" python-versions = "^3.8"
content-hash = "8c4ba410bbdc930d2d74f7864470a18827029a5697869833959708d7425460ae" content-hash = "6a9ee0f26b50f361d7e0e6a2275f0e3174dee1c89fbd460583c4ea3d873857b8"
[metadata.files] [metadata.files]
agate = [ agate = [
{file = "agate-1.6.1-py2.py3-none-any.whl", hash = "sha256:48d6f80b35611c1ba25a642cbc5b90fcbdeeb2a54711c4a8d062ee2809334d1c"}, {file = "agate-1.6.2.tar.gz", hash = "sha256:8dbd4a57a2cffecfa2d8109ef5993ec4be12a8a7c81fbc0c8c79d96d4c4399ed"},
{file = "agate-1.6.1.tar.gz", hash = "sha256:c93aaa500b439d71e4a5cf088d0006d2ce2c76f1950960c8843114e5f361dfd3"},
] ]
agate-dbf = [ agate-dbf = [
{file = "agate-dbf-0.2.2.tar.gz", hash = "sha256:589682b78c5c03f2dc8511e6e3edb659fb7336cd118e248896bb0b44c2f1917b"}, {file = "agate-dbf-0.2.2.tar.gz", hash = "sha256:589682b78c5c03f2dc8511e6e3edb659fb7336cd118e248896bb0b44c2f1917b"},
@ -866,10 +873,6 @@ isort = [
{file = "isort-5.7.0-py3-none-any.whl", hash = "sha256:fff4f0c04e1825522ce6949973e83110a6e907750cd92d128b0d14aaaadbffdc"}, {file = "isort-5.7.0-py3-none-any.whl", hash = "sha256:fff4f0c04e1825522ce6949973e83110a6e907750cd92d128b0d14aaaadbffdc"},
{file = "isort-5.7.0.tar.gz", hash = "sha256:c729845434366216d320e936b8ad6f9d681aab72dc7cbc2d51bedc3582f3ad1e"}, {file = "isort-5.7.0.tar.gz", hash = "sha256:c729845434366216d320e936b8ad6f9d681aab72dc7cbc2d51bedc3582f3ad1e"},
] ]
jdcal = [
{file = "jdcal-1.4.1-py2.py3-none-any.whl", hash = "sha256:1abf1305fce18b4e8aa248cf8fe0c56ce2032392bc64bbd61b5dff2a19ec8bba"},
{file = "jdcal-1.4.1.tar.gz", hash = "sha256:472872e096eb8df219c23f2689fc336668bdb43d194094b5cc1707e1640acfc8"},
]
jedi = [ jedi = [
{file = "jedi-0.18.0-py2.py3-none-any.whl", hash = "sha256:18456d83f65f400ab0c2d3319e48520420ef43b23a086fdc05dff34132f0fb93"}, {file = "jedi-0.18.0-py2.py3-none-any.whl", hash = "sha256:18456d83f65f400ab0c2d3319e48520420ef43b23a086fdc05dff34132f0fb93"},
{file = "jedi-0.18.0.tar.gz", hash = "sha256:92550a404bad8afed881a137ec9a461fed49eca661414be45059329614ed0707"}, {file = "jedi-0.18.0.tar.gz", hash = "sha256:92550a404bad8afed881a137ec9a461fed49eca661414be45059329614ed0707"},
@ -916,8 +919,8 @@ numpy = [
{file = "numpy-1.20.1.zip", hash = "sha256:3bc63486a870294683980d76ec1e3efc786295ae00128f9ea38e2c6e74d5a60a"}, {file = "numpy-1.20.1.zip", hash = "sha256:3bc63486a870294683980d76ec1e3efc786295ae00128f9ea38e2c6e74d5a60a"},
] ]
openpyxl = [ openpyxl = [
{file = "openpyxl-3.0.6-py2.py3-none-any.whl", hash = "sha256:1a4b3869c2500b5c713e8e28341cdada49ecfcff1b10cd9006945f5bcefc090d"}, {file = "openpyxl-3.0.7-py2.py3-none-any.whl", hash = "sha256:46af4eaf201a89b610fcca177eed957635f88770a5462fb6aae4a2a52b0ff516"},
{file = "openpyxl-3.0.6.tar.gz", hash = "sha256:b229112b46e158b910a5d1b270b212c42773d39cab24e8db527f775b82afc041"}, {file = "openpyxl-3.0.7.tar.gz", hash = "sha256:6456a3b472e1ef0facb1129f3c6ef00713cebf62e736cd7a75bcc3247432f251"},
] ]
packaging = [ packaging = [
{file = "packaging-20.9-py2.py3-none-any.whl", hash = "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a"}, {file = "packaging-20.9-py2.py3-none-any.whl", hash = "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a"},
@ -989,8 +992,11 @@ pyflakes = [
{file = "pyflakes-2.2.0.tar.gz", hash = "sha256:35b2d75ee967ea93b55750aa9edbbf72813e06a66ba54438df2cfac9e3c27fc8"}, {file = "pyflakes-2.2.0.tar.gz", hash = "sha256:35b2d75ee967ea93b55750aa9edbbf72813e06a66ba54438df2cfac9e3c27fc8"},
] ]
pygments = [ pygments = [
{file = "Pygments-2.8.0-py3-none-any.whl", hash = "sha256:b21b072d0ccdf29297a82a2363359d99623597b8a265b8081760e4d0f7153c88"}, {file = "Pygments-2.8.1-py3-none-any.whl", hash = "sha256:534ef71d539ae97d4c3a4cf7d6f110f214b0e687e92f9cb9d2a3b0d3101289c8"},
{file = "Pygments-2.8.0.tar.gz", hash = "sha256:37a13ba168a02ac54cc5891a42b1caec333e59b66addb7fa633ea8a6d73445c0"}, {file = "Pygments-2.8.1.tar.gz", hash = "sha256:2656e1a6edcdabf4275f9a3640db59fd5de107d88e8663c5d4e9a0fa62f77f94"},
]
pyicu = [
{file = "PyICU-2.6.tar.gz", hash = "sha256:a9a5bf6833360f8f69e9375b91c1a7dd6e0c9157a42aee5bb7d6891804d96371"},
] ]
pyparsing = [ pyparsing = [
{file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"}, {file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"},
@ -1077,6 +1083,10 @@ six = [
{file = "six-1.15.0-py2.py3-none-any.whl", hash = "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"}, {file = "six-1.15.0-py2.py3-none-any.whl", hash = "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"},
{file = "six-1.15.0.tar.gz", hash = "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259"}, {file = "six-1.15.0.tar.gz", hash = "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259"},
] ]
spdx-license-list = [
{file = "spdx_license_list-0.5.2-py3-none-any.whl", hash = "sha256:1b338470c7b403dbecceca563a316382c7977516128ca6c1e8f7078e3ed6e7b0"},
{file = "spdx_license_list-0.5.2.tar.gz", hash = "sha256:952996f72ab807972dc2278bb9b91e5294767211e51f09aad9c0e2ff5b82a31b"},
]
sqlalchemy = [ sqlalchemy = [
{file = "SQLAlchemy-1.3.23-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:fd3b96f8c705af8e938eaa99cbd8fd1450f632d38cad55e7367c33b263bf98ec"}, {file = "SQLAlchemy-1.3.23-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:fd3b96f8c705af8e938eaa99cbd8fd1450f632d38cad55e7367c33b263bf98ec"},
{file = "SQLAlchemy-1.3.23-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:29cccc9606750fe10c5d0e8bd847f17a97f3850b8682aef1f56f5d5e1a5a64b1"}, {file = "SQLAlchemy-1.3.23-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:29cccc9606750fe10c5d0e8bd847f17a97f3850b8682aef1f56f5d5e1a5a64b1"},

View File

@ -20,6 +20,7 @@ requests-cache = "^0.5.2"
pycountry = "^19.8.18" pycountry = "^19.8.18"
langid = "^1.1.6" langid = "^1.1.6"
colorama = "^0.4.4" colorama = "^0.4.4"
spdx-license-list = "^0.5.2"
[tool.poetry.dev-dependencies] [tool.poetry.dev-dependencies]
pytest = "^6.1.1" pytest = "^6.1.1"

View File

@ -1,7 +1,7 @@
agate-dbf==0.2.2 agate-dbf==0.2.2
agate-excel==0.2.3 agate-excel==0.2.3
agate-sql==0.5.5 agate-sql==0.5.5
agate==1.6.1 agate==1.6.2
appdirs==1.4.4; python_version >= "3.6" appdirs==1.4.4; python_version >= "3.6"
appnope==0.1.2; python_version >= "3.7" and python_version < "4.0" and sys_platform == "darwin" appnope==0.1.2; python_version >= "3.7" and python_version < "4.0" and sys_platform == "darwin"
atomicwrites==1.4.0; python_version >= "3.6" and python_full_version < "3.0.0" and sys_platform == "win32" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6") or sys_platform == "win32" and python_version >= "3.6" and python_full_version >= "3.4.0" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6") atomicwrites==1.4.0; python_version >= "3.6" and python_full_version < "3.0.0" and sys_platform == "win32" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6") or sys_platform == "win32" and python_version >= "3.6" and python_full_version >= "3.4.0" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6")
@ -24,14 +24,13 @@ ipython-genutils==0.2.0; python_version >= "3.7" and python_version < "4.0"
ipython==7.21.0; python_version >= "3.7" and python_version < "4.0" ipython==7.21.0; python_version >= "3.7" and python_version < "4.0"
isodate==0.6.0 isodate==0.6.0
isort==5.7.0; python_version >= "3.6" and python_version < "4.0" isort==5.7.0; python_version >= "3.6" and python_version < "4.0"
jdcal==1.4.1; python_version >= "3.6"
jedi==0.18.0; python_version >= "3.7" and python_version < "4.0" jedi==0.18.0; python_version >= "3.7" and python_version < "4.0"
langid==1.1.6 langid==1.1.6
leather==0.3.3 leather==0.3.3
mccabe==0.6.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" mccabe==0.6.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
mypy-extensions==0.4.3; python_version >= "3.6" mypy-extensions==0.4.3; python_version >= "3.6"
numpy==1.20.1; python_version >= "3.7" and python_full_version >= "3.7.1" numpy==1.20.1; python_version >= "3.7" and python_full_version >= "3.7.1"
openpyxl==3.0.6; python_version >= "3.6" openpyxl==3.0.7; python_version >= "3.6"
packaging==20.9; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" packaging==20.9; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
pandas==1.2.3; python_full_version >= "3.7.1" pandas==1.2.3; python_full_version >= "3.7.1"
parsedatetime==2.6 parsedatetime==2.6
@ -46,7 +45,8 @@ py==1.10.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_
pycodestyle==2.6.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" pycodestyle==2.6.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
pycountry==19.8.18 pycountry==19.8.18
pyflakes==2.2.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" pyflakes==2.2.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
pygments==2.8.0; python_version >= "3.7" and python_version < "4.0" pygments==2.8.1; python_version >= "3.7" and python_version < "4.0"
pyicu==2.6
pyparsing==2.4.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" pyparsing==2.4.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
pytest-clarity==0.3.0a0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0") pytest-clarity==0.3.0a0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0")
pytest==6.2.2; python_version >= "3.6" pytest==6.2.2; python_version >= "3.6"
@ -59,6 +59,7 @@ regex==2020.11.13; python_version >= "3.6"
requests-cache==0.5.2 requests-cache==0.5.2
requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
six==1.15.0; python_full_version >= "3.7.1" six==1.15.0; python_full_version >= "3.7.1"
spdx-license-list==0.5.2
sqlalchemy==1.3.23; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" sqlalchemy==1.3.23; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
termcolor==1.1.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" termcolor==1.1.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
text-unidecode==1.3; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" text-unidecode==1.3; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"

View File

@ -12,5 +12,6 @@ pytz==2021.1; python_full_version >= "3.7.1"
requests-cache==0.5.2 requests-cache==0.5.2
requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
six==1.15.0; python_full_version >= "3.7.1" six==1.15.0; python_full_version >= "3.7.1"
spdx-license-list==0.5.2
urllib3==1.26.3; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version < "4" urllib3==1.26.3; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version < "4"
xlrd==1.2.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0") xlrd==1.2.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0")

View File

@ -336,3 +336,27 @@ def test_check_correct_iso_639_3_language():
result = experimental.correct_language(series) result = experimental.correct_language(series)
assert result == language assert result == language
def test_check_valid_spdx_license_identifier():
"""Test valid SPDX license identifier."""
license = "CC-BY-SA-4.0"
result = check.spdx_license_identifier(license)
assert result == license
def test_check_invalid_spdx_license_identifier(capsys):
"""Test invalid SPDX license identifier."""
license = "CC-BY-SA"
result = check.spdx_license_identifier(license)
captured = capsys.readouterr()
assert (
captured.out
== f"{Fore.YELLOW}Non-SPDX license identifier: {Fore.RESET}{license}\n"
)