mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-25 23:28:18 +01:00
Compare commits
10 Commits
b16fa9121f
...
3dbe656f9f
Author | SHA1 | Date | |
---|---|---|---|
3dbe656f9f | |||
7ad821dcad | |||
cd876c4fb3 | |||
d88ea56488 | |||
e0e3ca6c58 | |||
abae8ca4fb | |||
d7d4d4efca | |||
5318953150 | |||
3b17914002 | |||
6e4b0e5c1b |
10
CHANGELOG.md
10
CHANGELOG.md
@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## Unreleased
|
||||||
|
## Added
|
||||||
|
- Validation of dcterms.license field against SPDX license identifiers
|
||||||
|
|
||||||
|
## Changed
|
||||||
|
- Use DCTERMS fields where possible in `data/test.csv`
|
||||||
|
|
||||||
|
### Updated
|
||||||
|
- Run `poetry update` to update project dependencies
|
||||||
|
|
||||||
## [0.4.5] - 2021-03-04
|
## [0.4.5] - 2021-03-04
|
||||||
### Added
|
### Added
|
||||||
- Check dates in dcterms.issued field as well, not just fields that have the
|
- Check dates in dcterms.issued field as well, not just fields that have the
|
||||||
|
@ -103,7 +103,6 @@ This currently uses the [Python langid](https://github.com/saffsd/langid.py) lib
|
|||||||
- Better logging, for example with INFO, WARN, and ERR levels
|
- Better logging, for example with INFO, WARN, and ERR levels
|
||||||
- Verbose, debug, or quiet options
|
- Verbose, debug, or quiet options
|
||||||
- Warn if an author is shorter than 3 characters?
|
- Warn if an author is shorter than 3 characters?
|
||||||
- Validate dc.rights field against SPDX? Perhaps with an option like `-m spdx` to enable the spdx module?
|
|
||||||
- Validate DOIs? Normalize to https://doi.org format? Or use just the DOI part: 10.1016/j.worlddev.2010.06.006
|
- Validate DOIs? Normalize to https://doi.org format? Or use just the DOI part: 10.1016/j.worlddev.2010.06.006
|
||||||
- Warn if two items use the same file in `filename` column
|
- Warn if two items use the same file in `filename` column
|
||||||
- Add an option to drop invalid AGROVOC subjects?
|
- Add an option to drop invalid AGROVOC subjects?
|
||||||
|
@ -150,6 +150,11 @@ def run(argv):
|
|||||||
if column == "filename":
|
if column == "filename":
|
||||||
df[column] = df[column].apply(check.filename_extension)
|
df[column] = df[column].apply(check.filename_extension)
|
||||||
|
|
||||||
|
# Check: SPDX license identifier
|
||||||
|
match = re.match(r"dcterms\.license.*$", column)
|
||||||
|
if match is not None:
|
||||||
|
df[column] = df[column].apply(check.spdx_license_identifier)
|
||||||
|
|
||||||
##
|
##
|
||||||
# Perform some checks on rows so we can consider items as a whole rather
|
# Perform some checks on rows so we can consider items as a whole rather
|
||||||
# than simple on a field-by-field basis. This allows us to check whether
|
# than simple on a field-by-field basis. This allows us to check whether
|
||||||
|
@ -1,10 +1,14 @@
|
|||||||
|
import re
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import requests
|
import requests
|
||||||
import requests_cache
|
import requests_cache
|
||||||
|
import spdx_license_list
|
||||||
from colorama import Fore
|
from colorama import Fore
|
||||||
from pycountry import languages
|
from pycountry import languages
|
||||||
|
from stdnum import isbn as stdnum_isbn
|
||||||
|
from stdnum import issn as stdnum_issn
|
||||||
|
|
||||||
|
|
||||||
def issn(field):
|
def issn(field):
|
||||||
@ -17,8 +21,6 @@ def issn(field):
|
|||||||
See: https://arthurdejong.org/python-stdnum/doc/1.11/index.html#stdnum.module.is_valid
|
See: https://arthurdejong.org/python-stdnum/doc/1.11/index.html#stdnum.module.is_valid
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from stdnum import issn
|
|
||||||
|
|
||||||
# Skip fields with missing values
|
# Skip fields with missing values
|
||||||
if pd.isna(field):
|
if pd.isna(field):
|
||||||
return
|
return
|
||||||
@ -26,7 +28,7 @@ def issn(field):
|
|||||||
# Try to split multi-value field on "||" separator
|
# Try to split multi-value field on "||" separator
|
||||||
for value in field.split("||"):
|
for value in field.split("||"):
|
||||||
|
|
||||||
if not issn.is_valid(value):
|
if not stdnum_issn.is_valid(value):
|
||||||
print(f"{Fore.RED}Invalid ISSN: {Fore.RESET}{value}")
|
print(f"{Fore.RED}Invalid ISSN: {Fore.RESET}{value}")
|
||||||
|
|
||||||
return field
|
return field
|
||||||
@ -42,8 +44,6 @@ def isbn(field):
|
|||||||
See: https://arthurdejong.org/python-stdnum/doc/1.11/index.html#stdnum.module.is_valid
|
See: https://arthurdejong.org/python-stdnum/doc/1.11/index.html#stdnum.module.is_valid
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from stdnum import isbn
|
|
||||||
|
|
||||||
# Skip fields with missing values
|
# Skip fields with missing values
|
||||||
if pd.isna(field):
|
if pd.isna(field):
|
||||||
return
|
return
|
||||||
@ -51,7 +51,7 @@ def isbn(field):
|
|||||||
# Try to split multi-value field on "||" separator
|
# Try to split multi-value field on "||" separator
|
||||||
for value in field.split("||"):
|
for value in field.split("||"):
|
||||||
|
|
||||||
if not isbn.is_valid(value):
|
if not stdnum_isbn.is_valid(value):
|
||||||
print(f"{Fore.RED}Invalid ISBN: {Fore.RESET}{value}")
|
print(f"{Fore.RED}Invalid ISBN: {Fore.RESET}{value}")
|
||||||
|
|
||||||
return field
|
return field
|
||||||
@ -67,8 +67,6 @@ def separators(field, field_name):
|
|||||||
Prints the field with the invalid multi-value separator.
|
Prints the field with the invalid multi-value separator.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
# Skip fields with missing values
|
# Skip fields with missing values
|
||||||
if pd.isna(field):
|
if pd.isna(field):
|
||||||
return
|
return
|
||||||
@ -277,8 +275,6 @@ def filename_extension(field):
|
|||||||
than .pdf, .xls(x), .doc(x), ppt(x), case insensitive).
|
than .pdf, .xls(x), .doc(x), ppt(x), case insensitive).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
# Skip fields with missing values
|
# Skip fields with missing values
|
||||||
if pd.isna(field):
|
if pd.isna(field):
|
||||||
return
|
return
|
||||||
@ -317,3 +313,23 @@ def filename_extension(field):
|
|||||||
print(f"{Fore.YELLOW}Filename with uncommon extension: {Fore.RESET}{value}")
|
print(f"{Fore.YELLOW}Filename with uncommon extension: {Fore.RESET}{value}")
|
||||||
|
|
||||||
return field
|
return field
|
||||||
|
|
||||||
|
|
||||||
|
def spdx_license_identifier(field):
|
||||||
|
"""Check if a license is a valid SPDX identifier.
|
||||||
|
|
||||||
|
Prints the value if it is invalid.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Skip fields with missing values
|
||||||
|
if pd.isna(field):
|
||||||
|
return
|
||||||
|
|
||||||
|
# Try to split multi-value field on "||" separator
|
||||||
|
for value in field.split("||"):
|
||||||
|
if value not in spdx_license_list.LICENSES:
|
||||||
|
print(f"{Fore.YELLOW}Non-SPDX license identifier: {Fore.RESET}{value}")
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
return field
|
||||||
|
@ -1,31 +1,32 @@
|
|||||||
dc.title,dc.date.issued,dc.identifier.issn,dc.identifier.isbn,dc.language.iso,dc.subject,cg.coverage.country,filename
|
dc.title,dcterms.issued,dc.identifier.issn,dc.identifier.isbn,dcterms.language,dcterms.subject,cg.coverage.country,filename,dcterms.license
|
||||||
Leading space,2019-07-29,,,,,,
|
Leading space,2019-07-29,,,,,,,
|
||||||
Trailing space ,2019-07-29,,,,,,
|
Trailing space ,2019-07-29,,,,,,,
|
||||||
Excessive space,2019-07-29,,,,,,
|
Excessive space,2019-07-29,,,,,,,
|
||||||
Miscellaenous ||whitespace | issues ,2019-07-29,,,,,,
|
Miscellaenous ||whitespace | issues ,2019-07-29,,,,,,,
|
||||||
Duplicate||Duplicate,2019-07-29,,,,,,
|
Duplicate||Duplicate,2019-07-29,,,,,,,
|
||||||
Invalid ISSN,2019-07-29,2321-2302,,,,,
|
Invalid ISSN,2019-07-29,2321-2302,,,,,,
|
||||||
Invalid ISBN,2019-07-29,,978-0-306-40615-6,,,,
|
Invalid ISBN,2019-07-29,,978-0-306-40615-6,,,,,
|
||||||
Multiple valid ISSNs,2019-07-29,0378-5955||0024-9319,,,,,
|
Multiple valid ISSNs,2019-07-29,0378-5955||0024-9319,,,,,,
|
||||||
Multiple valid ISBNs,2019-07-29,,99921-58-10-7||978-0-306-40615-7,,,,
|
Multiple valid ISBNs,2019-07-29,,99921-58-10-7||978-0-306-40615-7,,,,,
|
||||||
Invalid date,2019-07-260,,,,,,
|
Invalid date,2019-07-260,,,,,,,
|
||||||
Multiple dates,2019-07-26||2019-01-10,,,,,,
|
Multiple dates,2019-07-26||2019-01-10,,,,,,,
|
||||||
Invalid multi-value separator,2019-07-29,0378-5955|0024-9319,,,,,
|
Invalid multi-value separator,2019-07-29,0378-5955|0024-9319,,,,,,
|
||||||
Unnecessary Unicode,2019-07-29,,,,,,
|
Unnecessary Unicode,2019-07-29,,,,,,,
|
||||||
Suspicious character||foreˆt,2019-07-29,,,,,,
|
Suspicious character||foreˆt,2019-07-29,,,,,,,
|
||||||
Invalid ISO 639-1 (alpha 2) language,2019-07-29,,,jp,,,
|
Invalid ISO 639-1 (alpha 2) language,2019-07-29,,,jp,,,,
|
||||||
Invalid ISO 639-3 (alpha 3) language,2019-07-29,,,chi,,,
|
Invalid ISO 639-3 (alpha 3) language,2019-07-29,,,chi,,,,
|
||||||
Invalid language,2019-07-29,,,Span,,,
|
Invalid language,2019-07-29,,,Span,,,,
|
||||||
Invalid AGROVOC subject,2019-07-29,,,,FOREST,,
|
Invalid AGROVOC subject,2019-07-29,,,,FOREST,,,
|
||||||
Newline (LF),2019-07-30,,,,"TANZA
|
Newline (LF),2019-07-30,,,,"TANZA
|
||||||
NIA",,
|
NIA",,,
|
||||||
Missing date,,,,,,,
|
Missing date,,,,,,,,
|
||||||
Invalid country,2019-08-01,,,,,KENYAA,
|
Invalid country,2019-08-01,,,,,KENYAA,,
|
||||||
Uncommon filename extension,2019-08-10,,,,,,file.pdf.lck
|
Uncommon filename extension,2019-08-10,,,,,,file.pdf.lck,
|
||||||
Unneccesary unicode (U+002D + U+00AD),2019-08-10,,978-92-9043-823-6,,,,
|
Unneccesary unicode (U+002D + U+00AD),2019-08-10,,978-92-9043-823-6,,,,,
|
||||||
"Missing space,after comma",2019-08-27,,,,,,
|
"Missing space,after comma",2019-08-27,,,,,,,
|
||||||
Incorrect ISO 639-1 language,2019-09-26,,,es,,,
|
Incorrect ISO 639-1 language,2019-09-26,,,es,,,,
|
||||||
Incorrect ISO 639-3 language,2019-09-26,,,spa,,,
|
Incorrect ISO 639-3 language,2019-09-26,,,spa,,,,
|
||||||
Composéd Unicode,2020-01-14,,,,,,
|
Composéd Unicode,2020-01-14,,,,,,,
|
||||||
Decomposéd Unicode,2020-01-14,,,,,,
|
Decomposéd Unicode,2020-01-14,,,,,,,
|
||||||
Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,,
|
Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,,,
|
||||||
|
Invalid SPDX license identifier,2021-03-11,,,,,,,CC-BY
|
||||||
|
|
56
poetry.lock
generated
56
poetry.lock
generated
@ -1,6 +1,6 @@
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "agate"
|
name = "agate"
|
||||||
version = "1.6.1"
|
version = "1.6.2"
|
||||||
description = "A data analysis library that is optimized for humans instead of machines."
|
description = "A data analysis library that is optimized for humans instead of machines."
|
||||||
category = "dev"
|
category = "dev"
|
||||||
optional = false
|
optional = false
|
||||||
@ -11,6 +11,7 @@ Babel = ">=2.0"
|
|||||||
isodate = ">=0.5.4"
|
isodate = ">=0.5.4"
|
||||||
leather = ">=0.3.2"
|
leather = ">=0.3.2"
|
||||||
parsedatetime = ">=2.1"
|
parsedatetime = ">=2.1"
|
||||||
|
PyICU = ">=2.4.2"
|
||||||
python-slugify = ">=1.2.1"
|
python-slugify = ">=1.2.1"
|
||||||
pytimeparse = ">=1.1.5"
|
pytimeparse = ">=1.1.5"
|
||||||
six = ">=1.9.0"
|
six = ">=1.9.0"
|
||||||
@ -294,14 +295,6 @@ pipfile_deprecated_finder = ["pipreqs", "requirementslib"]
|
|||||||
requirements_deprecated_finder = ["pipreqs", "pip-api"]
|
requirements_deprecated_finder = ["pipreqs", "pip-api"]
|
||||||
colors = ["colorama (>=0.4.3,<0.5.0)"]
|
colors = ["colorama (>=0.4.3,<0.5.0)"]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "jdcal"
|
|
||||||
version = "1.4.1"
|
|
||||||
description = "Julian dates from proleptic Gregorian and Julian calendars."
|
|
||||||
category = "dev"
|
|
||||||
optional = false
|
|
||||||
python-versions = "*"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "jedi"
|
name = "jedi"
|
||||||
version = "0.18.0"
|
version = "0.18.0"
|
||||||
@ -365,7 +358,7 @@ python-versions = ">=3.7"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openpyxl"
|
name = "openpyxl"
|
||||||
version = "3.0.6"
|
version = "3.0.7"
|
||||||
description = "A Python library to read/write Excel 2010 xlsx/xlsm files"
|
description = "A Python library to read/write Excel 2010 xlsx/xlsm files"
|
||||||
category = "dev"
|
category = "dev"
|
||||||
optional = false
|
optional = false
|
||||||
@ -373,7 +366,6 @@ python-versions = ">=3.6,"
|
|||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
et-xmlfile = "*"
|
et-xmlfile = "*"
|
||||||
jdcal = "*"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "packaging"
|
name = "packaging"
|
||||||
@ -513,12 +505,20 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pygments"
|
name = "pygments"
|
||||||
version = "2.8.0"
|
version = "2.8.1"
|
||||||
description = "Pygments is a syntax highlighting package written in Python."
|
description = "Pygments is a syntax highlighting package written in Python."
|
||||||
category = "dev"
|
category = "dev"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.5"
|
python-versions = ">=3.5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyicu"
|
||||||
|
version = "2.6"
|
||||||
|
description = "Python extension wrapping the ICU C++ API"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pyparsing"
|
name = "pyparsing"
|
||||||
version = "2.4.7"
|
version = "2.4.7"
|
||||||
@ -659,6 +659,14 @@ category = "main"
|
|||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "spdx-license-list"
|
||||||
|
version = "0.5.2"
|
||||||
|
description = "A simple tool/library for working with SPDX license definitions."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sqlalchemy"
|
name = "sqlalchemy"
|
||||||
version = "1.3.23"
|
version = "1.3.23"
|
||||||
@ -765,12 +773,11 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "1.1"
|
lock-version = "1.1"
|
||||||
python-versions = "^3.8"
|
python-versions = "^3.8"
|
||||||
content-hash = "8c4ba410bbdc930d2d74f7864470a18827029a5697869833959708d7425460ae"
|
content-hash = "6a9ee0f26b50f361d7e0e6a2275f0e3174dee1c89fbd460583c4ea3d873857b8"
|
||||||
|
|
||||||
[metadata.files]
|
[metadata.files]
|
||||||
agate = [
|
agate = [
|
||||||
{file = "agate-1.6.1-py2.py3-none-any.whl", hash = "sha256:48d6f80b35611c1ba25a642cbc5b90fcbdeeb2a54711c4a8d062ee2809334d1c"},
|
{file = "agate-1.6.2.tar.gz", hash = "sha256:8dbd4a57a2cffecfa2d8109ef5993ec4be12a8a7c81fbc0c8c79d96d4c4399ed"},
|
||||||
{file = "agate-1.6.1.tar.gz", hash = "sha256:c93aaa500b439d71e4a5cf088d0006d2ce2c76f1950960c8843114e5f361dfd3"},
|
|
||||||
]
|
]
|
||||||
agate-dbf = [
|
agate-dbf = [
|
||||||
{file = "agate-dbf-0.2.2.tar.gz", hash = "sha256:589682b78c5c03f2dc8511e6e3edb659fb7336cd118e248896bb0b44c2f1917b"},
|
{file = "agate-dbf-0.2.2.tar.gz", hash = "sha256:589682b78c5c03f2dc8511e6e3edb659fb7336cd118e248896bb0b44c2f1917b"},
|
||||||
@ -866,10 +873,6 @@ isort = [
|
|||||||
{file = "isort-5.7.0-py3-none-any.whl", hash = "sha256:fff4f0c04e1825522ce6949973e83110a6e907750cd92d128b0d14aaaadbffdc"},
|
{file = "isort-5.7.0-py3-none-any.whl", hash = "sha256:fff4f0c04e1825522ce6949973e83110a6e907750cd92d128b0d14aaaadbffdc"},
|
||||||
{file = "isort-5.7.0.tar.gz", hash = "sha256:c729845434366216d320e936b8ad6f9d681aab72dc7cbc2d51bedc3582f3ad1e"},
|
{file = "isort-5.7.0.tar.gz", hash = "sha256:c729845434366216d320e936b8ad6f9d681aab72dc7cbc2d51bedc3582f3ad1e"},
|
||||||
]
|
]
|
||||||
jdcal = [
|
|
||||||
{file = "jdcal-1.4.1-py2.py3-none-any.whl", hash = "sha256:1abf1305fce18b4e8aa248cf8fe0c56ce2032392bc64bbd61b5dff2a19ec8bba"},
|
|
||||||
{file = "jdcal-1.4.1.tar.gz", hash = "sha256:472872e096eb8df219c23f2689fc336668bdb43d194094b5cc1707e1640acfc8"},
|
|
||||||
]
|
|
||||||
jedi = [
|
jedi = [
|
||||||
{file = "jedi-0.18.0-py2.py3-none-any.whl", hash = "sha256:18456d83f65f400ab0c2d3319e48520420ef43b23a086fdc05dff34132f0fb93"},
|
{file = "jedi-0.18.0-py2.py3-none-any.whl", hash = "sha256:18456d83f65f400ab0c2d3319e48520420ef43b23a086fdc05dff34132f0fb93"},
|
||||||
{file = "jedi-0.18.0.tar.gz", hash = "sha256:92550a404bad8afed881a137ec9a461fed49eca661414be45059329614ed0707"},
|
{file = "jedi-0.18.0.tar.gz", hash = "sha256:92550a404bad8afed881a137ec9a461fed49eca661414be45059329614ed0707"},
|
||||||
@ -916,8 +919,8 @@ numpy = [
|
|||||||
{file = "numpy-1.20.1.zip", hash = "sha256:3bc63486a870294683980d76ec1e3efc786295ae00128f9ea38e2c6e74d5a60a"},
|
{file = "numpy-1.20.1.zip", hash = "sha256:3bc63486a870294683980d76ec1e3efc786295ae00128f9ea38e2c6e74d5a60a"},
|
||||||
]
|
]
|
||||||
openpyxl = [
|
openpyxl = [
|
||||||
{file = "openpyxl-3.0.6-py2.py3-none-any.whl", hash = "sha256:1a4b3869c2500b5c713e8e28341cdada49ecfcff1b10cd9006945f5bcefc090d"},
|
{file = "openpyxl-3.0.7-py2.py3-none-any.whl", hash = "sha256:46af4eaf201a89b610fcca177eed957635f88770a5462fb6aae4a2a52b0ff516"},
|
||||||
{file = "openpyxl-3.0.6.tar.gz", hash = "sha256:b229112b46e158b910a5d1b270b212c42773d39cab24e8db527f775b82afc041"},
|
{file = "openpyxl-3.0.7.tar.gz", hash = "sha256:6456a3b472e1ef0facb1129f3c6ef00713cebf62e736cd7a75bcc3247432f251"},
|
||||||
]
|
]
|
||||||
packaging = [
|
packaging = [
|
||||||
{file = "packaging-20.9-py2.py3-none-any.whl", hash = "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a"},
|
{file = "packaging-20.9-py2.py3-none-any.whl", hash = "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a"},
|
||||||
@ -989,8 +992,11 @@ pyflakes = [
|
|||||||
{file = "pyflakes-2.2.0.tar.gz", hash = "sha256:35b2d75ee967ea93b55750aa9edbbf72813e06a66ba54438df2cfac9e3c27fc8"},
|
{file = "pyflakes-2.2.0.tar.gz", hash = "sha256:35b2d75ee967ea93b55750aa9edbbf72813e06a66ba54438df2cfac9e3c27fc8"},
|
||||||
]
|
]
|
||||||
pygments = [
|
pygments = [
|
||||||
{file = "Pygments-2.8.0-py3-none-any.whl", hash = "sha256:b21b072d0ccdf29297a82a2363359d99623597b8a265b8081760e4d0f7153c88"},
|
{file = "Pygments-2.8.1-py3-none-any.whl", hash = "sha256:534ef71d539ae97d4c3a4cf7d6f110f214b0e687e92f9cb9d2a3b0d3101289c8"},
|
||||||
{file = "Pygments-2.8.0.tar.gz", hash = "sha256:37a13ba168a02ac54cc5891a42b1caec333e59b66addb7fa633ea8a6d73445c0"},
|
{file = "Pygments-2.8.1.tar.gz", hash = "sha256:2656e1a6edcdabf4275f9a3640db59fd5de107d88e8663c5d4e9a0fa62f77f94"},
|
||||||
|
]
|
||||||
|
pyicu = [
|
||||||
|
{file = "PyICU-2.6.tar.gz", hash = "sha256:a9a5bf6833360f8f69e9375b91c1a7dd6e0c9157a42aee5bb7d6891804d96371"},
|
||||||
]
|
]
|
||||||
pyparsing = [
|
pyparsing = [
|
||||||
{file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"},
|
{file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"},
|
||||||
@ -1077,6 +1083,10 @@ six = [
|
|||||||
{file = "six-1.15.0-py2.py3-none-any.whl", hash = "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"},
|
{file = "six-1.15.0-py2.py3-none-any.whl", hash = "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"},
|
||||||
{file = "six-1.15.0.tar.gz", hash = "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259"},
|
{file = "six-1.15.0.tar.gz", hash = "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259"},
|
||||||
]
|
]
|
||||||
|
spdx-license-list = [
|
||||||
|
{file = "spdx_license_list-0.5.2-py3-none-any.whl", hash = "sha256:1b338470c7b403dbecceca563a316382c7977516128ca6c1e8f7078e3ed6e7b0"},
|
||||||
|
{file = "spdx_license_list-0.5.2.tar.gz", hash = "sha256:952996f72ab807972dc2278bb9b91e5294767211e51f09aad9c0e2ff5b82a31b"},
|
||||||
|
]
|
||||||
sqlalchemy = [
|
sqlalchemy = [
|
||||||
{file = "SQLAlchemy-1.3.23-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:fd3b96f8c705af8e938eaa99cbd8fd1450f632d38cad55e7367c33b263bf98ec"},
|
{file = "SQLAlchemy-1.3.23-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:fd3b96f8c705af8e938eaa99cbd8fd1450f632d38cad55e7367c33b263bf98ec"},
|
||||||
{file = "SQLAlchemy-1.3.23-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:29cccc9606750fe10c5d0e8bd847f17a97f3850b8682aef1f56f5d5e1a5a64b1"},
|
{file = "SQLAlchemy-1.3.23-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:29cccc9606750fe10c5d0e8bd847f17a97f3850b8682aef1f56f5d5e1a5a64b1"},
|
||||||
|
@ -20,6 +20,7 @@ requests-cache = "^0.5.2"
|
|||||||
pycountry = "^19.8.18"
|
pycountry = "^19.8.18"
|
||||||
langid = "^1.1.6"
|
langid = "^1.1.6"
|
||||||
colorama = "^0.4.4"
|
colorama = "^0.4.4"
|
||||||
|
spdx-license-list = "^0.5.2"
|
||||||
|
|
||||||
[tool.poetry.dev-dependencies]
|
[tool.poetry.dev-dependencies]
|
||||||
pytest = "^6.1.1"
|
pytest = "^6.1.1"
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
agate-dbf==0.2.2
|
agate-dbf==0.2.2
|
||||||
agate-excel==0.2.3
|
agate-excel==0.2.3
|
||||||
agate-sql==0.5.5
|
agate-sql==0.5.5
|
||||||
agate==1.6.1
|
agate==1.6.2
|
||||||
appdirs==1.4.4; python_version >= "3.6"
|
appdirs==1.4.4; python_version >= "3.6"
|
||||||
appnope==0.1.2; python_version >= "3.7" and python_version < "4.0" and sys_platform == "darwin"
|
appnope==0.1.2; python_version >= "3.7" and python_version < "4.0" and sys_platform == "darwin"
|
||||||
atomicwrites==1.4.0; python_version >= "3.6" and python_full_version < "3.0.0" and sys_platform == "win32" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6") or sys_platform == "win32" and python_version >= "3.6" and python_full_version >= "3.4.0" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6")
|
atomicwrites==1.4.0; python_version >= "3.6" and python_full_version < "3.0.0" and sys_platform == "win32" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6") or sys_platform == "win32" and python_version >= "3.6" and python_full_version >= "3.4.0" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6")
|
||||||
@ -24,14 +24,13 @@ ipython-genutils==0.2.0; python_version >= "3.7" and python_version < "4.0"
|
|||||||
ipython==7.21.0; python_version >= "3.7" and python_version < "4.0"
|
ipython==7.21.0; python_version >= "3.7" and python_version < "4.0"
|
||||||
isodate==0.6.0
|
isodate==0.6.0
|
||||||
isort==5.7.0; python_version >= "3.6" and python_version < "4.0"
|
isort==5.7.0; python_version >= "3.6" and python_version < "4.0"
|
||||||
jdcal==1.4.1; python_version >= "3.6"
|
|
||||||
jedi==0.18.0; python_version >= "3.7" and python_version < "4.0"
|
jedi==0.18.0; python_version >= "3.7" and python_version < "4.0"
|
||||||
langid==1.1.6
|
langid==1.1.6
|
||||||
leather==0.3.3
|
leather==0.3.3
|
||||||
mccabe==0.6.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
mccabe==0.6.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
||||||
mypy-extensions==0.4.3; python_version >= "3.6"
|
mypy-extensions==0.4.3; python_version >= "3.6"
|
||||||
numpy==1.20.1; python_version >= "3.7" and python_full_version >= "3.7.1"
|
numpy==1.20.1; python_version >= "3.7" and python_full_version >= "3.7.1"
|
||||||
openpyxl==3.0.6; python_version >= "3.6"
|
openpyxl==3.0.7; python_version >= "3.6"
|
||||||
packaging==20.9; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
packaging==20.9; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||||
pandas==1.2.3; python_full_version >= "3.7.1"
|
pandas==1.2.3; python_full_version >= "3.7.1"
|
||||||
parsedatetime==2.6
|
parsedatetime==2.6
|
||||||
@ -46,7 +45,8 @@ py==1.10.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_
|
|||||||
pycodestyle==2.6.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
pycodestyle==2.6.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
||||||
pycountry==19.8.18
|
pycountry==19.8.18
|
||||||
pyflakes==2.2.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
pyflakes==2.2.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
||||||
pygments==2.8.0; python_version >= "3.7" and python_version < "4.0"
|
pygments==2.8.1; python_version >= "3.7" and python_version < "4.0"
|
||||||
|
pyicu==2.6
|
||||||
pyparsing==2.4.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
pyparsing==2.4.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||||
pytest-clarity==0.3.0a0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0")
|
pytest-clarity==0.3.0a0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0")
|
||||||
pytest==6.2.2; python_version >= "3.6"
|
pytest==6.2.2; python_version >= "3.6"
|
||||||
@ -59,6 +59,7 @@ regex==2020.11.13; python_version >= "3.6"
|
|||||||
requests-cache==0.5.2
|
requests-cache==0.5.2
|
||||||
requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
|
requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
|
||||||
six==1.15.0; python_full_version >= "3.7.1"
|
six==1.15.0; python_full_version >= "3.7.1"
|
||||||
|
spdx-license-list==0.5.2
|
||||||
sqlalchemy==1.3.23; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
sqlalchemy==1.3.23; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
||||||
termcolor==1.1.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
termcolor==1.1.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
||||||
text-unidecode==1.3; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
text-unidecode==1.3; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
||||||
|
@ -12,5 +12,6 @@ pytz==2021.1; python_full_version >= "3.7.1"
|
|||||||
requests-cache==0.5.2
|
requests-cache==0.5.2
|
||||||
requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
|
requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
|
||||||
six==1.15.0; python_full_version >= "3.7.1"
|
six==1.15.0; python_full_version >= "3.7.1"
|
||||||
|
spdx-license-list==0.5.2
|
||||||
urllib3==1.26.3; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version < "4"
|
urllib3==1.26.3; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version < "4"
|
||||||
xlrd==1.2.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0")
|
xlrd==1.2.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0")
|
||||||
|
@ -336,3 +336,27 @@ def test_check_correct_iso_639_3_language():
|
|||||||
result = experimental.correct_language(series)
|
result = experimental.correct_language(series)
|
||||||
|
|
||||||
assert result == language
|
assert result == language
|
||||||
|
|
||||||
|
|
||||||
|
def test_check_valid_spdx_license_identifier():
|
||||||
|
"""Test valid SPDX license identifier."""
|
||||||
|
|
||||||
|
license = "CC-BY-SA-4.0"
|
||||||
|
|
||||||
|
result = check.spdx_license_identifier(license)
|
||||||
|
|
||||||
|
assert result == license
|
||||||
|
|
||||||
|
|
||||||
|
def test_check_invalid_spdx_license_identifier(capsys):
|
||||||
|
"""Test invalid SPDX license identifier."""
|
||||||
|
|
||||||
|
license = "CC-BY-SA"
|
||||||
|
|
||||||
|
result = check.spdx_license_identifier(license)
|
||||||
|
|
||||||
|
captured = capsys.readouterr()
|
||||||
|
assert (
|
||||||
|
captured.out
|
||||||
|
== f"{Fore.YELLOW}Non-SPDX license identifier: {Fore.RESET}{license}\n"
|
||||||
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user