mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-25 07:10:17 +01:00
Ignore common non-SPDX licenses
This is meant to catch licenses that are supposed to be SPDX but aren't, not licenses that *aren't* supposed to be SPDX. We have so many free-text license descriptions like "Copyrighted" and "Other" that I'm sick of seeing warnings for them!
This commit is contained in:
parent
084b970798
commit
fbb625be5c
@ -14,6 +14,7 @@ because it is deprecated and outdated
|
|||||||
- Require Python 3.9+
|
- Require Python 3.9+
|
||||||
- Don't run `fix.separators()` on title fields
|
- Don't run `fix.separators()` on title fields
|
||||||
- Don't run whitespace or newline fixes on abstract fields
|
- Don't run whitespace or newline fixes on abstract fields
|
||||||
|
- Ignore some common non-SPDX licenses
|
||||||
|
|
||||||
### Updated
|
### Updated
|
||||||
- Python dependencies
|
- Python dependencies
|
||||||
|
@ -312,8 +312,19 @@ def spdx_license_identifier(field):
|
|||||||
Prints the value if it is invalid.
|
Prints the value if it is invalid.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# List of common non-SPDX licenses to ignore
|
||||||
|
# See: https://ilri.github.io/cgspace-submission-guidelines/dcterms-license/dcterms-license.txt
|
||||||
|
ignore_licenses = {
|
||||||
|
"All rights reserved; no re-use allowed",
|
||||||
|
"All rights reserved; self-archive copy only",
|
||||||
|
"Copyrighted; Non-commercial educational use only",
|
||||||
|
"Copyrighted; Non-commercial use only",
|
||||||
|
"Copyrighted; all rights reserved",
|
||||||
|
"Other",
|
||||||
|
}
|
||||||
|
|
||||||
# Skip fields with missing values
|
# Skip fields with missing values
|
||||||
if pd.isna(field):
|
if pd.isna(field) or field in ignore_licenses:
|
||||||
return
|
return
|
||||||
|
|
||||||
spdx_licenses = load_spdx_licenses()
|
spdx_licenses = load_spdx_licenses()
|
||||||
|
Loading…
Reference in New Issue
Block a user