mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-12-22 04:02:19 +01:00
Ignore common non-SPDX licenses
This is meant to catch licenses that are supposed to be SPDX but aren't, not licenses that *aren't* supposed to be SPDX. We have so many free-text license descriptions like "Copyrighted" and "Other" that I'm sick of seeing warnings for them!
This commit is contained in:
parent
084b970798
commit
fbb625be5c
@ -14,6 +14,7 @@ because it is deprecated and outdated
|
||||
- Require Python 3.9+
|
||||
- Don't run `fix.separators()` on title fields
|
||||
- Don't run whitespace or newline fixes on abstract fields
|
||||
- Ignore some common non-SPDX licenses
|
||||
|
||||
### Updated
|
||||
- Python dependencies
|
||||
|
@ -312,8 +312,19 @@ def spdx_license_identifier(field):
|
||||
Prints the value if it is invalid.
|
||||
"""
|
||||
|
||||
# List of common non-SPDX licenses to ignore
|
||||
# See: https://ilri.github.io/cgspace-submission-guidelines/dcterms-license/dcterms-license.txt
|
||||
ignore_licenses = {
|
||||
"All rights reserved; no re-use allowed",
|
||||
"All rights reserved; self-archive copy only",
|
||||
"Copyrighted; Non-commercial educational use only",
|
||||
"Copyrighted; Non-commercial use only",
|
||||
"Copyrighted; all rights reserved",
|
||||
"Other",
|
||||
}
|
||||
|
||||
# Skip fields with missing values
|
||||
if pd.isna(field):
|
||||
if pd.isna(field) or field in ignore_licenses:
|
||||
return
|
||||
|
||||
spdx_licenses = load_spdx_licenses()
|
||||
|
Loading…
Reference in New Issue
Block a user