diff --git a/CHANGELOG.md b/CHANGELOG.md index 2dbef54..e9b9a9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ because it is deprecated and outdated - Require Python 3.9+ - Don't run `fix.separators()` on title fields - Don't run whitespace or newline fixes on abstract fields +- Ignore some common non-SPDX licenses ### Updated - Python dependencies diff --git a/csv_metadata_quality/check.py b/csv_metadata_quality/check.py index 465895d..e188b32 100755 --- a/csv_metadata_quality/check.py +++ b/csv_metadata_quality/check.py @@ -312,8 +312,19 @@ def spdx_license_identifier(field): Prints the value if it is invalid. """ + # List of common non-SPDX licenses to ignore + # See: https://ilri.github.io/cgspace-submission-guidelines/dcterms-license/dcterms-license.txt + ignore_licenses = { + "All rights reserved; no re-use allowed", + "All rights reserved; self-archive copy only", + "Copyrighted; Non-commercial educational use only", + "Copyrighted; Non-commercial use only", + "Copyrighted; all rights reserved", + "Other", + } + # Skip fields with missing values - if pd.isna(field): + if pd.isna(field) or field in ignore_licenses: return spdx_licenses = load_spdx_licenses()