diff --git a/CHANGELOG.md b/CHANGELOG.md index 36136a8..db8e171 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ because it is deprecated and outdated - Don't run `fix.separators()` on title or abstract fields - Don't run whitespace or newline fixes on abstract fields - Ignore some common non-SPDX licenses +- Ignore `__description` suffix in filenames meant for SAFBuilder when checking +for uncommon file extensions ### Updated - Python dependencies diff --git a/csv_metadata_quality/check.py b/csv_metadata_quality/check.py index e188b32..dba39ef 100755 --- a/csv_metadata_quality/check.py +++ b/csv_metadata_quality/check.py @@ -286,6 +286,11 @@ def filename_extension(field): # Iterate over all values for value in values: + # Strip filename descriptions that are meant for SAF Bundler, for + # example: Annual_Report_2020.pdf__description:Report + if "__description" in value: + value = value.split("__")[0] + # Assume filename extension does not match filename_extension_match = False