From 8bc4cd419c1d6852fcde541673496b885b91225b Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Mon, 13 Feb 2023 10:59:14 +0300 Subject: [PATCH] Strip filename descriptions before checking When checking for uncommon file extensions in the filename field we should strip descriptions that are meant for SAF Bundler, for example: Annual_Report_2020.pdf__description:Report. This ends up as a false positive that spams the output with warnings. --- CHANGELOG.md | 2 ++ csv_metadata_quality/check.py | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36136a8..db8e171 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ because it is deprecated and outdated - Don't run `fix.separators()` on title or abstract fields - Don't run whitespace or newline fixes on abstract fields - Ignore some common non-SPDX licenses +- Ignore `__description` suffix in filenames meant for SAFBuilder when checking +for uncommon file extensions ### Updated - Python dependencies diff --git a/csv_metadata_quality/check.py b/csv_metadata_quality/check.py index e188b32..dba39ef 100755 --- a/csv_metadata_quality/check.py +++ b/csv_metadata_quality/check.py @@ -286,6 +286,11 @@ def filename_extension(field): # Iterate over all values for value in values: + # Strip filename descriptions that are meant for SAF Bundler, for + # example: Annual_Report_2020.pdf__description:Report + if "__description" in value: + value = value.split("__")[0] + # Assume filename extension does not match filename_extension_match = False