mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-05-10 23:26:01 +02:00
Add validation of SPDX license identifiers
Currently this only checks the dcterms.license field and the result will only be a warning.
This commit is contained in:
@ -3,6 +3,7 @@ from datetime import datetime, timedelta
|
||||
import pandas as pd
|
||||
import requests
|
||||
import requests_cache
|
||||
import spdx_license_list
|
||||
from colorama import Fore
|
||||
from pycountry import languages
|
||||
|
||||
@ -317,3 +318,23 @@ def filename_extension(field):
|
||||
print(f"{Fore.YELLOW}Filename with uncommon extension: {Fore.RESET}{value}")
|
||||
|
||||
return field
|
||||
|
||||
|
||||
def spdx_license_identifier(field):
|
||||
"""Check if a license is a valid SPDX identifier.
|
||||
|
||||
Prints the value if it is invalid.
|
||||
"""
|
||||
|
||||
# Skip fields with missing values
|
||||
if pd.isna(field):
|
||||
return
|
||||
|
||||
# Try to split multi-value field on "||" separator
|
||||
for value in field.split("||"):
|
||||
if value not in spdx_license_list.LICENSES:
|
||||
print(f"{Fore.YELLOW}Non-SPDX license identifier: {Fore.RESET}{value}")
|
||||
|
||||
pass
|
||||
|
||||
return field
|
||||
|
Reference in New Issue
Block a user