1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-10-31 19:43:00 +01:00

csv_metadata_quality/check.py: update title in citation check

Initialize the titles and citations before the for loop so we can
access them later. This makes it easier to check if the item actua-
lly has a citation.
This commit is contained in:
Alan Orth 2021-12-05 16:21:44 +02:00
parent e02678cd7c
commit 4d5696c4cb
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9

View File

@ -419,6 +419,11 @@ def title_in_citation(row):
Function prints a warning if the title does not appear in the citation. Function prints a warning if the title does not appear in the citation.
""" """
# Initialize some variables at global scope so that we can set them in the
# loop scope below and still be able to access them afterwards.
title = ""
citation = ""
# Iterate over the labels of the current row's values to get the names of # Iterate over the labels of the current row's values to get the names of
# the title and citation columns. Then we check if the title is present in # the title and citation columns. Then we check if the title is present in
# the citation. # the citation.
@ -430,17 +435,15 @@ def title_in_citation(row):
# Find the name of the title column # Find the name of the title column
match = re.match(r"^(dc|dcterms)\.title.*$", label) match = re.match(r"^(dc|dcterms)\.title.*$", label)
if match is not None: if match is not None:
title_column_name = label title = row[label]
# Find the name of the citation column # Find the name of the citation column
match = re.match(r"^.*?[cC]itation.*$", label) match = re.match(r"^.*?[cC]itation.*$", label)
if match is not None: if match is not None:
citation_column_name = label citation = row[label]
if row[citation_column_name] != "": if citation != "":
if row[title_column_name] not in row[citation_column_name]: if title not in citation:
print( print(f"{Fore.YELLOW}Title is not present in citation: {Fore.RESET}{title}")
f"{Fore.YELLOW}Title is not present in citation: {Fore.RESET}{row[title_column_name]}"
)
return return