diff --git a/content/posts/2022-08.md b/content/posts/2022-08.md index 91eaad989..19111d663 100644 --- a/content/posts/2022-08.md +++ b/content/posts/2022-08.md @@ -165,7 +165,7 @@ $ xsv join --left id ~/Downloads/2022-08-18-MELIAs-UTF-8-With-Files.csv id ~/Dow - I was previously splitting up the text value field (title/abstract/etc) by spaces and searching for each word in the list of terms/countries like this: ```console -with open(r"/tmp/cgspace-countries.txt",'r') as f : +with open(r"/tmp/cgspace-countries.txt",'r') as f: countries = [name.rstrip().lower() for name in f] return "||".join([x for x in value.split(' ') if x.lower() in countries]) @@ -176,7 +176,7 @@ return "||".join([x for x in value.split(' ') if x.lower() in countries]) ```console import re -with open(r"/tmp/agrovoc-subjects.txt",'r') as f : +with open(r"/tmp/agrovoc-subjects.txt",'r') as f: terms = [name.rstrip().lower() for name in f] return "||".join([term for term in terms if re.match(r".*\b" + term + r"\b.*", value.lower())])