From c5eca3cc12d7b5f62d027c683b1284742bce936c Mon Sep 17 00:00:00 2001
From: Alan Orth <alan.orth@gmail.com>
Date: Mon, 13 Dec 2021 15:03:20 +0200
Subject: [PATCH] util/generate-hugo-content.py: re-work vocabularies

Read vocabularies from the data/controlled-vocabularies directory
instead of exporting them from the schema itself. Also, I use the
name vocabulary.txt for all of them on the site since they are in
each field's directory already.
---
 util/generate-hugo-content.py | 31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

diff --git a/util/generate-hugo-content.py b/util/generate-hugo-content.py
index 989d56d3..d9ea5346 100755
--- a/util/generate-hugo-content.py
+++ b/util/generate-hugo-content.py
@@ -9,6 +9,7 @@ import os
 import re
 import sys
 from shutil import rmtree
+from shutil import copyfile
 
 import pandas as pd
 
@@ -78,9 +79,13 @@ def parseSchema(schema_df):
         if row["element controlled values or terms"]:
             controlled_vocab = True
 
-            exportVocabulary(
-                row["element controlled values or terms"], element_name_safe
-            )
+            controlled_vocabulary_src=f"data/controlled-vocabularies/{element_name_safe}.txt"
+            controlled_vocabulary_dst=f"site/content/terms/{element_name_safe}/vocabulary.txt"
+
+            copyfile(controlled_vocabulary_src, controlled_vocabulary_dst)
+
+            if args.debug:
+                print(f"Copied controlled vocabulary: {element_name_safe}")
         else:
             controlled_vocab = False
 
@@ -118,7 +123,7 @@ def parseSchema(schema_df):
             indexLines.append(f"comment: '{comment}'\n")
         indexLines.append(f"required: {required}\n")
         if controlled_vocab:
-            indexLines.append(f"vocabulary: '{element_name_safe}.txt'\n")
+            indexLines.append(f"vocabulary: 'vocabulary.txt'\n")
         if module:
             indexLines.append(f"module: '{module}'\n")
         if cluster:
@@ -132,24 +137,6 @@ def parseSchema(schema_df):
             f.writelines(indexLines)
 
 
-def exportVocabulary(vocabulary: str, element_name_safe: str):
-    # Create an empty list where we'll add all the values (we don't need to do
-    # it this way, but using a list allows us to de-duplicate the values).
-    controlledVocabularyLines = []
-    for value in vocabulary.split("||"):
-        if value not in controlledVocabularyLines:
-            controlledVocabularyLines.append(value)
-
-    with open(
-        f"site/content/terms/{element_name_safe}/{element_name_safe}.txt", "w"
-    ) as f:
-        for value in controlledVocabularyLines:
-            f.write(f"{value}\n")
-
-    if args.debug:
-        print(f"Exported controlled vocabulary: {element_name_safe}")
-
-
 parser = argparse.ArgumentParser(
     description="Parse an ISEAL schema Excel file to produce documentation about metadata requirements."
 )