util/generate-hugo-content.py: re-work vocabularies

Read vocabularies from the data/controlled-vocabularies directory instead of exporting them from the schema itself. Also, I use the name vocabulary.txt for all of them on the site since they are in each field's directory already.
2025-08-16 18:03:03 +02:00 · 2021-12-13 15:03:20 +02:00
parent 96bc2b5f79
commit c5eca3cc12
1 changed files with 9 additions and 22 deletions
--- a/util/generate-hugo-content.py
+++ b/util/generate-hugo-content.py
@@ -9,6 +9,7 @@ import os
 import re
 import sys
 from shutil import rmtree
 from shutil import copyfile
 import pandas as pd
@@ -78,9 +79,13 @@ def parseSchema(schema_df):
        if row["element controlled values or terms"]:
            controlled_vocab = True
-            exportVocabulary(
+            controlled_vocabulary_src=f"data/controlled-vocabularies/{element_name_safe}.txt"
-                row["element controlled values or terms"], element_name_safe
+            controlled_vocabulary_dst=f"site/content/terms/{element_name_safe}/vocabulary.txt"
-            )
+
            copyfile(controlled_vocabulary_src, controlled_vocabulary_dst)
            if args.debug:
                print(f"Copied controlled vocabulary: {element_name_safe}")
        else:
            controlled_vocab = False
@@ -118,7 +123,7 @@ def parseSchema(schema_df):
            indexLines.append(f"comment: '{comment}'\n")
        indexLines.append(f"required: {required}\n")
        if controlled_vocab:
-            indexLines.append(f"vocabulary: '{element_name_safe}.txt'\n")
+            indexLines.append(f"vocabulary: 'vocabulary.txt'\n")
        if module:
            indexLines.append(f"module: '{module}'\n")
        if cluster:
@@ -132,24 +137,6 @@ def parseSchema(schema_df):
            f.writelines(indexLines)
 def exportVocabulary(vocabulary: str, element_name_safe: str):
    # Create an empty list where we'll add all the values (we don't need to do
    # it this way, but using a list allows us to de-duplicate the values).
    controlledVocabularyLines = []
    for value in vocabulary.split("||"):
        if value not in controlledVocabularyLines:
            controlledVocabularyLines.append(value)
    with open(
        f"site/content/terms/{element_name_safe}/{element_name_safe}.txt", "w"
    ) as f:
        for value in controlledVocabularyLines:
            f.write(f"{value}\n")
    if args.debug:
        print(f"Exported controlled vocabulary: {element_name_safe}")
 parser = argparse.ArgumentParser(
    description="Parse an ISEAL schema Excel file to produce documentation about metadata requirements."
 )