From 355fa3a56f8b862c585bc0734ff6454202166b24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marie-Ang=C3=A9lique=20Laporte?= Date: Thu, 16 Dec 2021 09:58:51 +0100 Subject: [PATCH] Update rdf_creation.py --- util/rdf_creation.py | 175 ++++++++++++++++++++++++++----------------- 1 file changed, 108 insertions(+), 67 deletions(-) diff --git a/util/rdf_creation.py b/util/rdf_creation.py index 4852dcab..67bc9a47 100644 --- a/util/rdf_creation.py +++ b/util/rdf_creation.py @@ -7,49 +7,48 @@ from rdflib import URIRef, BNode, Literal import json import os import pandas as pd -import pylode import re g = Graph() ##namespace -#NS = "http://iseal.org/terms/" +# NS = "http://iseal.org/terms/" NS = "https://alanorth.github.io/iseal-schema/#" ## create ontology iseal = URIRef(NS) -g.add((iseal, RDF.type, OWL.Ontology)) +g.add((iseal, RDF.type, OWL.Ontology)) -df = pd.read_csv('../data/schema-fields.csv') -df.dropna(how='all', axis=1) -df.fillna('', inplace=True) +df = pd.read_csv("../data/schema-fields.csv") +df.dropna(how="all", axis=1) +df.fillna("", inplace=True) -for index, row in df.iterrows() : - element_name = row['element name'] - element_description = row['element description'] - comment = row['element guidance'] - example = row['element link for more information'] - cardinality = row['element options'] - prop_type = row['element type'] - controlled_vocab = row['element controlled values or terms'] - module = row['idss element cluster'] - module_cat = row['idss schema module'] - dc = row['element link for dublin core attributes'] - dspace = row['dspace field name'] - +for index, row in df.iterrows(): + element_name = row["element name"] + element_description = row["element description"] + comment = row["element guidance"] + example = row["element link for more information"] + cardinality = row["element options"] + prop_type = row["element type"] + controlled_vocab = row["element controlled values or terms"] + module = row["idss element cluster"] + module_cat = row["idss schema module"] + dc = row["element link for dublin core attributes"] + dspace = row["dspace field name"] + ##module - moduleUri = URIRef(NS+module) + moduleUri = URIRef(NS + module) if not (None, SKOS.prefLabel, Literal(module)) in g: ##create module as skos concept - g.add((moduleUri, RDF.type, OWL.Class)) ## SKOS.Concept + g.add((moduleUri, RDF.type, OWL.Class)) ## SKOS.Concept g.add((moduleUri, SKOS.prefLabel, Literal(module))) ##element - #if '-' not in element_name: - if True: ## lazy reindenting - concept = module_cat#element_name.split(' - ')[0] - #element = element_name.strip() + # if '-' not in element_name: + if True: ## lazy reindenting + concept = module_cat # element_name.split(' - ')[0] + # element = element_name.strip() ## code from Alan # Make sure element name is URL friendly because we need to use it in # the file system and in the URL. @@ -82,48 +81,75 @@ for index, row in df.iterrows() : # files by combining the cluster and the element name. This could # change in the future. element_name_safe = cluster.replace(" ", "-").lower() + "-" + element_name - + element = element_name_safe - - - conceptUri = URIRef(NS+concept.replace(" ", "_")) + + conceptUri = URIRef(NS + concept.replace(" ", "_")) if not (None, SKOS.prefLabel, Literal(concept)) in g: ##create concept as skos concept - g.add((conceptUri, RDF.type, OWL.Class)) ## SKOS.Concept + g.add((conceptUri, RDF.type, OWL.Class)) ## SKOS.Concept g.add((conceptUri, SKOS.prefLabel, Literal(concept))) g.add((conceptUri, RDFS.subClassOf, moduleUri)) - + ## create properties - elementURI = URIRef(NS+element.replace(" ", "_")) - if prop_type == 'CONTROLLED VALUE': ## object property + elementURI = URIRef(NS + element.replace(" ", "_")) + if prop_type == "CONTROLLED VALUE": ## object property g.add((elementURI, SKOS.prefLabel, Literal(element))) g.add((elementURI, RDF.type, OWL.ObjectProperty)) g.add((elementURI, OWL.domain, conceptUri)) ## add suproperty link - if(dc): + if dc: dct = dc.split(":")[1] - if 'wgs84' in dc: - g.add((elementURI, RDFS.subPropertyOf, URIRef("http://www.w3.org/2003/01/geo/wgs84_pos#"+dct))) + if "wgs84" in dc: + g.add( + ( + elementURI, + RDFS.subPropertyOf, + URIRef("http://www.w3.org/2003/01/geo/wgs84_pos#" + dct), + ) + ) else: - g.add((elementURI, RDFS.subPropertyOf, URIRef("http://purl.org/dc/terms/"+dct))) + g.add( + ( + elementURI, + RDFS.subPropertyOf, + URIRef("http://purl.org/dc/terms/" + dct), + ) + ) ## add dspace alternative ID - g.add((elementURI, URIRef("http://purl.org/dc/terms/alternative"), Literal(dspace))) + g.add( + ( + elementURI, + URIRef("http://purl.org/dc/terms/alternative"), + Literal(dspace), + ) + ) ## create controlled vocab - cvURI = URIRef(NS+"VOCAB_"+element.replace(" ", "_")) - g.add((cvURI, RDF.type, OWL.Class)) ## SKOS.Concept ## SKOS.Collection?? - g.add((cvURI, SKOS.prefLabel, Literal("VOCAB "+element))) + cvURI = URIRef(NS + "VOCAB_" + element.replace(" ", "_")) + g.add((cvURI, RDF.type, OWL.Class)) ## SKOS.Concept ## SKOS.Collection?? + g.add((cvURI, SKOS.prefLabel, Literal("VOCAB " + element))) for term in controlled_vocab.split("||"): - termURI = URIRef(NS+term.replace(" ", "_").replace("|", "")) - g.add((termURI, RDF.type, OWL.Class)) ## SKOS.Concept + termURI = URIRef(NS + term.replace(" ", "_").replace("|", "")) + g.add((termURI, RDF.type, OWL.Class)) ## SKOS.Concept g.add((termURI, SKOS.prefLabel, Literal(term))) - g.add((termURI, RDFS.subClassOf, cvURI)) ## SKOS.member??? + g.add((termURI, RDFS.subClassOf, cvURI)) ## SKOS.member??? g.add((elementURI, OWL.range, cvURI)) - + ## add the controlled vocab information on properties directly - g.add((elementURI, URIRef("http://purl.org/dc/dcam/rangeIncludes"), Literal("https://raw.githubusercontent.com/alanorth/iseal-schema/main/data/controlled-vocabularies/"+element+".txt"))) - + g.add( + ( + elementURI, + URIRef("http://purl.org/dc/dcam/rangeIncludes"), + Literal( + "https://raw.githubusercontent.com/alanorth/iseal-schema/main/data/controlled-vocabularies/" + + element + + ".txt" + ), + ) + ) + ## cardinality - if cardinality == 'MULTI SELECT FROM CONTROL LIST': + if cardinality == "MULTI SELECT FROM CONTROL LIST": br = BNode() g.add((br, RDF.type, OWL.Restriction)) g.add((br, OWL.onProperty, elementURI)) @@ -137,36 +163,54 @@ for index, row in df.iterrows() : g.add((br, OWL.maxQualifiedCardinality, Literal(1))) g.add((br, OWL.onClass, cvURI)) g.add((conceptUri, RDFS.subClassOf, br)) - - #elif prop_type == 'URL': ## object property + + # elif prop_type == 'URL': ## object property # g.add((elementURI, RDF.type, OWL.ObjectProperty)) # g.add((elementURI, OWL.domain, conceptUri)) # g.add((elementURI, OWL.range, URIRef("") )) # g.add((elementURI, SKOS.prefLabel, Literal(element))) - else: ## datatype properties + else: ## datatype properties g.add((elementURI, SKOS.prefLabel, Literal(element))) g.add((elementURI, RDF.type, OWL.DatatypeProperty)) g.add((elementURI, OWL.domain, conceptUri)) - if(dc): + if dc: dct = dc.split(":")[1] - if 'wgs84' in dc: - g.add((elementURI, RDFS.subPropertyOf, URIRef("http://www.w3.org/2003/01/geo/wgs84_pos#"+dct))) + if "wgs84" in dc: + g.add( + ( + elementURI, + RDFS.subPropertyOf, + URIRef("http://www.w3.org/2003/01/geo/wgs84_pos#" + dct), + ) + ) else: - g.add((elementURI, RDFS.subPropertyOf, URIRef("http://purl.org/dc/terms/"+dct))) + g.add( + ( + elementURI, + RDFS.subPropertyOf, + URIRef("http://purl.org/dc/terms/" + dct), + ) + ) ## add dspace alternative ID - g.add((elementURI, URIRef("http://purl.org/dc/terms/alternative"), Literal(dspace))) + g.add( + ( + elementURI, + URIRef("http://purl.org/dc/terms/alternative"), + Literal(dspace), + ) + ) range = None - if prop_type == 'DATE': - g.add((elementURI, OWL.range, XSD.date )) + if prop_type == "DATE": + g.add((elementURI, OWL.range, XSD.date)) range = XSD.date - elif prop_type == 'NUMERIC VALUE': + elif prop_type == "NUMERIC VALUE": g.add((elementURI, OWL.range, XSD.float)) range = XSD.float else: g.add((elementURI, OWL.range, XSD.string)) range = XSD.string ##cardinality - if cardinality == 'REPEAT VALUES': + if cardinality == "REPEAT VALUES": br = BNode() g.add((br, RDF.type, OWL.Restriction)) g.add((br, OWL.onProperty, elementURI)) @@ -179,18 +223,15 @@ for index, row in df.iterrows() : g.add((br, OWL.maxQualifiedCardinality, Literal(1))) g.add((br, OWL.onDataRange, range)) g.add((conceptUri, RDFS.subClassOf, br)) - - + if comment: g.add((elementURI, SKOS.scopeNote, Literal(comment))) if example: g.add((elementURI, RDFS.comment, Literal(example))) if element_description: g.add((elementURI, SKOS.definition, Literal(element_description))) - #else: - #print(element_name) + # else: + # print(element_name) ## save graph -g.serialize(destination='idds_new3.ttl', format='turtle') - - +g.serialize(destination="idds_new3.ttl", format="turtle")