Update rdf_creation.py

This commit is contained in:
Marie-Angélique Laporte 2021-12-16 09:58:51 +01:00
parent 1b6b75f886
commit 355fa3a56f

View File

@ -7,7 +7,6 @@ from rdflib import URIRef, BNode, Literal
import json import json
import os import os
import pandas as pd import pandas as pd
import pylode
import re import re
@ -21,23 +20,23 @@ iseal = URIRef(NS)
g.add((iseal, RDF.type, OWL.Ontology)) g.add((iseal, RDF.type, OWL.Ontology))
df = pd.read_csv('../data/schema-fields.csv') df = pd.read_csv("../data/schema-fields.csv")
df.dropna(how='all', axis=1) df.dropna(how="all", axis=1)
df.fillna('', inplace=True) df.fillna("", inplace=True)
for index, row in df.iterrows(): for index, row in df.iterrows():
element_name = row['element name'] element_name = row["element name"]
element_description = row['element description'] element_description = row["element description"]
comment = row['element guidance'] comment = row["element guidance"]
example = row['element link for more information'] example = row["element link for more information"]
cardinality = row['element options'] cardinality = row["element options"]
prop_type = row['element type'] prop_type = row["element type"]
controlled_vocab = row['element controlled values or terms'] controlled_vocab = row["element controlled values or terms"]
module = row['idss element cluster'] module = row["idss element cluster"]
module_cat = row['idss schema module'] module_cat = row["idss schema module"]
dc = row['element link for dublin core attributes'] dc = row["element link for dublin core attributes"]
dspace = row['dspace field name'] dspace = row["dspace field name"]
##module ##module
moduleUri = URIRef(NS + module) moduleUri = URIRef(NS + module)
@ -85,7 +84,6 @@ for index, row in df.iterrows() :
element = element_name_safe element = element_name_safe
conceptUri = URIRef(NS + concept.replace(" ", "_")) conceptUri = URIRef(NS + concept.replace(" ", "_"))
if not (None, SKOS.prefLabel, Literal(concept)) in g: if not (None, SKOS.prefLabel, Literal(concept)) in g:
##create concept as skos concept ##create concept as skos concept
@ -95,19 +93,37 @@ for index, row in df.iterrows() :
## create properties ## create properties
elementURI = URIRef(NS + element.replace(" ", "_")) elementURI = URIRef(NS + element.replace(" ", "_"))
if prop_type == 'CONTROLLED VALUE': ## object property if prop_type == "CONTROLLED VALUE": ## object property
g.add((elementURI, SKOS.prefLabel, Literal(element))) g.add((elementURI, SKOS.prefLabel, Literal(element)))
g.add((elementURI, RDF.type, OWL.ObjectProperty)) g.add((elementURI, RDF.type, OWL.ObjectProperty))
g.add((elementURI, OWL.domain, conceptUri)) g.add((elementURI, OWL.domain, conceptUri))
## add suproperty link ## add suproperty link
if(dc): if dc:
dct = dc.split(":")[1] dct = dc.split(":")[1]
if 'wgs84' in dc: if "wgs84" in dc:
g.add((elementURI, RDFS.subPropertyOf, URIRef("http://www.w3.org/2003/01/geo/wgs84_pos#"+dct))) g.add(
(
elementURI,
RDFS.subPropertyOf,
URIRef("http://www.w3.org/2003/01/geo/wgs84_pos#" + dct),
)
)
else: else:
g.add((elementURI, RDFS.subPropertyOf, URIRef("http://purl.org/dc/terms/"+dct))) g.add(
(
elementURI,
RDFS.subPropertyOf,
URIRef("http://purl.org/dc/terms/" + dct),
)
)
## add dspace alternative ID ## add dspace alternative ID
g.add((elementURI, URIRef("http://purl.org/dc/terms/alternative"), Literal(dspace))) g.add(
(
elementURI,
URIRef("http://purl.org/dc/terms/alternative"),
Literal(dspace),
)
)
## create controlled vocab ## create controlled vocab
cvURI = URIRef(NS + "VOCAB_" + element.replace(" ", "_")) cvURI = URIRef(NS + "VOCAB_" + element.replace(" ", "_"))
g.add((cvURI, RDF.type, OWL.Class)) ## SKOS.Concept ## SKOS.Collection?? g.add((cvURI, RDF.type, OWL.Class)) ## SKOS.Concept ## SKOS.Collection??
@ -120,10 +136,20 @@ for index, row in df.iterrows() :
g.add((elementURI, OWL.range, cvURI)) g.add((elementURI, OWL.range, cvURI))
## add the controlled vocab information on properties directly ## add the controlled vocab information on properties directly
g.add((elementURI, URIRef("http://purl.org/dc/dcam/rangeIncludes"), Literal("https://raw.githubusercontent.com/alanorth/iseal-schema/main/data/controlled-vocabularies/"+element+".txt"))) g.add(
(
elementURI,
URIRef("http://purl.org/dc/dcam/rangeIncludes"),
Literal(
"https://raw.githubusercontent.com/alanorth/iseal-schema/main/data/controlled-vocabularies/"
+ element
+ ".txt"
),
)
)
## cardinality ## cardinality
if cardinality == 'MULTI SELECT FROM CONTROL LIST': if cardinality == "MULTI SELECT FROM CONTROL LIST":
br = BNode() br = BNode()
g.add((br, RDF.type, OWL.Restriction)) g.add((br, RDF.type, OWL.Restriction))
g.add((br, OWL.onProperty, elementURI)) g.add((br, OWL.onProperty, elementURI))
@ -147,26 +173,44 @@ for index, row in df.iterrows() :
g.add((elementURI, SKOS.prefLabel, Literal(element))) g.add((elementURI, SKOS.prefLabel, Literal(element)))
g.add((elementURI, RDF.type, OWL.DatatypeProperty)) g.add((elementURI, RDF.type, OWL.DatatypeProperty))
g.add((elementURI, OWL.domain, conceptUri)) g.add((elementURI, OWL.domain, conceptUri))
if(dc): if dc:
dct = dc.split(":")[1] dct = dc.split(":")[1]
if 'wgs84' in dc: if "wgs84" in dc:
g.add((elementURI, RDFS.subPropertyOf, URIRef("http://www.w3.org/2003/01/geo/wgs84_pos#"+dct))) g.add(
(
elementURI,
RDFS.subPropertyOf,
URIRef("http://www.w3.org/2003/01/geo/wgs84_pos#" + dct),
)
)
else: else:
g.add((elementURI, RDFS.subPropertyOf, URIRef("http://purl.org/dc/terms/"+dct))) g.add(
(
elementURI,
RDFS.subPropertyOf,
URIRef("http://purl.org/dc/terms/" + dct),
)
)
## add dspace alternative ID ## add dspace alternative ID
g.add((elementURI, URIRef("http://purl.org/dc/terms/alternative"), Literal(dspace))) g.add(
(
elementURI,
URIRef("http://purl.org/dc/terms/alternative"),
Literal(dspace),
)
)
range = None range = None
if prop_type == 'DATE': if prop_type == "DATE":
g.add((elementURI, OWL.range, XSD.date)) g.add((elementURI, OWL.range, XSD.date))
range = XSD.date range = XSD.date
elif prop_type == 'NUMERIC VALUE': elif prop_type == "NUMERIC VALUE":
g.add((elementURI, OWL.range, XSD.float)) g.add((elementURI, OWL.range, XSD.float))
range = XSD.float range = XSD.float
else: else:
g.add((elementURI, OWL.range, XSD.string)) g.add((elementURI, OWL.range, XSD.string))
range = XSD.string range = XSD.string
##cardinality ##cardinality
if cardinality == 'REPEAT VALUES': if cardinality == "REPEAT VALUES":
br = BNode() br = BNode()
g.add((br, RDF.type, OWL.Restriction)) g.add((br, RDF.type, OWL.Restriction))
g.add((br, OWL.onProperty, elementURI)) g.add((br, OWL.onProperty, elementURI))
@ -180,7 +224,6 @@ for index, row in df.iterrows() :
g.add((br, OWL.onDataRange, range)) g.add((br, OWL.onDataRange, range))
g.add((conceptUri, RDFS.subClassOf, br)) g.add((conceptUri, RDFS.subClassOf, br))
if comment: if comment:
g.add((elementURI, SKOS.scopeNote, Literal(comment))) g.add((elementURI, SKOS.scopeNote, Literal(comment)))
if example: if example:
@ -191,6 +234,4 @@ for index, row in df.iterrows() :
# print(element_name) # print(element_name)
## save graph ## save graph
g.serialize(destination='idds_new3.ttl', format='turtle') g.serialize(destination="idds_new3.ttl", format="turtle")