mirror of
https://github.com/ISEAL-Community/iseal-core.git
synced 2025-01-22 03:43:00 +01:00
Add util/create-schema-dspace.py
Script to read the ISEAL Core and FSC metadata CSVs and create the appropriate schemas and metadata fields in a DSpace 6.x repository.
This commit is contained in:
parent
f21038663e
commit
57cc35c839
254
util/create-schema-dspace.py
Executable file
254
util/create-schema-dspace.py
Executable file
@ -0,0 +1,254 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# create-schema-dspace.py 0.0.1
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
#
|
||||
# ---
|
||||
#
|
||||
# A quick and dirty script to read schema fields from a CSV file and create them
|
||||
# them in the DSpace metadata registry using the REST API. Specify an email and
|
||||
# for a DSpace user with administrator privileges when running:
|
||||
#
|
||||
# $ ./util/create-schema-dspace.py -e me@example.com -p 'fuu!'
|
||||
#
|
||||
# You can optionally specify the URL of a DSpace REST application (default is to
|
||||
# use http://localhost:8080/rest).
|
||||
#
|
||||
# This script is written for Python 3 and requires several modules that you can
|
||||
# install with pip (I recommend setting up a Python virtual environment first):
|
||||
#
|
||||
# $ pip install requests pandas
|
||||
#
|
||||
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
import requests
|
||||
|
||||
|
||||
def signal_handler(signal, frame):
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# Try to log in, for example:
|
||||
#
|
||||
# $ http -f POST http://localhost:8080/rest/login email=aorth@fuuu.com password='fuuuuuu'
|
||||
#
|
||||
def login(user, password):
|
||||
request_url = rest_login_endpoint
|
||||
headers = {"user-agent": rest_user_agent}
|
||||
data = {"email": args.user, "password": args.password}
|
||||
|
||||
print(f"Logging in...")
|
||||
|
||||
try:
|
||||
request = requests.post(rest_login_endpoint, headers=headers, data=data)
|
||||
except requests.ConnectionError:
|
||||
sys.stderr.write(f" Could not connect to REST API: {args.request_url}.\n")
|
||||
|
||||
exit(1)
|
||||
|
||||
if request.status_code != requests.codes.ok:
|
||||
sys.stderr.write(f" Login failed.\n")
|
||||
|
||||
exit(1)
|
||||
|
||||
return request.cookies["JSESSIONID"]
|
||||
|
||||
|
||||
# Check the authentication status of the specified JSESSIONID.
|
||||
def check_session(sessionid):
|
||||
request_url = rest_status_endpoint
|
||||
headers = {"user-agent": rest_user_agent, "Accept": "application/json"}
|
||||
cookies = {"JSESSIONID": sessionid}
|
||||
|
||||
print(f"Checking session status...")
|
||||
|
||||
try:
|
||||
request = requests.get(request_url, headers=headers, cookies=cookies)
|
||||
except requests.ConnectionError:
|
||||
sys.stderr.write(f" Could not connect to REST API: {args.request_url}.\n")
|
||||
|
||||
exit(1)
|
||||
|
||||
if request.status_code == requests.codes.ok:
|
||||
if not request.json()["authenticated"]:
|
||||
sys.stderr.write(f" Session expired: {sessionid}.\n")
|
||||
|
||||
exit(1)
|
||||
else:
|
||||
sys.stderr.write(f" Error checking session status.\n")
|
||||
|
||||
exit(1)
|
||||
|
||||
|
||||
# Create a new schema by passing a Schema Object, for example:
|
||||
#
|
||||
# $ http POST http://localhost:8080/rest/registries/schema Cookie:JSESSIONID=549756EB08169F17697A56A7D56901B3 < schema.json
|
||||
#
|
||||
# See: https://wiki.lyrasis.org/display/DSDOC6x/REST+API#RESTAPI-SchemaObject
|
||||
def create_schema(schema):
|
||||
request_url = rest_schema_registry_endpoint
|
||||
headers = {"user-agent": rest_user_agent}
|
||||
cookies = {"JSESSIONID": session}
|
||||
|
||||
print(f" Attempting to create schema: {schema['prefix']}")
|
||||
|
||||
try:
|
||||
request = requests.post(request_url, headers, json=schema, cookies=cookies)
|
||||
except requests.ConnectionError:
|
||||
sys.stderr.write(f" Could not connect to REST API: {args.request_url}.\n")
|
||||
|
||||
exit(1)
|
||||
|
||||
# Check the status
|
||||
if request.status_code == requests.codes.ok:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
# Create a new metadata field by passing a MetadataField Object, for example:
|
||||
#
|
||||
# $ http POST http://localhost:8080/rest/registries/schema/fuuu/metadata-fields \
|
||||
# Cookie:JSESSIONID=549756EB08169F17697A56A7D56901B3 < field.json
|
||||
#
|
||||
# See: https://wiki.lyrasis.org/display/DSDOC6x/REST+API#RESTAPI-MetadataFieldObject
|
||||
def create_field(schema_prefix, field):
|
||||
request_url = f"{rest_schema_registry_endpoint}/{schema_prefix}/metadata-fields"
|
||||
headers = {"user-agent": rest_user_agent}
|
||||
cookies = {"JSESSIONID": session}
|
||||
|
||||
print(f" Attempting to create field: {field['name']}")
|
||||
|
||||
try:
|
||||
request = requests.post(
|
||||
request_url,
|
||||
headers=headers,
|
||||
json=field,
|
||||
cookies=cookies,
|
||||
)
|
||||
except requests.ConnectionError:
|
||||
sys.stderr.write(f" Could not connect to REST API: {args.request_url}.\n")
|
||||
|
||||
exit(1)
|
||||
|
||||
# Check the status
|
||||
if request.status_code == requests.codes.ok:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def parse_fields(schema_df):
|
||||
# Iterate over all rows (the "index, row" syntax allows us to access column
|
||||
# headings in each row, which isn't possible if we just do row).
|
||||
for index, row in schema_df.iterrows():
|
||||
dspace_field_name = row["dspace field name"]
|
||||
# Extract the prefix from the field name, ie is.link.url
|
||||
dspace_field_prefix = dspace_field_name.split(".")[0]
|
||||
element_name = row["element name"]
|
||||
|
||||
# Make sure we only try to create fields in IS and FSC schemas for now.
|
||||
# In the future we may create fields for other schema extensions, but
|
||||
# we don't want to create dcterms fields, for example.
|
||||
if "is" in dspace_field_prefix or "fsc" in dspace_field_prefix:
|
||||
# Extract the element, ie "link" in is.link.url
|
||||
dspace_field_element = dspace_field_name.split(".")[1]
|
||||
|
||||
try:
|
||||
# Extract the qualifier, ie "url" in is.link.url, if it exists
|
||||
dspace_field_qualifier = dspace_field_name.split(".")[2]
|
||||
except IndexError:
|
||||
dspace_field_qualifier = None
|
||||
|
||||
# Create a list of tuples with the metadata field components
|
||||
field_components = [
|
||||
("element", dspace_field_element),
|
||||
("qualifier", dspace_field_qualifier),
|
||||
("name", row["dspace field name"]),
|
||||
("description", element_name),
|
||||
]
|
||||
|
||||
# Create a dict from the list of tuples аbove. We can pass the dict
|
||||
# to requests directly and it will be converted to JSON.
|
||||
field = dict(field_components)
|
||||
|
||||
if create_field(dspace_field_prefix, field):
|
||||
print(f" Created field: {dspace_field_name}")
|
||||
else:
|
||||
print(f" Field probably already exists: {dspace_field_name}")
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Create ISEAL and FSC schemas in a DSpace 6.x repository."
|
||||
)
|
||||
parser.add_argument("-d", "--debug", help="Print debug messages.", action="store_true")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--rest-url",
|
||||
help="URL of the DSpace 6.x REST API.",
|
||||
default="http://localhost:8080/rest",
|
||||
)
|
||||
parser.add_argument("-e", "--user", help="Email of administrator user.")
|
||||
parser.add_argument("-p", "--password", help="Email of administrator user.")
|
||||
parser.add_argument(
|
||||
"-s", "--jsessionid", help="JESSIONID, if previously authenticated."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# DSpace 6.x REST API base URL and endpoints
|
||||
rest_base_url = args.rest_url
|
||||
rest_login_endpoint = f"{rest_base_url}/login"
|
||||
rest_status_endpoint = f"{rest_base_url}/status"
|
||||
rest_schema_registry_endpoint = f"{rest_base_url}/registries/schema"
|
||||
rest_user_agent = "Alan Test Python Requests Bot"
|
||||
session = args.jsessionid
|
||||
|
||||
# set the signal handler for SIGINT (^C)
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
# Try to login if no session was passed
|
||||
if not args.jsessionid:
|
||||
session = login(args.user, args.password)
|
||||
else:
|
||||
check_session(args.jsessionid)
|
||||
|
||||
if args.debug:
|
||||
sys.stderr.write(f" Logged in, using JSESSIONID: {session}\n")
|
||||
|
||||
print("\nCreating schemas...")
|
||||
|
||||
iseal_schema = {
|
||||
"namespace": "https://iseal-community.github.io/iseal-core",
|
||||
"prefix": "is",
|
||||
}
|
||||
fsc_schema = {
|
||||
"namespace": "https://iseal-community.github.io/iseal-core/fsc",
|
||||
"prefix": "fsc",
|
||||
}
|
||||
|
||||
if create_schema(iseal_schema):
|
||||
print(" Created ISEAL Core schema")
|
||||
else:
|
||||
print(" ISEAL Core schema probably already exists")
|
||||
|
||||
if create_schema(fsc_schema):
|
||||
print(" Created FSC schema")
|
||||
else:
|
||||
print(" FSC schema probably already exists")
|
||||
|
||||
print("\nCreating fields...")
|
||||
|
||||
for file in ["data/iseal-core.csv", "data/fsc.csv"]:
|
||||
if args.debug:
|
||||
sys.stderr.write(f" Opening {file}\n")
|
||||
|
||||
try:
|
||||
df = pd.read_csv(file, usecols=["dspace field name", "element name"])
|
||||
parse_fields(df)
|
||||
except FileNotFoundError:
|
||||
sys.stderr.write(f" Could not open {file}\n")
|
Loading…
x
Reference in New Issue
Block a user