mirror of
https://github.com/ilri/dspace-statistics-api.git
synced 2025-07-06 14:31:38 +02:00
Use uv build backend
uv's build backend expects our module to be in src.
This commit is contained in:
@ -1,254 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
import json
|
||||
import math
|
||||
|
||||
import falcon
|
||||
from falcon_swagger_ui import register_swaggerui_app
|
||||
|
||||
from .config import DSPACE_STATISTICS_API_URL, VERSION
|
||||
from .database import DatabaseManager
|
||||
from .stats import get_downloads, get_views
|
||||
from .util import set_statistics_scope, validate_post_parameters
|
||||
|
||||
|
||||
class RootResource:
|
||||
def on_get(self, req, resp):
|
||||
resp.status = falcon.HTTP_200
|
||||
resp.content_type = "text/html"
|
||||
docs_html = (
|
||||
"<!DOCTYPE html>"
|
||||
'<html lang="en-US">'
|
||||
" <head>"
|
||||
' <meta charset="UTF-8">'
|
||||
" <title>DSpace Statistics API</title>"
|
||||
" </head>"
|
||||
" <body>"
|
||||
f" <h1>DSpace Statistics API {VERSION}</h1>"
|
||||
f" <p>This site is running the <a href=\"https://github.com/ilri/dspace-statistics-api\" title=\"DSpace Statistics API project\">DSpace Statistics API</a>. For more information see the project's README.md or the interactive <a href=\"{DSPACE_STATISTICS_API_URL + '/swagger'}\">Swagger UI</a> built into this API.</p>"
|
||||
" </body>"
|
||||
"</html"
|
||||
)
|
||||
|
||||
resp.text = docs_html
|
||||
|
||||
|
||||
class StatusResource:
|
||||
def on_get(self, req, resp):
|
||||
message = {"version": VERSION}
|
||||
|
||||
resp.status = falcon.HTTP_200
|
||||
resp.media = message
|
||||
|
||||
|
||||
class OpenAPIJSONResource:
|
||||
def on_get(self, req, resp):
|
||||
resp.status = falcon.HTTP_200
|
||||
resp.content_type = "text/html"
|
||||
with open("dspace_statistics_api/docs/openapi.json", "r") as f:
|
||||
# Load the openapi.json schema
|
||||
data = json.load(f)
|
||||
|
||||
# Swagger assumes your API is at the root of the current host unless
|
||||
# you configure a "servers" block in the schema. The problem is that
|
||||
# I want this to work in both development and production, so we need
|
||||
# to make this configurable.
|
||||
#
|
||||
# If the DSPACE_STATISTICS_API_URL is configured then we will add a
|
||||
# server entry to the openapi.json schema before sending it.
|
||||
if DSPACE_STATISTICS_API_URL != "":
|
||||
data["servers"] = [{"url": DSPACE_STATISTICS_API_URL}]
|
||||
|
||||
# Set the version in the schema so Swagger UI can display it
|
||||
data["info"]["version"] = VERSION
|
||||
|
||||
resp.text = json.dumps(data)
|
||||
|
||||
|
||||
class AllStatisticsResource:
|
||||
@falcon.before(set_statistics_scope)
|
||||
def on_get(self, req, resp):
|
||||
"""Handles GET requests"""
|
||||
# Return HTTPBadRequest if id parameter is not present and valid
|
||||
limit = req.get_param_as_int("limit", min_value=1, max_value=100) or 100
|
||||
page = req.get_param_as_int("page", min_value=0) or 0
|
||||
offset = limit * page
|
||||
|
||||
with DatabaseManager() as db:
|
||||
db.set_read_only(True)
|
||||
|
||||
with db.cursor() as cursor:
|
||||
# get total number of communities/collections/items so we can estimate the pages
|
||||
cursor.execute(f"SELECT COUNT(id) FROM {req.context.statistics_scope}")
|
||||
pages = math.ceil(cursor.fetchone()['count'] / limit)
|
||||
|
||||
# get statistics and use limit and offset to page through results
|
||||
cursor.execute(
|
||||
f"SELECT id, views, downloads FROM {req.context.statistics_scope} ORDER BY id LIMIT %s OFFSET %s",
|
||||
[limit, offset],
|
||||
)
|
||||
|
||||
# create a list to hold dicts of stats
|
||||
statistics = []
|
||||
|
||||
# iterate over results and build statistics object
|
||||
for result in cursor:
|
||||
statistics.append(
|
||||
{
|
||||
"id": str(result["id"]),
|
||||
"views": result["views"],
|
||||
"downloads": result["downloads"],
|
||||
}
|
||||
)
|
||||
|
||||
message = {
|
||||
"currentPage": page,
|
||||
"totalPages": pages,
|
||||
"limit": limit,
|
||||
"statistics": statistics,
|
||||
}
|
||||
|
||||
resp.media = message
|
||||
|
||||
@falcon.before(set_statistics_scope)
|
||||
@falcon.before(validate_post_parameters)
|
||||
def on_post(self, req, resp):
|
||||
"""Handles POST requests.
|
||||
|
||||
Uses two `before` hooks to set the statistics "scope" and validate the
|
||||
POST parameters. The "scope" is the type of statistics we want, which
|
||||
will be items, communities, or collections, depending on the request.
|
||||
"""
|
||||
|
||||
# Build the Solr date string, ie: [* TO *]
|
||||
if req.context.dateFrom and req.context.dateTo:
|
||||
solr_date_string = f"[{req.context.dateFrom} TO {req.context.dateTo}]"
|
||||
elif not req.context.dateFrom and req.context.dateTo:
|
||||
solr_date_string = f"[* TO {req.context.dateTo}]"
|
||||
elif req.context.dateFrom and not req.context.dateTo:
|
||||
solr_date_string = f"[{req.context.dateFrom} TO *]"
|
||||
else:
|
||||
solr_date_string = "[* TO *]"
|
||||
|
||||
# Helper variables to make working with pages/items/results easier and
|
||||
# to make the code easier to understand
|
||||
number_of_elements: int = len(req.context.elements)
|
||||
pages: int = math.ceil(number_of_elements / req.context.limit)
|
||||
first_element: int = req.context.page * req.context.limit
|
||||
last_element: int = first_element + req.context.limit
|
||||
# Get a subset of the POSTed items based on our limit. Note that Python
|
||||
# list slicing and indexing are both zero based, but the first and last
|
||||
# items in a slice can be confusing. See this ASCII diagram:
|
||||
#
|
||||
# +---+---+---+---+---+---+
|
||||
# | P | y | t | h | o | n |
|
||||
# +---+---+---+---+---+---+
|
||||
# Slice position: 0 1 2 3 4 5 6
|
||||
# Index position: 0 1 2 3 4 5
|
||||
#
|
||||
# So if we have a list of items with 240 items:
|
||||
#
|
||||
# 1st set: items[0:100] would give items at indexes 0 to 99
|
||||
# 2nd set: items[100:200] would give items at indexes 100 to 199
|
||||
# 3rd set: items[200:300] would give items at indexes 200 to 239
|
||||
elements_subset: list = req.context.elements[first_element:last_element]
|
||||
|
||||
views: dict = get_views(
|
||||
solr_date_string, elements_subset, req.context.views_facet_field
|
||||
)
|
||||
downloads: dict = get_downloads(
|
||||
solr_date_string, elements_subset, req.context.downloads_facet_field
|
||||
)
|
||||
|
||||
# create a list to hold dicts of stats
|
||||
statistics = []
|
||||
|
||||
# iterate over views dict to extract views and use the element id as an
|
||||
# index to the downloads dict to extract downloads.
|
||||
for k, v in views.items():
|
||||
statistics.append({"id": k, "views": v, "downloads": downloads[k]})
|
||||
|
||||
message = {
|
||||
"currentPage": req.context.page,
|
||||
"totalPages": pages,
|
||||
"limit": req.context.limit,
|
||||
"statistics": statistics,
|
||||
}
|
||||
|
||||
resp.status = falcon.HTTP_200
|
||||
resp.media = message
|
||||
|
||||
|
||||
class SingleStatisticsResource:
|
||||
@falcon.before(set_statistics_scope)
|
||||
def on_get(self, req, resp, id_):
|
||||
"""Handles GET requests"""
|
||||
|
||||
with DatabaseManager() as db:
|
||||
db.set_read_only(True)
|
||||
|
||||
with db.cursor() as cursor:
|
||||
cursor = db.cursor()
|
||||
cursor.execute(
|
||||
f"SELECT views, downloads FROM {req.context.database} WHERE id=%s",
|
||||
[str(id_)],
|
||||
)
|
||||
if cursor.rowcount == 0:
|
||||
raise falcon.HTTPNotFound(
|
||||
title=f"{req.context.statistics_scope} not found",
|
||||
description=f'The {req.context.statistics_scope} with id "{str(id_)}" was not found.',
|
||||
)
|
||||
else:
|
||||
results = cursor.fetchone()
|
||||
|
||||
statistics = {
|
||||
"id": str(id_),
|
||||
"views": results["views"],
|
||||
"downloads": results["downloads"],
|
||||
}
|
||||
|
||||
resp.media = statistics
|
||||
|
||||
|
||||
app = application = falcon.App()
|
||||
app.add_route("/", RootResource())
|
||||
app.add_route("/status", StatusResource())
|
||||
|
||||
# Item routes
|
||||
app.add_route("/items", AllStatisticsResource())
|
||||
app.add_route("/item/{id_:uuid}", SingleStatisticsResource())
|
||||
|
||||
# Community routes
|
||||
app.add_route("/communities", AllStatisticsResource())
|
||||
app.add_route("/community/{id_:uuid}", SingleStatisticsResource())
|
||||
|
||||
# Collection routes
|
||||
app.add_route("/collections", AllStatisticsResource())
|
||||
app.add_route("/collection/{id_:uuid}", SingleStatisticsResource())
|
||||
|
||||
# Route to the Swagger UI Openapp schema
|
||||
app.add_route("/docs/openapi.json", OpenAPIJSONResource())
|
||||
|
||||
# Path to host the Swagger UI. Keep in mind that Falcon will add a route for
|
||||
# this automatically when we register Swagger and the path will be relative
|
||||
# to the Falcon app like all other routes, not the absolute root.
|
||||
SWAGGERUI_PATH = "/swagger"
|
||||
|
||||
# The *absolute* path to the OpenJSON schema. This must be absolute because
|
||||
# it will be requested by the client and must resolve absolutely. Note: the
|
||||
# name of this variable is misleading because it is actually the schema URL
|
||||
# but we pass it into the register_swaggerui_app() function as the app_url
|
||||
# parameter.
|
||||
SWAGGERUI_API_URL = f"{DSPACE_STATISTICS_API_URL}/docs/openapi.json"
|
||||
|
||||
register_swaggerui_app(
|
||||
app,
|
||||
SWAGGERUI_PATH,
|
||||
SWAGGERUI_API_URL,
|
||||
config={
|
||||
"supportedSubmitMethods": ["get", "post"],
|
||||
},
|
||||
uri_prefix=DSPACE_STATISTICS_API_URL,
|
||||
)
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
@ -1,23 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
import os
|
||||
|
||||
# Check if Solr connection information was provided in the environment
|
||||
SOLR_SERVER = os.environ.get("SOLR_SERVER", "http://localhost:8080/solr")
|
||||
|
||||
DATABASE_NAME = os.environ.get("DATABASE_NAME", "dspacestatistics")
|
||||
DATABASE_USER = os.environ.get("DATABASE_USER", "dspacestatistics")
|
||||
DATABASE_PASS = os.environ.get("DATABASE_PASS", "dspacestatistics")
|
||||
DATABASE_HOST = os.environ.get("DATABASE_HOST", "localhost")
|
||||
DATABASE_PORT = os.environ.get("DATABASE_PORT", "5432")
|
||||
|
||||
# URL to DSpace Statistics API, which will be used as a prefix to API calls in
|
||||
# the Swagger UI. An empty string will allow this to work out of the box in a
|
||||
# local development environment, but for production it should be set to a value
|
||||
# like "/rest/statistics", assuming that the statistics API is deployed next to
|
||||
# the vanilla DSpace REST API.
|
||||
DSPACE_STATISTICS_API_URL = os.environ.get("DSPACE_STATISTICS_API_URL", "")
|
||||
|
||||
VERSION = "1.4.6-dev"
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
@ -1,37 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
import falcon
|
||||
import psycopg
|
||||
|
||||
from .config import (
|
||||
DATABASE_HOST,
|
||||
DATABASE_NAME,
|
||||
DATABASE_PASS,
|
||||
DATABASE_PORT,
|
||||
DATABASE_USER,
|
||||
)
|
||||
|
||||
|
||||
class DatabaseManager:
|
||||
"""Manage database connection."""
|
||||
|
||||
def __init__(self):
|
||||
self._connection_uri = f"dbname={DATABASE_NAME} user={DATABASE_USER} password={DATABASE_PASS} host={DATABASE_HOST} port={DATABASE_PORT}"
|
||||
|
||||
def __enter__(self):
|
||||
try:
|
||||
self._connection = psycopg.connect(
|
||||
self._connection_uri, row_factory=psycopg.rows.dict_row
|
||||
)
|
||||
except psycopg.OperationalError:
|
||||
title = "500 Internal Server Error"
|
||||
description = "Could not connect to database"
|
||||
raise falcon.HTTPInternalServerError(title, description)
|
||||
|
||||
return self._connection
|
||||
|
||||
def __exit__(self, exc_type, exc_value, exc_traceback):
|
||||
self._connection.close()
|
||||
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
@ -1,616 +0,0 @@
|
||||
{
|
||||
"openapi": "3.0.3",
|
||||
"info": {
|
||||
"version": "1.4.6-dev",
|
||||
"title": "DSpace Statistics API",
|
||||
"description": "A [Falcon-based](https://falcon.readthedocs.io/) web application to make DSpace's item, community, and collection statistics available via a simple REST API. This Swagger interface is powered by [falcon-swagger-ui](https://github.com/rdidyk/falcon-swagger-ui).",
|
||||
"license": {
|
||||
"name": "GPLv3.0",
|
||||
"url": "https://www.gnu.org/licenses/gpl-3.0.en.html"
|
||||
}
|
||||
},
|
||||
"paths": {
|
||||
"/item/{item_uuid}": {
|
||||
"get": {
|
||||
"summary": "Statistics for a specific item",
|
||||
"operationId": "getItem",
|
||||
"tags": [
|
||||
"item"
|
||||
],
|
||||
"parameters": [
|
||||
{
|
||||
"name": "item_uuid",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"description": "The UUID of the item to retrieve",
|
||||
"schema": {
|
||||
"type": "string",
|
||||
"format": "uuid",
|
||||
"example": "9596aeff-0b90-47d3-9fec-02d578920507"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Expected response to a valid request",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/SingleElementResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"404": {
|
||||
"description": "Item not found"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/items": {
|
||||
"get": {
|
||||
"summary": "Get statistics for all items",
|
||||
"operationId": "getItems",
|
||||
"tags": [
|
||||
"items"
|
||||
],
|
||||
"parameters": [
|
||||
{
|
||||
"name": "limit",
|
||||
"in": "query",
|
||||
"description": "How many items to return at once (optional)",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"minimum": 1,
|
||||
"maximum": 100,
|
||||
"default": 100,
|
||||
"example": 100
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "page",
|
||||
"in": "query",
|
||||
"description": "Page of results to start on (optional)",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"minimum": 0,
|
||||
"default": 0,
|
||||
"example": 0
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A paged array of items",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/SingleElementResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad request"
|
||||
}
|
||||
}
|
||||
},
|
||||
"post": {
|
||||
"summary": "Get statistics for a list of items with an optional date range",
|
||||
"operationId": "postItems",
|
||||
"tags": [
|
||||
"items"
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"minimum": 1,
|
||||
"maximum": 100,
|
||||
"default": 100
|
||||
},
|
||||
"page": {
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"minimum": 0,
|
||||
"default": 0
|
||||
},
|
||||
"dateFrom": {
|
||||
"type": "string",
|
||||
"format": "date"
|
||||
},
|
||||
"dateTo": {
|
||||
"type": "string",
|
||||
"format": "date"
|
||||
},
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"format": "uuid"
|
||||
}
|
||||
}
|
||||
},
|
||||
"example": {
|
||||
"limit": 100,
|
||||
"page": 0,
|
||||
"dateFrom": "2020-01-01T00:00:00Z",
|
||||
"dateTo": "2020-12-31T00:00:00Z",
|
||||
"items": [
|
||||
"f44cf173-2344-4eb2-8f00-ee55df32c76f",
|
||||
"2324aa41-e9de-4a2b-bc36-16241464683e",
|
||||
"8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
|
||||
"0fe573e7-042a-4240-a4d9-753b61233908"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Expected response to a valid request",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"currentPage": {
|
||||
"type": "integer",
|
||||
"format": "int32"
|
||||
},
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"format": "int32"
|
||||
},
|
||||
"totalPages": {
|
||||
"type": "integer",
|
||||
"format": "int32"
|
||||
},
|
||||
"statistics": {
|
||||
"$ref": "#/components/schemas/ListOfElements"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad request"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/community/{community_uuid}": {
|
||||
"get": {
|
||||
"summary": "Statistics for a specific community",
|
||||
"operationId": "getCommunity",
|
||||
"tags": [
|
||||
"community"
|
||||
],
|
||||
"parameters": [
|
||||
{
|
||||
"name": "community_uuid",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"description": "The UUID of the community to retrieve",
|
||||
"schema": {
|
||||
"type": "string",
|
||||
"format": "uuid",
|
||||
"example": "bde7139c-d321-46bb-aef6-ae70799e5edb"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Expected response to a valid request",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/SingleElementResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"404": {
|
||||
"description": "Community not found"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/communities": {
|
||||
"get": {
|
||||
"summary": "Get statistics for all communities",
|
||||
"operationId": "getCommunities",
|
||||
"tags": [
|
||||
"communities"
|
||||
],
|
||||
"parameters": [
|
||||
{
|
||||
"name": "limit",
|
||||
"in": "query",
|
||||
"description": "How many communities to return at once (optional)",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"minimum": 1,
|
||||
"maximum": 100,
|
||||
"default": 100,
|
||||
"example": 100
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "page",
|
||||
"in": "query",
|
||||
"description": "Zero-based page of results to start on (optional)",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"minimum": 0,
|
||||
"default": 0,
|
||||
"example": 0
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A paged array of communities",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/SingleElementResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad request"
|
||||
}
|
||||
}
|
||||
},
|
||||
"post": {
|
||||
"summary": "Get statistics for a list of communities with an optional date range",
|
||||
"operationId": "postCommunities",
|
||||
"tags": [
|
||||
"communities"
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"minimum": 1,
|
||||
"maximum": 100,
|
||||
"default": 100
|
||||
},
|
||||
"page": {
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"minimum": 0,
|
||||
"default": 0
|
||||
},
|
||||
"dateFrom": {
|
||||
"type": "string",
|
||||
"format": "date"
|
||||
},
|
||||
"dateTo": {
|
||||
"type": "string",
|
||||
"format": "date"
|
||||
},
|
||||
"communities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"format": "uuid"
|
||||
}
|
||||
}
|
||||
},
|
||||
"example": {
|
||||
"limit": 100,
|
||||
"page": 0,
|
||||
"dateFrom": "2020-01-01T00:00:00Z",
|
||||
"dateTo": "2020-12-31T00:00:00Z",
|
||||
"communities": [
|
||||
"bde7139c-d321-46bb-aef6-ae70799e5edb",
|
||||
"8a8aeed1-077e-4360-bdf8-a5f3020193b1",
|
||||
"47d0498a-203c-407d-afb8-1d44bf29badc",
|
||||
"d3fe99a9-e27d-4035-9339-084c93228c82"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Expected response to a valid request",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"currentPage": {
|
||||
"type": "integer",
|
||||
"format": "int32"
|
||||
},
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"format": "int32"
|
||||
},
|
||||
"totalPages": {
|
||||
"type": "integer",
|
||||
"format": "int32"
|
||||
},
|
||||
"statistics": {
|
||||
"$ref": "#/components/schemas/ListOfElements"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad request"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/collection/{collection_uuid}": {
|
||||
"get": {
|
||||
"summary": "Statistics for a specific collection",
|
||||
"operationId": "getCollection",
|
||||
"tags": [
|
||||
"collection"
|
||||
],
|
||||
"parameters": [
|
||||
{
|
||||
"name": "collection_uuid",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"description": "The UUID of the collection to retrieve",
|
||||
"schema": {
|
||||
"type": "string",
|
||||
"format": "uuid",
|
||||
"example": "49dc95d8-bf2f-4e68-b30f-41ea266c37ae"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Expected response to a valid request",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/SingleElementResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"404": {
|
||||
"description": "Collection not found"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/collections": {
|
||||
"get": {
|
||||
"summary": "Get statistics for all collections",
|
||||
"operationId": "getCollections",
|
||||
"tags": [
|
||||
"collections"
|
||||
],
|
||||
"parameters": [
|
||||
{
|
||||
"name": "limit",
|
||||
"in": "query",
|
||||
"description": "How many collections to return at once (optional)",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"minimum": 1,
|
||||
"maximum": 100,
|
||||
"default": 100,
|
||||
"example": 100
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "page",
|
||||
"in": "query",
|
||||
"description": "Zero-based page of results to start on (optional)",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"minimum": 0,
|
||||
"default": 0,
|
||||
"example": 0
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A paged array of collections",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/SingleElementResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad request"
|
||||
}
|
||||
}
|
||||
},
|
||||
"post": {
|
||||
"summary": "Get statistics for a list of collections with an optional date range",
|
||||
"operationId": "postCollections",
|
||||
"tags": [
|
||||
"collections"
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"minimum": 1,
|
||||
"maximum": 100,
|
||||
"default": 100
|
||||
},
|
||||
"page": {
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"minimum": 0,
|
||||
"default": 0
|
||||
},
|
||||
"dateFrom": {
|
||||
"type": "string",
|
||||
"format": "date"
|
||||
},
|
||||
"dateTo": {
|
||||
"type": "string",
|
||||
"format": "date"
|
||||
},
|
||||
"collections": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"format": "uuid"
|
||||
}
|
||||
}
|
||||
},
|
||||
"example": {
|
||||
"limit": 100,
|
||||
"page": 0,
|
||||
"dateFrom": "2020-01-01T00:00:00Z",
|
||||
"dateTo": "2020-12-31T00:00:00Z",
|
||||
"collections": [
|
||||
"5eeef6cf-b91b-42d0-9549-ea61bc8a758f",
|
||||
"6aac3269-b4a9-4924-a24d-9e6ee2b410d2",
|
||||
"551698dd-cd2b-4327-948e-54b5eb6deda5",
|
||||
"39358713-bbaf-4149-a453-e2b18c09fd5d"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Expected response to a valid request",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"currentPage": {
|
||||
"type": "integer",
|
||||
"format": "int32"
|
||||
},
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"format": "int32"
|
||||
},
|
||||
"totalPages": {
|
||||
"type": "integer",
|
||||
"format": "int32"
|
||||
},
|
||||
"statistics": {
|
||||
"$ref": "#/components/schemas/ListOfElements"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad request"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/status": {
|
||||
"get": {
|
||||
"summary": "Get API status",
|
||||
"operationId": "getStatus",
|
||||
"tags": [
|
||||
"status"
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"version": {
|
||||
"type": "string",
|
||||
"example": "1.4.0-dev"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"405": {
|
||||
"description": "Method Not Allowed"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"components": {
|
||||
"schemas": {
|
||||
"SingleElementResponse": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
"id",
|
||||
"views",
|
||||
"downloads"
|
||||
],
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"format": "uuid"
|
||||
},
|
||||
"views": {
|
||||
"type": "integer",
|
||||
"example": 450
|
||||
},
|
||||
"downloads": {
|
||||
"type": "integer",
|
||||
"example": 1337
|
||||
}
|
||||
}
|
||||
},
|
||||
"ListOfElements": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/SingleElementResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,231 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
#
|
||||
# indexer.py
|
||||
#
|
||||
# Connects to a DSpace Solr statistics core and ingests views and downloads for
|
||||
# communities, collections, and items into a PostgreSQL database.
|
||||
#
|
||||
# This script is written for Python 3.6+ and requires several modules that you
|
||||
# can install with pip (I recommend using a Python virtual environment):
|
||||
#
|
||||
# $ pip install psycopg
|
||||
#
|
||||
# See: https://wiki.duraspace.org/display/DSPACE/Solr
|
||||
|
||||
import math
|
||||
|
||||
import psycopg
|
||||
import requests
|
||||
|
||||
from .config import SOLR_SERVER
|
||||
from .database import DatabaseManager
|
||||
from .util import get_statistics_shards
|
||||
|
||||
|
||||
def index_views(indexType: str, facetField: str):
|
||||
# get total number of distinct facets for items with a minimum of 1 view,
|
||||
# otherwise Solr returns all kinds of weird ids that are actually not in
|
||||
# the database. Also, stats are expensive, but we need stats.calcdistinct
|
||||
# so we can get the countDistinct summary to calculate how many pages of
|
||||
# results we have.
|
||||
#
|
||||
# see: https://lucene.apache.org/solr/guide/6_6/the-stats-component.html
|
||||
solr_query_params = {
|
||||
"q": f"type:2 AND {facetField}:/.{{36}}/",
|
||||
"fq": "-isBot:true AND statistics_type:view",
|
||||
"fl": facetField,
|
||||
"facet": "true",
|
||||
"facet.field": facetField,
|
||||
"facet.mincount": 1,
|
||||
"facet.limit": 1,
|
||||
"facet.offset": 0,
|
||||
"stats": "true",
|
||||
"stats.field": facetField,
|
||||
"stats.calcdistinct": "true",
|
||||
"shards": shards,
|
||||
"rows": 0,
|
||||
"wt": "json",
|
||||
}
|
||||
|
||||
solr_url = SOLR_SERVER + "/statistics/select"
|
||||
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
try:
|
||||
# get total number of distinct facets (countDistinct)
|
||||
results_totalNumFacets = res.json()["stats"]["stats_fields"][facetField][
|
||||
"countDistinct"
|
||||
]
|
||||
except TypeError:
|
||||
print(f"{indexType}: no views, exiting.")
|
||||
|
||||
exit(0)
|
||||
|
||||
# divide results into "pages" and round up to next integer
|
||||
results_per_page = 100
|
||||
results_num_pages = math.ceil(results_totalNumFacets / results_per_page)
|
||||
results_current_page = 0
|
||||
|
||||
with DatabaseManager() as db:
|
||||
with db.cursor() as cursor:
|
||||
# create an empty list to store values for batch insertion
|
||||
data = []
|
||||
|
||||
while results_current_page <= results_num_pages:
|
||||
# "pages" are zero based, but one based is more human readable
|
||||
print(
|
||||
f"{indexType}: indexing views (page {results_current_page + 1} of {results_num_pages + 1})"
|
||||
)
|
||||
|
||||
solr_query_params = {
|
||||
"q": f"type:2 AND {facetField}:/.{{36}}/",
|
||||
"fq": "-isBot:true AND statistics_type:view",
|
||||
"fl": facetField,
|
||||
"facet": "true",
|
||||
"facet.field": facetField,
|
||||
"facet.mincount": 1,
|
||||
"facet.limit": results_per_page,
|
||||
"facet.offset": results_current_page * results_per_page,
|
||||
"shards": shards,
|
||||
"rows": 0,
|
||||
"wt": "json",
|
||||
"json.nl": "map", # return facets as a dict instead of a flat list
|
||||
}
|
||||
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
# Solr returns facets as a dict of dicts (see json.nl parameter)
|
||||
views = res.json()["facet_counts"]["facet_fields"]
|
||||
# iterate over the facetField dict and get the ids and views
|
||||
for id_, views in views[facetField].items():
|
||||
data.append((id_, views))
|
||||
|
||||
# do a batch insert of values from the current "page" of results
|
||||
sql = f"INSERT INTO {indexType}(id, views) VALUES (%s, %s) ON CONFLICT(id) DO UPDATE SET views=excluded.views"
|
||||
cursor.executemany(sql, data)
|
||||
db.commit()
|
||||
|
||||
# clear all items from the list so we can populate it with the next batch
|
||||
data.clear()
|
||||
|
||||
results_current_page += 1
|
||||
|
||||
|
||||
def index_downloads(indexType: str, facetField: str):
|
||||
# get the total number of distinct facets for items with at least 1 download
|
||||
solr_query_params = {
|
||||
"q": f"type:0 AND {facetField}:/.{{36}}/",
|
||||
"fq": "-isBot:true AND statistics_type:view AND bundleName:ORIGINAL",
|
||||
"fl": facetField,
|
||||
"facet": "true",
|
||||
"facet.field": facetField,
|
||||
"facet.mincount": 1,
|
||||
"facet.limit": 1,
|
||||
"facet.offset": 0,
|
||||
"stats": "true",
|
||||
"stats.field": facetField,
|
||||
"stats.calcdistinct": "true",
|
||||
"shards": shards,
|
||||
"rows": 0,
|
||||
"wt": "json",
|
||||
}
|
||||
|
||||
solr_url = SOLR_SERVER + "/statistics/select"
|
||||
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
try:
|
||||
# get total number of distinct facets (countDistinct)
|
||||
results_totalNumFacets = res.json()["stats"]["stats_fields"][facetField][
|
||||
"countDistinct"
|
||||
]
|
||||
except TypeError:
|
||||
print(f"{indexType}: no downloads, exiting.")
|
||||
|
||||
exit(0)
|
||||
|
||||
results_per_page = 100
|
||||
results_num_pages = math.ceil(results_totalNumFacets / results_per_page)
|
||||
results_current_page = 0
|
||||
|
||||
with DatabaseManager() as db:
|
||||
with db.cursor() as cursor:
|
||||
# create an empty list to store values for batch insertion
|
||||
data = []
|
||||
|
||||
while results_current_page <= results_num_pages:
|
||||
# "pages" are zero based, but one based is more human readable
|
||||
print(
|
||||
f"{indexType}: indexing downloads (page {results_current_page + 1} of {results_num_pages + 1})"
|
||||
)
|
||||
|
||||
solr_query_params = {
|
||||
"q": f"type:0 AND {facetField}:/.{{36}}/",
|
||||
"fq": "-isBot:true AND statistics_type:view AND bundleName:ORIGINAL",
|
||||
"fl": facetField,
|
||||
"facet": "true",
|
||||
"facet.field": facetField,
|
||||
"facet.mincount": 1,
|
||||
"facet.limit": results_per_page,
|
||||
"facet.offset": results_current_page * results_per_page,
|
||||
"shards": shards,
|
||||
"rows": 0,
|
||||
"wt": "json",
|
||||
"json.nl": "map", # return facets as a dict instead of a flat list
|
||||
}
|
||||
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
# Solr returns facets as a dict of dicts (see json.nl parameter)
|
||||
downloads = res.json()["facet_counts"]["facet_fields"]
|
||||
# iterate over the facetField dict and get the item ids and downloads
|
||||
for id_, downloads in downloads[facetField].items():
|
||||
data.append((id_, downloads))
|
||||
|
||||
# do a batch insert of values from the current "page" of results
|
||||
sql = f"INSERT INTO {indexType}(id, downloads) VALUES (%s, %s) ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads"
|
||||
cursor.executemany(sql, data)
|
||||
db.commit()
|
||||
|
||||
# clear all items from the list so we can populate it with the next batch
|
||||
data.clear()
|
||||
|
||||
results_current_page += 1
|
||||
|
||||
|
||||
with DatabaseManager() as db:
|
||||
with db.cursor() as cursor:
|
||||
# create table to store item views and downloads
|
||||
cursor.execute(
|
||||
"""CREATE TABLE IF NOT EXISTS items
|
||||
(id UUID PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)"""
|
||||
)
|
||||
# create table to store community views and downloads
|
||||
cursor.execute(
|
||||
"""CREATE TABLE IF NOT EXISTS communities
|
||||
(id UUID PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)"""
|
||||
)
|
||||
# create table to store collection views and downloads
|
||||
cursor.execute(
|
||||
"""CREATE TABLE IF NOT EXISTS collections
|
||||
(id UUID PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)"""
|
||||
)
|
||||
|
||||
# commit the table creation before closing the database connection
|
||||
db.commit()
|
||||
|
||||
shards = get_statistics_shards()
|
||||
|
||||
# Index views and downloads for items, communities, and collections. Here the
|
||||
# first parameter is the type of indexing to perform, and the second parameter
|
||||
# is the field to facet by in Solr's statistics to get this information.
|
||||
|
||||
index_views("items", "id")
|
||||
index_views("communities", "owningComm")
|
||||
index_views("collections", "owningColl")
|
||||
|
||||
index_downloads("items", "owningItem")
|
||||
index_downloads("communities", "owningComm")
|
||||
index_downloads("collections", "owningColl")
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
@ -1,126 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
import requests
|
||||
|
||||
from .config import SOLR_SERVER
|
||||
from .util import get_statistics_shards
|
||||
|
||||
|
||||
def get_views(solr_date_string: str, elements: list, facetField: str):
|
||||
"""
|
||||
Get view statistics for a list of elements from Solr. Depending on the req-
|
||||
uest this could be items, communities, or collections.
|
||||
|
||||
:parameter solr_date_string (str): Solr date string, for example "[* TO *]"
|
||||
:parameter elements (list): a list of IDs
|
||||
:parameter facetField (str): Solr field to facet by, for example "id"
|
||||
:returns: A dict of IDs and views
|
||||
"""
|
||||
shards = get_statistics_shards()
|
||||
|
||||
# Join the UUIDs with "OR" and escape the hyphens for Solr
|
||||
solr_elements_string: str = " OR ".join(elements).replace("-", r"\-")
|
||||
|
||||
solr_query_params = {
|
||||
"q": f"{facetField}:({solr_elements_string})",
|
||||
"fq": f"type:2 AND -isBot:true AND statistics_type:view AND time:{solr_date_string}",
|
||||
"fl": facetField,
|
||||
"facet": "true",
|
||||
"facet.field": facetField,
|
||||
"facet.mincount": 1,
|
||||
"shards": shards,
|
||||
"rows": 0,
|
||||
"wt": "json",
|
||||
"json.nl": "map", # return facets as a dict instead of a flat list
|
||||
}
|
||||
|
||||
solr_url = SOLR_SERVER + "/statistics/select"
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
# Create an empty dict to store views
|
||||
data = {}
|
||||
|
||||
# Solr returns facets as a dict of dicts (see the json.nl parameter)
|
||||
views = res.json()["facet_counts"]["facet_fields"]
|
||||
# iterate over the facetField dict and ids and views
|
||||
for id_, views in views[facetField].items():
|
||||
# For items we can rely on Solr returning facets for the *only* the ids
|
||||
# in our query, but for communities and collections, the owningComm and
|
||||
# owningColl fields are multi-value so Solr will return facets with the
|
||||
# values in our query as well as *any others* that happen to be present
|
||||
# in the field (which looks like Solr returning unrelated results until
|
||||
# you realize that the field is multi-value and this is correct).
|
||||
#
|
||||
# To work around this I make sure that each id in the returned dict are
|
||||
# present in the elements list POSTed by the user.
|
||||
if id_ in elements:
|
||||
data[id_] = views
|
||||
|
||||
# Check if any ids have missing stats so we can set them to 0
|
||||
if len(data) < len(elements):
|
||||
# List comprehension to get a list of ids (keys) in the data
|
||||
data_ids = [k for k, v in data.items()]
|
||||
for element_id in elements:
|
||||
if element_id not in data_ids:
|
||||
data[element_id] = 0
|
||||
continue
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def get_downloads(solr_date_string: str, elements: list, facetField: str):
|
||||
"""
|
||||
Get download statistics for a list of items from Solr. Depending on the req-
|
||||
uest this could be items, communities, or collections.
|
||||
|
||||
:parameter solr_date_string (str): Solr date string, for example "[* TO *]"
|
||||
:parameter elements (list): a list of IDs
|
||||
:parameter facetField (str): Solr field to facet by, for example "id"
|
||||
:returns: A dict of IDs and downloads
|
||||
"""
|
||||
shards = get_statistics_shards()
|
||||
|
||||
# Join the UUIDs with "OR" and escape the hyphens for Solr
|
||||
solr_elements_string: str = " OR ".join(elements).replace("-", r"\-")
|
||||
|
||||
solr_query_params = {
|
||||
"q": f"{facetField}:({solr_elements_string})",
|
||||
"fq": f"type:0 AND -isBot:true AND statistics_type:view AND bundleName:ORIGINAL AND time:{solr_date_string}",
|
||||
"fl": facetField,
|
||||
"facet": "true",
|
||||
"facet.field": facetField,
|
||||
"facet.mincount": 1,
|
||||
"shards": shards,
|
||||
"rows": 0,
|
||||
"wt": "json",
|
||||
"json.nl": "map", # return facets as a dict instead of a flat list
|
||||
}
|
||||
|
||||
solr_url = SOLR_SERVER + "/statistics/select"
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
# Create an empty dict to store downloads
|
||||
data = {}
|
||||
|
||||
# Solr returns facets as a dict of dicts (see the json.nl parameter)
|
||||
downloads = res.json()["facet_counts"]["facet_fields"]
|
||||
# Iterate over the facetField dict and get the ids and downloads
|
||||
for id_, downloads in downloads[facetField].items():
|
||||
# Make sure that each id in the returned dict are present in the
|
||||
# elements list POSTed by the user.
|
||||
if id_ in elements:
|
||||
data[id_] = downloads
|
||||
|
||||
# Check if any elements have missing stats so we can set them to 0
|
||||
if len(data) < len(elements):
|
||||
# List comprehension to get a list of ids (keys) in the data
|
||||
data_ids = [k for k, v in data.items()]
|
||||
for element_id in elements:
|
||||
if element_id not in data_ids:
|
||||
data[element_id] = 0
|
||||
continue
|
||||
|
||||
return data
|
||||
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
@ -1,193 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
|
||||
import falcon
|
||||
import requests
|
||||
|
||||
from .config import SOLR_SERVER
|
||||
|
||||
|
||||
def get_statistics_shards():
|
||||
"""Enumerate the cores in Solr to determine if statistics have been sharded into
|
||||
yearly shards by DSpace's stats-util or not (for example: statistics-2018).
|
||||
|
||||
Returns:
|
||||
str:A list of Solr statistics shards separated by commas.
|
||||
"""
|
||||
|
||||
# Initialize an empty list for statistics core years
|
||||
statistics_core_years = []
|
||||
|
||||
# URL for Solr status to check active cores
|
||||
solr_query_params = {"action": "STATUS", "wt": "json"}
|
||||
solr_url = SOLR_SERVER + "/admin/cores"
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
if res.status_code == requests.codes.ok:
|
||||
data = res.json()
|
||||
|
||||
# Iterate over active cores from Solr's STATUS response (cores are in
|
||||
# the status array of this response).
|
||||
for core in data["status"]:
|
||||
# Pattern to match, for example: statistics-2018
|
||||
pattern = re.compile("^statistics-[0-9]{4}$")
|
||||
|
||||
if not pattern.match(core):
|
||||
continue
|
||||
|
||||
# Append current core to list
|
||||
statistics_core_years.append(core)
|
||||
|
||||
# Initialize a string to hold our shards (may end up being empty if the Solr
|
||||
# core has not been processed by stats-util).
|
||||
shards = str()
|
||||
|
||||
if len(statistics_core_years) > 0:
|
||||
# Begin building a string of shards starting with the default one
|
||||
shards = f"{SOLR_SERVER}/statistics"
|
||||
|
||||
for core in statistics_core_years:
|
||||
# Create a comma-separated list of shards to pass to our Solr query
|
||||
#
|
||||
# See: https://wiki.apache.org/solr/DistributedSearch
|
||||
shards += f",{SOLR_SERVER}/{core}"
|
||||
|
||||
# Return the string of shards, which may actually be empty. Solr doesn't
|
||||
# seem to mind if the shards query parameter is empty and I haven't seen
|
||||
# any negative performance impact so this should be fine.
|
||||
return shards
|
||||
|
||||
|
||||
def is_valid_date(date):
|
||||
try:
|
||||
# Solr date format is: 2020-01-01T00:00:00Z
|
||||
# See: https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior
|
||||
datetime.datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
return True
|
||||
except ValueError:
|
||||
raise falcon.HTTPBadRequest(
|
||||
title="Invalid parameter",
|
||||
description=f"Invalid date format: {date}. The value must be in format: 2020-01-01T00:00:00Z.",
|
||||
)
|
||||
|
||||
|
||||
def validate_post_parameters(req, resp, resource, params):
|
||||
"""Check the POSTed request parameters for the `/items`, `/communities` and
|
||||
`/collections` endpoints.
|
||||
|
||||
Meant to be used as a `before` hook.
|
||||
"""
|
||||
|
||||
# Only attempt to read the POSTed request if its length is not 0 (or
|
||||
# rather, in the Python sense, if length is not a False-y value).
|
||||
if req.content_length:
|
||||
doc = json.load(req.bounded_stream)
|
||||
else:
|
||||
raise falcon.HTTPBadRequest(
|
||||
title="Invalid request", description="Request body is empty."
|
||||
)
|
||||
|
||||
# Parse date parameters from request body (will raise an HTTPBadRequest
|
||||
# from is_valid_date() if any parameters are invalid)
|
||||
if "dateFrom" in doc and is_valid_date(doc["dateFrom"]):
|
||||
req.context.dateFrom = doc["dateFrom"]
|
||||
else:
|
||||
req.context.dateFrom = None
|
||||
|
||||
if "dateTo" in doc and is_valid_date(doc["dateTo"]):
|
||||
req.context.dateTo = doc["dateTo"]
|
||||
else:
|
||||
req.context.dateTo = None
|
||||
|
||||
# Parse the limit parameter from the POST request body
|
||||
if "limit" in doc:
|
||||
if isinstance(doc["limit"], int) and 0 < doc["limit"] <= 100:
|
||||
req.context.limit = doc["limit"]
|
||||
else:
|
||||
raise falcon.HTTPBadRequest(
|
||||
title="Invalid parameter",
|
||||
description='The "limit" parameter is invalid. The value must be an integer between 1 and 100.',
|
||||
)
|
||||
else:
|
||||
req.context.limit = 100
|
||||
|
||||
# Parse the page parameter from the POST request body
|
||||
if "page" in doc:
|
||||
if isinstance(doc["page"], int) and doc["page"] >= 0:
|
||||
req.context.page = doc["page"]
|
||||
else:
|
||||
raise falcon.HTTPBadRequest(
|
||||
title="Invalid parameter",
|
||||
description='The "page" parameter is invalid. The value must be at least 0.',
|
||||
)
|
||||
else:
|
||||
req.context.page = 0
|
||||
|
||||
# Parse the list of elements from the POST request body
|
||||
if req.context.statistics_scope in doc:
|
||||
if (
|
||||
isinstance(doc[req.context.statistics_scope], list)
|
||||
and len(doc[req.context.statistics_scope]) > 0
|
||||
):
|
||||
req.context.elements = doc[req.context.statistics_scope]
|
||||
else:
|
||||
raise falcon.HTTPBadRequest(
|
||||
title="Invalid parameter",
|
||||
description=f'The "{req.context.statistics_scope}" parameter is invalid. The value must be a comma-separated list of UUIDs.',
|
||||
)
|
||||
else:
|
||||
req.context.elements = []
|
||||
|
||||
|
||||
def set_statistics_scope(req, resp, resource, params):
|
||||
"""Set the statistics scope (item, collection, or community) of the request
|
||||
as well as the appropriate database (for GET requests) and Solr facet fields
|
||||
(for POST requests).
|
||||
|
||||
Meant to be used as a `before` hook.
|
||||
"""
|
||||
|
||||
# Extract the scope from the request path. This is *guaranteed* to be one
|
||||
# of the following values because we only send requests matching these few
|
||||
# patterns to routes using this set_statistics_scope hook.
|
||||
#
|
||||
# Note: this regex is ordered so that "items" and "collections" match before
|
||||
# "item" and "collection".
|
||||
req.context.statistics_scope = re.findall(
|
||||
r"^/(communities|community|collections|collection|items|item)", req.path
|
||||
)[0]
|
||||
|
||||
# Set the correct database based on the statistics_scope. The database is
|
||||
# used for all GET requests where statistics are returned directly from the
|
||||
# database. In this case we can return early.
|
||||
if req.method == "GET":
|
||||
if re.findall(r"^(item|items)$", req.context.statistics_scope):
|
||||
req.context.database = "items"
|
||||
elif re.findall(r"^(community|communities)$", req.context.statistics_scope):
|
||||
req.context.database = "communities"
|
||||
elif re.findall(r"^(collection|collections)$", req.context.statistics_scope):
|
||||
req.context.database = "collections"
|
||||
|
||||
# GET requests only need the scope and the database so we can return now
|
||||
return
|
||||
|
||||
# If the current request is for a plural items, communities, or collections
|
||||
# that includes a list of element ids POSTed with the request body then we
|
||||
# need to set the Solr facet field so we can get the live results.
|
||||
if req.method == "POST":
|
||||
if req.context.statistics_scope == "items":
|
||||
req.context.views_facet_field = "id"
|
||||
req.context.downloads_facet_field = "owningItem"
|
||||
elif req.context.statistics_scope == "communities":
|
||||
req.context.views_facet_field = "owningComm"
|
||||
req.context.downloads_facet_field = "owningComm"
|
||||
elif req.context.statistics_scope == "collections":
|
||||
req.context.views_facet_field = "owningColl"
|
||||
req.context.downloads_facet_field = "owningColl"
|
||||
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
Reference in New Issue
Block a user