1
0
mirror of https://github.com/ilri/dspace-statistics-api.git synced 2024-06-29 01:23:45 +02:00
dspace-statistics-api/dspace_statistics_api/app.py
Alan Orth 73c71fa8a0
dspace_statistics_api: Add support for date ranges to /items
You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.
2020-09-25 12:21:11 +03:00

206 lines
7.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import falcon
from .database import DatabaseManager
class RootResource:
def on_get(self, req, resp):
resp.status = falcon.HTTP_200
resp.content_type = "text/html"
with open("dspace_statistics_api/docs/index.html", "r") as f:
resp.body = f.read()
class AllItemsResource:
def on_get(self, req, resp):
"""Handles GET requests"""
# Return HTTPBadRequest if id parameter is not present and valid
limit = req.get_param_as_int("limit", min_value=0, max_value=100) or 100
page = req.get_param_as_int("page", min_value=0) or 0
offset = limit * page
with DatabaseManager() as db:
db.set_session(readonly=True)
with db.cursor() as cursor:
# get total number of items so we can estimate the pages
cursor.execute("SELECT COUNT(id) FROM items")
pages = round(cursor.fetchone()[0] / limit)
# get statistics and use limit and offset to page through results
cursor.execute(
"SELECT id, views, downloads FROM items LIMIT %s OFFSET %s",
[limit, offset],
)
# create a list to hold dicts of item stats
statistics = list()
# iterate over results and build statistics object
for item in cursor:
statistics.append(
{
"id": str(item["id"]),
"views": item["views"],
"downloads": item["downloads"],
}
)
message = {
"currentPage": page,
"totalPages": pages,
"limit": limit,
"statistics": statistics,
}
resp.media = message
def on_post(self, req, resp):
"""Handles POST requests"""
import json
from .items import get_views
from .items import get_downloads
from .util import is_valid_date
# Only attempt to read the POSTed request if its length is not 0 (or
# rather, in the Python sense, if length is not a False-y value).
if req.content_length:
doc = json.load(req.stream)
else:
raise falcon.HTTPBadRequest(
title="Invalid request", description=f"Request body is empty."
)
# Parse date parameters from request body (will raise an HTTPBadRequest
# from is_valid_date() if any parameters are invalid)
req_dateFrom = (
doc["dateFrom"]
if "dateFrom" in doc and is_valid_date(doc["dateFrom"])
else None
)
req_dateTo = (
doc["dateTo"] if "dateTo" in doc and is_valid_date(doc["dateTo"]) else None
)
# Build the Solr date string, ie: [* TO *]
if req_dateFrom and req_dateTo:
solr_date_string = f"[{req_dateFrom} TO {req_dateTo}]"
elif not req_dateFrom and req_dateTo:
solr_date_string = f"[* TO {req_dateTo}]"
elif req_dateFrom and not req_dateTo:
solr_date_string = f"[{req_dateFrom} TO *]"
else:
solr_date_string = "[* TO *]"
# Parse the limit parameter from the POST request body
req_limit = doc["limit"] if "limit" in doc else 100
if not isinstance(req_limit, int) or req_limit < 0 or req_limit > 100:
raise falcon.HTTPBadRequest(
title="Invalid parameter",
description=f'The "limit" parameter is invalid. The value must be an integer between 0 and 100.',
)
# Parse the page parameter from the POST request body
req_page = doc["page"] if "page" in doc else 0
if not isinstance(req_page, int) or req_page < 0:
raise falcon.HTTPBadRequest(
title="Invalid parameter",
description=f'The "page" parameter is invalid. The value must be at least 0.',
)
# Parse the list of items from the POST request body
req_items = doc["items"] if "items" in doc else list()
if not isinstance(req_items, list) or len(req_items) == 0:
raise falcon.HTTPBadRequest(
title="Invalid parameter",
description=f'The "items" parameter is invalid. The value must be a comma-separated list of item UUIDs.',
)
# Helper variables to make working with pages/items/results easier and
# to make the code easier to understand
number_of_items: int = len(req_items)
pages: int = int(number_of_items / req_limit)
first_item: int = req_page * req_limit
last_item: int = first_item + req_limit
# Get a subset of the POSTed items based on our limit. Note that Python
# list slicing and indexing are both zero based, but the first and last
# items in a slice can be confusing. See this ASCII diagram:
#
# +---+---+---+---+---+---+
# | P | y | t | h | o | n |
# +---+---+---+---+---+---+
# Slice position: 0 1 2 3 4 5 6
# Index position: 0 1 2 3 4 5
#
# So if we have a list req_items with 240 items:
#
# 1st set: req_items[0:100] would give items at indexes 0 to 99
# 2nd set: req_items[100:200] would give items at indexes 100 to 199
# 3rd set: req_items[200:300] would give items at indexes 200 to 239
items_subset: list = req_items[first_item:last_item]
views: dict = get_views(solr_date_string, items_subset)
downloads: dict = get_downloads(solr_date_string, items_subset)
# create a list to hold dicts of item stats
statistics = list()
# iterate over views dict to extract views and use the item id as an
# index to the downloads dict to extract downloads.
for k, v in views.items():
statistics.append({"id": k, "views": v, "downloads": downloads[k]})
message = {
"currentPage": req_page,
"totalPages": pages,
"limit": req_limit,
"statistics": statistics,
}
resp.status = falcon.HTTP_200
resp.media = message
class ItemResource:
def on_get(self, req, resp, item_id):
"""Handles GET requests"""
import psycopg2.extras
# Adapt Pythons uuid.UUID type to PostgreSQLs uuid
# See: https://www.psycopg.org/docs/extras.html
psycopg2.extras.register_uuid()
with DatabaseManager() as db:
db.set_session(readonly=True)
with db.cursor() as cursor:
cursor = db.cursor()
cursor.execute(
"SELECT views, downloads FROM items WHERE id=%s", [str(item_id)]
)
if cursor.rowcount == 0:
raise falcon.HTTPNotFound(
title="Item not found",
description=f'The item with id "{str(item_id)}" was not found.',
)
else:
results = cursor.fetchone()
statistics = {
"id": str(item_id),
"views": results["views"],
"downloads": results["downloads"],
}
resp.media = statistics
api = application = falcon.API()
api.add_route("/", RootResource())
api.add_route("/items", AllItemsResource())
api.add_route("/item/{item_id:uuid}", ItemResource())
# vim: set sw=4 ts=4 expandtab: