From 73c71fa8a056139f12eb7cda5af28ffd65f32c0c Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Fri, 25 Sep 2020 12:21:11 +0300 Subject: [PATCH] dspace_statistics_api: Add support for date ranges to /items You can now POST a JSON request to /items with a list of items and a date range. This allows the possibility to get view and download statistics for arbitrary items and arbitrary date ranges. The JSON request should be in the following format: { "limit": 100, "page": 0, "dateFrom": "2020-01-01T00:00:00Z", "dateTo": "2020-09-09T00:00:00Z", "items": [ "f44cf173-2344-4eb2-8f00-ee55df32c76f", "2324aa41-e9de-4a2b-bc36-16241464683e", "8542f9da-9ce1-4614-abf4-f2e3fdb4b305", "0fe573e7-042a-4240-a4d9-753b61233908" ] } The limit, page, and date parameters are all optional. By default it will use a limit of 100, page 0, and [* TO *] Solr date range. --- dspace_statistics_api/app.py | 106 ++++++++++++++++++++++++++++++++++ dspace_statistics_api/util.py | 17 ++++++ 2 files changed, 123 insertions(+) diff --git a/dspace_statistics_api/app.py b/dspace_statistics_api/app.py index abbf12a..246fa55 100644 --- a/dspace_statistics_api/app.py +++ b/dspace_statistics_api/app.py @@ -55,6 +55,112 @@ class AllItemsResource: resp.media = message + def on_post(self, req, resp): + """Handles POST requests""" + + import json + from .items import get_views + from .items import get_downloads + from .util import is_valid_date + + # Only attempt to read the POSTed request if its length is not 0 (or + # rather, in the Python sense, if length is not a False-y value). + if req.content_length: + doc = json.load(req.stream) + else: + raise falcon.HTTPBadRequest( + title="Invalid request", description=f"Request body is empty." + ) + + # Parse date parameters from request body (will raise an HTTPBadRequest + # from is_valid_date() if any parameters are invalid) + req_dateFrom = ( + doc["dateFrom"] + if "dateFrom" in doc and is_valid_date(doc["dateFrom"]) + else None + ) + req_dateTo = ( + doc["dateTo"] if "dateTo" in doc and is_valid_date(doc["dateTo"]) else None + ) + + # Build the Solr date string, ie: [* TO *] + if req_dateFrom and req_dateTo: + solr_date_string = f"[{req_dateFrom} TO {req_dateTo}]" + elif not req_dateFrom and req_dateTo: + solr_date_string = f"[* TO {req_dateTo}]" + elif req_dateFrom and not req_dateTo: + solr_date_string = f"[{req_dateFrom} TO *]" + else: + solr_date_string = "[* TO *]" + + # Parse the limit parameter from the POST request body + req_limit = doc["limit"] if "limit" in doc else 100 + if not isinstance(req_limit, int) or req_limit < 0 or req_limit > 100: + raise falcon.HTTPBadRequest( + title="Invalid parameter", + description=f'The "limit" parameter is invalid. The value must be an integer between 0 and 100.', + ) + + # Parse the page parameter from the POST request body + req_page = doc["page"] if "page" in doc else 0 + if not isinstance(req_page, int) or req_page < 0: + raise falcon.HTTPBadRequest( + title="Invalid parameter", + description=f'The "page" parameter is invalid. The value must be at least 0.', + ) + + # Parse the list of items from the POST request body + req_items = doc["items"] if "items" in doc else list() + if not isinstance(req_items, list) or len(req_items) == 0: + raise falcon.HTTPBadRequest( + title="Invalid parameter", + description=f'The "items" parameter is invalid. The value must be a comma-separated list of item UUIDs.', + ) + + # Helper variables to make working with pages/items/results easier and + # to make the code easier to understand + number_of_items: int = len(req_items) + pages: int = int(number_of_items / req_limit) + first_item: int = req_page * req_limit + last_item: int = first_item + req_limit + # Get a subset of the POSTed items based on our limit. Note that Python + # list slicing and indexing are both zero based, but the first and last + # items in a slice can be confusing. See this ASCII diagram: + # + # +---+---+---+---+---+---+ + # | P | y | t | h | o | n | + # +---+---+---+---+---+---+ + # Slice position: 0 1 2 3 4 5 6 + # Index position: 0 1 2 3 4 5 + # + # So if we have a list req_items with 240 items: + # + # 1st set: req_items[0:100] would give items at indexes 0 to 99 + # 2nd set: req_items[100:200] would give items at indexes 100 to 199 + # 3rd set: req_items[200:300] would give items at indexes 200 to 239 + items_subset: list = req_items[first_item:last_item] + + views: dict = get_views(solr_date_string, items_subset) + downloads: dict = get_downloads(solr_date_string, items_subset) + + # create a list to hold dicts of item stats + statistics = list() + + # iterate over views dict to extract views and use the item id as an + # index to the downloads dict to extract downloads. + for k, v in views.items(): + statistics.append({"id": k, "views": v, "downloads": downloads[k]}) + + message = { + "currentPage": req_page, + "totalPages": pages, + "limit": req_limit, + "statistics": statistics, + } + + resp.status = falcon.HTTP_200 + resp.media = message + class ItemResource: def on_get(self, req, resp, item_id): diff --git a/dspace_statistics_api/util.py b/dspace_statistics_api/util.py index ff6823d..87f104b 100644 --- a/dspace_statistics_api/util.py +++ b/dspace_statistics_api/util.py @@ -51,3 +51,20 @@ def get_statistics_shards(): # seem to mind if the shards query parameter is empty and I haven't seen # any negative performance impact so this should be fine. return shards + + +def is_valid_date(date): + import datetime + import falcon + + try: + # Solr date format is: 2020-01-01T00:00:00Z + # See: https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior + datetime.datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ") + + return True + except ValueError: + raise falcon.HTTPBadRequest( + title="Invalid parameter", + description=f"Invalid date format: {date}. The value must be in format: 2020-01-01T00:00:00Z.", + )