1
0
mirror of https://github.com/ilri/dspace-statistics-api.git synced 2024-11-16 19:37:04 +01:00
dspace-statistics-api/dspace_statistics_api/items.py
Alan Orth 4dbf734a4b
Move all imports to top of file
A few months ago I had an issue setting up mocking because I was
trying to be clever importing these libraries only when I needed
them rather than at the global scope. Someone pointed out to me
that if the imports are at the top of the file Falcon will load
them once when the WSGI server starts, whereas if they are in the
on_get() or on_post() they will load for every request! Also, it
seems that PEP8 recommends keeping imports at the top of the file
anyways, so I will just do that.

Imports sorted with isort.

See: https://www.python.org/dev/peps/pep-0008/#imports
2020-12-18 22:42:06 +02:00

107 lines
3.5 KiB
Python

import requests
from .config import SOLR_SERVER
from .util import get_statistics_shards
def get_views(solr_date_string: str, items: list):
"""
Get view statistics for a list of items from Solr.
:parameter solr_date_string (str): Solr date string, for example "[* TO *]"
:parameter items (list): a list of item IDs
:returns: A dict of item IDs and views
"""
shards = get_statistics_shards()
# Join the UUIDs with "OR" and escape the hyphens for Solr
solr_items_string: str = " OR ".join(items).replace("-", r"\-")
solr_query_params = {
"q": f"id:({solr_items_string})",
"fq": f"type:2 AND isBot:false AND statistics_type:view AND time:{solr_date_string}",
"fl": "id",
"facet": "true",
"facet.field": "id",
"facet.mincount": 1,
"shards": shards,
"rows": 0,
"wt": "json",
"json.nl": "map", # return facets as a dict instead of a flat list
}
solr_url = SOLR_SERVER + "/statistics/select"
res = requests.get(solr_url, params=solr_query_params)
# Create an empty dict to store views
data = {}
# Solr returns facets as a dict of dicts (see the json.nl parameter)
views = res.json()["facet_counts"]["facet_fields"]
# iterate over the 'id' dict and get the item ids and views
for item_id, item_views in views["id"].items():
data[item_id] = item_views
# Check if any items have missing stats so we can set them to 0
if len(data) < len(items):
# List comprehension to get a list of item ids (keys) in the data
data_ids = [k for k, v in data.items()]
for item_id in items:
if item_id not in data_ids:
data[item_id] = 0
continue
return data
def get_downloads(solr_date_string: str, items: list):
"""
Get download statistics for a list of items from Solr.
:parameter solr_date_string (str): Solr date string, for example "[* TO *]"
:parameter items (list): a list of item IDs
:returns: A dict of item IDs and downloads
"""
shards = get_statistics_shards()
# Join the UUIDs with "OR" and escape the hyphens for Solr
solr_items_string: str = " OR ".join(items).replace("-", r"\-")
solr_query_params = {
"q": f"owningItem:({solr_items_string})",
"fq": f"type:0 AND isBot:false AND statistics_type:view AND bundleName:ORIGINAL AND time:{solr_date_string}",
"fl": "owningItem",
"facet": "true",
"facet.field": "owningItem",
"facet.mincount": 1,
"shards": shards,
"rows": 0,
"wt": "json",
"json.nl": "map", # return facets as a dict instead of a flat list
}
solr_url = SOLR_SERVER + "/statistics/select"
res = requests.get(solr_url, params=solr_query_params)
# Create an empty dict to store downloads
data = {}
# Solr returns facets as a dict of dicts (see the json.nl parameter)
downloads = res.json()["facet_counts"]["facet_fields"]
# Iterate over the 'owningItem' dict and get the item ids and downloads
for item_id, item_downloads in downloads["owningItem"].items():
data[item_id] = item_downloads
# Check if any items have missing stats so we can set them to 0
if len(data) < len(items):
# List comprehension to get a list of item ids (keys) in the data
data_ids = [k for k, v in data.items()]
for item_id in items:
if item_id not in data_ids:
data[item_id] = 0
continue
return data
# vim: set sw=4 ts=4 expandtab: