dspace-statistics-api/dspace_statistics_api/items.py

import requests

from .config import SOLR_SERVER
from .util import get_statistics_shards


def get_views(solr_date_string: str, items: list):
    """
    Get view statistics for a list of items from Solr.

    :parameter solr_date_string (str): Solr date string, for example "[* TO *]"
    :parameter items (list): a list of item IDs
    :returns: A dict of item IDs and views
    """
    shards = get_statistics_shards()

    # Join the UUIDs with "OR" and escape the hyphens for Solr
    solr_items_string: str = " OR ".join(items).replace("-", r"\-")

    solr_query_params = {
        "q": f"id:({solr_items_string})",
        "fq": f"type:2 AND isBot:false AND statistics_type:view AND time:{solr_date_string}",
        "fl": "id",
        "facet": "true",
        "facet.field": "id",
        "facet.mincount": 1,
        "shards": shards,
        "rows": 0,
        "wt": "json",
        "json.nl": "map",  # return facets as a dict instead of a flat list
    }

    solr_url = SOLR_SERVER + "/statistics/select"
    res = requests.get(solr_url, params=solr_query_params)

    # Create an empty dict to store views
    data = {}

    # Solr returns facets as a dict of dicts (see the json.nl parameter)
    views = res.json()["facet_counts"]["facet_fields"]
    # iterate over the 'id' dict and get the item ids and views
    for item_id, item_views in views["id"].items():
        data[item_id] = item_views

    # Check if any items have missing stats so we can set them to 0
    if len(data) < len(items):
        # List comprehension to get a list of item ids (keys) in the data
        data_ids = [k for k, v in data.items()]
        for item_id in items:
            if item_id not in data_ids:
                data[item_id] = 0
                continue

    return data


def get_downloads(solr_date_string: str, items: list):
    """
    Get download statistics for a list of items from Solr.

    :parameter solr_date_string (str): Solr date string, for example "[* TO *]"
    :parameter items (list): a list of item IDs
    :returns: A dict of item IDs and downloads
    """
    shards = get_statistics_shards()

    # Join the UUIDs with "OR" and escape the hyphens for Solr
    solr_items_string: str = " OR ".join(items).replace("-", r"\-")

    solr_query_params = {
        "q": f"owningItem:({solr_items_string})",
        "fq": f"type:0 AND isBot:false AND statistics_type:view AND bundleName:ORIGINAL AND time:{solr_date_string}",
        "fl": "owningItem",
        "facet": "true",
        "facet.field": "owningItem",
        "facet.mincount": 1,
        "shards": shards,
        "rows": 0,
        "wt": "json",
        "json.nl": "map",  # return facets as a dict instead of a flat list
    }

    solr_url = SOLR_SERVER + "/statistics/select"
    res = requests.get(solr_url, params=solr_query_params)

    # Create an empty dict to store downloads
    data = {}

    # Solr returns facets as a dict of dicts (see the json.nl parameter)
    downloads = res.json()["facet_counts"]["facet_fields"]
    # Iterate over the 'owningItem' dict and get the item ids and downloads
    for item_id, item_downloads in downloads["owningItem"].items():
        data[item_id] = item_downloads

    # Check if any items have missing stats so we can set them to 0
    if len(data) < len(items):
        # List comprehension to get a list of item ids (keys) in the data
        data_ids = [k for k, v in data.items()]
        for item_id in items:
            if item_id not in data_ids:
                data[item_id] = 0
                continue

    return data

# vim: set sw=4 ts=4 expandtab:
Add missing dspace_statistics_api/items.py This was meant to be added with the new /items POST changes. 2020-09-25 11:29:51 +02:00			`import requests`

			`from .config import SOLR_SERVER`
Move all imports to top of file A few months ago I had an issue setting up mocking because I was trying to be clever importing these libraries only when I needed them rather than at the global scope. Someone pointed out to me that if the imports are at the top of the file Falcon will load them once when the WSGI server starts, whereas if they are in the on_get() or on_post() they will load for every request! Also, it seems that PEP8 recommends keeping imports at the top of the file anyways, so I will just do that. Imports sorted with isort. See: https://www.python.org/dev/peps/pep-0008/#imports 2020-12-18 21:42:06 +01:00			`from .util import get_statistics_shards`
Add missing dspace_statistics_api/items.py This was meant to be added with the new /items POST changes. 2020-09-25 11:29:51 +02:00

			`def get_views(solr_date_string: str, items: list):`
			`"""`
			`Get view statistics for a list of items from Solr.`

			`:parameter solr_date_string (str): Solr date string, for example "[* TO *]"`
			`:parameter items (list): a list of item IDs`
			`:returns: A dict of item IDs and views`
			`"""`
dspace_statistics_api/items.py: Move util import Move util import from global scope because it causes tests to fail. We don't need the set up the Solr connection unless we're actually trying to use the get_views and get_downloads methods, either when running the API in production or during tests where the connection has been set up. 2020-10-06 14:07:00 +02:00			`shards = get_statistics_shards()`

Add missing dspace_statistics_api/items.py This was meant to be added with the new /items POST changes. 2020-09-25 11:29:51 +02:00			`# Join the UUIDs with "OR" and escape the hyphens for Solr`
dspace_statistics_api/items.py: Fix flake8 warning According to flake8 we need to use a different syntax for strings with backslash escape sequences: > As of Python 3.6, a backslash-character pair that is not a valid > escape sequence now generates a DeprecationWarning. This will > eventually become a SyntaxError. The warning was: W605 invalid escape sequence '\-' See: https://www.flake8rules.com/rules/W605.html 2020-09-26 11:11:04 +02:00			`solr_items_string: str = " OR ".join(items).replace("-", r"\-")`
Add missing dspace_statistics_api/items.py This was meant to be added with the new /items POST changes. 2020-09-25 11:29:51 +02:00
			`solr_query_params = {`
			`"q": f"id:({solr_items_string})",`
			`"fq": f"type:2 AND isBot:false AND statistics_type:view AND time:{solr_date_string}",`
items.py: Add fl paramter to Solr queries I forgot to add the fl parameter here as well. 2020-12-18 15:12:34 +01:00			`"fl": "id",`
Add missing dspace_statistics_api/items.py This was meant to be added with the new /items POST changes. 2020-09-25 11:29:51 +02:00			`"facet": "true",`
			`"facet.field": "id",`
			`"facet.mincount": 1,`
			`"shards": shards,`
			`"rows": 0,`
			`"wt": "json",`
			`"json.nl": "map", # return facets as a dict instead of a flat list`
			`}`

			`solr_url = SOLR_SERVER + "/statistics/select"`
			`res = requests.get(solr_url, params=solr_query_params)`

			`# Create an empty dict to store views`
			`data = {}`

			`# Solr returns facets as a dict of dicts (see the json.nl parameter)`
			`views = res.json()["facet_counts"]["facet_fields"]`
			`# iterate over the 'id' dict and get the item ids and views`
			`for item_id, item_views in views["id"].items():`
			`data[item_id] = item_views`

			`# Check if any items have missing stats so we can set them to 0`
			`if len(data) < len(items):`
			`# List comprehension to get a list of item ids (keys) in the data`
			`data_ids = [k for k, v in data.items()]`
			`for item_id in items:`
			`if item_id not in data_ids:`
			`data[item_id] = 0`
			`continue`

			`return data`


			`def get_downloads(solr_date_string: str, items: list):`
			`"""`
			`Get download statistics for a list of items from Solr.`

			`:parameter solr_date_string (str): Solr date string, for example "[* TO *]"`
			`:parameter items (list): a list of item IDs`
			`:returns: A dict of item IDs and downloads`
			`"""`
dspace_statistics_api/items.py: Move util import Move util import from global scope because it causes tests to fail. We don't need the set up the Solr connection unless we're actually trying to use the get_views and get_downloads methods, either when running the API in production or during tests where the connection has been set up. 2020-10-06 14:07:00 +02:00			`shards = get_statistics_shards()`

Add missing dspace_statistics_api/items.py This was meant to be added with the new /items POST changes. 2020-09-25 11:29:51 +02:00			`# Join the UUIDs with "OR" and escape the hyphens for Solr`
dspace_statistics_api/items.py: Fix flake8 warning According to flake8 we need to use a different syntax for strings with backslash escape sequences: > As of Python 3.6, a backslash-character pair that is not a valid > escape sequence now generates a DeprecationWarning. This will > eventually become a SyntaxError. The warning was: W605 invalid escape sequence '\-' See: https://www.flake8rules.com/rules/W605.html 2020-09-26 11:11:04 +02:00			`solr_items_string: str = " OR ".join(items).replace("-", r"\-")`
Add missing dspace_statistics_api/items.py This was meant to be added with the new /items POST changes. 2020-09-25 11:29:51 +02:00
			`solr_query_params = {`
			`"q": f"owningItem:({solr_items_string})",`
			`"fq": f"type:0 AND isBot:false AND statistics_type:view AND bundleName:ORIGINAL AND time:{solr_date_string}",`
items.py: Add fl paramter to Solr queries I forgot to add the fl parameter here as well. 2020-12-18 15:12:34 +01:00			`"fl": "owningItem",`
Add missing dspace_statistics_api/items.py This was meant to be added with the new /items POST changes. 2020-09-25 11:29:51 +02:00			`"facet": "true",`
			`"facet.field": "owningItem",`
			`"facet.mincount": 1,`
			`"shards": shards,`
			`"rows": 0,`
			`"wt": "json",`
			`"json.nl": "map", # return facets as a dict instead of a flat list`
			`}`

			`solr_url = SOLR_SERVER + "/statistics/select"`
			`res = requests.get(solr_url, params=solr_query_params)`

			`# Create an empty dict to store downloads`
			`data = {}`

			`# Solr returns facets as a dict of dicts (see the json.nl parameter)`
			`downloads = res.json()["facet_counts"]["facet_fields"]`
			`# Iterate over the 'owningItem' dict and get the item ids and downloads`
			`for item_id, item_downloads in downloads["owningItem"].items():`
			`data[item_id] = item_downloads`

			`# Check if any items have missing stats so we can set them to 0`
			`if len(data) < len(items):`
			`# List comprehension to get a list of item ids (keys) in the data`
			`data_ids = [k for k, v in data.items()]`
			`for item_id in items:`
			`if item_id not in data_ids:`
			`data[item_id] = 0`
			`continue`

			`return data`

			`# vim: set sw=4 ts=4 expandtab:`