dspace-statistics-api/dspace_statistics_api/items.py

import requests

from .config import SOLR_SERVER


def get_views(solr_date_string: str, items: list):
    """
    Get view statistics for a list of items from Solr.

    :parameter solr_date_string (str): Solr date string, for example "[* TO *]"
    :parameter items (list): a list of item IDs
    :returns: A dict of item IDs and views
    """
    from .util import get_statistics_shards
    shards = get_statistics_shards()

    # Join the UUIDs with "OR" and escape the hyphens for Solr
    solr_items_string: str = " OR ".join(items).replace("-", r"\-")

    solr_query_params = {
        "q": f"id:({solr_items_string})",
        "fq": f"type:2 AND isBot:false AND statistics_type:view AND time:{solr_date_string}",
        "facet": "true",
        "facet.field": "id",
        "facet.mincount": 1,
        "shards": shards,
        "rows": 0,
        "wt": "json",
        "json.nl": "map",  # return facets as a dict instead of a flat list
    }

    solr_url = SOLR_SERVER + "/statistics/select"
    res = requests.get(solr_url, params=solr_query_params)

    # Create an empty dict to store views
    data = {}

    # Solr returns facets as a dict of dicts (see the json.nl parameter)
    views = res.json()["facet_counts"]["facet_fields"]
    # iterate over the 'id' dict and get the item ids and views
    for item_id, item_views in views["id"].items():
        data[item_id] = item_views

    # Check if any items have missing stats so we can set them to 0
    if len(data) < len(items):
        # List comprehension to get a list of item ids (keys) in the data
        data_ids = [k for k, v in data.items()]
        for item_id in items:
            if item_id not in data_ids:
                data[item_id] = 0
                continue

    return data


def get_downloads(solr_date_string: str, items: list):
    """
    Get download statistics for a list of items from Solr.

    :parameter solr_date_string (str): Solr date string, for example "[* TO *]"
    :parameter items (list): a list of item IDs
    :returns: A dict of item IDs and downloads
    """
    from .util import get_statistics_shards
    shards = get_statistics_shards()

    # Join the UUIDs with "OR" and escape the hyphens for Solr
    solr_items_string: str = " OR ".join(items).replace("-", r"\-")

    solr_query_params = {
        "q": f"owningItem:({solr_items_string})",
        "fq": f"type:0 AND isBot:false AND statistics_type:view AND bundleName:ORIGINAL AND time:{solr_date_string}",
        "facet": "true",
        "facet.field": "owningItem",
        "facet.mincount": 1,
        "shards": shards,
        "rows": 0,
        "wt": "json",
        "json.nl": "map",  # return facets as a dict instead of a flat list
    }

    solr_url = SOLR_SERVER + "/statistics/select"
    res = requests.get(solr_url, params=solr_query_params)

    # Create an empty dict to store downloads
    data = {}

    # Solr returns facets as a dict of dicts (see the json.nl parameter)
    downloads = res.json()["facet_counts"]["facet_fields"]
    # Iterate over the 'owningItem' dict and get the item ids and downloads
    for item_id, item_downloads in downloads["owningItem"].items():
        data[item_id] = item_downloads

    # Check if any items have missing stats so we can set them to 0
    if len(data) < len(items):
        # List comprehension to get a list of item ids (keys) in the data
        data_ids = [k for k, v in data.items()]
        for item_id in items:
            if item_id not in data_ids:
                data[item_id] = 0
                continue

    return data

# vim: set sw=4 ts=4 expandtab:
Add missing dspace_statistics_api/items.py This was meant to be added with the new /items POST changes. 2020-09-25 11:29:51 +02:00			`import requests`

			`from .config import SOLR_SERVER`


			`def get_views(solr_date_string: str, items: list):`
			`"""`
			`Get view statistics for a list of items from Solr.`

			`:parameter solr_date_string (str): Solr date string, for example "[* TO *]"`
			`:parameter items (list): a list of item IDs`
			`:returns: A dict of item IDs and views`
			`"""`
dspace_statistics_api/items.py: Move util import Move util import from global scope because it causes tests to fail. We don't need the set up the Solr connection unless we're actually trying to use the get_views and get_downloads methods, either when running the API in production or during tests where the connection has been set up. 2020-10-06 14:07:00 +02:00			`from .util import get_statistics_shards`
			`shards = get_statistics_shards()`

Add missing dspace_statistics_api/items.py This was meant to be added with the new /items POST changes. 2020-09-25 11:29:51 +02:00			`# Join the UUIDs with "OR" and escape the hyphens for Solr`
dspace_statistics_api/items.py: Fix flake8 warning According to flake8 we need to use a different syntax for strings with backslash escape sequences: > As of Python 3.6, a backslash-character pair that is not a valid > escape sequence now generates a DeprecationWarning. This will > eventually become a SyntaxError. The warning was: W605 invalid escape sequence '\-' See: https://www.flake8rules.com/rules/W605.html 2020-09-26 11:11:04 +02:00			`solr_items_string: str = " OR ".join(items).replace("-", r"\-")`
Add missing dspace_statistics_api/items.py This was meant to be added with the new /items POST changes. 2020-09-25 11:29:51 +02:00
			`solr_query_params = {`
			`"q": f"id:({solr_items_string})",`
			`"fq": f"type:2 AND isBot:false AND statistics_type:view AND time:{solr_date_string}",`
			`"facet": "true",`
			`"facet.field": "id",`
			`"facet.mincount": 1,`
			`"shards": shards,`
			`"rows": 0,`
			`"wt": "json",`
			`"json.nl": "map", # return facets as a dict instead of a flat list`
			`}`

			`solr_url = SOLR_SERVER + "/statistics/select"`
			`res = requests.get(solr_url, params=solr_query_params)`

			`# Create an empty dict to store views`
			`data = {}`

			`# Solr returns facets as a dict of dicts (see the json.nl parameter)`
			`views = res.json()["facet_counts"]["facet_fields"]`
			`# iterate over the 'id' dict and get the item ids and views`
			`for item_id, item_views in views["id"].items():`
			`data[item_id] = item_views`

			`# Check if any items have missing stats so we can set them to 0`
			`if len(data) < len(items):`
			`# List comprehension to get a list of item ids (keys) in the data`
			`data_ids = [k for k, v in data.items()]`
			`for item_id in items:`
			`if item_id not in data_ids:`
			`data[item_id] = 0`
			`continue`

			`return data`


			`def get_downloads(solr_date_string: str, items: list):`
			`"""`
			`Get download statistics for a list of items from Solr.`

			`:parameter solr_date_string (str): Solr date string, for example "[* TO *]"`
			`:parameter items (list): a list of item IDs`
			`:returns: A dict of item IDs and downloads`
			`"""`
dspace_statistics_api/items.py: Move util import Move util import from global scope because it causes tests to fail. We don't need the set up the Solr connection unless we're actually trying to use the get_views and get_downloads methods, either when running the API in production or during tests where the connection has been set up. 2020-10-06 14:07:00 +02:00			`from .util import get_statistics_shards`
			`shards = get_statistics_shards()`

Add missing dspace_statistics_api/items.py This was meant to be added with the new /items POST changes. 2020-09-25 11:29:51 +02:00			`# Join the UUIDs with "OR" and escape the hyphens for Solr`
dspace_statistics_api/items.py: Fix flake8 warning According to flake8 we need to use a different syntax for strings with backslash escape sequences: > As of Python 3.6, a backslash-character pair that is not a valid > escape sequence now generates a DeprecationWarning. This will > eventually become a SyntaxError. The warning was: W605 invalid escape sequence '\-' See: https://www.flake8rules.com/rules/W605.html 2020-09-26 11:11:04 +02:00			`solr_items_string: str = " OR ".join(items).replace("-", r"\-")`
Add missing dspace_statistics_api/items.py This was meant to be added with the new /items POST changes. 2020-09-25 11:29:51 +02:00
			`solr_query_params = {`
			`"q": f"owningItem:({solr_items_string})",`
			`"fq": f"type:0 AND isBot:false AND statistics_type:view AND bundleName:ORIGINAL AND time:{solr_date_string}",`
			`"facet": "true",`
			`"facet.field": "owningItem",`
			`"facet.mincount": 1,`
			`"shards": shards,`
			`"rows": 0,`
			`"wt": "json",`
			`"json.nl": "map", # return facets as a dict instead of a flat list`
			`}`

			`solr_url = SOLR_SERVER + "/statistics/select"`
			`res = requests.get(solr_url, params=solr_query_params)`

			`# Create an empty dict to store downloads`
			`data = {}`

			`# Solr returns facets as a dict of dicts (see the json.nl parameter)`
			`downloads = res.json()["facet_counts"]["facet_fields"]`
			`# Iterate over the 'owningItem' dict and get the item ids and downloads`
			`for item_id, item_downloads in downloads["owningItem"].items():`
			`data[item_id] = item_downloads`

			`# Check if any items have missing stats so we can set them to 0`
			`if len(data) < len(items):`
			`# List comprehension to get a list of item ids (keys) in the data`
			`data_ids = [k for k, v in data.items()]`
			`for item_id in items:`
			`if item_id not in data_ids:`
			`data[item_id] = 0`
			`continue`

			`return data`

			`# vim: set sw=4 ts=4 expandtab:`