dspace-statistics-api/dspace_statistics_api/app.py

import falcon
import psycopg2.extras

from .database import DatabaseManager
from .items import get_downloads, get_views
from .util import validate_items_post_parameters


class RootResource:
    def on_get(self, req, resp):
        resp.status = falcon.HTTP_200
        resp.content_type = "text/html"
        with open("dspace_statistics_api/docs/index.html", "r") as f:
            resp.body = f.read()


class AllItemsResource:
    def on_get(self, req, resp):
        """Handles GET requests"""
        # Return HTTPBadRequest if id parameter is not present and valid
        limit = req.get_param_as_int("limit", min_value=1, max_value=100) or 100
        page = req.get_param_as_int("page", min_value=0) or 0
        offset = limit * page

        with DatabaseManager() as db:
            db.set_session(readonly=True)

            with db.cursor() as cursor:
                # get total number of items so we can estimate the pages
                cursor.execute("SELECT COUNT(id) FROM items")
                pages = round(cursor.fetchone()[0] / limit)

                # get statistics and use limit and offset to page through results
                cursor.execute(
                    "SELECT id, views, downloads FROM items ORDER BY id LIMIT %s OFFSET %s",
                    [limit, offset],
                )

                # create a list to hold dicts of item stats
                statistics = list()

                # iterate over results and build statistics object
                for item in cursor:
                    statistics.append(
                        {
                            "id": str(item["id"]),
                            "views": item["views"],
                            "downloads": item["downloads"],
                        }
                    )

        message = {
            "currentPage": page,
            "totalPages": pages,
            "limit": limit,
            "statistics": statistics,
        }

        resp.media = message

    @falcon.before(validate_items_post_parameters)
    def on_post(self, req, resp):
        """Handles POST requests"""

        # Build the Solr date string, ie: [* TO *]
        if req.context.dateFrom and req.context.dateTo:
            solr_date_string = f"[{req.context.dateFrom} TO {req.context.dateTo}]"
        elif not req.context.dateFrom and req.context.dateTo:
            solr_date_string = f"[* TO {req.context.dateTo}]"
        elif req.context.dateFrom and not req.context.dateTo:
            solr_date_string = f"[{req.context.dateFrom} TO *]"
        else:
            solr_date_string = "[* TO *]"

        # Helper variables to make working with pages/items/results easier and
        # to make the code easier to understand
        number_of_items: int = len(req.context.items)
        pages: int = int(number_of_items / req.context.limit)
        first_item: int = req.context.page * req.context.limit
        last_item: int = first_item + req.context.limit
        # Get a subset of the POSTed items based on our limit. Note that Python
        # list slicing and indexing are both zero based, but the first and last
        # items in a slice can be confusing. See this ASCII diagram:
        #
        #                 +---+---+---+---+---+---+
        #                 | P | y | t | h | o | n |
        #                 +---+---+---+---+---+---+
        # Slice position: 0   1   2   3   4   5   6
        # Index position:   0   1   2   3   4   5
        #
        # So if we have a list items with 240 items:
        #
        #   1st set: items[0:100] would give items at indexes 0 to 99
        #   2nd set: items[100:200] would give items at indexes 100 to 199
        #   3rd set: items[200:300] would give items at indexes 200 to 239
        items_subset: list = req.context.items[first_item:last_item]

        views: dict = get_views(solr_date_string, items_subset)
        downloads: dict = get_downloads(solr_date_string, items_subset)

        # create a list to hold dicts of item stats
        statistics = list()

        # iterate over views dict to extract views and use the item id as an
        # index to the downloads dict to extract downloads.
        for k, v in views.items():
            statistics.append({"id": k, "views": v, "downloads": downloads[k]})

        message = {
            "currentPage": req.context.page,
            "totalPages": pages,
            "limit": req.context.limit,
            "statistics": statistics,
        }

        resp.status = falcon.HTTP_200
        resp.media = message


class ItemResource:
    def on_get(self, req, resp, item_id):
        """Handles GET requests"""

        # Adapt Python’s uuid.UUID type to PostgreSQL’s uuid
        # See: https://www.psycopg.org/docs/extras.html
        psycopg2.extras.register_uuid()

        with DatabaseManager() as db:
            db.set_session(readonly=True)

            with db.cursor() as cursor:
                cursor = db.cursor()
                cursor.execute(
                    "SELECT views, downloads FROM items WHERE id=%s", [str(item_id)]
                )
                if cursor.rowcount == 0:
                    raise falcon.HTTPNotFound(
                        title="Item not found",
                        description=f'The item with id "{str(item_id)}" was not found.',
                    )
                else:
                    results = cursor.fetchone()

                    statistics = {
                        "id": str(item_id),
                        "views": results["views"],
                        "downloads": results["downloads"],
                    }

                    resp.media = statistics


api = application = falcon.API()
api.add_route("/", RootResource())
api.add_route("/items", AllItemsResource())
api.add_route("/item/{item_id:uuid}", ItemResource())

# vim: set sw=4 ts=4 expandtab:
-												Initial commit

Add first working version of the statistics API.

											
										
										
											2018-09-18 13:03:15 +02:00
+								import falcon
-												Move all imports to top of file

A few months ago I had an issue setting up mocking because I was
trying to be clever importing these libraries only when I needed
them rather than at the global scope. Someone pointed out to me
that if the imports are at the top of the file Falcon will load
them once when the WSGI server starts, whereas if they are in the
on_get() or on_post() they will load for every request! Also, it
seems that PEP8 recommends keeping imports at the top of the file
anyways, so I will just do that.

Imports sorted with isort.

See: https://www.python.org/dev/peps/pep-0008/#imports

											
										
										
											2020-12-18 21:42:06 +01:00
+								import psycopg2.extras
-												Initial commit

Add first working version of the statistics API.

											
										
										
											2018-09-18 13:03:15 +02:00
-												Sort imports with isort

											
										
										
											2019-11-27 11:31:04 +01:00
+								from .database import DatabaseManager
-												dspace_statistics_api: Sort imports with isort

											
										
										
											2020-10-06 14:12:13 +02:00
+								from .items import get_downloads, get_views
-												Refactor `/items` POST handler to use a before hook

This allows us to do the dirty work of parsing, validating, and
setting local variables from the POST parameters outside of the
on_post function. We then share the parameters via the req.context
object. Functionally it is the same, but readability is better
and it's a neat trick that I could use elsewhere.

See: https://falcon.readthedocs.io/en/stable/user/faq.html#how-can-i-pass-data-from-a-hook-to-a-responder-and-between-hooks

											
										
										
											2020-09-26 17:37:14 +02:00
+								from .util import validate_items_post_parameters
-												Sort imports with isort

											
										
										
											2019-11-27 11:31:04 +01:00
-												Correct issues highlighted by Flake8

Flake8 validates code style against PEP 8 in order to encourage the
writing of idiomatic Python. For reference, I am currently ignoring
errors about line length (E501) because I feel it makes code harder
to read.

This is the invocation I am using:

    $ flake8 --ignore E501 dspace_statistics_api

											
										
										
											2018-11-03 22:55:23 +01:00
-												Add basic API documentation on root (/)

I had imagined plugging in an interactive Swagger or OpenAPI instance
here, but that's actually much more involved in Falcon than I want to
deal with right now.

											
										
										
											2018-10-31 23:19:39 +01:00
+								class RootResource:
 								    def on_get(self, req, resp):
 								        resp.status = falcon.HTTP_200
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								        resp.content_type = "text/html"
 								        with open("dspace_statistics_api/docs/index.html", "r") as f:
-												Add basic API documentation on root (/)

I had imagined plugging in an interactive Swagger or OpenAPI instance
here, but that's actually much more involved in Falcon than I want to
deal with right now.

											
										
										
											2018-10-31 23:19:39 +01:00
+								            resp.body = f.read()
-												Correct issues highlighted by Flake8

Flake8 validates code style against PEP 8 in order to encourage the
writing of idiomatic Python. For reference, I am currently ignoring
errors about line length (E501) because I feel it makes code harder
to read.

This is the invocation I am using:

    $ flake8 --ignore E501 dspace_statistics_api

											
										
										
											2018-11-03 22:55:23 +01:00
-												app.py: Add route to page through all item statistics

This route exposes all item statistics and uses the limit and offset
parameters to control paging throug the result set. The logic here
is extremely easy thanks to the brilliant LIMIT and OFFSET features
of SQLite (of course the SQL query sorts the results by some unique
field to ensure the order is already the same).

											
										
										
											2018-09-24 15:07:26 +02:00
+								class AllItemsResource:
 								    def on_get(self, req, resp):
 								        """Handles GET requests"""
 								        # Return HTTPBadRequest if id parameter is not present and valid
-												Make sure limit is between 1 and 100

We were not properly checking whether the limit was greater than 0
in all cases.

											
										
										
											2020-11-02 20:59:20 +01:00
+								        limit = req.get_param_as_int("limit", min_value=1, max_value=100) or 100
-												dspace_statistics_api/app.py: Fix Falcon 2.0 syntax

See: dspace_statistics_api/app.py

											
										
										
											2019-03-17 22:23:23 +01:00
+								        page = req.get_param_as_int("page", min_value=0) or 0
-												app.py: Add route to page through all item statistics

This route exposes all item statistics and uses the limit and offset
parameters to control paging throug the result set. The logic here
is extremely easy thanks to the brilliant LIMIT and OFFSET features
of SQLite (of course the SQL query sorts the results by some unique
field to ensure the order is already the same).

											
										
										
											2018-09-24 15:07:26 +02:00
+								        offset = limit * page
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								        with DatabaseManager() as db:
 								            db.set_session(readonly=True)
-												app.py: Add route to page through all item statistics

This route exposes all item statistics and uses the limit and offset
parameters to control paging throug the result set. The logic here
is extremely easy thanks to the brilliant LIMIT and OFFSET features
of SQLite (of course the SQL query sorts the results by some unique
field to ensure the order is already the same).

											
										
										
											2018-09-24 15:07:26 +02:00
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								            with db.cursor() as cursor:
 								                # get total number of items so we can estimate the pages
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								                cursor.execute("SELECT COUNT(id) FROM items")
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								                pages = round(cursor.fetchone()[0] / limit)
-												app.py: Add route to page through all item statistics

This route exposes all item statistics and uses the limit and offset
parameters to control paging throug the result set. The logic here
is extremely easy thanks to the brilliant LIMIT and OFFSET features
of SQLite (of course the SQL query sorts the results by some unique
field to ensure the order is already the same).

											
										
										
											2018-09-24 15:07:26 +02:00
-												dspace_statistics_api/app.py: Use UUID

DSpace 6+ uses a UUID for item identifiers instead of an integer so
we need to adapt our PostgreSQL queries to use those. Note that we
can no longer sort results in the "all items" endpoint by ID. Also,
we need to use parameterized psycopg2 queries instead of strings to
support queries with UUIDs properly. To use the Python UUID objects
elsewhere in the code we need to make sure that we cast them to str.

											
										
										
											2020-03-02 10:06:27 +01:00
+								                # get statistics and use limit and offset to page through results
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								                cursor.execute(
-												dspace_statistics_api/app.py: Use ORDER BY in /items

Since we are paging through the results by limit/offset we need to
be sure that we are returning results deterministically.

											
										
										
											2020-12-17 09:10:40 +01:00
+								                    "SELECT id, views, downloads FROM items ORDER BY id LIMIT %s OFFSET %s",
-												dspace_statistics_api/app.py: Use parameterized SQL queries

This is a better way to run SQL queries because psycopg2 takes care
of the quoting for us.

											
										
										
											2020-03-02 10:16:05 +01:00
+								                    [limit, offset],
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								                )
-												app.py: Add route to page through all item statistics

This route exposes all item statistics and uses the limit and offset
parameters to control paging throug the result set. The logic here
is extremely easy thanks to the brilliant LIMIT and OFFSET features
of SQLite (of course the SQL query sorts the results by some unique
field to ensure the order is already the same).

											
										
										
											2018-09-24 15:07:26 +02:00
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								                # create a list to hold dicts of item stats
 								                statistics = list()
-												app.py: Add route to page through all item statistics

This route exposes all item statistics and uses the limit and offset
parameters to control paging throug the result set. The logic here
is extremely easy thanks to the brilliant LIMIT and OFFSET features
of SQLite (of course the SQL query sorts the results by some unique
field to ensure the order is already the same).

											
										
										
											2018-09-24 15:07:26 +02:00
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								                # iterate over results and build statistics object
 								                for item in cursor:
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								                    statistics.append(
 								                        {
-												dspace_statistics_api/app.py: Use UUID

DSpace 6+ uses a UUID for item identifiers instead of an integer so
we need to adapt our PostgreSQL queries to use those. Note that we
can no longer sort results in the "all items" endpoint by ID. Also,
we need to use parameterized psycopg2 queries instead of strings to
support queries with UUIDs properly. To use the Python UUID objects
elsewhere in the code we need to make sure that we cast them to str.

											
										
										
											2020-03-02 10:06:27 +01:00
+								                            "id": str(item["id"]),
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								                            "views": item["views"],
 								                            "downloads": item["downloads"],
 								                        }
 								                    )
-												app.py: Iterate directly on cursor

We don't need to create an intermediate variable for the results of
the SQL query because psycopg2's cursor is iterable.

See: http://initd.org/psycopg/docs/cursor.html

											
										
										
											2018-09-27 10:03:44 +02:00
-												app.py: Add route to page through all item statistics

This route exposes all item statistics and uses the limit and offset
parameters to control paging throug the result set. The logic here
is extremely easy thanks to the brilliant LIMIT and OFFSET features
of SQLite (of course the SQL query sorts the results by some unique
field to ensure the order is already the same).

											
										
										
											2018-09-24 15:07:26 +02:00
+								        message = {
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								            "currentPage": page,
 								            "totalPages": pages,
 								            "limit": limit,
 								            "statistics": statistics,
-												app.py: Add route to page through all item statistics

This route exposes all item statistics and uses the limit and offset
parameters to control paging throug the result set. The logic here
is extremely easy thanks to the brilliant LIMIT and OFFSET features
of SQLite (of course the SQL query sorts the results by some unique
field to ensure the order is already the same).

											
										
										
											2018-09-24 15:07:26 +02:00
+								        }
 								        resp.media = message
-												Refactor `/items` POST handler to use a before hook

This allows us to do the dirty work of parsing, validating, and
setting local variables from the POST parameters outside of the
on_post function. We then share the parameters via the req.context
object. Functionally it is the same, but readability is better
and it's a neat trick that I could use elsewhere.

See: https://falcon.readthedocs.io/en/stable/user/faq.html#how-can-i-pass-data-from-a-hook-to-a-responder-and-between-hooks

											
										
										
											2020-09-26 17:37:14 +02:00
+								    @falcon.before(validate_items_post_parameters)
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
+								    def on_post(self, req, resp):
 								        """Handles POST requests"""
 								        # Build the Solr date string, ie: [* TO *]
-												Refactor `/items` POST handler to use a before hook

This allows us to do the dirty work of parsing, validating, and
setting local variables from the POST parameters outside of the
on_post function. We then share the parameters via the req.context
object. Functionally it is the same, but readability is better
and it's a neat trick that I could use elsewhere.

See: https://falcon.readthedocs.io/en/stable/user/faq.html#how-can-i-pass-data-from-a-hook-to-a-responder-and-between-hooks

											
										
										
											2020-09-26 17:37:14 +02:00
+								        if req.context.dateFrom and req.context.dateTo:
 								            solr_date_string = f"[{req.context.dateFrom} TO {req.context.dateTo}]"
 								        elif not req.context.dateFrom and req.context.dateTo:
 								            solr_date_string = f"[* TO {req.context.dateTo}]"
 								        elif req.context.dateFrom and not req.context.dateTo:
 								            solr_date_string = f"[{req.context.dateFrom} TO *]"
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
+								        else:
 								            solr_date_string = "[* TO *]"
 								        # Helper variables to make working with pages/items/results easier and
 								        # to make the code easier to understand
-												Refactor `/items` POST handler to use a before hook

This allows us to do the dirty work of parsing, validating, and
setting local variables from the POST parameters outside of the
on_post function. We then share the parameters via the req.context
object. Functionally it is the same, but readability is better
and it's a neat trick that I could use elsewhere.

See: https://falcon.readthedocs.io/en/stable/user/faq.html#how-can-i-pass-data-from-a-hook-to-a-responder-and-between-hooks

											
										
										
											2020-09-26 17:37:14 +02:00
+								        number_of_items: int = len(req.context.items)
 								        pages: int = int(number_of_items / req.context.limit)
 								        first_item: int = req.context.page * req.context.limit
 								        last_item: int = first_item + req.context.limit
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
+								        # Get a subset of the POSTed items based on our limit. Note that Python
 								        # list slicing and indexing are both zero based, but the first and last
 								        # items in a slice can be confusing. See this ASCII diagram:
 								        #
 								        #                 +---+---+---+---+---+---+
 								        #                 | P | y | t | h | o | n |
 								        #                 +---+---+---+---+---+---+
 								        # Slice position: 0   1   2   3   4   5   6
 								        # Index position:   0   1   2   3   4   5
 								        #
-												Refactor `/items` POST handler to use a before hook

This allows us to do the dirty work of parsing, validating, and
setting local variables from the POST parameters outside of the
on_post function. We then share the parameters via the req.context
object. Functionally it is the same, but readability is better
and it's a neat trick that I could use elsewhere.

See: https://falcon.readthedocs.io/en/stable/user/faq.html#how-can-i-pass-data-from-a-hook-to-a-responder-and-between-hooks

											
										
										
											2020-09-26 17:37:14 +02:00
+								        # So if we have a list items with 240 items:
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
+								        #
-												Refactor `/items` POST handler to use a before hook

This allows us to do the dirty work of parsing, validating, and
setting local variables from the POST parameters outside of the
on_post function. We then share the parameters via the req.context
object. Functionally it is the same, but readability is better
and it's a neat trick that I could use elsewhere.

See: https://falcon.readthedocs.io/en/stable/user/faq.html#how-can-i-pass-data-from-a-hook-to-a-responder-and-between-hooks

											
										
										
											2020-09-26 17:37:14 +02:00
+								        #   1st set: items[0:100] would give items at indexes 0 to 99
 								        #   2nd set: items[100:200] would give items at indexes 100 to 199
 								        #   3rd set: items[200:300] would give items at indexes 200 to 239
 								        items_subset: list = req.context.items[first_item:last_item]
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
 								        views: dict = get_views(solr_date_string, items_subset)
 								        downloads: dict = get_downloads(solr_date_string, items_subset)
 								        # create a list to hold dicts of item stats
 								        statistics = list()
 								        # iterate over views dict to extract views and use the item id as an
 								        # index to the downloads dict to extract downloads.
 								        for k, v in views.items():
 								            statistics.append({"id": k, "views": v, "downloads": downloads[k]})
 								        message = {
-												Refactor `/items` POST handler to use a before hook

This allows us to do the dirty work of parsing, validating, and
setting local variables from the POST parameters outside of the
on_post function. We then share the parameters via the req.context
object. Functionally it is the same, but readability is better
and it's a neat trick that I could use elsewhere.

See: https://falcon.readthedocs.io/en/stable/user/faq.html#how-can-i-pass-data-from-a-hook-to-a-responder-and-between-hooks

											
										
										
											2020-09-26 17:37:14 +02:00
+								            "currentPage": req.context.page,
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
+								            "totalPages": pages,
-												Refactor `/items` POST handler to use a before hook

This allows us to do the dirty work of parsing, validating, and
setting local variables from the POST parameters outside of the
on_post function. We then share the parameters via the req.context
object. Functionally it is the same, but readability is better
and it's a neat trick that I could use elsewhere.

See: https://falcon.readthedocs.io/en/stable/user/faq.html#how-can-i-pass-data-from-a-hook-to-a-responder-and-between-hooks

											
										
										
											2020-09-26 17:37:14 +02:00
+								            "limit": req.context.limit,
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
+								            "statistics": statistics,
 								        }
 								        resp.status = falcon.HTTP_200
 								        resp.media = message
-												Correct issues highlighted by Flake8

Flake8 validates code style against PEP 8 in order to encourage the
writing of idiomatic Python. For reference, I am currently ignoring
errors about line length (E501) because I feel it makes code harder
to read.

This is the invocation I am using:

    $ flake8 --ignore E501 dspace_statistics_api

											
										
										
											2018-11-03 22:55:23 +01:00
-												Initial commit

Add first working version of the statistics API.

											
										
										
											2018-09-18 13:03:15 +02:00
+								class ItemResource:
-												app.py: Use parameterized URI instead of query for /item

Falcon's get_param_as_int() is really nice in that it gets a query
parameter and does validation for you, but I really wanted to have
cleaner URIs for API routes so I am now using a route URI template
with a field converter. This is cleaner, but means that parameters
not matching the template will return HTTP 404.

See: https://falcon.readthedocs.io/en/stable/api/routing.html#field-converters

											
										
										
											2018-09-23 15:23:33 +02:00
+								    def on_get(self, req, resp, item_id):
-												Initial commit

Add first working version of the statistics API.

											
										
										
											2018-09-18 13:03:15 +02:00
+								        """Handles GET requests"""
-												dspace_statistics_api/app.py: Use UUID

DSpace 6+ uses a UUID for item identifiers instead of an integer so
we need to adapt our PostgreSQL queries to use those. Note that we
can no longer sort results in the "all items" endpoint by ID. Also,
we need to use parameterized psycopg2 queries instead of strings to
support queries with UUIDs properly. To use the Python UUID objects
elsewhere in the code we need to make sure that we cast them to str.

											
										
										
											2020-03-02 10:06:27 +01:00
+								        # Adapt Python’s uuid.UUID type to PostgreSQL’s uuid
 								        # See: https://www.psycopg.org/docs/extras.html
 								        psycopg2.extras.register_uuid()
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								        with DatabaseManager() as db:
 								            db.set_session(readonly=True)
 								            with db.cursor() as cursor:
 								                cursor = db.cursor()
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								                cursor.execute(
-												dspace_statistics_api/app.py: Use UUID

DSpace 6+ uses a UUID for item identifiers instead of an integer so
we need to adapt our PostgreSQL queries to use those. Note that we
can no longer sort results in the "all items" endpoint by ID. Also,
we need to use parameterized psycopg2 queries instead of strings to
support queries with UUIDs properly. To use the Python UUID objects
elsewhere in the code we need to make sure that we cast them to str.

											
										
										
											2020-03-02 10:06:27 +01:00
+								                    "SELECT views, downloads FROM items WHERE id=%s", [str(item_id)]
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								                )
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								                if cursor.rowcount == 0:
 								                    raise falcon.HTTPNotFound(
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								                        title="Item not found",
-												dspace_statistics_api: Use f-strings instead of format()

We had previously been avoiding the f-strings because we needed to
run on Python 3.5 and they were only available in Python 3.6+, but
now the black formatter requires Python 3.6 and all our systems are
running Python 3.6+ anyways.

											
										
										
											2020-03-02 10:24:29 +01:00
+								                        description=f'The item with id "{str(item_id)}" was not found.',
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								                    )
 								                else:
 								                    results = cursor.fetchone()
 								                    statistics = {
-												dspace_statistics_api/app.py: Use UUID

DSpace 6+ uses a UUID for item identifiers instead of an integer so
we need to adapt our PostgreSQL queries to use those. Note that we
can no longer sort results in the "all items" endpoint by ID. Also,
we need to use parameterized psycopg2 queries instead of strings to
support queries with UUIDs properly. To use the Python UUID objects
elsewhere in the code we need to make sure that we cast them to str.

											
										
										
											2020-03-02 10:06:27 +01:00
+								                        "id": str(item_id),
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								                        "views": results["views"],
 								                        "downloads": results["downloads"],
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								                    }
 								                    resp.media = statistics
-												Initial commit

Add first working version of the statistics API.

											
										
										
											2018-09-18 13:03:15 +02:00
-												Correct issues highlighted by Flake8

Flake8 validates code style against PEP 8 in order to encourage the
writing of idiomatic Python. For reference, I am currently ignoring
errors about line length (E501) because I feel it makes code harder
to read.

This is the invocation I am using:

    $ flake8 --ignore E501 dspace_statistics_api

											
										
										
											2018-11-03 22:55:23 +01:00
-												Add "application" alias to API object

By default gunicorn looks for an "application" object to run, so this
saves us having to type api:app.

											
										
										
											2018-10-26 18:21:27 +02:00
+								api = application = falcon.API()
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								api.add_route("/", RootResource())
 								api.add_route("/items", AllItemsResource())
-												dspace_statistics_api/app.py: Use UUID

DSpace 6+ uses a UUID for item identifiers instead of an integer so
we need to adapt our PostgreSQL queries to use those. Note that we
can no longer sort results in the "all items" endpoint by ID. Also,
we need to use parameterized psycopg2 queries instead of strings to
support queries with UUIDs properly. To use the Python UUID objects
elsewhere in the code we need to make sure that we cast them to str.

											
										
										
											2020-03-02 10:06:27 +01:00
+								api.add_route("/item/{item_id:uuid}", ItemResource())
-												Add vim modeline to all Python files

Uses four spaces for tab and shift widths, and turns on expansion of
tabs to spaces.

											
										
										
											2018-09-23 10:33:26 +02:00
 								# vim: set sw=4 ts=4 expandtab: