dspace-statistics-api/dspace_statistics_api/app.py

import falcon
import psycopg2.extras
from falcon_swagger_ui import register_swaggerui_app

from .database import DatabaseManager
from .stats import get_downloads, get_views
from .util import set_statistics_scope, validate_post_parameters
from .config import VERSION


class RootResource:
    def on_get(self, req, resp):
        resp.status = falcon.HTTP_200
        resp.content_type = "text/html"
        with open("dspace_statistics_api/docs/index.html", "r") as f:
            resp.body = f.read()


class StatusResource:
    def on_get(self, req, resp):
        message = {"version": VERSION}

        resp.status = falcon.HTTP_200
        resp.media = message


class OpenAPIJSONResource:
    def on_get(self, req, resp):
        resp.status = falcon.HTTP_200
        resp.content_type = "text/html"
        with open("dspace_statistics_api/docs/openapi.json", "r") as f:
            resp.body = f.read()


class AllStatisticsResource:
    @falcon.before(set_statistics_scope)
    def on_get(self, req, resp):
        """Handles GET requests"""
        # Return HTTPBadRequest if id parameter is not present and valid
        limit = req.get_param_as_int("limit", min_value=1, max_value=100) or 100
        page = req.get_param_as_int("page", min_value=0) or 0
        offset = limit * page

        with DatabaseManager() as db:
            db.set_session(readonly=True)

            with db.cursor() as cursor:
                # get total number of communities/collections/items so we can estimate the pages
                cursor.execute(f"SELECT COUNT(id) FROM {req.context.statistics_scope}")
                pages = round(cursor.fetchone()[0] / limit)

                # get statistics and use limit and offset to page through results
                cursor.execute(
                    f"SELECT id, views, downloads FROM {req.context.statistics_scope} ORDER BY id LIMIT %s OFFSET %s",
                    [limit, offset],
                )

                # create a list to hold dicts of stats
                statistics = list()

                # iterate over results and build statistics object
                for result in cursor:
                    statistics.append(
                        {
                            "id": str(result["id"]),
                            "views": result["views"],
                            "downloads": result["downloads"],
                        }
                    )

        message = {
            "currentPage": page,
            "totalPages": pages,
            "limit": limit,
            "statistics": statistics,
        }

        resp.media = message

    @falcon.before(set_statistics_scope)
    @falcon.before(validate_post_parameters)
    def on_post(self, req, resp):
        """Handles POST requests.

        Uses two `before` hooks to set the statistics "scope" and validate the
        POST parameters. The "scope" is the type of statistics we want, which
        will be items, communities, or collections, depending on the request.
        """

        # Build the Solr date string, ie: [* TO *]
        if req.context.dateFrom and req.context.dateTo:
            solr_date_string = f"[{req.context.dateFrom} TO {req.context.dateTo}]"
        elif not req.context.dateFrom and req.context.dateTo:
            solr_date_string = f"[* TO {req.context.dateTo}]"
        elif req.context.dateFrom and not req.context.dateTo:
            solr_date_string = f"[{req.context.dateFrom} TO *]"
        else:
            solr_date_string = "[* TO *]"

        # Helper variables to make working with pages/items/results easier and
        # to make the code easier to understand
        number_of_elements: int = len(req.context.elements)
        pages: int = int(number_of_elements / req.context.limit)
        first_element: int = req.context.page * req.context.limit
        last_element: int = first_element + req.context.limit
        # Get a subset of the POSTed items based on our limit. Note that Python
        # list slicing and indexing are both zero based, but the first and last
        # items in a slice can be confusing. See this ASCII diagram:
        #
        #                 +---+---+---+---+---+---+
        #                 | P | y | t | h | o | n |
        #                 +---+---+---+---+---+---+
        # Slice position: 0   1   2   3   4   5   6
        # Index position:   0   1   2   3   4   5
        #
        # So if we have a list of items with 240 items:
        #
        #   1st set: items[0:100] would give items at indexes 0 to 99
        #   2nd set: items[100:200] would give items at indexes 100 to 199
        #   3rd set: items[200:300] would give items at indexes 200 to 239
        elements_subset: list = req.context.elements[first_element:last_element]

        views: dict = get_views(
            solr_date_string, elements_subset, req.context.views_facet_field
        )
        downloads: dict = get_downloads(
            solr_date_string, elements_subset, req.context.downloads_facet_field
        )

        # create a list to hold dicts of stats
        statistics = list()

        # iterate over views dict to extract views and use the element id as an
        # index to the downloads dict to extract downloads.
        for k, v in views.items():
            statistics.append({"id": k, "views": v, "downloads": downloads[k]})

        message = {
            "currentPage": req.context.page,
            "totalPages": pages,
            "limit": req.context.limit,
            "statistics": statistics,
        }

        resp.status = falcon.HTTP_200
        resp.media = message


class SingleStatisticsResource:
    @falcon.before(set_statistics_scope)
    def on_get(self, req, resp, id_):
        """Handles GET requests"""

        # Adapt Python’s uuid.UUID type to PostgreSQL’s uuid
        # See: https://www.psycopg.org/docs/extras.html
        psycopg2.extras.register_uuid()

        with DatabaseManager() as db:
            db.set_session(readonly=True)

            with db.cursor() as cursor:
                cursor = db.cursor()
                cursor.execute(
                    f"SELECT views, downloads FROM {req.context.database} WHERE id=%s",
                    [str(id_)],
                )
                if cursor.rowcount == 0:
                    raise falcon.HTTPNotFound(
                        title=f"{req.context.statistics_scope} not found",
                        description=f'The {req.context.statistics_scope} with id "{str(id_)}" was not found.',
                    )
                else:
                    results = cursor.fetchone()

                    statistics = {
                        "id": str(id_),
                        "views": results["views"],
                        "downloads": results["downloads"],
                    }

                    resp.media = statistics


api = application = falcon.API()
api.add_route("/", RootResource())
api.add_route("/status", StatusResource())

# Item routes
api.add_route("/items", AllStatisticsResource())
api.add_route("/item/{id_:uuid}", SingleStatisticsResource())

# Community routes
api.add_route("/communities", AllStatisticsResource())
api.add_route("/community/{id_:uuid}", SingleStatisticsResource())

# Collection routes
api.add_route("/collections", AllStatisticsResource())
api.add_route("/collection/{id_:uuid}", SingleStatisticsResource())

# Swagger configuration
SWAGGERUI_URL = "/swagger"  # without trailing slash
SCHEMA_URL = "/docs/openapi.json"
api.add_route("/docs/openapi.json", OpenAPIJSONResource())

register_swaggerui_app(
    api,
    SWAGGERUI_URL,
    SCHEMA_URL,
    config={
        "supportedSubmitMethods": ["get", "post"],
    },
)

# vim: set sw=4 ts=4 expandtab:
-												Initial commit

Add first working version of the statistics API.

											
										
										
											2018-09-18 13:03:15 +02:00
+								import falcon
-												Move all imports to top of file

A few months ago I had an issue setting up mocking because I was
trying to be clever importing these libraries only when I needed
them rather than at the global scope. Someone pointed out to me
that if the imports are at the top of the file Falcon will load
them once when the WSGI server starts, whereas if they are in the
on_get() or on_post() they will load for every request! Also, it
seems that PEP8 recommends keeping imports at the top of the file
anyways, so I will just do that.

Imports sorted with isort.

See: https://www.python.org/dev/peps/pep-0008/#imports

											
										
										
											2020-12-18 21:42:06 +01:00
+								import psycopg2.extras
-												Add Swagger UI on /swagger

This includes a Swagger UI with an OpenAPI 3.0 JSON schema for easy
interactive demonstration and testing of the API. The JSON schema
was created with the standalone swagger-editor. Includes tests to
make sure that the /swagger and /docs/openapi.json paths are acce-
ssible.

											
										
										
											2020-12-22 10:18:47 +01:00
+								from falcon_swagger_ui import register_swaggerui_app
-												Initial commit

Add first working version of the statistics API.

											
										
										
											2018-09-18 13:03:15 +02:00
-												Sort imports with isort

											
										
										
											2019-11-27 11:31:04 +01:00
+								from .database import DatabaseManager
-												Rename items.py to stats.py

It is no longer used only for item-related statistics functions.

											
										
										
											2020-12-20 15:28:56 +01:00
+								from .stats import get_downloads, get_views
-												dspace_statistics_api/app.py: Run isort

											
										
										
											2020-12-20 15:29:35 +01:00
+								from .util import set_statistics_scope, validate_post_parameters
-												Add /status route

Currently this only prints the API version.

											
										
										
											2020-12-22 10:30:09 +01:00
+								from .config import VERSION
-												Sort imports with isort

											
										
										
											2019-11-27 11:31:04 +01:00
-												Correct issues highlighted by Flake8

Flake8 validates code style against PEP 8 in order to encourage the
writing of idiomatic Python. For reference, I am currently ignoring
errors about line length (E501) because I feel it makes code harder
to read.

This is the invocation I am using:

    $ flake8 --ignore E501 dspace_statistics_api

											
										
										
											2018-11-03 22:55:23 +01:00
-												Add basic API documentation on root (/)

I had imagined plugging in an interactive Swagger or OpenAPI instance
here, but that's actually much more involved in Falcon than I want to
deal with right now.

											
										
										
											2018-10-31 23:19:39 +01:00
+								class RootResource:
 								    def on_get(self, req, resp):
 								        resp.status = falcon.HTTP_200
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								        resp.content_type = "text/html"
 								        with open("dspace_statistics_api/docs/index.html", "r") as f:
-												Add basic API documentation on root (/)

I had imagined plugging in an interactive Swagger or OpenAPI instance
here, but that's actually much more involved in Falcon than I want to
deal with right now.

											
										
										
											2018-10-31 23:19:39 +01:00
+								            resp.body = f.read()
-												Correct issues highlighted by Flake8

Flake8 validates code style against PEP 8 in order to encourage the
writing of idiomatic Python. For reference, I am currently ignoring
errors about line length (E501) because I feel it makes code harder
to read.

This is the invocation I am using:

    $ flake8 --ignore E501 dspace_statistics_api

											
										
										
											2018-11-03 22:55:23 +01:00
-												Add /status route

Currently this only prints the API version.

											
										
										
											2020-12-22 10:30:09 +01:00
+								class StatusResource:
 								    def on_get(self, req, resp):
 								        message = {"version": VERSION}
 								        resp.status = falcon.HTTP_200
 								        resp.media = message
-												Add Swagger UI on /swagger

This includes a Swagger UI with an OpenAPI 3.0 JSON schema for easy
interactive demonstration and testing of the API. The JSON schema
was created with the standalone swagger-editor. Includes tests to
make sure that the /swagger and /docs/openapi.json paths are acce-
ssible.

											
										
										
											2020-12-22 10:18:47 +01:00
+								class OpenAPIJSONResource:
 								    def on_get(self, req, resp):
 								        resp.status = falcon.HTTP_200
 								        resp.content_type = "text/html"
 								        with open("dspace_statistics_api/docs/openapi.json", "r") as f:
 								            resp.body = f.read()
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								class AllStatisticsResource:
 								    @falcon.before(set_statistics_scope)
-												app.py: Add route to page through all item statistics

This route exposes all item statistics and uses the limit and offset
parameters to control paging throug the result set. The logic here
is extremely easy thanks to the brilliant LIMIT and OFFSET features
of SQLite (of course the SQL query sorts the results by some unique
field to ensure the order is already the same).

											
										
										
											2018-09-24 15:07:26 +02:00
+								    def on_get(self, req, resp):
 								        """Handles GET requests"""
 								        # Return HTTPBadRequest if id parameter is not present and valid
-												Make sure limit is between 1 and 100

We were not properly checking whether the limit was greater than 0
in all cases.

											
										
										
											2020-11-02 20:59:20 +01:00
+								        limit = req.get_param_as_int("limit", min_value=1, max_value=100) or 100
-												dspace_statistics_api/app.py: Fix Falcon 2.0 syntax

See: dspace_statistics_api/app.py

											
										
										
											2019-03-17 22:23:23 +01:00
+								        page = req.get_param_as_int("page", min_value=0) or 0
-												app.py: Add route to page through all item statistics

This route exposes all item statistics and uses the limit and offset
parameters to control paging throug the result set. The logic here
is extremely easy thanks to the brilliant LIMIT and OFFSET features
of SQLite (of course the SQL query sorts the results by some unique
field to ensure the order is already the same).

											
										
										
											2018-09-24 15:07:26 +02:00
+								        offset = limit * page
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								        with DatabaseManager() as db:
 								            db.set_session(readonly=True)
-												app.py: Add route to page through all item statistics

This route exposes all item statistics and uses the limit and offset
parameters to control paging throug the result set. The logic here
is extremely easy thanks to the brilliant LIMIT and OFFSET features
of SQLite (of course the SQL query sorts the results by some unique
field to ensure the order is already the same).

											
										
										
											2018-09-24 15:07:26 +02:00
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								            with db.cursor() as cursor:
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								                # get total number of communities/collections/items so we can estimate the pages
 								                cursor.execute(f"SELECT COUNT(id) FROM {req.context.statistics_scope}")
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								                pages = round(cursor.fetchone()[0] / limit)
-												app.py: Add route to page through all item statistics

This route exposes all item statistics and uses the limit and offset
parameters to control paging throug the result set. The logic here
is extremely easy thanks to the brilliant LIMIT and OFFSET features
of SQLite (of course the SQL query sorts the results by some unique
field to ensure the order is already the same).

											
										
										
											2018-09-24 15:07:26 +02:00
-												dspace_statistics_api/app.py: Use UUID

DSpace 6+ uses a UUID for item identifiers instead of an integer so
we need to adapt our PostgreSQL queries to use those. Note that we
can no longer sort results in the "all items" endpoint by ID. Also,
we need to use parameterized psycopg2 queries instead of strings to
support queries with UUIDs properly. To use the Python UUID objects
elsewhere in the code we need to make sure that we cast them to str.

											
										
										
											2020-03-02 10:06:27 +01:00
+								                # get statistics and use limit and offset to page through results
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								                cursor.execute(
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								                    f"SELECT id, views, downloads FROM {req.context.statistics_scope} ORDER BY id LIMIT %s OFFSET %s",
-												dspace_statistics_api/app.py: Use parameterized SQL queries

This is a better way to run SQL queries because psycopg2 takes care
of the quoting for us.

											
										
										
											2020-03-02 10:16:05 +01:00
+								                    [limit, offset],
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								                )
-												app.py: Add route to page through all item statistics

This route exposes all item statistics and uses the limit and offset
parameters to control paging throug the result set. The logic here
is extremely easy thanks to the brilliant LIMIT and OFFSET features
of SQLite (of course the SQL query sorts the results by some unique
field to ensure the order is already the same).

											
										
										
											2018-09-24 15:07:26 +02:00
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								                # create a list to hold dicts of stats
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								                statistics = list()
-												app.py: Add route to page through all item statistics

This route exposes all item statistics and uses the limit and offset
parameters to control paging throug the result set. The logic here
is extremely easy thanks to the brilliant LIMIT and OFFSET features
of SQLite (of course the SQL query sorts the results by some unique
field to ensure the order is already the same).

											
										
										
											2018-09-24 15:07:26 +02:00
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								                # iterate over results and build statistics object
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								                for result in cursor:
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								                    statistics.append(
 								                        {
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								                            "id": str(result["id"]),
 								                            "views": result["views"],
 								                            "downloads": result["downloads"],
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								                        }
 								                    )
-												app.py: Iterate directly on cursor

We don't need to create an intermediate variable for the results of
the SQL query because psycopg2's cursor is iterable.

See: http://initd.org/psycopg/docs/cursor.html

											
										
										
											2018-09-27 10:03:44 +02:00
-												app.py: Add route to page through all item statistics

This route exposes all item statistics and uses the limit and offset
parameters to control paging throug the result set. The logic here
is extremely easy thanks to the brilliant LIMIT and OFFSET features
of SQLite (of course the SQL query sorts the results by some unique
field to ensure the order is already the same).

											
										
										
											2018-09-24 15:07:26 +02:00
+								        message = {
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								            "currentPage": page,
 								            "totalPages": pages,
 								            "limit": limit,
 								            "statistics": statistics,
-												app.py: Add route to page through all item statistics

This route exposes all item statistics and uses the limit and offset
parameters to control paging throug the result set. The logic here
is extremely easy thanks to the brilliant LIMIT and OFFSET features
of SQLite (of course the SQL query sorts the results by some unique
field to ensure the order is already the same).

											
										
										
											2018-09-24 15:07:26 +02:00
+								        }
 								        resp.media = message
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								    @falcon.before(set_statistics_scope)
 								    @falcon.before(validate_post_parameters)
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
+								    def on_post(self, req, resp):
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								        """Handles POST requests.
 								        Uses two `before` hooks to set the statistics "scope" and validate the
 								        POST parameters. The "scope" is the type of statistics we want, which
 								        will be items, communities, or collections, depending on the request.
 								        """
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
 								        # Build the Solr date string, ie: [* TO *]
-												Refactor `/items` POST handler to use a before hook

This allows us to do the dirty work of parsing, validating, and
setting local variables from the POST parameters outside of the
on_post function. We then share the parameters via the req.context
object. Functionally it is the same, but readability is better
and it's a neat trick that I could use elsewhere.

See: https://falcon.readthedocs.io/en/stable/user/faq.html#how-can-i-pass-data-from-a-hook-to-a-responder-and-between-hooks

											
										
										
											2020-09-26 17:37:14 +02:00
+								        if req.context.dateFrom and req.context.dateTo:
 								            solr_date_string = f"[{req.context.dateFrom} TO {req.context.dateTo}]"
 								        elif not req.context.dateFrom and req.context.dateTo:
 								            solr_date_string = f"[* TO {req.context.dateTo}]"
 								        elif req.context.dateFrom and not req.context.dateTo:
 								            solr_date_string = f"[{req.context.dateFrom} TO *]"
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
+								        else:
 								            solr_date_string = "[* TO *]"
 								        # Helper variables to make working with pages/items/results easier and
 								        # to make the code easier to understand
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								        number_of_elements: int = len(req.context.elements)
 								        pages: int = int(number_of_elements / req.context.limit)
 								        first_element: int = req.context.page * req.context.limit
 								        last_element: int = first_element + req.context.limit
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
+								        # Get a subset of the POSTed items based on our limit. Note that Python
 								        # list slicing and indexing are both zero based, but the first and last
 								        # items in a slice can be confusing. See this ASCII diagram:
 								        #
 								        #                 +---+---+---+---+---+---+
 								        #                 | P | y | t | h | o | n |
 								        #                 +---+---+---+---+---+---+
 								        # Slice position: 0   1   2   3   4   5   6
 								        # Index position:   0   1   2   3   4   5
 								        #
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								        # So if we have a list of items with 240 items:
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
+								        #
-												Refactor `/items` POST handler to use a before hook

This allows us to do the dirty work of parsing, validating, and
setting local variables from the POST parameters outside of the
on_post function. We then share the parameters via the req.context
object. Functionally it is the same, but readability is better
and it's a neat trick that I could use elsewhere.

See: https://falcon.readthedocs.io/en/stable/user/faq.html#how-can-i-pass-data-from-a-hook-to-a-responder-and-between-hooks

											
										
										
											2020-09-26 17:37:14 +02:00
+								        #   1st set: items[0:100] would give items at indexes 0 to 99
 								        #   2nd set: items[100:200] would give items at indexes 100 to 199
 								        #   3rd set: items[200:300] would give items at indexes 200 to 239
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								        elements_subset: list = req.context.elements[first_element:last_element]
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								        views: dict = get_views(
 								            solr_date_string, elements_subset, req.context.views_facet_field
 								        )
 								        downloads: dict = get_downloads(
 								            solr_date_string, elements_subset, req.context.downloads_facet_field
 								        )
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								        # create a list to hold dicts of stats
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
+								        statistics = list()
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								        # iterate over views dict to extract views and use the element id as an
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
+								        # index to the downloads dict to extract downloads.
 								        for k, v in views.items():
 								            statistics.append({"id": k, "views": v, "downloads": downloads[k]})
 								        message = {
-												Refactor `/items` POST handler to use a before hook

This allows us to do the dirty work of parsing, validating, and
setting local variables from the POST parameters outside of the
on_post function. We then share the parameters via the req.context
object. Functionally it is the same, but readability is better
and it's a neat trick that I could use elsewhere.

See: https://falcon.readthedocs.io/en/stable/user/faq.html#how-can-i-pass-data-from-a-hook-to-a-responder-and-between-hooks

											
										
										
											2020-09-26 17:37:14 +02:00
+								            "currentPage": req.context.page,
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
+								            "totalPages": pages,
-												Refactor `/items` POST handler to use a before hook

This allows us to do the dirty work of parsing, validating, and
setting local variables from the POST parameters outside of the
on_post function. We then share the parameters via the req.context
object. Functionally it is the same, but readability is better
and it's a neat trick that I could use elsewhere.

See: https://falcon.readthedocs.io/en/stable/user/faq.html#how-can-i-pass-data-from-a-hook-to-a-responder-and-between-hooks

											
										
										
											2020-09-26 17:37:14 +02:00
+								            "limit": req.context.limit,
-												dspace_statistics_api: Add support for date ranges to /items

You can now POST a JSON request to /items with a list of items and
a date range. This allows the possibility to get view and download
statistics for arbitrary items and arbitrary date ranges.

The JSON request should be in the following format:

    {
        "limit": 100,
        "page": 0,
        "dateFrom": "2020-01-01T00:00:00Z",
        "dateTo": "2020-09-09T00:00:00Z",
        "items": [
            "f44cf173-2344-4eb2-8f00-ee55df32c76f",
            "2324aa41-e9de-4a2b-bc36-16241464683e",
            "8542f9da-9ce1-4614-abf4-f2e3fdb4b305",
            "0fe573e7-042a-4240-a4d9-753b61233908"
        ]
    }

The limit, page, and date parameters are all optional. By default
it will use a limit of 100, page 0, and [* TO *] Solr date range.

											
										
										
											2020-09-25 11:21:11 +02:00
+								            "statistics": statistics,
 								        }
 								        resp.status = falcon.HTTP_200
 								        resp.media = message
-												Correct issues highlighted by Flake8

Flake8 validates code style against PEP 8 in order to encourage the
writing of idiomatic Python. For reference, I am currently ignoring
errors about line length (E501) because I feel it makes code harder
to read.

This is the invocation I am using:

    $ flake8 --ignore E501 dspace_statistics_api

											
										
										
											2018-11-03 22:55:23 +01:00
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								class SingleStatisticsResource:
 								    @falcon.before(set_statistics_scope)
 								    def on_get(self, req, resp, id_):
-												Initial commit

Add first working version of the statistics API.

											
										
										
											2018-09-18 13:03:15 +02:00
+								        """Handles GET requests"""
-												dspace_statistics_api/app.py: Use UUID

DSpace 6+ uses a UUID for item identifiers instead of an integer so
we need to adapt our PostgreSQL queries to use those. Note that we
can no longer sort results in the "all items" endpoint by ID. Also,
we need to use parameterized psycopg2 queries instead of strings to
support queries with UUIDs properly. To use the Python UUID objects
elsewhere in the code we need to make sure that we cast them to str.

											
										
										
											2020-03-02 10:06:27 +01:00
+								        # Adapt Python’s uuid.UUID type to PostgreSQL’s uuid
 								        # See: https://www.psycopg.org/docs/extras.html
 								        psycopg2.extras.register_uuid()
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								        with DatabaseManager() as db:
 								            db.set_session(readonly=True)
 								            with db.cursor() as cursor:
 								                cursor = db.cursor()
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								                cursor.execute(
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								                    f"SELECT views, downloads FROM {req.context.database} WHERE id=%s",
 								                    [str(id_)],
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								                )
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								                if cursor.rowcount == 0:
 								                    raise falcon.HTTPNotFound(
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								                        title=f"{req.context.statistics_scope} not found",
 								                        description=f'The {req.context.statistics_scope} with id "{str(id_)}" was not found.',
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								                    )
 								                else:
 								                    results = cursor.fetchone()
 								                    statistics = {
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
+								                        "id": str(id_),
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								                        "views": results["views"],
 								                        "downloads": results["downloads"],
-												Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close

											
										
										
											2018-11-07 16:41:21 +01:00
+								                    }
 								                    resp.media = statistics
-												Initial commit

Add first working version of the statistics API.

											
										
										
											2018-09-18 13:03:15 +02:00
-												Correct issues highlighted by Flake8

Flake8 validates code style against PEP 8 in order to encourage the
writing of idiomatic Python. For reference, I am currently ignoring
errors about line length (E501) because I feel it makes code harder
to read.

This is the invocation I am using:

    $ flake8 --ignore E501 dspace_statistics_api

											
										
										
											2018-11-03 22:55:23 +01:00
-												Add "application" alias to API object

By default gunicorn looks for an "application" object to run, so this
saves us having to type api:app.

											
										
										
											2018-10-26 18:21:27 +02:00
+								api = application = falcon.API()
-												Format code with black

											
										
										
											2019-11-27 11:30:06 +01:00
+								api.add_route("/", RootResource())
-												Add /status route

Currently this only prints the API version.

											
										
										
											2020-12-22 10:30:09 +01:00
+								api.add_route("/status", StatusResource())
-												Add communities and collections support to API

The basic logic is similar to items, where you can request single
item statistics with a UUID, all item statistics, and item statis-
tics for a list of items (optionally with a date range). Most of
the item code was re-purposed to work on "elements", which can be
items, communities, or collections depending on the request, with
the use of Falcon's `before` hooks to set the statistics scope so
we know how to behave for the current request.

Other than the minor difference in facet fields, another issue I
had with communities and collections is that the owningComm and
owningColl fields are multi-valued (unlike items' id field). This
means that, when you facet the results of your query, Solr returns
ids that seem unrelated, but are actually present in the field, so
I had to make sure I checked all returned ids to see if they were
in the user's POSTed elements list.

TODO:
  - Add tests
  - Revise docstrings
  - Refactor items.py as it is now generic

											
										
										
											2020-12-20 15:14:46 +01:00
 								# Item routes
 								api.add_route("/items", AllStatisticsResource())
 								api.add_route("/item/{id_:uuid}", SingleStatisticsResource())
 								# Community routes
 								api.add_route("/communities", AllStatisticsResource())
 								api.add_route("/community/{id_:uuid}", SingleStatisticsResource())
 								# Collection routes
 								api.add_route("/collections", AllStatisticsResource())
 								api.add_route("/collection/{id_:uuid}", SingleStatisticsResource())
-												Add vim modeline to all Python files

Uses four spaces for tab and shift widths, and turns on expansion of
tabs to spaces.

											
										
										
											2018-09-23 10:33:26 +02:00
-												Add Swagger UI on /swagger

This includes a Swagger UI with an OpenAPI 3.0 JSON schema for easy
interactive demonstration and testing of the API. The JSON schema
was created with the standalone swagger-editor. Includes tests to
make sure that the /swagger and /docs/openapi.json paths are acce-
ssible.

											
										
										
											2020-12-22 10:18:47 +01:00
+								# Swagger configuration
 								SWAGGERUI_URL = "/swagger"  # without trailing slash
 								SCHEMA_URL = "/docs/openapi.json"
 								api.add_route("/docs/openapi.json", OpenAPIJSONResource())
 								register_swaggerui_app(
 								    api,
 								    SWAGGERUI_URL,
 								    SCHEMA_URL,
 								    config={
 								        "supportedSubmitMethods": ["get", "post"],
 								    },
 								)
-												Add vim modeline to all Python files

Uses four spaces for tab and shift widths, and turns on expansion of
tabs to spaces.

											
										
										
											2018-09-23 10:33:26 +02:00
+								# vim: set sw=4 ts=4 expandtab: