Format code with black

2025-06-10 19:02:11 +02:00 · 2019-11-27 12:30:06 +02:00
parent d2fe420a9a
commit 4ff1fd4a22
4 changed files with 137 additions and 107 deletions
--- a/dspace_statistics_api/app.py
+++ b/dspace_statistics_api/app.py
@ -5,8 +5,8 @@ import falcon
 class RootResource:
    def on_get(self, req, resp):
        resp.status = falcon.HTTP_200
-        resp.content_type = 'text/html'
-        with open('dspace_statistics_api/docs/index.html', 'r') as f:
+        resp.content_type = "text/html"
+        with open("dspace_statistics_api/docs/index.html", "r") as f:
            resp.body = f.read()


@ -23,24 +23,34 @@ class AllItemsResource:

            with db.cursor() as cursor:
                # get total number of items so we can estimate the pages
-                cursor.execute('SELECT COUNT(id) FROM items')
+                cursor.execute("SELECT COUNT(id) FROM items")
                pages = round(cursor.fetchone()[0] / limit)

                # get statistics, ordered by id, and use limit and offset to page through results
-                cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset))
+                cursor.execute(
+                    "SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}".format(
+                        limit, offset
+                    )
+                )

                # create a list to hold dicts of item stats
                statistics = list()

                # iterate over results and build statistics object
                for item in cursor:
-                    statistics.append({'id': item['id'], 'views': item['views'], 'downloads': item['downloads']})
+                    statistics.append(
+                        {
+                            "id": item["id"],
+                            "views": item["views"],
+                            "downloads": item["downloads"],
+                        }
+                    )

        message = {
-            'currentPage': page,
-            'totalPages': pages,
-            'limit': limit,
-            'statistics': statistics
+            "currentPage": page,
+            "totalPages": pages,
+            "limit": limit,
+            "statistics": statistics,
        }

        resp.media = message
@ -55,27 +65,31 @@ class ItemResource:

            with db.cursor() as cursor:
                cursor = db.cursor()
-                cursor.execute('SELECT views, downloads FROM items WHERE id={}'.format(item_id))
+                cursor.execute(
+                    "SELECT views, downloads FROM items WHERE id={}".format(item_id)
+                )
                if cursor.rowcount == 0:
                    raise falcon.HTTPNotFound(
-                        title='Item not found',
-                        description='The item with id "{}" was not found.'.format(item_id)
+                        title="Item not found",
+                        description='The item with id "{}" was not found.'.format(
+                            item_id
+                        ),
                    )
                else:
                    results = cursor.fetchone()

                    statistics = {
-                        'id': item_id,
-                        'views': results['views'],
-                        'downloads': results['downloads']
+                        "id": item_id,
+                        "views": results["views"],
+                        "downloads": results["downloads"],
                    }

                    resp.media = statistics


 api = application = falcon.API()
-api.add_route('/', RootResource())
-api.add_route('/items', AllItemsResource())
-api.add_route('/item/{item_id:int}', ItemResource())
+api.add_route("/", RootResource())
+api.add_route("/items", AllItemsResource())
+api.add_route("/item/{item_id:int}", ItemResource())

 # vim: set sw=4 ts=4 expandtab:
--- a/dspace_statistics_api/config.py
+++ b/dspace_statistics_api/config.py
@ -1,12 +1,12 @@
 import os

 # Check if Solr connection information was provided in the environment
-SOLR_SERVER = os.environ.get('SOLR_SERVER', 'http://localhost:8080/solr')
+SOLR_SERVER = os.environ.get("SOLR_SERVER", "http://localhost:8080/solr")

-DATABASE_NAME = os.environ.get('DATABASE_NAME', 'dspacestatistics')
-DATABASE_USER = os.environ.get('DATABASE_USER', 'dspacestatistics')
-DATABASE_PASS = os.environ.get('DATABASE_PASS', 'dspacestatistics')
-DATABASE_HOST = os.environ.get('DATABASE_HOST', 'localhost')
-DATABASE_PORT = os.environ.get('DATABASE_PORT', '5432')
+DATABASE_NAME = os.environ.get("DATABASE_NAME", "dspacestatistics")
+DATABASE_USER = os.environ.get("DATABASE_USER", "dspacestatistics")
+DATABASE_PASS = os.environ.get("DATABASE_PASS", "dspacestatistics")
+DATABASE_HOST = os.environ.get("DATABASE_HOST", "localhost")
+DATABASE_PORT = os.environ.get("DATABASE_PORT", "5432")

 # vim: set sw=4 ts=4 expandtab:
--- a/dspace_statistics_api/database.py
+++ b/dspace_statistics_api/database.py
@ -8,18 +8,22 @@ import psycopg2
 import psycopg2.extras


-class DatabaseManager():
-    '''Manage database connection.'''
+class DatabaseManager:
+    """Manage database connection."""

    def __init__(self):
-        self._connection_uri = 'dbname={} user={} password={} host={} port={}'.format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST, DATABASE_PORT)
+        self._connection_uri = "dbname={} user={} password={} host={} port={}".format(
+            DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST, DATABASE_PORT
+        )

    def __enter__(self):
        try:
-            self._connection = psycopg2.connect(self._connection_uri, cursor_factory=psycopg2.extras.DictCursor)
+            self._connection = psycopg2.connect(
+                self._connection_uri, cursor_factory=psycopg2.extras.DictCursor
+            )
        except psycopg2.OperationalError:
-            title = '500 Internal Server Error'
-            description = 'Could not connect to database'
+            title = "500 Internal Server Error"
+            description = "Could not connect to database"
            raise falcon.HTTPInternalServerError(title, description)

        return self._connection
@ -27,4 +31,5 @@ class DatabaseManager():
    def __exit__(self, exc_type, exc_value, exc_traceback):
        self._connection.close()

+
 # vim: set sw=4 ts=4 expandtab:
--- a/dspace_statistics_api/indexer.py
+++ b/dspace_statistics_api/indexer.py
@ -43,11 +43,8 @@ def get_statistics_shards():
    statistics_core_years = []

    # URL for Solr status to check active cores
-    solr_query_params = {
-        'action': 'STATUS',
-        'wt': 'json'
-    }
-    solr_url = SOLR_SERVER + '/admin/cores'
+    solr_query_params = {"action": "STATUS", "wt": "json"}
+    solr_url = SOLR_SERVER + "/admin/cores"
    res = requests.get(solr_url, params=solr_query_params)

    if res.status_code == requests.codes.ok:
@ -55,9 +52,9 @@ def get_statistics_shards():

        # Iterate over active cores from Solr's STATUS response (cores are in
        # the status array of this response).
-        for core in data['status']:
+        for core in data["status"]:
            # Pattern to match, for example: statistics-2018
-            pattern = re.compile('^statistics-[0-9]{4}$')
+            pattern = re.compile("^statistics-[0-9]{4}$")

            if not pattern.match(core):
                continue
@ -71,13 +68,13 @@ def get_statistics_shards():

    if len(statistics_core_years) > 0:
        # Begin building a string of shards starting with the default one
-        shards = '{}/statistics'.format(SOLR_SERVER)
+        shards = "{}/statistics".format(SOLR_SERVER)

        for core in statistics_core_years:
            # Create a comma-separated list of shards to pass to our Solr query
            #
            # See: https://wiki.apache.org/solr/DistributedSearch
-            shards += ',{}/{}'.format(SOLR_SERVER, core)
+            shards += ",{}/{}".format(SOLR_SERVER, core)

    # Return the string of shards, which may actually be empty. Solr doesn't
    # seem to mind if the shards query parameter is empty and I haven't seen
@ -93,30 +90,32 @@ def index_views():
    #
    # see: https://lucene.apache.org/solr/guide/6_6/the-stats-component.html
    solr_query_params = {
-        'q': 'type:2',
-        'fq': 'isBot:false AND statistics_type:view',
-        'facet': 'true',
-        'facet.field': 'id',
-        'facet.mincount': 1,
-        'facet.limit': 1,
-        'facet.offset': 0,
-        'stats': 'true',
-        'stats.field': 'id',
-        'stats.calcdistinct': 'true',
-        'shards': shards,
-        'rows': 0,
-        'wt': 'json'
+        "q": "type:2",
+        "fq": "isBot:false AND statistics_type:view",
+        "facet": "true",
+        "facet.field": "id",
+        "facet.mincount": 1,
+        "facet.limit": 1,
+        "facet.offset": 0,
+        "stats": "true",
+        "stats.field": "id",
+        "stats.calcdistinct": "true",
+        "shards": shards,
+        "rows": 0,
+        "wt": "json",
    }

-    solr_url = SOLR_SERVER + '/statistics/select'
+    solr_url = SOLR_SERVER + "/statistics/select"

    res = requests.get(solr_url, params=solr_query_params)

    try:
        # get total number of distinct facets (countDistinct)
-        results_totalNumFacets = res.json()['stats']['stats_fields']['id']['countDistinct']
+        results_totalNumFacets = res.json()["stats"]["stats_fields"]["id"][
+            "countDistinct"
+        ]
    except TypeError:
-        print('No item views to index, exiting.')
+        print("No item views to index, exiting.")

        exit(0)

@ -132,35 +131,39 @@ def index_views():

            while results_current_page <= results_num_pages:
                # "pages" are zero based, but one based is more human readable
-                print('Indexing item views (page {} of {})'.format(results_current_page + 1, results_num_pages + 1))
+                print(
+                    "Indexing item views (page {} of {})".format(
+                        results_current_page + 1, results_num_pages + 1
+                    )
+                )

                solr_query_params = {
-                    'q': 'type:2',
-                    'fq': 'isBot:false AND statistics_type:view',
-                    'facet': 'true',
-                    'facet.field': 'id',
-                    'facet.mincount': 1,
-                    'facet.limit': results_per_page,
-                    'facet.offset': results_current_page * results_per_page,
-                    'shards': shards,
-                    'rows': 0,
-                    'wt': 'json',
-                    'json.nl': 'map'  # return facets as a dict instead of a flat list
+                    "q": "type:2",
+                    "fq": "isBot:false AND statistics_type:view",
+                    "facet": "true",
+                    "facet.field": "id",
+                    "facet.mincount": 1,
+                    "facet.limit": results_per_page,
+                    "facet.offset": results_current_page * results_per_page,
+                    "shards": shards,
+                    "rows": 0,
+                    "wt": "json",
+                    "json.nl": "map",  # return facets as a dict instead of a flat list
                }

-                solr_url = SOLR_SERVER + '/statistics/select'
+                solr_url = SOLR_SERVER + "/statistics/select"

                res = requests.get(solr_url, params=solr_query_params)

                # Solr returns facets as a dict of dicts (see json.nl parameter)
-                views = res.json()['facet_counts']['facet_fields']
+                views = res.json()["facet_counts"]["facet_fields"]
                # iterate over the 'id' dict and get the item ids and views
-                for item_id, item_views in views['id'].items():
+                for item_id, item_views in views["id"].items():
                    data.append((item_id, item_views))

                # do a batch insert of values from the current "page" of results
-                sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views'
-                psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
+                sql = "INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views"
+                psycopg2.extras.execute_values(cursor, sql, data, template="(%s, %s)")
                db.commit()

                # clear all items from the list so we can populate it with the next batch
@ -172,30 +175,32 @@ def index_views():
 def index_downloads():
    # get the total number of distinct facets for items with at least 1 download
    solr_query_params = {
-        'q': 'type:0',
-        'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
-        'facet': 'true',
-        'facet.field': 'owningItem',
-        'facet.mincount': 1,
-        'facet.limit': 1,
-        'facet.offset': 0,
-        'stats': 'true',
-        'stats.field': 'owningItem',
-        'stats.calcdistinct': 'true',
-        'shards': shards,
-        'rows': 0,
-        'wt': 'json'
+        "q": "type:0",
+        "fq": "isBot:false AND statistics_type:view AND bundleName:ORIGINAL",
+        "facet": "true",
+        "facet.field": "owningItem",
+        "facet.mincount": 1,
+        "facet.limit": 1,
+        "facet.offset": 0,
+        "stats": "true",
+        "stats.field": "owningItem",
+        "stats.calcdistinct": "true",
+        "shards": shards,
+        "rows": 0,
+        "wt": "json",
    }

-    solr_url = SOLR_SERVER + '/statistics/select'
+    solr_url = SOLR_SERVER + "/statistics/select"

    res = requests.get(solr_url, params=solr_query_params)

    try:
        # get total number of distinct facets (countDistinct)
-        results_totalNumFacets = res.json()['stats']['stats_fields']['owningItem']['countDistinct']
+        results_totalNumFacets = res.json()["stats"]["stats_fields"]["owningItem"][
+            "countDistinct"
+        ]
    except TypeError:
-        print('No item downloads to index, exiting.')
+        print("No item downloads to index, exiting.")

        exit(0)

@ -211,35 +216,39 @@ def index_downloads():

            while results_current_page <= results_num_pages:
                # "pages" are zero based, but one based is more human readable
-                print('Indexing item downloads (page {} of {})'.format(results_current_page + 1, results_num_pages + 1))
+                print(
+                    "Indexing item downloads (page {} of {})".format(
+                        results_current_page + 1, results_num_pages + 1
+                    )
+                )

                solr_query_params = {
-                    'q': 'type:0',
-                    'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
-                    'facet': 'true',
-                    'facet.field': 'owningItem',
-                    'facet.mincount': 1,
-                    'facet.limit': results_per_page,
-                    'facet.offset': results_current_page * results_per_page,
-                    'shards': shards,
-                    'rows': 0,
-                    'wt': 'json',
-                    'json.nl': 'map'  # return facets as a dict instead of a flat list
+                    "q": "type:0",
+                    "fq": "isBot:false AND statistics_type:view AND bundleName:ORIGINAL",
+                    "facet": "true",
+                    "facet.field": "owningItem",
+                    "facet.mincount": 1,
+                    "facet.limit": results_per_page,
+                    "facet.offset": results_current_page * results_per_page,
+                    "shards": shards,
+                    "rows": 0,
+                    "wt": "json",
+                    "json.nl": "map",  # return facets as a dict instead of a flat list
                }

-                solr_url = SOLR_SERVER + '/statistics/select'
+                solr_url = SOLR_SERVER + "/statistics/select"

                res = requests.get(solr_url, params=solr_query_params)

                # Solr returns facets as a dict of dicts (see json.nl parameter)
-                downloads = res.json()['facet_counts']['facet_fields']
+                downloads = res.json()["facet_counts"]["facet_fields"]
                # iterate over the 'owningItem' dict and get the item ids and downloads
-                for item_id, item_downloads in downloads['owningItem'].items():
+                for item_id, item_downloads in downloads["owningItem"].items():
                    data.append((item_id, item_downloads))

                # do a batch insert of values from the current "page" of results
-                sql = 'INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads'
-                psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
+                sql = "INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads"
+                psycopg2.extras.execute_values(cursor, sql, data, template="(%s, %s)")
                db.commit()

                # clear all items from the list so we can populate it with the next batch
@ -251,8 +260,10 @@ def index_downloads():
 with DatabaseManager() as db:
    with db.cursor() as cursor:
        # create table to store item views and downloads
-        cursor.execute('''CREATE TABLE IF NOT EXISTS items
-                  (id INT PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)''')
+        cursor.execute(
+            """CREATE TABLE IF NOT EXISTS items
+                  (id INT PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)"""
+        )

    # commit the table creation before closing the database connection
    db.commit()