From 4ff1fd4a221163270c6fb36b0c3279cb9151ba22 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Wed, 27 Nov 2019 12:30:06 +0200 Subject: [PATCH] Format code with black --- dspace_statistics_api/app.py | 50 +++++---- dspace_statistics_api/config.py | 12 +-- dspace_statistics_api/database.py | 17 +-- dspace_statistics_api/indexer.py | 165 ++++++++++++++++-------------- 4 files changed, 137 insertions(+), 107 deletions(-) diff --git a/dspace_statistics_api/app.py b/dspace_statistics_api/app.py index 27ba312..e3feaba 100644 --- a/dspace_statistics_api/app.py +++ b/dspace_statistics_api/app.py @@ -5,8 +5,8 @@ import falcon class RootResource: def on_get(self, req, resp): resp.status = falcon.HTTP_200 - resp.content_type = 'text/html' - with open('dspace_statistics_api/docs/index.html', 'r') as f: + resp.content_type = "text/html" + with open("dspace_statistics_api/docs/index.html", "r") as f: resp.body = f.read() @@ -23,24 +23,34 @@ class AllItemsResource: with db.cursor() as cursor: # get total number of items so we can estimate the pages - cursor.execute('SELECT COUNT(id) FROM items') + cursor.execute("SELECT COUNT(id) FROM items") pages = round(cursor.fetchone()[0] / limit) # get statistics, ordered by id, and use limit and offset to page through results - cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset)) + cursor.execute( + "SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}".format( + limit, offset + ) + ) # create a list to hold dicts of item stats statistics = list() # iterate over results and build statistics object for item in cursor: - statistics.append({'id': item['id'], 'views': item['views'], 'downloads': item['downloads']}) + statistics.append( + { + "id": item["id"], + "views": item["views"], + "downloads": item["downloads"], + } + ) message = { - 'currentPage': page, - 'totalPages': pages, - 'limit': limit, - 'statistics': statistics + "currentPage": page, + "totalPages": pages, + "limit": limit, + "statistics": statistics, } resp.media = message @@ -55,27 +65,31 @@ class ItemResource: with db.cursor() as cursor: cursor = db.cursor() - cursor.execute('SELECT views, downloads FROM items WHERE id={}'.format(item_id)) + cursor.execute( + "SELECT views, downloads FROM items WHERE id={}".format(item_id) + ) if cursor.rowcount == 0: raise falcon.HTTPNotFound( - title='Item not found', - description='The item with id "{}" was not found.'.format(item_id) + title="Item not found", + description='The item with id "{}" was not found.'.format( + item_id + ), ) else: results = cursor.fetchone() statistics = { - 'id': item_id, - 'views': results['views'], - 'downloads': results['downloads'] + "id": item_id, + "views": results["views"], + "downloads": results["downloads"], } resp.media = statistics api = application = falcon.API() -api.add_route('/', RootResource()) -api.add_route('/items', AllItemsResource()) -api.add_route('/item/{item_id:int}', ItemResource()) +api.add_route("/", RootResource()) +api.add_route("/items", AllItemsResource()) +api.add_route("/item/{item_id:int}", ItemResource()) # vim: set sw=4 ts=4 expandtab: diff --git a/dspace_statistics_api/config.py b/dspace_statistics_api/config.py index 31b1ea0..844b4ba 100644 --- a/dspace_statistics_api/config.py +++ b/dspace_statistics_api/config.py @@ -1,12 +1,12 @@ import os # Check if Solr connection information was provided in the environment -SOLR_SERVER = os.environ.get('SOLR_SERVER', 'http://localhost:8080/solr') +SOLR_SERVER = os.environ.get("SOLR_SERVER", "http://localhost:8080/solr") -DATABASE_NAME = os.environ.get('DATABASE_NAME', 'dspacestatistics') -DATABASE_USER = os.environ.get('DATABASE_USER', 'dspacestatistics') -DATABASE_PASS = os.environ.get('DATABASE_PASS', 'dspacestatistics') -DATABASE_HOST = os.environ.get('DATABASE_HOST', 'localhost') -DATABASE_PORT = os.environ.get('DATABASE_PORT', '5432') +DATABASE_NAME = os.environ.get("DATABASE_NAME", "dspacestatistics") +DATABASE_USER = os.environ.get("DATABASE_USER", "dspacestatistics") +DATABASE_PASS = os.environ.get("DATABASE_PASS", "dspacestatistics") +DATABASE_HOST = os.environ.get("DATABASE_HOST", "localhost") +DATABASE_PORT = os.environ.get("DATABASE_PORT", "5432") # vim: set sw=4 ts=4 expandtab: diff --git a/dspace_statistics_api/database.py b/dspace_statistics_api/database.py index cb8640e..6ed7f01 100644 --- a/dspace_statistics_api/database.py +++ b/dspace_statistics_api/database.py @@ -8,18 +8,22 @@ import psycopg2 import psycopg2.extras -class DatabaseManager(): - '''Manage database connection.''' +class DatabaseManager: + """Manage database connection.""" def __init__(self): - self._connection_uri = 'dbname={} user={} password={} host={} port={}'.format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST, DATABASE_PORT) + self._connection_uri = "dbname={} user={} password={} host={} port={}".format( + DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST, DATABASE_PORT + ) def __enter__(self): try: - self._connection = psycopg2.connect(self._connection_uri, cursor_factory=psycopg2.extras.DictCursor) + self._connection = psycopg2.connect( + self._connection_uri, cursor_factory=psycopg2.extras.DictCursor + ) except psycopg2.OperationalError: - title = '500 Internal Server Error' - description = 'Could not connect to database' + title = "500 Internal Server Error" + description = "Could not connect to database" raise falcon.HTTPInternalServerError(title, description) return self._connection @@ -27,4 +31,5 @@ class DatabaseManager(): def __exit__(self, exc_type, exc_value, exc_traceback): self._connection.close() + # vim: set sw=4 ts=4 expandtab: diff --git a/dspace_statistics_api/indexer.py b/dspace_statistics_api/indexer.py index 2ec0a40..4bed42d 100644 --- a/dspace_statistics_api/indexer.py +++ b/dspace_statistics_api/indexer.py @@ -43,11 +43,8 @@ def get_statistics_shards(): statistics_core_years = [] # URL for Solr status to check active cores - solr_query_params = { - 'action': 'STATUS', - 'wt': 'json' - } - solr_url = SOLR_SERVER + '/admin/cores' + solr_query_params = {"action": "STATUS", "wt": "json"} + solr_url = SOLR_SERVER + "/admin/cores" res = requests.get(solr_url, params=solr_query_params) if res.status_code == requests.codes.ok: @@ -55,9 +52,9 @@ def get_statistics_shards(): # Iterate over active cores from Solr's STATUS response (cores are in # the status array of this response). - for core in data['status']: + for core in data["status"]: # Pattern to match, for example: statistics-2018 - pattern = re.compile('^statistics-[0-9]{4}$') + pattern = re.compile("^statistics-[0-9]{4}$") if not pattern.match(core): continue @@ -71,13 +68,13 @@ def get_statistics_shards(): if len(statistics_core_years) > 0: # Begin building a string of shards starting with the default one - shards = '{}/statistics'.format(SOLR_SERVER) + shards = "{}/statistics".format(SOLR_SERVER) for core in statistics_core_years: # Create a comma-separated list of shards to pass to our Solr query # # See: https://wiki.apache.org/solr/DistributedSearch - shards += ',{}/{}'.format(SOLR_SERVER, core) + shards += ",{}/{}".format(SOLR_SERVER, core) # Return the string of shards, which may actually be empty. Solr doesn't # seem to mind if the shards query parameter is empty and I haven't seen @@ -93,30 +90,32 @@ def index_views(): # # see: https://lucene.apache.org/solr/guide/6_6/the-stats-component.html solr_query_params = { - 'q': 'type:2', - 'fq': 'isBot:false AND statistics_type:view', - 'facet': 'true', - 'facet.field': 'id', - 'facet.mincount': 1, - 'facet.limit': 1, - 'facet.offset': 0, - 'stats': 'true', - 'stats.field': 'id', - 'stats.calcdistinct': 'true', - 'shards': shards, - 'rows': 0, - 'wt': 'json' + "q": "type:2", + "fq": "isBot:false AND statistics_type:view", + "facet": "true", + "facet.field": "id", + "facet.mincount": 1, + "facet.limit": 1, + "facet.offset": 0, + "stats": "true", + "stats.field": "id", + "stats.calcdistinct": "true", + "shards": shards, + "rows": 0, + "wt": "json", } - solr_url = SOLR_SERVER + '/statistics/select' + solr_url = SOLR_SERVER + "/statistics/select" res = requests.get(solr_url, params=solr_query_params) try: # get total number of distinct facets (countDistinct) - results_totalNumFacets = res.json()['stats']['stats_fields']['id']['countDistinct'] + results_totalNumFacets = res.json()["stats"]["stats_fields"]["id"][ + "countDistinct" + ] except TypeError: - print('No item views to index, exiting.') + print("No item views to index, exiting.") exit(0) @@ -132,35 +131,39 @@ def index_views(): while results_current_page <= results_num_pages: # "pages" are zero based, but one based is more human readable - print('Indexing item views (page {} of {})'.format(results_current_page + 1, results_num_pages + 1)) + print( + "Indexing item views (page {} of {})".format( + results_current_page + 1, results_num_pages + 1 + ) + ) solr_query_params = { - 'q': 'type:2', - 'fq': 'isBot:false AND statistics_type:view', - 'facet': 'true', - 'facet.field': 'id', - 'facet.mincount': 1, - 'facet.limit': results_per_page, - 'facet.offset': results_current_page * results_per_page, - 'shards': shards, - 'rows': 0, - 'wt': 'json', - 'json.nl': 'map' # return facets as a dict instead of a flat list + "q": "type:2", + "fq": "isBot:false AND statistics_type:view", + "facet": "true", + "facet.field": "id", + "facet.mincount": 1, + "facet.limit": results_per_page, + "facet.offset": results_current_page * results_per_page, + "shards": shards, + "rows": 0, + "wt": "json", + "json.nl": "map", # return facets as a dict instead of a flat list } - solr_url = SOLR_SERVER + '/statistics/select' + solr_url = SOLR_SERVER + "/statistics/select" res = requests.get(solr_url, params=solr_query_params) # Solr returns facets as a dict of dicts (see json.nl parameter) - views = res.json()['facet_counts']['facet_fields'] + views = res.json()["facet_counts"]["facet_fields"] # iterate over the 'id' dict and get the item ids and views - for item_id, item_views in views['id'].items(): + for item_id, item_views in views["id"].items(): data.append((item_id, item_views)) # do a batch insert of values from the current "page" of results - sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views' - psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)') + sql = "INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views" + psycopg2.extras.execute_values(cursor, sql, data, template="(%s, %s)") db.commit() # clear all items from the list so we can populate it with the next batch @@ -172,30 +175,32 @@ def index_views(): def index_downloads(): # get the total number of distinct facets for items with at least 1 download solr_query_params = { - 'q': 'type:0', - 'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL', - 'facet': 'true', - 'facet.field': 'owningItem', - 'facet.mincount': 1, - 'facet.limit': 1, - 'facet.offset': 0, - 'stats': 'true', - 'stats.field': 'owningItem', - 'stats.calcdistinct': 'true', - 'shards': shards, - 'rows': 0, - 'wt': 'json' + "q": "type:0", + "fq": "isBot:false AND statistics_type:view AND bundleName:ORIGINAL", + "facet": "true", + "facet.field": "owningItem", + "facet.mincount": 1, + "facet.limit": 1, + "facet.offset": 0, + "stats": "true", + "stats.field": "owningItem", + "stats.calcdistinct": "true", + "shards": shards, + "rows": 0, + "wt": "json", } - solr_url = SOLR_SERVER + '/statistics/select' + solr_url = SOLR_SERVER + "/statistics/select" res = requests.get(solr_url, params=solr_query_params) try: # get total number of distinct facets (countDistinct) - results_totalNumFacets = res.json()['stats']['stats_fields']['owningItem']['countDistinct'] + results_totalNumFacets = res.json()["stats"]["stats_fields"]["owningItem"][ + "countDistinct" + ] except TypeError: - print('No item downloads to index, exiting.') + print("No item downloads to index, exiting.") exit(0) @@ -211,35 +216,39 @@ def index_downloads(): while results_current_page <= results_num_pages: # "pages" are zero based, but one based is more human readable - print('Indexing item downloads (page {} of {})'.format(results_current_page + 1, results_num_pages + 1)) + print( + "Indexing item downloads (page {} of {})".format( + results_current_page + 1, results_num_pages + 1 + ) + ) solr_query_params = { - 'q': 'type:0', - 'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL', - 'facet': 'true', - 'facet.field': 'owningItem', - 'facet.mincount': 1, - 'facet.limit': results_per_page, - 'facet.offset': results_current_page * results_per_page, - 'shards': shards, - 'rows': 0, - 'wt': 'json', - 'json.nl': 'map' # return facets as a dict instead of a flat list + "q": "type:0", + "fq": "isBot:false AND statistics_type:view AND bundleName:ORIGINAL", + "facet": "true", + "facet.field": "owningItem", + "facet.mincount": 1, + "facet.limit": results_per_page, + "facet.offset": results_current_page * results_per_page, + "shards": shards, + "rows": 0, + "wt": "json", + "json.nl": "map", # return facets as a dict instead of a flat list } - solr_url = SOLR_SERVER + '/statistics/select' + solr_url = SOLR_SERVER + "/statistics/select" res = requests.get(solr_url, params=solr_query_params) # Solr returns facets as a dict of dicts (see json.nl parameter) - downloads = res.json()['facet_counts']['facet_fields'] + downloads = res.json()["facet_counts"]["facet_fields"] # iterate over the 'owningItem' dict and get the item ids and downloads - for item_id, item_downloads in downloads['owningItem'].items(): + for item_id, item_downloads in downloads["owningItem"].items(): data.append((item_id, item_downloads)) # do a batch insert of values from the current "page" of results - sql = 'INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads' - psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)') + sql = "INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads" + psycopg2.extras.execute_values(cursor, sql, data, template="(%s, %s)") db.commit() # clear all items from the list so we can populate it with the next batch @@ -251,8 +260,10 @@ def index_downloads(): with DatabaseManager() as db: with db.cursor() as cursor: # create table to store item views and downloads - cursor.execute('''CREATE TABLE IF NOT EXISTS items - (id INT PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)''') + cursor.execute( + """CREATE TABLE IF NOT EXISTS items + (id INT PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)""" + ) # commit the table creation before closing the database connection db.commit()