diff --git a/dspace_statistics_api/app.py b/dspace_statistics_api/app.py index 6ca55f5..cc4b454 100644 --- a/dspace_statistics_api/app.py +++ b/dspace_statistics_api/app.py @@ -1,9 +1,6 @@ -from .database import database_connection +from .database import DatabaseManager import falcon -db = database_connection() -db.set_session(readonly=True) - class RootResource: def on_get(self, req, resp): @@ -21,23 +18,23 @@ class AllItemsResource: page = req.get_param_as_int("page", min=0) or 0 offset = limit * page - cursor = db.cursor() + with DatabaseManager() as db: + db.set_session(readonly=True) - # get total number of items so we can estimate the pages - cursor.execute('SELECT COUNT(id) FROM items') - pages = round(cursor.fetchone()[0] / limit) + with db.cursor() as cursor: + # get total number of items so we can estimate the pages + cursor.execute('SELECT COUNT(id) FROM items') + pages = round(cursor.fetchone()[0] / limit) - # get statistics, ordered by id, and use limit and offset to page through results - cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset)) + # get statistics, ordered by id, and use limit and offset to page through results + cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset)) - # create a list to hold dicts of item stats - statistics = list() + # create a list to hold dicts of item stats + statistics = list() - # iterate over results and build statistics object - for item in cursor: - statistics.append({'id': item['id'], 'views': item['views'], 'downloads': item['downloads']}) - - cursor.close() + # iterate over results and build statistics object + for item in cursor: + statistics.append({'id': item['id'], 'views': item['views'], 'downloads': item['downloads']}) message = { 'currentPage': page, @@ -53,25 +50,27 @@ class ItemResource: def on_get(self, req, resp, item_id): """Handles GET requests""" - cursor = db.cursor() - cursor.execute('SELECT views, downloads FROM items WHERE id={}'.format(item_id)) - if cursor.rowcount == 0: - raise falcon.HTTPNotFound( - title='Item not found', - description='The item with id "{}" was not found.'.format(item_id) - ) - else: - results = cursor.fetchone() + with DatabaseManager() as db: + db.set_session(readonly=True) - statistics = { - 'id': item_id, - 'views': results['views'], - 'downloads': results['downloads'] - } + with db.cursor() as cursor: + cursor = db.cursor() + cursor.execute('SELECT views, downloads FROM items WHERE id={}'.format(item_id)) + if cursor.rowcount == 0: + raise falcon.HTTPNotFound( + title='Item not found', + description='The item with id "{}" was not found.'.format(item_id) + ) + else: + results = cursor.fetchone() - resp.media = statistics + statistics = { + 'id': item_id, + 'views': results['views'], + 'downloads': results['downloads'] + } - cursor.close() + resp.media = statistics api = application = falcon.API() diff --git a/dspace_statistics_api/database.py b/dspace_statistics_api/database.py index 183815f..d240757 100644 --- a/dspace_statistics_api/database.py +++ b/dspace_statistics_api/database.py @@ -7,9 +7,17 @@ import psycopg2 import psycopg2.extras -def database_connection(): - connection = psycopg2.connect("dbname={} user={} password={} host={} port={}".format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST, DATABASE_PORT), cursor_factory=psycopg2.extras.DictCursor) +class DatabaseManager(): + '''Manage database connection.''' - return connection + def __init__(self): + self._connection_uri = 'dbname={} user={} password={} host={} port={}'.format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST, DATABASE_PORT) + + def __enter__(self): + self._connection = psycopg2.connect(self._connection_uri, cursor_factory=psycopg2.extras.DictCursor) + return self._connection + + def __exit__(self, exc_type, exc_value, exc_traceback): + self._connection.close() # vim: set sw=4 ts=4 expandtab: diff --git a/dspace_statistics_api/indexer.py b/dspace_statistics_api/indexer.py index 1adf1f6..2067363 100644 --- a/dspace_statistics_api/indexer.py +++ b/dspace_statistics_api/indexer.py @@ -29,7 +29,7 @@ # See: https://solrclient.readthedocs.io/en/latest/SolrClient.html # See: https://wiki.duraspace.org/display/DSPACE/Solr -from .database import database_connection +from .database import DatabaseManager import json import psycopg2.extras from .solr import solr_connection @@ -63,41 +63,39 @@ def index_views(): results_num_pages = int(results_totalNumFacets / results_per_page) results_current_page = 0 - cursor = db.cursor() + with DatabaseManager() as db: + with db.cursor() as cursor: + # create an empty list to store values for batch insertion + data = [] - # create an empty list to store values for batch insertion - data = [] + while results_current_page <= results_num_pages: + print('Indexing item views (page {} of {})'.format(results_current_page, results_num_pages)) - while results_current_page <= results_num_pages: - print('Indexing item views (page {} of {})'.format(results_current_page, results_num_pages)) + res = solr.query('statistics', { + 'q': 'type:2', + 'fq': 'isBot:false AND statistics_type:view', + 'facet': True, + 'facet.field': 'id', + 'facet.mincount': 1, + 'facet.limit': results_per_page, + 'facet.offset': results_current_page * results_per_page + }, rows=0) - res = solr.query('statistics', { - 'q': 'type:2', - 'fq': 'isBot:false AND statistics_type:view', - 'facet': True, - 'facet.field': 'id', - 'facet.mincount': 1, - 'facet.limit': results_per_page, - 'facet.offset': results_current_page * results_per_page - }, rows=0) + # SolrClient's get_facets() returns a dict of dicts + views = res.get_facets() + # in this case iterate over the 'id' dict and get the item ids and views + for item_id, item_views in views['id'].items(): + data.append((item_id, item_views)) - # SolrClient's get_facets() returns a dict of dicts - views = res.get_facets() - # in this case iterate over the 'id' dict and get the item ids and views - for item_id, item_views in views['id'].items(): - data.append((item_id, item_views)) + # do a batch insert of values from the current "page" of results + sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views' + psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)') + db.commit() - # do a batch insert of values from the current "page" of results - sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views' - psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)') - db.commit() + # clear all items from the list so we can populate it with the next batch + data.clear() - # clear all items from the list so we can populate it with the next batch - data.clear() - - results_current_page += 1 - - cursor.close() + results_current_page += 1 def index_downloads(): @@ -123,53 +121,55 @@ def index_downloads(): results_num_pages = int(results_totalNumFacets / results_per_page) results_current_page = 0 - cursor = db.cursor() + with DatabaseManager() as db: + with db.cursor() as cursor: + # create an empty list to store values for batch insertion + data = [] - # create an empty list to store values for batch insertion - data = [] + while results_current_page <= results_num_pages: + print('Indexing item downloads (page {} of {})'.format(results_current_page, results_num_pages)) - while results_current_page <= results_num_pages: - print('Indexing item downloads (page {} of {})'.format(results_current_page, results_num_pages)) + res = solr.query('statistics', { + 'q': 'type:0', + 'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL', + 'facet': True, + 'facet.field': 'owningItem', + 'facet.mincount': 1, + 'facet.limit': results_per_page, + 'facet.offset': results_current_page * results_per_page + }, rows=0) - res = solr.query('statistics', { - 'q': 'type:0', - 'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL', - 'facet': True, - 'facet.field': 'owningItem', - 'facet.mincount': 1, - 'facet.limit': results_per_page, - 'facet.offset': results_current_page * results_per_page - }, rows=0) + # SolrClient's get_facets() returns a dict of dicts + downloads = res.get_facets() + # in this case iterate over the 'owningItem' dict and get the item ids and downloads + for item_id, item_downloads in downloads['owningItem'].items(): + data.append((item_id, item_downloads)) - # SolrClient's get_facets() returns a dict of dicts - downloads = res.get_facets() - # in this case iterate over the 'owningItem' dict and get the item ids and downloads - for item_id, item_downloads in downloads['owningItem'].items(): - data.append((item_id, item_downloads)) + # do a batch insert of values from the current "page" of results + sql = 'INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads' + psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)') + db.commit() - # do a batch insert of values from the current "page" of results - sql = 'INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads' - psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)') - db.commit() + # clear all items from the list so we can populate it with the next batch + data.clear() - # clear all items from the list so we can populate it with the next batch - data.clear() - - results_current_page += 1 - - cursor.close() + results_current_page += 1 -db = database_connection() solr = solr_connection() -# create table to store item views and downloads -cursor = db.cursor() -cursor.execute('''CREATE TABLE IF NOT EXISTS items +print("gonna create the table") + +with DatabaseManager() as db: + with db.cursor() as cursor: + # create table to store item views and downloads + cursor.execute('''CREATE TABLE IF NOT EXISTS items (id INT PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)''') + + # commit the table creation before closing the database connection + db.commit() + index_views() index_downloads() -db.close() - # vim: set sw=4 ts=4 expandtab: