1
0
mirror of https://github.com/ilri/dspace-statistics-api.git synced 2024-06-29 01:23:45 +02:00

Refactor database code to use a context manager

Instead of opening one global persistent database connection when
the application I am now abstracting it to a class that I can use
in combination with Python's "with" context. Both connections and
cursors are kept for the context of each "with" block and closed
automatically when exiting.

See: https://alysivji.github.io/managing-resources-with-context-managers-pythonic.html
See: http://initd.org/psycopg/docs/connection.html#connection.close
This commit is contained in:
Alan Orth 2018-11-07 17:41:21 +02:00
parent e39f2b260c
commit 2f342be948
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9
3 changed files with 108 additions and 101 deletions

View File

@ -1,9 +1,6 @@
from .database import database_connection from .database import DatabaseManager
import falcon import falcon
db = database_connection()
db.set_session(readonly=True)
class RootResource: class RootResource:
def on_get(self, req, resp): def on_get(self, req, resp):
@ -21,23 +18,23 @@ class AllItemsResource:
page = req.get_param_as_int("page", min=0) or 0 page = req.get_param_as_int("page", min=0) or 0
offset = limit * page offset = limit * page
cursor = db.cursor() with DatabaseManager() as db:
db.set_session(readonly=True)
# get total number of items so we can estimate the pages with db.cursor() as cursor:
cursor.execute('SELECT COUNT(id) FROM items') # get total number of items so we can estimate the pages
pages = round(cursor.fetchone()[0] / limit) cursor.execute('SELECT COUNT(id) FROM items')
pages = round(cursor.fetchone()[0] / limit)
# get statistics, ordered by id, and use limit and offset to page through results # get statistics, ordered by id, and use limit and offset to page through results
cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset)) cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset))
# create a list to hold dicts of item stats # create a list to hold dicts of item stats
statistics = list() statistics = list()
# iterate over results and build statistics object # iterate over results and build statistics object
for item in cursor: for item in cursor:
statistics.append({'id': item['id'], 'views': item['views'], 'downloads': item['downloads']}) statistics.append({'id': item['id'], 'views': item['views'], 'downloads': item['downloads']})
cursor.close()
message = { message = {
'currentPage': page, 'currentPage': page,
@ -53,25 +50,27 @@ class ItemResource:
def on_get(self, req, resp, item_id): def on_get(self, req, resp, item_id):
"""Handles GET requests""" """Handles GET requests"""
cursor = db.cursor() with DatabaseManager() as db:
cursor.execute('SELECT views, downloads FROM items WHERE id={}'.format(item_id)) db.set_session(readonly=True)
if cursor.rowcount == 0:
raise falcon.HTTPNotFound(
title='Item not found',
description='The item with id "{}" was not found.'.format(item_id)
)
else:
results = cursor.fetchone()
statistics = { with db.cursor() as cursor:
'id': item_id, cursor = db.cursor()
'views': results['views'], cursor.execute('SELECT views, downloads FROM items WHERE id={}'.format(item_id))
'downloads': results['downloads'] if cursor.rowcount == 0:
} raise falcon.HTTPNotFound(
title='Item not found',
description='The item with id "{}" was not found.'.format(item_id)
)
else:
results = cursor.fetchone()
resp.media = statistics statistics = {
'id': item_id,
'views': results['views'],
'downloads': results['downloads']
}
cursor.close() resp.media = statistics
api = application = falcon.API() api = application = falcon.API()

View File

@ -7,9 +7,17 @@ import psycopg2
import psycopg2.extras import psycopg2.extras
def database_connection(): class DatabaseManager():
connection = psycopg2.connect("dbname={} user={} password={} host={} port={}".format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST, DATABASE_PORT), cursor_factory=psycopg2.extras.DictCursor) '''Manage database connection.'''
return connection def __init__(self):
self._connection_uri = 'dbname={} user={} password={} host={} port={}'.format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST, DATABASE_PORT)
def __enter__(self):
self._connection = psycopg2.connect(self._connection_uri, cursor_factory=psycopg2.extras.DictCursor)
return self._connection
def __exit__(self, exc_type, exc_value, exc_traceback):
self._connection.close()
# vim: set sw=4 ts=4 expandtab: # vim: set sw=4 ts=4 expandtab:

View File

@ -29,7 +29,7 @@
# See: https://solrclient.readthedocs.io/en/latest/SolrClient.html # See: https://solrclient.readthedocs.io/en/latest/SolrClient.html
# See: https://wiki.duraspace.org/display/DSPACE/Solr # See: https://wiki.duraspace.org/display/DSPACE/Solr
from .database import database_connection from .database import DatabaseManager
import json import json
import psycopg2.extras import psycopg2.extras
from .solr import solr_connection from .solr import solr_connection
@ -63,41 +63,39 @@ def index_views():
results_num_pages = int(results_totalNumFacets / results_per_page) results_num_pages = int(results_totalNumFacets / results_per_page)
results_current_page = 0 results_current_page = 0
cursor = db.cursor() with DatabaseManager() as db:
with db.cursor() as cursor:
# create an empty list to store values for batch insertion
data = []
# create an empty list to store values for batch insertion while results_current_page <= results_num_pages:
data = [] print('Indexing item views (page {} of {})'.format(results_current_page, results_num_pages))
while results_current_page <= results_num_pages: res = solr.query('statistics', {
print('Indexing item views (page {} of {})'.format(results_current_page, results_num_pages)) 'q': 'type:2',
'fq': 'isBot:false AND statistics_type:view',
'facet': True,
'facet.field': 'id',
'facet.mincount': 1,
'facet.limit': results_per_page,
'facet.offset': results_current_page * results_per_page
}, rows=0)
res = solr.query('statistics', { # SolrClient's get_facets() returns a dict of dicts
'q': 'type:2', views = res.get_facets()
'fq': 'isBot:false AND statistics_type:view', # in this case iterate over the 'id' dict and get the item ids and views
'facet': True, for item_id, item_views in views['id'].items():
'facet.field': 'id', data.append((item_id, item_views))
'facet.mincount': 1,
'facet.limit': results_per_page,
'facet.offset': results_current_page * results_per_page
}, rows=0)
# SolrClient's get_facets() returns a dict of dicts # do a batch insert of values from the current "page" of results
views = res.get_facets() sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views'
# in this case iterate over the 'id' dict and get the item ids and views psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
for item_id, item_views in views['id'].items(): db.commit()
data.append((item_id, item_views))
# do a batch insert of values from the current "page" of results # clear all items from the list so we can populate it with the next batch
sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views' data.clear()
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
db.commit()
# clear all items from the list so we can populate it with the next batch results_current_page += 1
data.clear()
results_current_page += 1
cursor.close()
def index_downloads(): def index_downloads():
@ -123,53 +121,55 @@ def index_downloads():
results_num_pages = int(results_totalNumFacets / results_per_page) results_num_pages = int(results_totalNumFacets / results_per_page)
results_current_page = 0 results_current_page = 0
cursor = db.cursor() with DatabaseManager() as db:
with db.cursor() as cursor:
# create an empty list to store values for batch insertion
data = []
# create an empty list to store values for batch insertion while results_current_page <= results_num_pages:
data = [] print('Indexing item downloads (page {} of {})'.format(results_current_page, results_num_pages))
while results_current_page <= results_num_pages: res = solr.query('statistics', {
print('Indexing item downloads (page {} of {})'.format(results_current_page, results_num_pages)) 'q': 'type:0',
'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
'facet': True,
'facet.field': 'owningItem',
'facet.mincount': 1,
'facet.limit': results_per_page,
'facet.offset': results_current_page * results_per_page
}, rows=0)
res = solr.query('statistics', { # SolrClient's get_facets() returns a dict of dicts
'q': 'type:0', downloads = res.get_facets()
'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL', # in this case iterate over the 'owningItem' dict and get the item ids and downloads
'facet': True, for item_id, item_downloads in downloads['owningItem'].items():
'facet.field': 'owningItem', data.append((item_id, item_downloads))
'facet.mincount': 1,
'facet.limit': results_per_page,
'facet.offset': results_current_page * results_per_page
}, rows=0)
# SolrClient's get_facets() returns a dict of dicts # do a batch insert of values from the current "page" of results
downloads = res.get_facets() sql = 'INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads'
# in this case iterate over the 'owningItem' dict and get the item ids and downloads psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
for item_id, item_downloads in downloads['owningItem'].items(): db.commit()
data.append((item_id, item_downloads))
# do a batch insert of values from the current "page" of results # clear all items from the list so we can populate it with the next batch
sql = 'INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads' data.clear()
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
db.commit()
# clear all items from the list so we can populate it with the next batch results_current_page += 1
data.clear()
results_current_page += 1
cursor.close()
db = database_connection()
solr = solr_connection() solr = solr_connection()
# create table to store item views and downloads print("gonna create the table")
cursor = db.cursor()
cursor.execute('''CREATE TABLE IF NOT EXISTS items with DatabaseManager() as db:
with db.cursor() as cursor:
# create table to store item views and downloads
cursor.execute('''CREATE TABLE IF NOT EXISTS items
(id INT PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)''') (id INT PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)''')
# commit the table creation before closing the database connection
db.commit()
index_views() index_views()
index_downloads() index_downloads()
db.close()
# vim: set sw=4 ts=4 expandtab: # vim: set sw=4 ts=4 expandtab: