1
0
mirror of https://github.com/ilri/dspace-statistics-api.git synced 2024-06-26 16:13:47 +02:00

Merge pull request #4 from ilri/database-refactor

Database refactor
This commit is contained in:
Alan Orth 2018-11-07 17:54:04 +02:00 committed by GitHub
commit a6ce44e852
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 109 additions and 102 deletions

View File

@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Hound CI configuration to validate pull requests against PEP 8 code style with Flake8
- Configuration for [pipenv](https://pipenv.readthedocs.io/en/latest/)
## Changed
- Use a database management class with Python context management to automatically open/close connections and cursors
## Changed
- Validate code against PEP 8 style guide with Flake8

View File

@ -70,7 +70,6 @@ The item id is the *internal* id for an item. You can get these from the standar
## Todo
- Close DB connection when gunicorn shuts down gracefully
- Better logging
- Tests
- Check if database exists (try/except)

View File

@ -1,9 +1,6 @@
from .database import database_connection
from .database import DatabaseManager
import falcon
db = database_connection()
db.set_session(readonly=True)
class RootResource:
def on_get(self, req, resp):
@ -21,23 +18,23 @@ class AllItemsResource:
page = req.get_param_as_int("page", min=0) or 0
offset = limit * page
cursor = db.cursor()
with DatabaseManager() as db:
db.set_session(readonly=True)
# get total number of items so we can estimate the pages
cursor.execute('SELECT COUNT(id) FROM items')
pages = round(cursor.fetchone()[0] / limit)
with db.cursor() as cursor:
# get total number of items so we can estimate the pages
cursor.execute('SELECT COUNT(id) FROM items')
pages = round(cursor.fetchone()[0] / limit)
# get statistics, ordered by id, and use limit and offset to page through results
cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset))
# get statistics, ordered by id, and use limit and offset to page through results
cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset))
# create a list to hold dicts of item stats
statistics = list()
# create a list to hold dicts of item stats
statistics = list()
# iterate over results and build statistics object
for item in cursor:
statistics.append({'id': item['id'], 'views': item['views'], 'downloads': item['downloads']})
cursor.close()
# iterate over results and build statistics object
for item in cursor:
statistics.append({'id': item['id'], 'views': item['views'], 'downloads': item['downloads']})
message = {
'currentPage': page,
@ -53,25 +50,27 @@ class ItemResource:
def on_get(self, req, resp, item_id):
"""Handles GET requests"""
cursor = db.cursor()
cursor.execute('SELECT views, downloads FROM items WHERE id={}'.format(item_id))
if cursor.rowcount == 0:
raise falcon.HTTPNotFound(
title='Item not found',
description='The item with id "{}" was not found.'.format(item_id)
)
else:
results = cursor.fetchone()
with DatabaseManager() as db:
db.set_session(readonly=True)
statistics = {
'id': item_id,
'views': results['views'],
'downloads': results['downloads']
}
with db.cursor() as cursor:
cursor = db.cursor()
cursor.execute('SELECT views, downloads FROM items WHERE id={}'.format(item_id))
if cursor.rowcount == 0:
raise falcon.HTTPNotFound(
title='Item not found',
description='The item with id "{}" was not found.'.format(item_id)
)
else:
results = cursor.fetchone()
resp.media = statistics
statistics = {
'id': item_id,
'views': results['views'],
'downloads': results['downloads']
}
cursor.close()
resp.media = statistics
api = application = falcon.API()

View File

@ -7,9 +7,17 @@ import psycopg2
import psycopg2.extras
def database_connection():
connection = psycopg2.connect("dbname={} user={} password={} host={} port={}".format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST, DATABASE_PORT), cursor_factory=psycopg2.extras.DictCursor)
class DatabaseManager():
'''Manage database connection.'''
return connection
def __init__(self):
self._connection_uri = 'dbname={} user={} password={} host={} port={}'.format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST, DATABASE_PORT)
def __enter__(self):
self._connection = psycopg2.connect(self._connection_uri, cursor_factory=psycopg2.extras.DictCursor)
return self._connection
def __exit__(self, exc_type, exc_value, exc_traceback):
self._connection.close()
# vim: set sw=4 ts=4 expandtab:

View File

@ -29,7 +29,7 @@
# See: https://solrclient.readthedocs.io/en/latest/SolrClient.html
# See: https://wiki.duraspace.org/display/DSPACE/Solr
from .database import database_connection
from .database import DatabaseManager
import json
import psycopg2.extras
from .solr import solr_connection
@ -63,41 +63,39 @@ def index_views():
results_num_pages = int(results_totalNumFacets / results_per_page)
results_current_page = 0
cursor = db.cursor()
with DatabaseManager() as db:
with db.cursor() as cursor:
# create an empty list to store values for batch insertion
data = []
# create an empty list to store values for batch insertion
data = []
while results_current_page <= results_num_pages:
print('Indexing item views (page {} of {})'.format(results_current_page, results_num_pages))
while results_current_page <= results_num_pages:
print('Indexing item views (page {} of {})'.format(results_current_page, results_num_pages))
res = solr.query('statistics', {
'q': 'type:2',
'fq': 'isBot:false AND statistics_type:view',
'facet': True,
'facet.field': 'id',
'facet.mincount': 1,
'facet.limit': results_per_page,
'facet.offset': results_current_page * results_per_page
}, rows=0)
res = solr.query('statistics', {
'q': 'type:2',
'fq': 'isBot:false AND statistics_type:view',
'facet': True,
'facet.field': 'id',
'facet.mincount': 1,
'facet.limit': results_per_page,
'facet.offset': results_current_page * results_per_page
}, rows=0)
# SolrClient's get_facets() returns a dict of dicts
views = res.get_facets()
# in this case iterate over the 'id' dict and get the item ids and views
for item_id, item_views in views['id'].items():
data.append((item_id, item_views))
# SolrClient's get_facets() returns a dict of dicts
views = res.get_facets()
# in this case iterate over the 'id' dict and get the item ids and views
for item_id, item_views in views['id'].items():
data.append((item_id, item_views))
# do a batch insert of values from the current "page" of results
sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views'
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
db.commit()
# do a batch insert of values from the current "page" of results
sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views'
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
db.commit()
# clear all items from the list so we can populate it with the next batch
data.clear()
# clear all items from the list so we can populate it with the next batch
data.clear()
results_current_page += 1
cursor.close()
results_current_page += 1
def index_downloads():
@ -123,53 +121,53 @@ def index_downloads():
results_num_pages = int(results_totalNumFacets / results_per_page)
results_current_page = 0
cursor = db.cursor()
with DatabaseManager() as db:
with db.cursor() as cursor:
# create an empty list to store values for batch insertion
data = []
# create an empty list to store values for batch insertion
data = []
while results_current_page <= results_num_pages:
print('Indexing item downloads (page {} of {})'.format(results_current_page, results_num_pages))
while results_current_page <= results_num_pages:
print('Indexing item downloads (page {} of {})'.format(results_current_page, results_num_pages))
res = solr.query('statistics', {
'q': 'type:0',
'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
'facet': True,
'facet.field': 'owningItem',
'facet.mincount': 1,
'facet.limit': results_per_page,
'facet.offset': results_current_page * results_per_page
}, rows=0)
res = solr.query('statistics', {
'q': 'type:0',
'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
'facet': True,
'facet.field': 'owningItem',
'facet.mincount': 1,
'facet.limit': results_per_page,
'facet.offset': results_current_page * results_per_page
}, rows=0)
# SolrClient's get_facets() returns a dict of dicts
downloads = res.get_facets()
# in this case iterate over the 'owningItem' dict and get the item ids and downloads
for item_id, item_downloads in downloads['owningItem'].items():
data.append((item_id, item_downloads))
# SolrClient's get_facets() returns a dict of dicts
downloads = res.get_facets()
# in this case iterate over the 'owningItem' dict and get the item ids and downloads
for item_id, item_downloads in downloads['owningItem'].items():
data.append((item_id, item_downloads))
# do a batch insert of values from the current "page" of results
sql = 'INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads'
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
db.commit()
# do a batch insert of values from the current "page" of results
sql = 'INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads'
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
db.commit()
# clear all items from the list so we can populate it with the next batch
data.clear()
# clear all items from the list so we can populate it with the next batch
data.clear()
results_current_page += 1
cursor.close()
results_current_page += 1
db = database_connection()
solr = solr_connection()
# create table to store item views and downloads
cursor = db.cursor()
cursor.execute('''CREATE TABLE IF NOT EXISTS items
with DatabaseManager() as db:
with db.cursor() as cursor:
# create table to store item views and downloads
cursor.execute('''CREATE TABLE IF NOT EXISTS items
(id INT PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)''')
# commit the table creation before closing the database connection
db.commit()
index_views()
index_downloads()
db.close()
# vim: set sw=4 ts=4 expandtab: