From 8f7450f67a28e6815bfd1fa85c8ec6845c472ef0 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Tue, 25 Sep 2018 00:49:47 +0300 Subject: [PATCH] Use PostgreSQL instead of SQLite I was very surprised how easy and fast and robust SQLite was, but in the end I realized that its UPSERT support only came in version 3.24 and both Ubuntu 16.04 and 18.04 have older versions than that! I did manage to install libsqlite3-0 from Ubuntu 18.04 cosmic on my xenial host, but that feels dirty. PostgreSQL has support for UPSERT since 9.5, not to mention the same nice LIMIT and OFFSET clauses. --- app.py | 5 +++-- config.py | 5 ++++- database.py | 20 +++++++------------- indexer.py | 19 ++++++++++++++----- 4 files changed, 28 insertions(+), 21 deletions(-) diff --git a/app.py b/app.py index 6eb12da..f7cde11 100644 --- a/app.py +++ b/app.py @@ -2,11 +2,12 @@ # See DSpace Solr docs for tips about parameters # https://wiki.duraspace.org/display/DSPACE/Solr -from database import database_connection_ro +from database import database_connection import falcon from solr import solr_connection -db = database_connection_ro() +db = database_connection() +db.set_session(readonly=True) solr = solr_connection() class AllItemsResource: diff --git a/config.py b/config.py index b2ed17b..289fb06 100644 --- a/config.py +++ b/config.py @@ -3,6 +3,9 @@ import os # Check if Solr connection information was provided in the environment SOLR_SERVER = os.environ.get('SOLR_SERVER', 'http://localhost:8080/solr') -SQLITE_DB = os.environ.get('SQLITE_DB', 'statistics.db') +DATABASE_NAME = os.environ.get('DATABASE_NAME', 'dspacestatistics') +DATABASE_USER = os.environ.get('DATABASE_USER', 'dspacestatistics') +DATABASE_PASS = os.environ.get('DATABASE_PASS', 'dspacestatistics') +DATABASE_HOST = os.environ.get('DATABASE_HOST', 'localhost') # vim: set sw=4 ts=4 expandtab: diff --git a/database.py b/database.py index 392bb97..f34dd3f 100644 --- a/database.py +++ b/database.py @@ -1,17 +1,11 @@ -from config import SQLITE_DB -import sqlite3 +from config import DATABASE_NAME +from config import DATABASE_USER +from config import DATABASE_PASS +from config import DATABASE_HOST +import psycopg2 -def database_connection_rw(): - connection = sqlite3.connect(SQLITE_DB) - # allow iterating over row results by column key - connection.row_factory = sqlite3.Row - - return connection - -def database_connection_ro(): - connection = sqlite3.connect('file:{0}?mode=ro'.format(SQLITE_DB), uri=True) - # allow iterating over row results by column key - connection.row_factory = sqlite3.Row +def database_connection(): + connection = psycopg2.connect("dbname={} user={} password={} host='{}'".format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST)) return connection diff --git a/indexer.py b/indexer.py index a5f4cdf..6e62481 100755 --- a/indexer.py +++ b/indexer.py @@ -32,7 +32,7 @@ # # Tested with Python 3.5 and 3.6. -from database import database_connection_rw +from database import database_connection from solr import solr_connection def index_views(): @@ -52,6 +52,8 @@ def index_views(): results_num_pages = round(results_numFound / results_per_page) results_current_page = 0 + cursor = db.cursor() + while results_current_page <= results_num_pages: print('Page {0} of {1}.'.format(results_current_page, results_num_pages)) @@ -70,7 +72,7 @@ def index_views(): views = res.get_facets() # in this case iterate over the 'id' dict and get the item ids and views for item_id, item_views in views['id'].items(): - db.execute('''INSERT INTO items(id, views) VALUES(?, ?) + cursor.execute('''INSERT INTO items(id, views) VALUES(%s, %s) ON CONFLICT(id) DO UPDATE SET downloads=excluded.views''', (item_id, item_views)) @@ -78,6 +80,8 @@ def index_views(): results_current_page += 1 + cursor.close() + def index_downloads(): print("Populating database with item downloads.") @@ -95,6 +99,8 @@ def index_downloads(): results_num_pages = round(results_numFound / results_per_page) results_current_page = 0 + cursor = db.cursor() + while results_current_page <= results_num_pages: print('Page {0} of {1}.'.format(results_current_page, results_num_pages)) @@ -113,7 +119,7 @@ def index_downloads(): downloads = res.get_facets() # in this case iterate over the 'owningItem' dict and get the item ids and downloads for item_id, item_downloads in downloads['owningItem'].items(): - db.execute('''INSERT INTO items(id, downloads) VALUES(?, ?) + cursor.execute('''INSERT INTO items(id, downloads) VALUES(%s, %s) ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads''', (item_id, item_downloads)) @@ -121,11 +127,14 @@ def index_downloads(): results_current_page += 1 -db = database_connection_rw() + cursor.close() + +db = database_connection() solr = solr_connection() # create table to store item views and downloads -db.execute('''CREATE TABLE IF NOT EXISTS items +cursor = db.cursor() +cursor.execute('''CREATE TABLE IF NOT EXISTS items (id INT PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)''') index_views() index_downloads()