mirror of
https://github.com/ilri/dspace-statistics-api.git
synced 2024-12-22 12:42:19 +01:00
indexer.py: Use single items table with UPSERT
I was using two separate tables for item views and downloads without realizing that SQLite didn't support FULL OUTER JOIN, which would be needed to get views and downloads for a given item in a single query. Instead I can use one table with a default value of 0 for both views and downloads, and then use "UPSERT" to populate the statistics. This is a newish SQL concept that allows you to attempt an INSERT and then specify an action to perform in case of conflict. This works well in SQLite and actually simplifies my Python logic greatly! Note that the "excluded" table qualifier is a special keyword that allows you to reference the value that would have been inserted. See: https://www.sqlite.org/lang_UPSERT.html
This commit is contained in:
parent
53615dea2d
commit
2cab456f16
16
indexer.py
16
indexer.py
@ -70,7 +70,9 @@ def index_views():
|
||||
views = res.get_facets()
|
||||
# in this case iterate over the 'id' dict and get the item ids and views
|
||||
for item_id, item_views in views['id'].items():
|
||||
db.execute('''REPLACE INTO itemviews VALUES (?, ?)''', (item_id, item_views))
|
||||
db.execute('''INSERT INTO items(id, views) VALUES(?, ?)
|
||||
ON CONFLICT(id) DO UPDATE SET downloads=excluded.views''',
|
||||
(item_id, item_views))
|
||||
|
||||
db.commit()
|
||||
|
||||
@ -111,7 +113,9 @@ def index_downloads():
|
||||
downloads = res.get_facets()
|
||||
# in this case iterate over the 'owningItem' dict and get the item ids and downloads
|
||||
for item_id, item_downloads in downloads['owningItem'].items():
|
||||
db.execute('''REPLACE INTO itemdownloads VALUES (?, ?)''', (item_id, item_downloads))
|
||||
db.execute('''INSERT INTO items(id, downloads) VALUES(?, ?)
|
||||
ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads''',
|
||||
(item_id, item_downloads))
|
||||
|
||||
db.commit()
|
||||
|
||||
@ -120,11 +124,9 @@ def index_downloads():
|
||||
db = database_connection_rw()
|
||||
solr = solr_connection()
|
||||
|
||||
# use separate views and downloads tables so we can REPLACE INTO carelessly (ie, item may have views but no downloads)
|
||||
db.execute('''CREATE TABLE IF NOT EXISTS itemviews
|
||||
(id integer primary key, views integer)''')
|
||||
db.execute('''CREATE TABLE IF NOT EXISTS itemdownloads
|
||||
(id integer primary key, downloads integer)''')
|
||||
# create table to store item views and downloads
|
||||
db.execute('''CREATE TABLE IF NOT EXISTS items
|
||||
(id INT PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)''')
|
||||
index_views()
|
||||
index_downloads()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user