mirror of
https://github.com/ilri/dspace-statistics-api.git
synced 2025-05-11 07:36:02 +02:00
Compare commits
8 Commits
Author | SHA1 | Date | |
---|---|---|---|
7499b89d99
|
|||
2c1e4952b1
|
|||
379f202c3f
|
|||
560fa6056d
|
|||
385a34e5d0
|
|||
d0ea62d2bd
|
|||
366ae25b8e
|
|||
0f3054ae03
|
@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [0.4.1] - 2018-09-26
|
||||||
|
### Changed
|
||||||
|
- Use execute_values() to batch insert records to PostgreSQL
|
||||||
|
|
||||||
## [0.4.0] - 2018-09-25
|
## [0.4.0] - 2018-09-25
|
||||||
### Fixed
|
### Fixed
|
||||||
- Invalid OnCalendar syntax in dspace-statistics-indexer.timer
|
- Invalid OnCalendar syntax in dspace-statistics-indexer.timer
|
||||||
|
@ -2,8 +2,7 @@ from config import DATABASE_NAME
|
|||||||
from config import DATABASE_USER
|
from config import DATABASE_USER
|
||||||
from config import DATABASE_PASS
|
from config import DATABASE_PASS
|
||||||
from config import DATABASE_HOST
|
from config import DATABASE_HOST
|
||||||
import psycopg2
|
import psycopg2, psycopg2.extras
|
||||||
import psycopg2.extras
|
|
||||||
|
|
||||||
def database_connection():
|
def database_connection():
|
||||||
connection = psycopg2.connect("dbname={} user={} password={} host='{}'".format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST), cursor_factory=psycopg2.extras.DictCursor)
|
connection = psycopg2.connect("dbname={} user={} password={} host='{}'".format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST), cursor_factory=psycopg2.extras.DictCursor)
|
||||||
|
30
indexer.py
30
indexer.py
@ -32,6 +32,7 @@
|
|||||||
|
|
||||||
from database import database_connection
|
from database import database_connection
|
||||||
import json
|
import json
|
||||||
|
import psycopg2.extras
|
||||||
from solr import solr_connection
|
from solr import solr_connection
|
||||||
|
|
||||||
def index_views():
|
def index_views():
|
||||||
@ -64,6 +65,9 @@ def index_views():
|
|||||||
|
|
||||||
cursor = db.cursor()
|
cursor = db.cursor()
|
||||||
|
|
||||||
|
# create an empty list to store values for batch insertion
|
||||||
|
data = []
|
||||||
|
|
||||||
while results_current_page <= results_num_pages:
|
while results_current_page <= results_num_pages:
|
||||||
print('Indexing item views (page {} of {})'.format(results_current_page, results_num_pages))
|
print('Indexing item views (page {} of {})'.format(results_current_page, results_num_pages))
|
||||||
|
|
||||||
@ -77,19 +81,20 @@ def index_views():
|
|||||||
'facet.offset':results_current_page * results_per_page
|
'facet.offset':results_current_page * results_per_page
|
||||||
}, rows=0)
|
}, rows=0)
|
||||||
|
|
||||||
# check number of facets returned in the last query
|
|
||||||
#results_currentNumFacets = len(res.get_facets()['id'])
|
|
||||||
|
|
||||||
# SolrClient's get_facets() returns a dict of dicts
|
# SolrClient's get_facets() returns a dict of dicts
|
||||||
views = res.get_facets()
|
views = res.get_facets()
|
||||||
# in this case iterate over the 'id' dict and get the item ids and views
|
# in this case iterate over the 'id' dict and get the item ids and views
|
||||||
for item_id, item_views in views['id'].items():
|
for item_id, item_views in views['id'].items():
|
||||||
cursor.execute('''INSERT INTO items(id, views) VALUES(%s, %s)
|
data.append((item_id, item_views))
|
||||||
ON CONFLICT(id) DO UPDATE SET downloads=excluded.views''',
|
|
||||||
(item_id, item_views))
|
|
||||||
|
|
||||||
|
# do a batch insert of values from the current "page" of results
|
||||||
|
sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.views'
|
||||||
|
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
|
# clear all items from the list so we can populate it with the next batch
|
||||||
|
data.clear()
|
||||||
|
|
||||||
results_current_page += 1
|
results_current_page += 1
|
||||||
|
|
||||||
cursor.close()
|
cursor.close()
|
||||||
@ -119,6 +124,9 @@ def index_downloads():
|
|||||||
|
|
||||||
cursor = db.cursor()
|
cursor = db.cursor()
|
||||||
|
|
||||||
|
# create an empty list to store values for batch insertion
|
||||||
|
data = []
|
||||||
|
|
||||||
while results_current_page <= results_num_pages:
|
while results_current_page <= results_num_pages:
|
||||||
print('Indexing item downloads (page {} of {})'.format(results_current_page, results_num_pages))
|
print('Indexing item downloads (page {} of {})'.format(results_current_page, results_num_pages))
|
||||||
|
|
||||||
@ -136,12 +144,16 @@ def index_downloads():
|
|||||||
downloads = res.get_facets()
|
downloads = res.get_facets()
|
||||||
# in this case iterate over the 'owningItem' dict and get the item ids and downloads
|
# in this case iterate over the 'owningItem' dict and get the item ids and downloads
|
||||||
for item_id, item_downloads in downloads['owningItem'].items():
|
for item_id, item_downloads in downloads['owningItem'].items():
|
||||||
cursor.execute('''INSERT INTO items(id, downloads) VALUES(%s, %s)
|
data.append((item_id, item_downloads))
|
||||||
ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads''',
|
|
||||||
(item_id, item_downloads))
|
|
||||||
|
|
||||||
|
# do a batch insert of values from the current "page" of results
|
||||||
|
sql = 'INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads'
|
||||||
|
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
|
# clear all items from the list so we can populate it with the next batch
|
||||||
|
data.clear()
|
||||||
|
|
||||||
results_current_page += 1
|
results_current_page += 1
|
||||||
|
|
||||||
cursor.close()
|
cursor.close()
|
||||||
|
Reference in New Issue
Block a user