mirror of
https://github.com/ilri/dspace-statistics-api.git
synced 2025-05-10 07:06:01 +02:00
Compare commits
14 Commits
Author | SHA1 | Date | |
---|---|---|---|
914ec52fbb
|
|||
5524066656
|
|||
043d897cef
|
|||
bd28353cda
|
|||
e23d66c2a2
|
|||
40e284dac0
|
|||
934fa9db9b
|
|||
1fabb72b58
|
|||
c7f95f0b60
|
|||
c95a98dd2d
|
|||
3f70f94a10
|
|||
9b8ad9defd | |||
d69ab20220
|
|||
378f56ddc2
|
@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [0.9.0] - 2019-01-22
|
||||
### Updated
|
||||
- pytest version 4.0.0
|
||||
- Fix indexing of sharded statistics cores ([#10))
|
||||
- Handle case of missing views/downloads gracefully
|
||||
|
||||
## [0.8.1] - 2018-11-14
|
||||
### Changed
|
||||
- README.md to recommend using vanilla Python virtual environments and pip instead of pipenv
|
||||
|
50
Pipfile.lock
generated
50
Pipfile.lock
generated
@ -77,10 +77,10 @@
|
||||
},
|
||||
"six": {
|
||||
"hashes": [
|
||||
"sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
|
||||
"sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
|
||||
"sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
|
||||
"sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
|
||||
],
|
||||
"version": "==1.11.0"
|
||||
"version": "==1.12.0"
|
||||
},
|
||||
"solrclient": {
|
||||
"git": "https://github.com/alanorth/SolrClient.git",
|
||||
@ -126,11 +126,11 @@
|
||||
},
|
||||
"ipython": {
|
||||
"hashes": [
|
||||
"sha256:a5781d6934a3341a1f9acb4ea5acdc7ea0a0855e689dbe755d070ca51e995435",
|
||||
"sha256:b10a7ddd03657c761fc503495bc36471c8158e3fc948573fb9fe82a7029d8efd"
|
||||
"sha256:6a9496209b76463f1dec126ab928919aaf1f55b38beb9219af3fe202f6bbdd12",
|
||||
"sha256:f69932b1e806b38a7818d9a1e918e5821b685715040b48e59c657b3c7961b742"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==7.1.1"
|
||||
"version": "==7.2.0"
|
||||
},
|
||||
"ipython-genutils": {
|
||||
"hashes": [
|
||||
@ -141,10 +141,10 @@
|
||||
},
|
||||
"jedi": {
|
||||
"hashes": [
|
||||
"sha256:0191c447165f798e6a730285f2eee783fff81b0d3df261945ecb80983b5c3ca7",
|
||||
"sha256:b7493f73a2febe0dc33d51c99b474547f7f6c0b2c8fb2b21f453eef204c12148"
|
||||
"sha256:571702b5bd167911fe9036e5039ba67f820d6502832285cde8c881ab2b2149fd",
|
||||
"sha256:c8481b5e59d34a5c7c42e98f6625e633f6ef59353abea6437472c7ec2093f191"
|
||||
],
|
||||
"version": "==0.13.1"
|
||||
"version": "==0.13.2"
|
||||
},
|
||||
"mccabe": {
|
||||
"hashes": [
|
||||
@ -155,11 +155,11 @@
|
||||
},
|
||||
"more-itertools": {
|
||||
"hashes": [
|
||||
"sha256:c187a73da93e7a8acc0001572aebc7e3c69daf7bf6881a2cea10650bd4420092",
|
||||
"sha256:c476b5d3a34e12d40130bc2f935028b5f636df8f372dc2c1c01dc19681b2039e",
|
||||
"sha256:fcbfeaea0be121980e15bc97b3817b5202ca73d0eae185b4550cbfce2a3ebb3d"
|
||||
"sha256:38a936c0a6d98a38bcc2d03fdaaedaba9f412879461dd2ceff8d37564d6522e4",
|
||||
"sha256:c0a5785b1109a6bd7fac76d6837fd1feca158e54e521ccd2ae8bfe393cc9d4fc",
|
||||
"sha256:fe7a7cae1ccb57d33952113ff4fa1bc5f879963600ed74918f1236e212ee50b9"
|
||||
],
|
||||
"version": "==4.3.0"
|
||||
"version": "==5.0.0"
|
||||
},
|
||||
"parso": {
|
||||
"hashes": [
|
||||
@ -185,10 +185,10 @@
|
||||
},
|
||||
"pluggy": {
|
||||
"hashes": [
|
||||
"sha256:447ba94990e8014ee25ec853339faf7b0fc8050cdc3289d4d71f7f410fb90095",
|
||||
"sha256:bde19360a8ec4dfd8a20dcb811780a30998101f078fc7ded6162f0076f50508f"
|
||||
"sha256:8ddc32f03971bfdf900a81961a48ccf2fb677cf7715108f85295c67405798616",
|
||||
"sha256:980710797ff6a041e9a73a5787804f848996ecaa6f8a1b1e08224a5894f2074a"
|
||||
],
|
||||
"version": "==0.8.0"
|
||||
"version": "==0.8.1"
|
||||
},
|
||||
"prompt-toolkit": {
|
||||
"hashes": [
|
||||
@ -228,25 +228,25 @@
|
||||
},
|
||||
"pygments": {
|
||||
"hashes": [
|
||||
"sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
|
||||
"sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
|
||||
"sha256:5ffada19f6203563680669ee7f53b64dabbeb100eb51b61996085e99c03b284a",
|
||||
"sha256:e8218dd399a61674745138520d0d4cf2621d7e032439341bc3f647bff125818d"
|
||||
],
|
||||
"version": "==2.2.0"
|
||||
"version": "==2.3.1"
|
||||
},
|
||||
"pytest": {
|
||||
"hashes": [
|
||||
"sha256:3f193df1cfe1d1609d4c583838bea3d532b18d6160fd3f55c9447fdca30848ec",
|
||||
"sha256:e246cf173c01169b9617fc07264b7b1316e78d7a650055235d6d897bc80d9660"
|
||||
"sha256:41568ea7ecb4a68d7f63837cf65b92ce8d0105e43196ff2b26622995bb3dc4b2",
|
||||
"sha256:c3c573a29d7c9547fb90217ece8a8843aa0c1328a797e200290dc3d0b4b823be"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.10.1"
|
||||
"version": "==4.1.1"
|
||||
},
|
||||
"six": {
|
||||
"hashes": [
|
||||
"sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
|
||||
"sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
|
||||
"sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
|
||||
"sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
|
||||
],
|
||||
"version": "==1.11.0"
|
||||
"version": "==1.12.0"
|
||||
},
|
||||
"traitlets": {
|
||||
"hashes": [
|
||||
|
@ -85,6 +85,7 @@ The item id is the *internal* id for an item. You can get these from the standar
|
||||
- Better logging
|
||||
- Version API
|
||||
- Use JSON in PostgreSQL
|
||||
- Make community and collection stats available
|
||||
- Switch to [Python 3.6+ f-string syntax](https://realpython.com/python-f-strings/)
|
||||
|
||||
## License
|
||||
|
@ -32,9 +32,56 @@
|
||||
from .database import DatabaseManager
|
||||
import json
|
||||
import psycopg2.extras
|
||||
import re
|
||||
import requests
|
||||
from .solr import solr_connection
|
||||
|
||||
|
||||
# Enumerate the cores in Solr to determine if statistics have been sharded into
|
||||
# yearly shards by DSpace's stats-util or not (for example: statistics-2018).
|
||||
def get_statistics_shards():
|
||||
# Initialize an empty list for statistics core years
|
||||
statistics_core_years = []
|
||||
|
||||
# URL for Solr status to check active cores
|
||||
solr_url = solr.host + '/admin/cores?action=STATUS&wt=json'
|
||||
res = requests.get(solr_url)
|
||||
|
||||
if res.status_code == requests.codes.ok:
|
||||
data = res.json()
|
||||
|
||||
# Iterate over active cores from Solr's STATUS response (cores are in
|
||||
# the status array of this response).
|
||||
for core in data['status']:
|
||||
# Pattern to match, for example: statistics-2018
|
||||
pattern = re.compile('^statistics-[0-9]{4}$')
|
||||
|
||||
if not pattern.match(core):
|
||||
continue
|
||||
|
||||
# Append current core to list
|
||||
statistics_core_years.append(core)
|
||||
|
||||
# Initialize a string to hold our shards (may end up being empty if the Solr
|
||||
# core has not been processed by stats-util).
|
||||
shards = str()
|
||||
|
||||
if len(statistics_core_years) > 0:
|
||||
# Begin building a string of shards starting with the default one
|
||||
shards = '{}/statistics'.format(solr.host)
|
||||
|
||||
for core in statistics_core_years:
|
||||
# Create a comma-separated list of shards to pass to our Solr query
|
||||
#
|
||||
# See: https://wiki.apache.org/solr/DistributedSearch
|
||||
shards += ',{}/{}'.format(solr.host, core)
|
||||
|
||||
# Return the string of shards, which may actually be empty. Solr doesn't
|
||||
# seem to mind if the shards query parameter is empty and I haven't seen
|
||||
# any negative performance impact so this should be fine.
|
||||
return shards
|
||||
|
||||
|
||||
def index_views():
|
||||
# get total number of distinct facets for items with a minimum of 1 view,
|
||||
# otherwise Solr returns all kinds of weird ids that are actually not in
|
||||
@ -52,11 +99,17 @@ def index_views():
|
||||
'facet.offset': 0,
|
||||
'stats': True,
|
||||
'stats.field': 'id',
|
||||
'stats.calcdistinct': True
|
||||
'stats.calcdistinct': True,
|
||||
'shards': shards
|
||||
}, rows=0)
|
||||
|
||||
# get total number of distinct facets (countDistinct)
|
||||
results_totalNumFacets = json.loads(res.get_json())['stats']['stats_fields']['id']['countDistinct']
|
||||
try:
|
||||
# get total number of distinct facets (countDistinct)
|
||||
results_totalNumFacets = json.loads(res.get_json())['stats']['stats_fields']['id']['countDistinct']
|
||||
except TypeError:
|
||||
print('No item views to index, exiting.')
|
||||
|
||||
exit(0)
|
||||
|
||||
# divide results into "pages" (cast to int to effectively round down)
|
||||
results_per_page = 100
|
||||
@ -78,7 +131,8 @@ def index_views():
|
||||
'facet.field': 'id',
|
||||
'facet.mincount': 1,
|
||||
'facet.limit': results_per_page,
|
||||
'facet.offset': results_current_page * results_per_page
|
||||
'facet.offset': results_current_page * results_per_page,
|
||||
'shards': shards
|
||||
}, rows=0)
|
||||
|
||||
# SolrClient's get_facets() returns a dict of dicts
|
||||
@ -110,11 +164,17 @@ def index_downloads():
|
||||
'facet.offset': 0,
|
||||
'stats': True,
|
||||
'stats.field': 'owningItem',
|
||||
'stats.calcdistinct': True
|
||||
'stats.calcdistinct': True,
|
||||
'shards': shards
|
||||
}, rows=0)
|
||||
|
||||
# get total number of distinct facets (countDistinct)
|
||||
results_totalNumFacets = json.loads(res.get_json())['stats']['stats_fields']['owningItem']['countDistinct']
|
||||
try:
|
||||
# get total number of distinct facets (countDistinct)
|
||||
results_totalNumFacets = json.loads(res.get_json())['stats']['stats_fields']['owningItem']['countDistinct']
|
||||
except TypeError:
|
||||
print('No item downloads to index, exiting.')
|
||||
|
||||
exit(0)
|
||||
|
||||
# divide results into "pages" (cast to int to effectively round down)
|
||||
results_per_page = 100
|
||||
@ -136,7 +196,8 @@ def index_downloads():
|
||||
'facet.field': 'owningItem',
|
||||
'facet.mincount': 1,
|
||||
'facet.limit': results_per_page,
|
||||
'facet.offset': results_current_page * results_per_page
|
||||
'facet.offset': results_current_page * results_per_page,
|
||||
'shards': shards
|
||||
}, rows=0)
|
||||
|
||||
# SolrClient's get_facets() returns a dict of dicts
|
||||
@ -167,6 +228,8 @@ with DatabaseManager() as db:
|
||||
# commit the table creation before closing the database connection
|
||||
db.commit()
|
||||
|
||||
shards = get_statistics_shards()
|
||||
|
||||
index_views()
|
||||
index_downloads()
|
||||
|
||||
|
@ -5,21 +5,21 @@ backcall==0.1.0
|
||||
decorator==4.3.0
|
||||
flake8==3.6.0
|
||||
ipython-genutils==0.2.0
|
||||
ipython==7.1.1
|
||||
jedi==0.13.1
|
||||
ipython==7.2.0
|
||||
jedi==0.13.2
|
||||
mccabe==0.6.1
|
||||
more-itertools==4.3.0
|
||||
more-itertools==5.0.0
|
||||
parso==0.3.1
|
||||
pexpect==4.6.0 ; sys_platform != 'win32'
|
||||
pickleshare==0.7.5
|
||||
pluggy==0.8.0
|
||||
pluggy==0.8.1
|
||||
prompt-toolkit==2.0.7
|
||||
ptyprocess==0.6.0
|
||||
py==1.7.0
|
||||
pycodestyle==2.4.0
|
||||
pyflakes==2.0.0
|
||||
pygments==2.2.0
|
||||
pytest==3.10.1
|
||||
six==1.11.0
|
||||
pygments==2.3.1
|
||||
pytest==4.1.1
|
||||
six==1.12.0
|
||||
traitlets==4.3.2
|
||||
wcwidth==0.1.7
|
||||
|
@ -4,4 +4,4 @@ git+https://github.com/alanorth/SolrClient.git@c629e3475be37c82770b2be61748be7e2
|
||||
gunicorn==19.9.0
|
||||
psycopg2-binary==2.7.6.1
|
||||
python-mimeparse==1.6.0
|
||||
six==1.11.0
|
||||
six==1.12.0
|
||||
|
Reference in New Issue
Block a user