mirror of
https://github.com/ilri/dspace-statistics-api.git
synced 2025-05-15 09:27:43 +02:00
Compare commits
27 Commits
Author | SHA1 | Date | |
---|---|---|---|
f65089f9ce
|
|||
1db5cf1c29
|
|||
e581c4b1aa
|
|||
e8d356c9ca
|
|||
34a9b8d629
|
|||
41e3d66a0e
|
|||
9b2a6137b4
|
|||
600b986f99
|
|||
49a7790794
|
|||
f2deba627c
|
|||
9323513794
|
|||
daf15610f2
|
|||
4ede966dbb
|
|||
3580473a6d
|
|||
071c24535f
|
|||
4291aecac4
|
|||
46bf537e88
|
|||
eaca5354d3
|
|||
4600288ee4
|
|||
8179563378
|
|||
b14c3eef4d
|
|||
71a789b13f
|
|||
c68ddacaa4
|
|||
9c9e79769e
|
|||
2ad5ade556
|
|||
7412a09670
|
|||
bb744a00b8
|
@ -2,8 +2,10 @@ language: python
|
|||||||
python:
|
python:
|
||||||
- "3.5"
|
- "3.5"
|
||||||
- "3.6"
|
- "3.6"
|
||||||
- "3.7"
|
- "3.7-dev"
|
||||||
install:
|
script: pip install -r requirements.txt
|
||||||
- pip install -r requirements.txt
|
branches:
|
||||||
|
only:
|
||||||
|
- master
|
||||||
|
|
||||||
# vim: ts=2 sw=2 et
|
# vim: ts=2 sw=2 et
|
||||||
|
16
CHANGELOG.md
16
CHANGELOG.md
@ -4,6 +4,22 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
### [0.4.3] - 2018-10-17
|
||||||
|
## Changed
|
||||||
|
- Use pip install as script for Travis CI
|
||||||
|
|
||||||
|
## Improved
|
||||||
|
- Documentation for deployment and testing
|
||||||
|
|
||||||
|
## [0.4.2] - 2018-10-04
|
||||||
|
### Changed
|
||||||
|
- README.md introduction and requirements
|
||||||
|
- Use ujson instead of json
|
||||||
|
- Iterate directly on SQL cursor in `/items` route
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Logic error in SQL for item views
|
||||||
|
|
||||||
## [0.4.1] - 2018-09-26
|
## [0.4.1] - 2018-09-26
|
||||||
### Changed
|
### Changed
|
||||||
- Use execute_values() to batch insert records to PostgreSQL
|
- Use execute_values() to batch insert records to PostgreSQL
|
||||||
|
42
README.md
42
README.md
@ -1,16 +1,42 @@
|
|||||||
# DSpace Statistics API
|
# DSpace Statistics API [](https://travis-ci.org/alanorth/dspace-statistics-api)
|
||||||
A quick and dirty REST API to expose Solr view and download statistics for items in a DSpace repository.
|
A simple REST API to expose Solr view and download statistics for items in a DSpace repository. This project contains a standalone indexing component and a WSGI application.
|
||||||
|
|
||||||
Written and tested in Python 3.5, 3.6, and 3.7. Requires PostgreSQL version 9.5 or greater for [`UPSERT` support](https://wiki.postgresql.org/wiki/UPSERT).
|
## Requirements
|
||||||
|
|
||||||
## Installation
|
- Python 3.5+
|
||||||
Create a virtual environment and run it:
|
- PostgreSQL version 9.5+ (due to [`UPSERT` support](https://wiki.postgresql.org/wiki/UPSERT))
|
||||||
|
- DSpace 4+ with [Solr usage statistics enabled](https://wiki.duraspace.org/display/DSDOC5x/SOLR+Statistics)
|
||||||
|
|
||||||
|
## Installation and Testing
|
||||||
|
Create a Python virtual environment and install the dependencies:
|
||||||
|
|
||||||
$ python -m venv venv
|
$ python -m venv venv
|
||||||
$ . venv/bin/activate
|
$ . venv/bin/activate
|
||||||
$ pip install -r requirements.txt
|
$ pip install -r requirements.txt
|
||||||
|
|
||||||
|
Set up the environment variables for Solr and PostgreSQL:
|
||||||
|
|
||||||
|
$ export SOLR_SERVER=http://localhost:8080/solr
|
||||||
|
$ export DATABASE_NAME=dspacestatistics
|
||||||
|
$ export DATABASE_USER=dspacestatistics
|
||||||
|
$ export DATABASE_PASS=dspacestatistics
|
||||||
|
$ export DATABASE_HOST=localhost
|
||||||
|
|
||||||
|
Index the Solr statistics core to populate the PostgreSQL database:
|
||||||
|
|
||||||
|
$ ./indexer.py
|
||||||
|
|
||||||
|
Run the REST API:
|
||||||
|
|
||||||
$ gunicorn app:api
|
$ gunicorn app:api
|
||||||
|
|
||||||
|
Test to see if there are any statistics:
|
||||||
|
|
||||||
|
$ curl 'http://localhost:8000/items?limit=1'
|
||||||
|
|
||||||
|
## Deployment
|
||||||
|
There are example systemd service and timer units in the `contrib` directory.
|
||||||
|
|
||||||
## Using the API
|
## Using the API
|
||||||
The API exposes the following endpoints:
|
The API exposes the following endpoints:
|
||||||
|
|
||||||
@ -22,9 +48,13 @@ The API exposes the following endpoints:
|
|||||||
## Todo
|
## Todo
|
||||||
|
|
||||||
- Add API documentation
|
- Add API documentation
|
||||||
- Close up DB connection when gunicorn shuts down gracefully
|
- Close DB connection when gunicorn shuts down gracefully
|
||||||
- Better logging
|
- Better logging
|
||||||
- Tests
|
- Tests
|
||||||
|
- Check if database exists (try/except)
|
||||||
|
- Version API
|
||||||
|
- Use JSON in PostgreSQL
|
||||||
|
- Switch to [Python 3.6+ f-string syntax](https://realpython.com/python-f-strings/)
|
||||||
|
|
||||||
## License
|
## License
|
||||||
This work is licensed under the [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html).
|
This work is licensed under the [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html).
|
||||||
|
6
app.py
6
app.py
@ -22,16 +22,16 @@ class AllItemsResource:
|
|||||||
|
|
||||||
# get statistics, ordered by id, and use limit and offset to page through results
|
# get statistics, ordered by id, and use limit and offset to page through results
|
||||||
cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset))
|
cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset))
|
||||||
results = cursor.fetchmany(limit)
|
|
||||||
cursor.close()
|
|
||||||
|
|
||||||
# create a list to hold dicts of item stats
|
# create a list to hold dicts of item stats
|
||||||
statistics = list()
|
statistics = list()
|
||||||
|
|
||||||
# iterate over results and build statistics object
|
# iterate over results and build statistics object
|
||||||
for item in results:
|
for item in cursor:
|
||||||
statistics.append({ 'id': item['id'], 'views': item['views'], 'downloads': item['downloads'] })
|
statistics.append({ 'id': item['id'], 'views': item['views'], 'downloads': item['downloads'] })
|
||||||
|
|
||||||
|
cursor.close()
|
||||||
|
|
||||||
message = {
|
message = {
|
||||||
'currentPage': page,
|
'currentPage': page,
|
||||||
'totalPages': pages,
|
'totalPages': pages,
|
||||||
|
@ -31,7 +31,7 @@
|
|||||||
# See: https://wiki.duraspace.org/display/DSPACE/Solr
|
# See: https://wiki.duraspace.org/display/DSPACE/Solr
|
||||||
|
|
||||||
from database import database_connection
|
from database import database_connection
|
||||||
import json
|
import ujson
|
||||||
import psycopg2.extras
|
import psycopg2.extras
|
||||||
from solr import solr_connection
|
from solr import solr_connection
|
||||||
|
|
||||||
@ -56,7 +56,7 @@ def index_views():
|
|||||||
}, rows=0)
|
}, rows=0)
|
||||||
|
|
||||||
# get total number of distinct facets (countDistinct)
|
# get total number of distinct facets (countDistinct)
|
||||||
results_totalNumFacets = json.loads(res.get_json())['stats']['stats_fields']['id']['countDistinct']
|
results_totalNumFacets = ujson.loads(res.get_json())['stats']['stats_fields']['id']['countDistinct']
|
||||||
|
|
||||||
# divide results into "pages" (cast to int to effectively round down)
|
# divide results into "pages" (cast to int to effectively round down)
|
||||||
results_per_page = 100
|
results_per_page = 100
|
||||||
@ -88,7 +88,7 @@ def index_views():
|
|||||||
data.append((item_id, item_views))
|
data.append((item_id, item_views))
|
||||||
|
|
||||||
# do a batch insert of values from the current "page" of results
|
# do a batch insert of values from the current "page" of results
|
||||||
sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.views'
|
sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views'
|
||||||
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
|
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
@ -115,7 +115,7 @@ def index_downloads():
|
|||||||
}, rows=0)
|
}, rows=0)
|
||||||
|
|
||||||
# get total number of distinct facets (countDistinct)
|
# get total number of distinct facets (countDistinct)
|
||||||
results_totalNumFacets = json.loads(res.get_json())['stats']['stats_fields']['owningItem']['countDistinct']
|
results_totalNumFacets = ujson.loads(res.get_json())['stats']['stats_fields']['owningItem']['countDistinct']
|
||||||
|
|
||||||
# divide results into "pages" (cast to int to effectively round down)
|
# divide results into "pages" (cast to int to effectively round down)
|
||||||
results_per_page = 100
|
results_per_page = 100
|
||||||
|
@ -9,4 +9,5 @@ python-mimeparse==1.6.0
|
|||||||
requests==2.19.1
|
requests==2.19.1
|
||||||
six==1.11.0
|
six==1.11.0
|
||||||
SolrClient==0.2.1
|
SolrClient==0.2.1
|
||||||
|
ujson==1.35
|
||||||
urllib3==1.23
|
urllib3==1.23
|
||||||
|
Reference in New Issue
Block a user