mirror of
https://github.com/ilri/dspace-statistics-api.git
synced 2025-05-10 15:16:02 +02:00
Compare commits
38 Commits
Author | SHA1 | Date | |
---|---|---|---|
4c51d12eb4
|
|||
a6ce44e852 | |||
f6e866a589
|
|||
eb5c187d41
|
|||
b06c82bb16
|
|||
2f342be948
|
|||
e39f2b260c | |||
60ad474b88
|
|||
888f85d19e
|
|||
df7de93964
|
|||
7218631cc4
|
|||
085e525b2f
|
|||
e1580df12f
|
|||
be18779ff9
|
|||
60cfd8f23b
|
|||
87fd117d77
|
|||
f262ebdca2
|
|||
64d7f1a3b2
|
|||
a238a727d2
|
|||
cc5ce3ab98 | |||
70dfcb93c5
|
|||
69bcd1b5e4
|
|||
5f3bd61998
|
|||
e54dd8888f
|
|||
2ba09f8693
|
|||
a468a87a5a
|
|||
6a30b6550d
|
|||
18f013bfa0
|
|||
78900b5d85
|
|||
eb08832bf8
|
|||
c2ec780ad9
|
|||
df8ebc8bf1
|
|||
0d4be5f4c8
|
|||
30dc7f1939
|
|||
77194707fd
|
|||
10c1f8bdcc
|
|||
da74943da2
|
|||
fc8348ab29
|
4
.hound.yml
Normal file
4
.hound.yml
Normal file
@ -0,0 +1,4 @@
|
||||
flake8:
|
||||
enabled: true
|
||||
config_file: .flake8
|
||||
fail_on_violations: true
|
16
CHANGELOG.md
16
CHANGELOG.md
@ -4,6 +4,22 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
### [0.7.0] - 2018-11-07
|
||||
## Added
|
||||
- Ability to configure PostgreSQL database port with DATABASE_PORT environment variable (defaults to 5432)
|
||||
- Hound CI configuration to validate pull requests against PEP 8 code style with Flake8
|
||||
- Configuration for [pipenv](https://pipenv.readthedocs.io/en/latest/)
|
||||
|
||||
## Changed
|
||||
- Use a database management class with Python context management to automatically open/close connections and cursors
|
||||
|
||||
## Changed
|
||||
- Validate code against PEP 8 style guide with Flake8
|
||||
|
||||
### [0.6.1] - 2018-10-31
|
||||
## Added
|
||||
- API documentation at root path (/)
|
||||
|
||||
### [0.6.0] - 2018-10-31
|
||||
## Changed
|
||||
- Refactor project structure (note breaking changes to API and indexing invocation, see contrib and README.md)
|
||||
|
25
Pipfile
Normal file
25
Pipfile
Normal file
@ -0,0 +1,25 @@
|
||||
[[source]]
|
||||
url = "https://pypi.org/simple"
|
||||
verify_ssl = true
|
||||
name = "pypi"
|
||||
|
||||
[packages]
|
||||
certifi = "==2018.10.15"
|
||||
chardet = "==3.0.4"
|
||||
falcon = "==1.4.1"
|
||||
gunicorn = "==19.9.0"
|
||||
idna = "==2.7"
|
||||
kazoo = "==2.5.0"
|
||||
"psycopg2-binary" = "==2.7.5"
|
||||
python-mimeparse = "==1.6.0"
|
||||
requests = "==2.20.0"
|
||||
six = "==1.11.0"
|
||||
solrclient = {ref = "kazoo-2.5.0", git = "https://github.com/alanorth/SolrClient.git"}
|
||||
"urllib3" = "==1.24"
|
||||
|
||||
[dev-packages]
|
||||
"flake8" = "*"
|
||||
ipython = "*"
|
||||
|
||||
[requires]
|
||||
python_version = "3.6"
|
273
Pipfile.lock
generated
Normal file
273
Pipfile.lock
generated
Normal file
@ -0,0 +1,273 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "74430260b3271348f65792cc7f9cadc5d2036abc4a5fc958524239656ffabb4f"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
"python_version": "3.6"
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "pypi",
|
||||
"url": "https://pypi.org/simple",
|
||||
"verify_ssl": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"default": {
|
||||
"certifi": {
|
||||
"hashes": [
|
||||
"sha256:339dc09518b07e2fa7eda5450740925974815557727d6bd35d319c1524a04a4c",
|
||||
"sha256:6d58c986d22b038c8c0df30d639f23a3e6d172a05c3583e766f4c0b785c0986a"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2018.10.15"
|
||||
},
|
||||
"chardet": {
|
||||
"hashes": [
|
||||
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
|
||||
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.0.4"
|
||||
},
|
||||
"falcon": {
|
||||
"hashes": [
|
||||
"sha256:0a66b33458fab9c1e400a9be1a68056abda178eb02a8cb4b8f795e9df20b053b",
|
||||
"sha256:3981f609c0358a9fcdb25b0e7fab3d9e23019356fb429c635ce4133135ae1bc4"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.4.1"
|
||||
},
|
||||
"gunicorn": {
|
||||
"hashes": [
|
||||
"sha256:aa8e0b40b4157b36a5df5e599f45c9c76d6af43845ba3b3b0efe2c70473c2471",
|
||||
"sha256:fa2662097c66f920f53f70621c6c58ca4a3c4d3434205e608e121b5b3b71f4f3"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==19.9.0"
|
||||
},
|
||||
"idna": {
|
||||
"hashes": [
|
||||
"sha256:156a6814fb5ac1fc6850fb002e0852d56c0c8d2531923a51032d1b70760e186e",
|
||||
"sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.7"
|
||||
},
|
||||
"kazoo": {
|
||||
"hashes": [
|
||||
"sha256:8db774f7bdece7d0dc7decb21539ff0852e42c2ffe1c28d7f1ff6f9292a1c3a4",
|
||||
"sha256:a5fa2e400c5068cfee9e86b35cf0dab8232b574152d8e3590d823b3e2426ab5e"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.5.0"
|
||||
},
|
||||
"psycopg2-binary": {
|
||||
"hashes": [
|
||||
"sha256:04afb59bbbd2eab3148e6816beddc74348078b8c02a1113ea7f7822f5be4afe3",
|
||||
"sha256:098b18f4d8857a8f9b206d1dc54db56c2255d5d26458917e7bcad61ebfe4338f",
|
||||
"sha256:0bf855d4a7083e20ead961fda4923887094eaeace0ab2d76eb4aa300f4bbf5bd",
|
||||
"sha256:197dda3ffd02057820be83fe4d84529ea70bf39a9a4daee1d20ffc74eb3d042e",
|
||||
"sha256:278ef63afb4b3d842b4609f2c05ffbfb76795cf6a184deeb8707cd5ed3c981a5",
|
||||
"sha256:3cbf8c4fc8f22f0817220891cf405831559f4d4c12c4f73913730a2ea6c47a47",
|
||||
"sha256:4305aed922c4d9d6163ab3a41d80b5a1cfab54917467da8168552c42cad84d32",
|
||||
"sha256:47ee296f704fb8b2a616dec691cdcfd5fa0f11943955e88faa98cbd1dc3b3e3d",
|
||||
"sha256:4a0e38cb30457e70580903367161173d4a7d1381eb2f2cfe4e69b7806623f484",
|
||||
"sha256:4d6c294c6638a71cafb82a37f182f24321f1163b08b5d5ca076e11fe838a3086",
|
||||
"sha256:4f3233c366500730f839f92833194fd8f9a5c4529c8cd8040aa162c3740de8e5",
|
||||
"sha256:5221f5a3f4ca2ddf0d58e8b8a32ca50948be9a43351fda797eb4e72d7a7aa34d",
|
||||
"sha256:5c6ca0b507540a11eaf9e77dee4f07c131c2ec80ca0cffa146671bf690bc1c02",
|
||||
"sha256:789bd89d71d704db2b3d5e67d6d518b158985d791d3b2dec5ab85457cfc9677b",
|
||||
"sha256:7b94d29239efeaa6a967f3b5971bd0518d2a24edd1511edbf4a2c8b815220d07",
|
||||
"sha256:89bc65ef3301c74cf32db25334421ea6adbe8f65601ea45dcaaf095abed910bb",
|
||||
"sha256:89d6d3a549f405c20c9ae4dc94d7ed2de2fa77427a470674490a622070732e62",
|
||||
"sha256:97521704ac7127d7d8ba22877da3c7bf4a40366587d238ec679ff38e33177498",
|
||||
"sha256:a395b62d5f44ff6f633231abe568e2203b8fabf9797cd6386aa92497df912d9a",
|
||||
"sha256:a6d32c37f714c3f34158f3fa659f3a8f2658d5f53c4297d45579b9677cc4d852",
|
||||
"sha256:a89ee5c26f72f2d0d74b991ce49e42ddeb4ac0dc2d8c06a0f2770a1ab48f4fe0",
|
||||
"sha256:b4c8b0ef3608e59317bfc501df84a61e48b5445d45f24d0391a24802de5f2d84",
|
||||
"sha256:b5fcf07140219a1f71e18486b8dc28e2e1b76a441c19374805c617aa6d9a9d55",
|
||||
"sha256:b86f527f00956ecebad6ab3bb30e3a75fedf1160a8716978dd8ce7adddedd86f",
|
||||
"sha256:be4c4aa22ba22f70de36c98b06480e2f1697972d49eb20d525f400d204a6d272",
|
||||
"sha256:c2ac7aa1a144d4e0e613ac7286dae85671e99fe7a1353954d4905629c36b811c",
|
||||
"sha256:de26ef4787b5e778e8223913a3e50368b44e7480f83c76df1f51d23bd21cea16",
|
||||
"sha256:e70ebcfc5372dc7b699c0110454fc4263967f30c55454397e5769eb72c0eb0ce",
|
||||
"sha256:eadbd32b6bc48b67b0457fccc94c86f7ccc8178ab839f684eb285bb592dc143e",
|
||||
"sha256:ecbc6dfff6db06b8b72ae8a2f25ff20fbdcb83cb543811a08f7cb555042aa729"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.7.5"
|
||||
},
|
||||
"python-mimeparse": {
|
||||
"hashes": [
|
||||
"sha256:76e4b03d700a641fd7761d3cd4fdbbdcd787eade1ebfac43f877016328334f78",
|
||||
"sha256:a295f03ff20341491bfe4717a39cd0a8cc9afad619ba44b77e86b0ab8a2b8282"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.6.0"
|
||||
},
|
||||
"requests": {
|
||||
"hashes": [
|
||||
"sha256:99dcfdaaeb17caf6e526f32b6a7b780461512ab3f1d992187801694cba42770c",
|
||||
"sha256:a84b8c9ab6239b578f22d1c21d51b696dcfe004032bb80ea832398d6909d7279"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.20.0"
|
||||
},
|
||||
"six": {
|
||||
"hashes": [
|
||||
"sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
|
||||
"sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.11.0"
|
||||
},
|
||||
"solrclient": {
|
||||
"git": "https://github.com/alanorth/SolrClient.git",
|
||||
"ref": "c629e3475be37c82770b2be61748be7e29882648"
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:41c3db2fc01e5b907288010dec72f9d0a74e37d6994e6eb56849f59fea2265ae",
|
||||
"sha256:8819bba37a02d143296a4d032373c4dd4aca11f6d4c9973335ca75f9c8475f59"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.24"
|
||||
}
|
||||
},
|
||||
"develop": {
|
||||
"backcall": {
|
||||
"hashes": [
|
||||
"sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
|
||||
"sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
|
||||
],
|
||||
"version": "==0.1.0"
|
||||
},
|
||||
"decorator": {
|
||||
"hashes": [
|
||||
"sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82",
|
||||
"sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c"
|
||||
],
|
||||
"version": "==4.3.0"
|
||||
},
|
||||
"flake8": {
|
||||
"hashes": [
|
||||
"sha256:6a35f5b8761f45c5513e3405f110a86bea57982c3b75b766ce7b65217abe1670",
|
||||
"sha256:c01f8a3963b3571a8e6bd7a4063359aff90749e160778e03817cd9b71c9e07d2"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.6.0"
|
||||
},
|
||||
"ipython": {
|
||||
"hashes": [
|
||||
"sha256:a5781d6934a3341a1f9acb4ea5acdc7ea0a0855e689dbe755d070ca51e995435",
|
||||
"sha256:b10a7ddd03657c761fc503495bc36471c8158e3fc948573fb9fe82a7029d8efd"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==7.1.1"
|
||||
},
|
||||
"ipython-genutils": {
|
||||
"hashes": [
|
||||
"sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
|
||||
"sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
|
||||
],
|
||||
"version": "==0.2.0"
|
||||
},
|
||||
"jedi": {
|
||||
"hashes": [
|
||||
"sha256:0191c447165f798e6a730285f2eee783fff81b0d3df261945ecb80983b5c3ca7",
|
||||
"sha256:b7493f73a2febe0dc33d51c99b474547f7f6c0b2c8fb2b21f453eef204c12148"
|
||||
],
|
||||
"version": "==0.13.1"
|
||||
},
|
||||
"mccabe": {
|
||||
"hashes": [
|
||||
"sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
|
||||
"sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
|
||||
],
|
||||
"version": "==0.6.1"
|
||||
},
|
||||
"parso": {
|
||||
"hashes": [
|
||||
"sha256:35704a43a3c113cce4de228ddb39aab374b8004f4f2407d070b6a2ca784ce8a2",
|
||||
"sha256:895c63e93b94ac1e1690f5fdd40b65f07c8171e3e53cbd7793b5b96c0e0a7f24"
|
||||
],
|
||||
"version": "==0.3.1"
|
||||
},
|
||||
"pexpect": {
|
||||
"hashes": [
|
||||
"sha256:2a8e88259839571d1251d278476f3eec5db26deb73a70be5ed5dc5435e418aba",
|
||||
"sha256:3fbd41d4caf27fa4a377bfd16fef87271099463e6fa73e92a52f92dfee5d425b"
|
||||
],
|
||||
"markers": "sys_platform != 'win32'",
|
||||
"version": "==4.6.0"
|
||||
},
|
||||
"pickleshare": {
|
||||
"hashes": [
|
||||
"sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca",
|
||||
"sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"
|
||||
],
|
||||
"version": "==0.7.5"
|
||||
},
|
||||
"prompt-toolkit": {
|
||||
"hashes": [
|
||||
"sha256:c1d6aff5252ab2ef391c2fe498ed8c088066f66bc64a8d5c095bbf795d9fec34",
|
||||
"sha256:d4c47f79b635a0e70b84fdb97ebd9a274203706b1ee5ed44c10da62755cf3ec9",
|
||||
"sha256:fd17048d8335c1e6d5ee403c3569953ba3eb8555d710bfc548faf0712666ea39"
|
||||
],
|
||||
"version": "==2.0.7"
|
||||
},
|
||||
"ptyprocess": {
|
||||
"hashes": [
|
||||
"sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
|
||||
"sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"
|
||||
],
|
||||
"version": "==0.6.0"
|
||||
},
|
||||
"pycodestyle": {
|
||||
"hashes": [
|
||||
"sha256:cbc619d09254895b0d12c2c691e237b2e91e9b2ecf5e84c26b35400f93dcfb83",
|
||||
"sha256:cbfca99bd594a10f674d0cd97a3d802a1fdef635d4361e1a2658de47ed261e3a"
|
||||
],
|
||||
"version": "==2.4.0"
|
||||
},
|
||||
"pyflakes": {
|
||||
"hashes": [
|
||||
"sha256:9a7662ec724d0120012f6e29d6248ae3727d821bba522a0e6b356eff19126a49",
|
||||
"sha256:f661252913bc1dbe7fcfcbf0af0db3f42ab65aabd1a6ca68fe5d466bace94dae"
|
||||
],
|
||||
"version": "==2.0.0"
|
||||
},
|
||||
"pygments": {
|
||||
"hashes": [
|
||||
"sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
|
||||
"sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
|
||||
],
|
||||
"version": "==2.2.0"
|
||||
},
|
||||
"six": {
|
||||
"hashes": [
|
||||
"sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
|
||||
"sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.11.0"
|
||||
},
|
||||
"traitlets": {
|
||||
"hashes": [
|
||||
"sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835",
|
||||
"sha256:c6cb5e6f57c5a9bdaa40fa71ce7b4af30298fbab9ece9815b5d995ab6217c7d9"
|
||||
],
|
||||
"version": "==4.3.2"
|
||||
},
|
||||
"wcwidth": {
|
||||
"hashes": [
|
||||
"sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e",
|
||||
"sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c"
|
||||
],
|
||||
"version": "==0.1.7"
|
||||
}
|
||||
}
|
||||
}
|
26
README.md
26
README.md
@ -1,18 +1,19 @@
|
||||
# DSpace Statistics API [](https://travis-ci.org/alanorth/dspace-statistics-api)
|
||||
A simple REST API to expose Solr view and download statistics for items in a DSpace repository. This project contains a standalone indexing component and a WSGI application.
|
||||
# DSpace Statistics API [](https://travis-ci.org/ilri/dspace-statistics-api)
|
||||
DSpace stores item view and download events in a Solr "statistics" core. This information is available for use in the various DSpace user interfaces, but is not exposed externally via any APIs. The DSpace 4+ [REST API](https://wiki.duraspace.org/display/DSDOC5x/REST+API), for example, only exposes information about communities, collections, item metadata, and bitstreams.
|
||||
|
||||
This project contains an indexer and a [Falcon-based](https://falcon.readthedocs.io/) web application to make the statistics available via simple REST API. You can read more about the Solr queries used to gather the item view and download statistics on the [DSpace wiki](https://wiki.duraspace.org/display/DSPACE/Solr).
|
||||
|
||||
## Requirements
|
||||
|
||||
- Python 3.5+
|
||||
- PostgreSQL version 9.5+ (due to [`UPSERT` support](https://wiki.postgresql.org/wiki/UPSERT))
|
||||
- DSpace 4+ with [Solr usage statistics enabled](https://wiki.duraspace.org/display/DSDOC5x/SOLR+Statistics)
|
||||
- DSpace with [Solr usage statistics enabled](https://wiki.duraspace.org/display/DSDOC5x/SOLR+Statistics) (tested with 5.x)
|
||||
|
||||
## Installation and Testing
|
||||
Create a Python virtual environment and install the dependencies:
|
||||
Create a Python virtual environment and install the dependencies using [`pipenv`](https://github.com/pypa/pipenv):
|
||||
|
||||
$ python -m venv venv
|
||||
$ . venv/bin/activate
|
||||
$ pip install -r requirements.txt
|
||||
$ pipenv install --dev
|
||||
$ pipenv shell
|
||||
|
||||
Set up the environment variables for Solr and PostgreSQL:
|
||||
|
||||
@ -59,15 +60,16 @@ This would expose the API at `/rest/statistics`.
|
||||
## Using the API
|
||||
The API exposes the following endpoints:
|
||||
|
||||
- GET `/items` — return views and downloads for all items that Solr knows about¹. Accepts `limit` and `page` query parameters for pagination of results.
|
||||
- GET `/item/id` — return views and downloads for a single item (*id* must be a positive integer). Returns HTTP 404 if an item id is not found.
|
||||
- GET `/` — return a basic API documentation page.
|
||||
- GET `/items` — return views and downloads for all items that Solr knows about¹. Accepts `limit` and `page` query parameters for pagination of results (`limit` must be an integer between 1 and 100, and `page` must be an integer greater than or equal to 0).
|
||||
- GET `/item/id` — return views and downloads for a single item (`id` must be a positive integer). Returns HTTP 404 if an item id is not found.
|
||||
|
||||
¹ We are querying the Solr statistics core, which technically only knows about items that have either views or downloads.
|
||||
The item id is the *internal* id for an item. You can get these from the standard DSpace REST API.
|
||||
|
||||
¹ We are querying the Solr statistics core, which technically only knows about items that have either views or downloads. If an item is not present here you can assume it has zero views and zero downloads, but not necessarily that it does not exist in the repository.
|
||||
|
||||
## Todo
|
||||
|
||||
- Add API documentation
|
||||
- Close DB connection when gunicorn shuts down gracefully
|
||||
- Better logging
|
||||
- Tests
|
||||
- Check if database exists (try/except)
|
||||
|
@ -1,8 +1,14 @@
|
||||
from .database import database_connection
|
||||
from .database import DatabaseManager
|
||||
import falcon
|
||||
|
||||
db = database_connection()
|
||||
db.set_session(readonly=True)
|
||||
|
||||
class RootResource:
|
||||
def on_get(self, req, resp):
|
||||
resp.status = falcon.HTTP_200
|
||||
resp.content_type = 'text/html'
|
||||
with open('dspace_statistics_api/docs/index.html', 'r') as f:
|
||||
resp.body = f.read()
|
||||
|
||||
|
||||
class AllItemsResource:
|
||||
def on_get(self, req, resp):
|
||||
@ -12,58 +18,63 @@ class AllItemsResource:
|
||||
page = req.get_param_as_int("page", min=0) or 0
|
||||
offset = limit * page
|
||||
|
||||
cursor = db.cursor()
|
||||
with DatabaseManager() as db:
|
||||
db.set_session(readonly=True)
|
||||
|
||||
# get total number of items so we can estimate the pages
|
||||
cursor.execute('SELECT COUNT(id) FROM items')
|
||||
pages = round(cursor.fetchone()[0] / limit)
|
||||
with db.cursor() as cursor:
|
||||
# get total number of items so we can estimate the pages
|
||||
cursor.execute('SELECT COUNT(id) FROM items')
|
||||
pages = round(cursor.fetchone()[0] / limit)
|
||||
|
||||
# get statistics, ordered by id, and use limit and offset to page through results
|
||||
cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset))
|
||||
# get statistics, ordered by id, and use limit and offset to page through results
|
||||
cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset))
|
||||
|
||||
# create a list to hold dicts of item stats
|
||||
statistics = list()
|
||||
# create a list to hold dicts of item stats
|
||||
statistics = list()
|
||||
|
||||
# iterate over results and build statistics object
|
||||
for item in cursor:
|
||||
statistics.append({ 'id': item['id'], 'views': item['views'], 'downloads': item['downloads'] })
|
||||
|
||||
cursor.close()
|
||||
# iterate over results and build statistics object
|
||||
for item in cursor:
|
||||
statistics.append({'id': item['id'], 'views': item['views'], 'downloads': item['downloads']})
|
||||
|
||||
message = {
|
||||
'currentPage': page,
|
||||
'totalPages': pages,
|
||||
'limit': limit,
|
||||
'statistics': statistics
|
||||
'currentPage': page,
|
||||
'totalPages': pages,
|
||||
'limit': limit,
|
||||
'statistics': statistics
|
||||
}
|
||||
|
||||
resp.media = message
|
||||
|
||||
|
||||
class ItemResource:
|
||||
def on_get(self, req, resp, item_id):
|
||||
"""Handles GET requests"""
|
||||
|
||||
cursor = db.cursor()
|
||||
cursor.execute('SELECT views, downloads FROM items WHERE id={}'.format(item_id))
|
||||
if cursor.rowcount == 0:
|
||||
raise falcon.HTTPNotFound(
|
||||
title='Item not found',
|
||||
description='The item with id "{}" was not found.'.format(item_id)
|
||||
)
|
||||
else:
|
||||
results = cursor.fetchone()
|
||||
with DatabaseManager() as db:
|
||||
db.set_session(readonly=True)
|
||||
|
||||
statistics = {
|
||||
'id': item_id,
|
||||
'views': results['views'],
|
||||
'downloads': results['downloads']
|
||||
}
|
||||
with db.cursor() as cursor:
|
||||
cursor = db.cursor()
|
||||
cursor.execute('SELECT views, downloads FROM items WHERE id={}'.format(item_id))
|
||||
if cursor.rowcount == 0:
|
||||
raise falcon.HTTPNotFound(
|
||||
title='Item not found',
|
||||
description='The item with id "{}" was not found.'.format(item_id)
|
||||
)
|
||||
else:
|
||||
results = cursor.fetchone()
|
||||
|
||||
resp.media = statistics
|
||||
statistics = {
|
||||
'id': item_id,
|
||||
'views': results['views'],
|
||||
'downloads': results['downloads']
|
||||
}
|
||||
|
||||
resp.media = statistics
|
||||
|
||||
cursor.close()
|
||||
|
||||
api = application = falcon.API()
|
||||
api.add_route('/', RootResource())
|
||||
api.add_route('/items', AllItemsResource())
|
||||
api.add_route('/item/{item_id:int}', ItemResource())
|
||||
|
||||
|
@ -7,5 +7,6 @@ DATABASE_NAME = os.environ.get('DATABASE_NAME', 'dspacestatistics')
|
||||
DATABASE_USER = os.environ.get('DATABASE_USER', 'dspacestatistics')
|
||||
DATABASE_PASS = os.environ.get('DATABASE_PASS', 'dspacestatistics')
|
||||
DATABASE_HOST = os.environ.get('DATABASE_HOST', 'localhost')
|
||||
DATABASE_PORT = os.environ.get('DATABASE_PORT', '5432')
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
||||
|
@ -2,11 +2,22 @@ from .config import DATABASE_NAME
|
||||
from .config import DATABASE_USER
|
||||
from .config import DATABASE_PASS
|
||||
from .config import DATABASE_HOST
|
||||
import psycopg2, psycopg2.extras
|
||||
from .config import DATABASE_PORT
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
def database_connection():
|
||||
connection = psycopg2.connect("dbname={} user={} password={} host='{}'".format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST), cursor_factory=psycopg2.extras.DictCursor)
|
||||
|
||||
return connection
|
||||
class DatabaseManager():
|
||||
'''Manage database connection.'''
|
||||
|
||||
def __init__(self):
|
||||
self._connection_uri = 'dbname={} user={} password={} host={} port={}'.format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST, DATABASE_PORT)
|
||||
|
||||
def __enter__(self):
|
||||
self._connection = psycopg2.connect(self._connection_uri, cursor_factory=psycopg2.extras.DictCursor)
|
||||
return self._connection
|
||||
|
||||
def __exit__(self, exc_type, exc_value, exc_traceback):
|
||||
self._connection.close()
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
||||
|
20
dspace_statistics_api/docs/index.html
Normal file
20
dspace_statistics_api/docs/index.html
Normal file
@ -0,0 +1,20 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en-US">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>DSpace Statistics API</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>DSpace Statistics API</h1>
|
||||
<p>This site is running the <a href="https://github.com/ilri/dspace-statistics-api" title="DSpace Statistics API project">DSpace Statistics API</a>. The following endpoints are available:</p>
|
||||
<ul>
|
||||
<li>GET <code>/</code> — return a basic API documentation page.</li>
|
||||
<li>GET <code>/items</code> — return views and downloads for all items that Solr knows about¹. Accepts <code>limit</code> and <code>page</code> query parameters for pagination of results (<code>limit</code> must be an integer between 1 and 100, and <code>page</code> must be an integer greater than or equal to 0).</li>
|
||||
<li>GET <code>/item/id</code> — return views and downloads for a single item (<code>id</code> must be a positive integer). Returns HTTP 404 if an item id is not found.</li>
|
||||
</ul>
|
||||
|
||||
<p>The item id is the <em>internal</em> id for an item. You can get these from the standard DSpace REST API.</p>
|
||||
|
||||
<p>¹ We are querying the Solr statistics core, which technically only knows about items that have either views or downloads. If an item is not present here you can assume it has zero views and zero downloads, but not necessarily that it does not exist in the repository.</code>
|
||||
</body>
|
||||
</html>
|
@ -29,11 +29,12 @@
|
||||
# See: https://solrclient.readthedocs.io/en/latest/SolrClient.html
|
||||
# See: https://wiki.duraspace.org/display/DSPACE/Solr
|
||||
|
||||
from .database import database_connection
|
||||
from .database import DatabaseManager
|
||||
import json
|
||||
import psycopg2.extras
|
||||
from .solr import solr_connection
|
||||
|
||||
|
||||
def index_views():
|
||||
# get total number of distinct facets for items with a minimum of 1 view,
|
||||
# otherwise Solr returns all kinds of weird ids that are actually not in
|
||||
@ -42,16 +43,16 @@ def index_views():
|
||||
#
|
||||
# see: https://lucene.apache.org/solr/guide/6_6/the-stats-component.html
|
||||
res = solr.query('statistics', {
|
||||
'q':'type:2',
|
||||
'fq':'isBot:false AND statistics_type:view',
|
||||
'facet':True,
|
||||
'facet.field':'id',
|
||||
'facet.mincount':1,
|
||||
'facet.limit':1,
|
||||
'facet.offset':0,
|
||||
'stats':True,
|
||||
'stats.field':'id',
|
||||
'stats.calcdistinct':True
|
||||
'q': 'type:2',
|
||||
'fq': 'isBot:false AND statistics_type:view',
|
||||
'facet': True,
|
||||
'facet.field': 'id',
|
||||
'facet.mincount': 1,
|
||||
'facet.limit': 1,
|
||||
'facet.offset': 0,
|
||||
'stats': True,
|
||||
'stats.field': 'id',
|
||||
'stats.calcdistinct': True
|
||||
}, rows=0)
|
||||
|
||||
# get total number of distinct facets (countDistinct)
|
||||
@ -62,55 +63,54 @@ def index_views():
|
||||
results_num_pages = int(results_totalNumFacets / results_per_page)
|
||||
results_current_page = 0
|
||||
|
||||
cursor = db.cursor()
|
||||
with DatabaseManager() as db:
|
||||
with db.cursor() as cursor:
|
||||
# create an empty list to store values for batch insertion
|
||||
data = []
|
||||
|
||||
# create an empty list to store values for batch insertion
|
||||
data = []
|
||||
while results_current_page <= results_num_pages:
|
||||
print('Indexing item views (page {} of {})'.format(results_current_page, results_num_pages))
|
||||
|
||||
while results_current_page <= results_num_pages:
|
||||
print('Indexing item views (page {} of {})'.format(results_current_page, results_num_pages))
|
||||
res = solr.query('statistics', {
|
||||
'q': 'type:2',
|
||||
'fq': 'isBot:false AND statistics_type:view',
|
||||
'facet': True,
|
||||
'facet.field': 'id',
|
||||
'facet.mincount': 1,
|
||||
'facet.limit': results_per_page,
|
||||
'facet.offset': results_current_page * results_per_page
|
||||
}, rows=0)
|
||||
|
||||
res = solr.query('statistics', {
|
||||
'q':'type:2',
|
||||
'fq':'isBot:false AND statistics_type:view',
|
||||
'facet':True,
|
||||
'facet.field':'id',
|
||||
'facet.mincount':1,
|
||||
'facet.limit':results_per_page,
|
||||
'facet.offset':results_current_page * results_per_page
|
||||
}, rows=0)
|
||||
# SolrClient's get_facets() returns a dict of dicts
|
||||
views = res.get_facets()
|
||||
# in this case iterate over the 'id' dict and get the item ids and views
|
||||
for item_id, item_views in views['id'].items():
|
||||
data.append((item_id, item_views))
|
||||
|
||||
# SolrClient's get_facets() returns a dict of dicts
|
||||
views = res.get_facets()
|
||||
# in this case iterate over the 'id' dict and get the item ids and views
|
||||
for item_id, item_views in views['id'].items():
|
||||
data.append((item_id, item_views))
|
||||
# do a batch insert of values from the current "page" of results
|
||||
sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views'
|
||||
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
|
||||
db.commit()
|
||||
|
||||
# do a batch insert of values from the current "page" of results
|
||||
sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views'
|
||||
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
|
||||
db.commit()
|
||||
# clear all items from the list so we can populate it with the next batch
|
||||
data.clear()
|
||||
|
||||
# clear all items from the list so we can populate it with the next batch
|
||||
data.clear()
|
||||
results_current_page += 1
|
||||
|
||||
results_current_page += 1
|
||||
|
||||
cursor.close()
|
||||
|
||||
def index_downloads():
|
||||
# get the total number of distinct facets for items with at least 1 download
|
||||
res = solr.query('statistics', {
|
||||
'q':'type:0',
|
||||
'fq':'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
|
||||
'facet':True,
|
||||
'facet.field':'owningItem',
|
||||
'facet.mincount':1,
|
||||
'facet.limit':1,
|
||||
'facet.offset':0,
|
||||
'stats':True,
|
||||
'stats.field':'owningItem',
|
||||
'stats.calcdistinct':True
|
||||
'q': 'type:0',
|
||||
'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
|
||||
'facet': True,
|
||||
'facet.field': 'owningItem',
|
||||
'facet.mincount': 1,
|
||||
'facet.limit': 1,
|
||||
'facet.offset': 0,
|
||||
'stats': True,
|
||||
'stats.field': 'owningItem',
|
||||
'stats.calcdistinct': True
|
||||
}, rows=0)
|
||||
|
||||
# get total number of distinct facets (countDistinct)
|
||||
@ -121,52 +121,53 @@ def index_downloads():
|
||||
results_num_pages = int(results_totalNumFacets / results_per_page)
|
||||
results_current_page = 0
|
||||
|
||||
cursor = db.cursor()
|
||||
with DatabaseManager() as db:
|
||||
with db.cursor() as cursor:
|
||||
# create an empty list to store values for batch insertion
|
||||
data = []
|
||||
|
||||
# create an empty list to store values for batch insertion
|
||||
data = []
|
||||
while results_current_page <= results_num_pages:
|
||||
print('Indexing item downloads (page {} of {})'.format(results_current_page, results_num_pages))
|
||||
|
||||
while results_current_page <= results_num_pages:
|
||||
print('Indexing item downloads (page {} of {})'.format(results_current_page, results_num_pages))
|
||||
res = solr.query('statistics', {
|
||||
'q': 'type:0',
|
||||
'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
|
||||
'facet': True,
|
||||
'facet.field': 'owningItem',
|
||||
'facet.mincount': 1,
|
||||
'facet.limit': results_per_page,
|
||||
'facet.offset': results_current_page * results_per_page
|
||||
}, rows=0)
|
||||
|
||||
res = solr.query('statistics', {
|
||||
'q':'type:0',
|
||||
'fq':'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
|
||||
'facet':True,
|
||||
'facet.field':'owningItem',
|
||||
'facet.mincount':1,
|
||||
'facet.limit':results_per_page,
|
||||
'facet.offset':results_current_page * results_per_page
|
||||
}, rows=0)
|
||||
# SolrClient's get_facets() returns a dict of dicts
|
||||
downloads = res.get_facets()
|
||||
# in this case iterate over the 'owningItem' dict and get the item ids and downloads
|
||||
for item_id, item_downloads in downloads['owningItem'].items():
|
||||
data.append((item_id, item_downloads))
|
||||
|
||||
# SolrClient's get_facets() returns a dict of dicts
|
||||
downloads = res.get_facets()
|
||||
# in this case iterate over the 'owningItem' dict and get the item ids and downloads
|
||||
for item_id, item_downloads in downloads['owningItem'].items():
|
||||
data.append((item_id, item_downloads))
|
||||
# do a batch insert of values from the current "page" of results
|
||||
sql = 'INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads'
|
||||
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
|
||||
db.commit()
|
||||
|
||||
# do a batch insert of values from the current "page" of results
|
||||
sql = 'INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads'
|
||||
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
|
||||
db.commit()
|
||||
# clear all items from the list so we can populate it with the next batch
|
||||
data.clear()
|
||||
|
||||
# clear all items from the list so we can populate it with the next batch
|
||||
data.clear()
|
||||
results_current_page += 1
|
||||
|
||||
results_current_page += 1
|
||||
|
||||
cursor.close()
|
||||
|
||||
db = database_connection()
|
||||
solr = solr_connection()
|
||||
|
||||
# create table to store item views and downloads
|
||||
cursor = db.cursor()
|
||||
cursor.execute('''CREATE TABLE IF NOT EXISTS items
|
||||
with DatabaseManager() as db:
|
||||
with db.cursor() as cursor:
|
||||
# create table to store item views and downloads
|
||||
cursor.execute('''CREATE TABLE IF NOT EXISTS items
|
||||
(id INT PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)''')
|
||||
|
||||
# commit the table creation before closing the database connection
|
||||
db.commit()
|
||||
|
||||
index_views()
|
||||
index_downloads()
|
||||
|
||||
db.close()
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
||||
|
@ -1,6 +1,7 @@
|
||||
from .config import SOLR_SERVER
|
||||
from SolrClient import SolrClient
|
||||
|
||||
|
||||
def solr_connection():
|
||||
connection = SolrClient(SOLR_SERVER)
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
-i https://pypi.org/simple
|
||||
certifi==2018.10.15
|
||||
chardet==3.0.4
|
||||
falcon==1.4.1
|
||||
git+https://github.com/alanorth/SolrClient.git@c629e3475be37c82770b2be61748be7e29882648#egg=solrclient
|
||||
gunicorn==19.9.0
|
||||
idna==2.7
|
||||
kazoo==2.5.0
|
||||
@ -8,5 +10,4 @@ psycopg2-binary==2.7.5
|
||||
python-mimeparse==1.6.0
|
||||
requests==2.20.0
|
||||
six==1.11.0
|
||||
-e git://github.com/alanorth/SolrClient.git@c629e3475be37c82770b2be61748be7e29882648#egg=SolrClient
|
||||
urllib3==1.24
|
||||
|
Reference in New Issue
Block a user