mirror of
https://github.com/ilri/dspace-statistics-api.git
synced 2025-05-10 15:16:02 +02:00
Compare commits
163 Commits
Author | SHA1 | Date | |
---|---|---|---|
78f9949ecb
|
|||
af80c4b447
|
|||
edd9e90f59
|
|||
1806d50a51
|
|||
a459e66fd9
|
|||
5a3b392a1d | |||
9dcda114c6 | |||
2b8aba5835
|
|||
9eb30a98e3
|
|||
622e9a86f1
|
|||
2acd08e0ab
|
|||
f75bcf292c
|
|||
8f46ceb8d8
|
|||
18e1e1a227
|
|||
fd46041698
|
|||
4ce7231ece
|
|||
60689d9014
|
|||
7bca32189a
|
|||
94c5d91d3c
|
|||
a640f734c8
|
|||
d56a3420f7
|
|||
7add0d6164
|
|||
c86bec4d8f
|
|||
5429fe5cc8
|
|||
f8a4cfd3da | |||
be94c94433 | |||
ba49b78a25
|
|||
842f80036f
|
|||
f738b8029b | |||
d08c43f3d5 | |||
819f8e6b0d | |||
c79e50a364
|
|||
71006d8bbf
|
|||
b7d723ef7c
|
|||
914ec52fbb
|
|||
5524066656
|
|||
043d897cef
|
|||
bd28353cda
|
|||
e23d66c2a2
|
|||
40e284dac0
|
|||
934fa9db9b
|
|||
1fabb72b58
|
|||
c7f95f0b60
|
|||
c95a98dd2d
|
|||
3f70f94a10
|
|||
9b8ad9defd | |||
d69ab20220
|
|||
378f56ddc2
|
|||
5a2a7d684c
|
|||
18276e910f
|
|||
8de8c2765f | |||
11a1755e59
|
|||
a835b0fdc5
|
|||
a88600c92b
|
|||
019d9242c9 | |||
f4d7312a3f
|
|||
9c46cfc7e2
|
|||
c1c2e319ac
|
|||
0895b4f469
|
|||
dcfef06a65
|
|||
13736d6359
|
|||
4fc64edeb8 | |||
2a8901dc4f
|
|||
e25c974796
|
|||
ffc62e9ee6
|
|||
556c5ae088
|
|||
d94134f80a
|
|||
586231eb2d
|
|||
766b77a3b6
|
|||
1959e8154e
|
|||
d40b2f0b2e | |||
061d0a8f5f | |||
e57660ff88 | |||
5c8756bede | |||
bae9fb80e4 | |||
8a65d99e08
|
|||
d479b7dc6c
|
|||
40aac8bf89 | |||
53ba6f2936
|
|||
140cc4cb07
|
|||
d5d2d2149b
|
|||
4c51d12eb4
|
|||
a6ce44e852 | |||
f6e866a589
|
|||
eb5c187d41
|
|||
b06c82bb16
|
|||
2f342be948
|
|||
e39f2b260c | |||
60ad474b88
|
|||
888f85d19e
|
|||
df7de93964
|
|||
7218631cc4
|
|||
085e525b2f
|
|||
e1580df12f
|
|||
be18779ff9
|
|||
60cfd8f23b
|
|||
87fd117d77
|
|||
f262ebdca2
|
|||
64d7f1a3b2
|
|||
a238a727d2
|
|||
cc5ce3ab98 | |||
70dfcb93c5
|
|||
69bcd1b5e4
|
|||
5f3bd61998
|
|||
e54dd8888f
|
|||
2ba09f8693
|
|||
a468a87a5a
|
|||
6a30b6550d
|
|||
18f013bfa0
|
|||
78900b5d85
|
|||
eb08832bf8
|
|||
c2ec780ad9
|
|||
df8ebc8bf1
|
|||
0d4be5f4c8
|
|||
30dc7f1939
|
|||
77194707fd
|
|||
10c1f8bdcc
|
|||
da74943da2
|
|||
fc8348ab29
|
|||
15c3299b99
|
|||
d36be5ee50 | |||
2f45d27554 | |||
b8356f7a87 | |||
2136dc79ce | |||
ed60120cef | |||
c027f01b48 | |||
754663f062
|
|||
507699e58a
|
|||
a016916995
|
|||
6fd2827a7c
|
|||
62142eb79e
|
|||
fda0321942
|
|||
963aa245c8
|
|||
568ff2eebb
|
|||
deecb8a10b
|
|||
12f45d7c08
|
|||
f65089f9ce
|
|||
1db5cf1c29
|
|||
e581c4b1aa
|
|||
e8d356c9ca
|
|||
34a9b8d629
|
|||
41e3d66a0e
|
|||
9b2a6137b4
|
|||
600b986f99
|
|||
49a7790794
|
|||
f2deba627c
|
|||
9323513794
|
|||
daf15610f2
|
|||
4ede966dbb
|
|||
3580473a6d
|
|||
071c24535f
|
|||
4291aecac4
|
|||
46bf537e88
|
|||
eaca5354d3
|
|||
4600288ee4
|
|||
8179563378
|
|||
b14c3eef4d
|
|||
71a789b13f
|
|||
c68ddacaa4
|
|||
9c9e79769e
|
|||
2ad5ade556
|
|||
7412a09670
|
|||
bb744a00b8
|
24
.build.yml
Normal file
24
.build.yml
Normal file
@ -0,0 +1,24 @@
|
||||
image: archlinux
|
||||
packages:
|
||||
- python-pipenv
|
||||
- postgresql
|
||||
sources:
|
||||
- https://git.sr.ht/~alanorth/dspace-statistics-api
|
||||
tasks:
|
||||
- setup: |
|
||||
id
|
||||
psql --version
|
||||
sudo su - postgres -c "initdb --locale en_US.UTF-8 -E UTF8 -D '/var/lib/postgres/data'"
|
||||
sudo systemctl start postgresql
|
||||
createuser -U postgres dspacestatistics
|
||||
psql -U postgres -c "ALTER USER dspacestatistics WITH PASSWORD 'dspacestatistics'"
|
||||
createdb -U postgres -O dspacestatistics --encoding=UNICODE dspacestatistics
|
||||
cd dspace-statistics-api
|
||||
psql -U postgres -d dspacestatistics < tests/dspacestatistics.sql
|
||||
pipenv install --dev
|
||||
- test: |
|
||||
cd dspace-statistics-api
|
||||
pipenv run pytest
|
||||
environment:
|
||||
PIPENV_NOSPIN: 'True'
|
||||
PIPENV_HIDE_EMOJIS: 'True'
|
4
.hound.yml
Normal file
4
.hound.yml
Normal file
@ -0,0 +1,4 @@
|
||||
flake8:
|
||||
enabled: true
|
||||
config_file: .flake8
|
||||
fail_on_violations: true
|
16
.travis.yml
16
.travis.yml
@ -1,9 +1,23 @@
|
||||
dist: xenial
|
||||
language: python
|
||||
python:
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
- "3.7"
|
||||
addons:
|
||||
postgresql: "9.6"
|
||||
before_script:
|
||||
- psql --version
|
||||
- createuser -U postgres dspacestatistics
|
||||
- psql -U postgres -c "ALTER USER dspacestatistics WITH PASSWORD 'dspacestatistics'"
|
||||
- createdb -U postgres -O dspacestatistics --encoding=UNICODE dspacestatistics
|
||||
- psql -U postgres -d dspacestatistics < tests/dspacestatistics.sql
|
||||
install:
|
||||
- pip install -r requirements.txt
|
||||
- "pip install pipenv --upgrade-strategy=only-if-needed"
|
||||
- "pipenv install --dev"
|
||||
script: pytest
|
||||
env:
|
||||
- PIPENV_NOSPIN=True
|
||||
- PIPENV_HIDE_EMOJIS=True
|
||||
|
||||
# vim: ts=2 sw=2 et
|
||||
|
90
CHANGELOG.md
90
CHANGELOG.md
@ -4,9 +4,97 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [1.1.0] - 2019-05-05
|
||||
## Updated
|
||||
- Falcon 2.0.0 (@alanorth)
|
||||
|
||||
## [1.0.0] - 2019-04-15
|
||||
### Added
|
||||
- Build configuration for build.sr.ht
|
||||
|
||||
### Updated
|
||||
- Run pipenv update, bringing pytest version 4.4.0, psycopg-binary 2.8.2, etc
|
||||
- sr.ht and TravisCI configuration to disable emojis and animation to keep logs clean
|
||||
|
||||
### Changed
|
||||
- Use vanilla requests library instead of SolrClient
|
||||
- Use one-based paging in indexer output (for human readability)
|
||||
|
||||
## [0.9.0] - 2019-01-22
|
||||
### Updated
|
||||
- pytest version 4.0.0
|
||||
- Fix indexing of sharded statistics cores ([#10))
|
||||
- Handle case of missing views/downloads gracefully
|
||||
|
||||
## [0.8.1] - 2018-11-14
|
||||
### Changed
|
||||
- README.md to recommend using vanilla Python virtual environments and pip instead of pipenv
|
||||
- Regenerate pipenv environment to capture only direct dependencies
|
||||
|
||||
### Added
|
||||
- `requirements-dev.txt` for installing development packages with pip
|
||||
|
||||
## [0.8.0] - 2018-11-11
|
||||
### Changed
|
||||
- Properly handle database connection errors
|
||||
|
||||
### Added
|
||||
- API tests with pytest
|
||||
|
||||
## [0.7.0] - 2018-11-07
|
||||
### Added
|
||||
- Ability to configure PostgreSQL database port with DATABASE_PORT environment variable (defaults to 5432)
|
||||
- Hound CI configuration to validate pull requests against PEP 8 code style with Flake8
|
||||
- Configuration for [pipenv](https://pipenv.readthedocs.io/en/latest/)
|
||||
|
||||
### Changed
|
||||
- Use a database management class with Python context management to automatically open/close connections and cursors
|
||||
|
||||
### Changed
|
||||
- Validate code against PEP 8 style guide with Flake8
|
||||
|
||||
## [0.6.1] - 2018-10-31
|
||||
### Added
|
||||
- API documentation at root path (/)
|
||||
|
||||
## [0.6.0] - 2018-10-31
|
||||
### Changed
|
||||
- Refactor project structure (note breaking changes to API and indexing invocation, see contrib and README.md)
|
||||
|
||||
## [0.5.2] - 2018-10-28
|
||||
### Changed
|
||||
- Update library versions in requirements.txt
|
||||
|
||||
## [0.5.1] - 2018-10-24
|
||||
### Changed
|
||||
- Use Python's native json instead of ujson
|
||||
|
||||
## [0.5.0] - 2018-10-24
|
||||
### Added
|
||||
- Example nginx configuration to README.md
|
||||
|
||||
### Changed
|
||||
- Don't initialize Solr connection in API
|
||||
|
||||
## [0.4.3] - 2018-10-17
|
||||
### Changed
|
||||
- Use pip install as script for Travis CI
|
||||
|
||||
### Improved
|
||||
- Documentation for deployment and testing
|
||||
|
||||
## [0.4.2] - 2018-10-04
|
||||
### Changed
|
||||
- README.md introduction and requirements
|
||||
- Use ujson instead of json
|
||||
- Iterate directly on SQL cursor in `/items` route
|
||||
|
||||
### Fixed
|
||||
- Logic error in SQL for item views
|
||||
|
||||
## [0.4.1] - 2018-09-26
|
||||
### Changed
|
||||
- Use execute_values() to batch insert records to PostgreSQL
|
||||
- Use `execute_values()` to batch insert records to PostgreSQL
|
||||
|
||||
## [0.4.0] - 2018-09-25
|
||||
### Fixed
|
||||
|
18
Pipfile
Normal file
18
Pipfile
Normal file
@ -0,0 +1,18 @@
|
||||
[[source]]
|
||||
url = "https://pypi.org/simple"
|
||||
verify_ssl = true
|
||||
name = "pypi"
|
||||
|
||||
[packages]
|
||||
gunicorn = "*"
|
||||
falcon = "==2.0.0"
|
||||
"psycopg2-binary" = "*"
|
||||
requests = "*"
|
||||
|
||||
[dev-packages]
|
||||
ipython = "*"
|
||||
"flake8" = "*"
|
||||
pytest = "*"
|
||||
|
||||
[requires]
|
||||
python_version = "3.7"
|
301
Pipfile.lock
generated
Normal file
301
Pipfile.lock
generated
Normal file
@ -0,0 +1,301 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "b0152688b12e9a7d176f42fd1940613deedc79e6a561edc1d86ed9248cbe4255"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
"python_version": "3.7"
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "pypi",
|
||||
"url": "https://pypi.org/simple",
|
||||
"verify_ssl": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"default": {
|
||||
"certifi": {
|
||||
"hashes": [
|
||||
"sha256:59b7658e26ca9c7339e00f8f4636cdfe59d34fa37b9b04f6f9e9926b3cece1a5",
|
||||
"sha256:b26104d6835d1f5e49452a26eb2ff87fe7090b89dfcaee5ea2212697e1e1d7ae"
|
||||
],
|
||||
"version": "==2019.3.9"
|
||||
},
|
||||
"chardet": {
|
||||
"hashes": [
|
||||
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
|
||||
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
|
||||
],
|
||||
"version": "==3.0.4"
|
||||
},
|
||||
"falcon": {
|
||||
"hashes": [
|
||||
"sha256:18157af2a4fc3feedf2b5dcc6196f448639acf01c68bc33d4d5a04c3ef87f494",
|
||||
"sha256:24adcd2b29a8ffa9d552dc79638cd21736a3fb04eda7d102c6cebafdaadb88ad",
|
||||
"sha256:54f2cb4b687035b2a03206dbfc538055cc48b59a953187b0458aa1b574d47b53",
|
||||
"sha256:59d1e8c993b9a37ea06df9d72cf907a46cc8063b30717cdac2f34d1658b6f936",
|
||||
"sha256:733033ec80c896e30a43ab3e776856096836787197a44eb21022320a61311983",
|
||||
"sha256:74cf1d18207381c665b9e6292d65100ce146d958707793174b03869dc6e614f4",
|
||||
"sha256:95bf6ce986c1119aef12c9b348f4dee9c6dcc58391bdd0bc2b0bf353c2b15986",
|
||||
"sha256:9712975adcf8c6e12876239085ad757b8fdeba223d46d23daef82b47658f83a9",
|
||||
"sha256:a5ebb22a04c9cc65081938ee7651b4e3b4d2a28522ea8ec04c7bdd2b3e9e8cd8",
|
||||
"sha256:aa184895d1ad4573fbfaaf803563d02f019ebdf4790e41cc568a330607eae439",
|
||||
"sha256:e3782b7b92fefd46a6ad1fd8fe63fe6c6f1b7740a95ca56957f48d1aee34b357",
|
||||
"sha256:e9efa0791b5d9f9dd9689015ea6bce0a27fcd5ecbcd30e6d940bffa4f7f03389",
|
||||
"sha256:eea593cf466b9c126ce667f6d30503624ef24459f118c75594a69353b6c3d5fc",
|
||||
"sha256:f93351459f110b4c1ee28556aef9a791832df6f910bea7b3f616109d534df06b"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.0.0"
|
||||
},
|
||||
"gunicorn": {
|
||||
"hashes": [
|
||||
"sha256:aa8e0b40b4157b36a5df5e599f45c9c76d6af43845ba3b3b0efe2c70473c2471",
|
||||
"sha256:fa2662097c66f920f53f70621c6c58ca4a3c4d3434205e608e121b5b3b71f4f3"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==19.9.0"
|
||||
},
|
||||
"idna": {
|
||||
"hashes": [
|
||||
"sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
|
||||
"sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c"
|
||||
],
|
||||
"version": "==2.8"
|
||||
},
|
||||
"psycopg2-binary": {
|
||||
"hashes": [
|
||||
"sha256:007ca0df127b1862fc010125bc4100b7a630efc6841047bd11afceadb4754611",
|
||||
"sha256:03c49e02adf0b4d68f422fdbd98f7a7c547beb27e99a75ed02298f85cb48406a",
|
||||
"sha256:0a1232cdd314e08848825edda06600455ad2a7adaa463ebfb12ece2d09f3370e",
|
||||
"sha256:131c80d0958c89273d9720b9adf9df1d7600bb3120e16019a7389ab15b079af5",
|
||||
"sha256:2de34cc3b775724623f86617d2601308083176a495f5b2efc2bbb0da154f483a",
|
||||
"sha256:2eddc31500f73544a2a54123d4c4b249c3c711d31e64deddb0890982ea37397a",
|
||||
"sha256:484f6c62bdc166ee0e5be3aa831120423bf399786d1f3b0304526c86180fbc0b",
|
||||
"sha256:4c2d9369ed40b4a44a8ccd6bc3a7db6272b8314812d2d1091f95c4c836d92e06",
|
||||
"sha256:70f570b5fa44413b9f30dbc053d17ef3ce6a4100147a10822f8662e58d473656",
|
||||
"sha256:7a2b5b095f3bd733aab101c89c0e1a3f0dfb4ebdc26f6374805c086ffe29d5b2",
|
||||
"sha256:804914a669186e2843c1f7fbe12b55aad1b36d40a28274abe6027deffad9433d",
|
||||
"sha256:8520c03172da18345d012949a53617a963e0191ccb3c666f23276d5326af27b5",
|
||||
"sha256:90da901fc33ea393fc644607e4a3916b509387e9339ec6ebc7bfded45b7a0ae9",
|
||||
"sha256:a582416ad123291a82c300d1d872bdc4136d69ad0b41d57dc5ca3df7ef8e3088",
|
||||
"sha256:ac8c5e20309f4989c296d62cac20ee456b69c41fd1bc03829e27de23b6fa9dd0",
|
||||
"sha256:b2cf82f55a619879f8557fdaae5cec7a294fac815e0087c4f67026fdf5259844",
|
||||
"sha256:b59d6f8cfca2983d8fdbe457bf95d2192f7b7efdb2b483bf5fa4e8981b04e8b2",
|
||||
"sha256:be08168197021d669b9964bd87628fa88f910b1be31e7010901070f2540c05fd",
|
||||
"sha256:be0f952f1c365061041bad16e27e224e29615d4eb1fb5b7e7760a1d3d12b90b6",
|
||||
"sha256:c1c9a33e46d7c12b9c96cf2d4349d783e3127163fd96254dcd44663cf0a1d438",
|
||||
"sha256:d18c89957ac57dd2a2724ecfe9a759912d776f96ecabba23acb9ecbf5c731035",
|
||||
"sha256:d7e7b0ff21f39433c50397e60bf0995d078802c591ca3b8d99857ea18a7496ee",
|
||||
"sha256:da0929b2bf0d1f365345e5eb940d8713c1d516312e010135b14402e2a3d2404d",
|
||||
"sha256:de24a4962e361c512d3e528ded6c7480eab24c655b8ca1f0b761d3b3650d2f07",
|
||||
"sha256:e45f93ff3f7dae2202248cf413a87aeb330821bf76998b3cf374eda2fc893dd7",
|
||||
"sha256:f046aeae1f7a845041b8661bb7a52449202b6c5d3fb59eb4724e7ca088811904",
|
||||
"sha256:f1dc2b7b2748084b890f5d05b65a47cd03188824890e9a60818721fd492249fb",
|
||||
"sha256:fcbe7cf3a786572b73d2cd5f34ed452a5f5fac47c9c9d1e0642c457a148f9f88"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.8.2"
|
||||
},
|
||||
"requests": {
|
||||
"hashes": [
|
||||
"sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e",
|
||||
"sha256:7bf2a778576d825600030a110f3c0e3e8edc51dfaafe1c146e39a2027784957b"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.21.0"
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:2393a695cd12afedd0dcb26fe5d50d0cf248e5a66f75dbd89a3d4eb333a61af4",
|
||||
"sha256:a637e5fae88995b256e3409dc4d52c2e2e0ba32c42a6365fee8bbd2238de3cfb"
|
||||
],
|
||||
"version": "==1.24.3"
|
||||
}
|
||||
},
|
||||
"develop": {
|
||||
"atomicwrites": {
|
||||
"hashes": [
|
||||
"sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4",
|
||||
"sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6"
|
||||
],
|
||||
"version": "==1.3.0"
|
||||
},
|
||||
"attrs": {
|
||||
"hashes": [
|
||||
"sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79",
|
||||
"sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399"
|
||||
],
|
||||
"version": "==19.1.0"
|
||||
},
|
||||
"backcall": {
|
||||
"hashes": [
|
||||
"sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
|
||||
"sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
|
||||
],
|
||||
"version": "==0.1.0"
|
||||
},
|
||||
"decorator": {
|
||||
"hashes": [
|
||||
"sha256:86156361c50488b84a3f148056ea716ca587df2f0de1d34750d35c21312725de",
|
||||
"sha256:f069f3a01830ca754ba5258fde2278454a0b5b79e0d7f5c13b3b97e57d4acff6"
|
||||
],
|
||||
"version": "==4.4.0"
|
||||
},
|
||||
"entrypoints": {
|
||||
"hashes": [
|
||||
"sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19",
|
||||
"sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451"
|
||||
],
|
||||
"version": "==0.3"
|
||||
},
|
||||
"flake8": {
|
||||
"hashes": [
|
||||
"sha256:859996073f341f2670741b51ec1e67a01da142831aa1fdc6242dbf88dffbe661",
|
||||
"sha256:a796a115208f5c03b18f332f7c11729812c8c3ded6c46319c59b53efd3819da8"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.7.7"
|
||||
},
|
||||
"ipython": {
|
||||
"hashes": [
|
||||
"sha256:54c5a8aa1eadd269ac210b96923688ccf01ebb2d0f21c18c3c717909583579a8",
|
||||
"sha256:e840810029224b56cd0d9e7719dc3b39cf84d577f8ac686547c8ba7a06eeab26"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==7.5.0"
|
||||
},
|
||||
"ipython-genutils": {
|
||||
"hashes": [
|
||||
"sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
|
||||
"sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
|
||||
],
|
||||
"version": "==0.2.0"
|
||||
},
|
||||
"jedi": {
|
||||
"hashes": [
|
||||
"sha256:2bb0603e3506f708e792c7f4ad8fc2a7a9d9c2d292a358fbbd58da531695595b",
|
||||
"sha256:2c6bcd9545c7d6440951b12b44d373479bf18123a401a52025cf98563fbd826c"
|
||||
],
|
||||
"version": "==0.13.3"
|
||||
},
|
||||
"mccabe": {
|
||||
"hashes": [
|
||||
"sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
|
||||
"sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
|
||||
],
|
||||
"version": "==0.6.1"
|
||||
},
|
||||
"more-itertools": {
|
||||
"hashes": [
|
||||
"sha256:2112d2ca570bb7c3e53ea1a35cd5df42bb0fd10c45f0fb97178679c3c03d64c7",
|
||||
"sha256:c3e4748ba1aad8dba30a4886b0b1a2004f9a863837b8654e7059eebf727afa5a"
|
||||
],
|
||||
"markers": "python_version > '2.7'",
|
||||
"version": "==7.0.0"
|
||||
},
|
||||
"parso": {
|
||||
"hashes": [
|
||||
"sha256:17cc2d7a945eb42c3569d4564cdf49bde221bc2b552af3eca9c1aad517dcdd33",
|
||||
"sha256:2e9574cb12e7112a87253e14e2c380ce312060269d04bd018478a3c92ea9a376"
|
||||
],
|
||||
"version": "==0.4.0"
|
||||
},
|
||||
"pexpect": {
|
||||
"hashes": [
|
||||
"sha256:2094eefdfcf37a1fdbfb9aa090862c1a4878e5c7e0e7e7088bdb511c558e5cd1",
|
||||
"sha256:9e2c1fd0e6ee3a49b28f95d4b33bc389c89b20af6a1255906e90ff1262ce62eb"
|
||||
],
|
||||
"markers": "sys_platform != 'win32'",
|
||||
"version": "==4.7.0"
|
||||
},
|
||||
"pickleshare": {
|
||||
"hashes": [
|
||||
"sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca",
|
||||
"sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"
|
||||
],
|
||||
"version": "==0.7.5"
|
||||
},
|
||||
"pluggy": {
|
||||
"hashes": [
|
||||
"sha256:19ecf9ce9db2fce065a7a0586e07cfb4ac8614fe96edf628a264b1c70116cf8f",
|
||||
"sha256:84d306a647cc805219916e62aab89caa97a33a1dd8c342e87a37f91073cd4746"
|
||||
],
|
||||
"version": "==0.9.0"
|
||||
},
|
||||
"prompt-toolkit": {
|
||||
"hashes": [
|
||||
"sha256:11adf3389a996a6d45cc277580d0d53e8a5afd281d0c9ec71b28e6f121463780",
|
||||
"sha256:2519ad1d8038fd5fc8e770362237ad0364d16a7650fb5724af6997ed5515e3c1",
|
||||
"sha256:977c6583ae813a37dc1c2e1b715892461fcbdaa57f6fc62f33a528c4886c8f55"
|
||||
],
|
||||
"version": "==2.0.9"
|
||||
},
|
||||
"ptyprocess": {
|
||||
"hashes": [
|
||||
"sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
|
||||
"sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"
|
||||
],
|
||||
"version": "==0.6.0"
|
||||
},
|
||||
"py": {
|
||||
"hashes": [
|
||||
"sha256:64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa",
|
||||
"sha256:dc639b046a6e2cff5bbe40194ad65936d6ba360b52b3c3fe1d08a82dd50b5e53"
|
||||
],
|
||||
"version": "==1.8.0"
|
||||
},
|
||||
"pycodestyle": {
|
||||
"hashes": [
|
||||
"sha256:95a2219d12372f05704562a14ec30bc76b05a5b297b21a5dfe3f6fac3491ae56",
|
||||
"sha256:e40a936c9a450ad81df37f549d676d127b1b66000a6c500caa2b085bc0ca976c"
|
||||
],
|
||||
"version": "==2.5.0"
|
||||
},
|
||||
"pyflakes": {
|
||||
"hashes": [
|
||||
"sha256:17dbeb2e3f4d772725c777fabc446d5634d1038f234e77343108ce445ea69ce0",
|
||||
"sha256:d976835886f8c5b31d47970ed689944a0262b5f3afa00a5a7b4dc81e5449f8a2"
|
||||
],
|
||||
"version": "==2.1.1"
|
||||
},
|
||||
"pygments": {
|
||||
"hashes": [
|
||||
"sha256:5ffada19f6203563680669ee7f53b64dabbeb100eb51b61996085e99c03b284a",
|
||||
"sha256:e8218dd399a61674745138520d0d4cf2621d7e032439341bc3f647bff125818d"
|
||||
],
|
||||
"version": "==2.3.1"
|
||||
},
|
||||
"pytest": {
|
||||
"hashes": [
|
||||
"sha256:3773f4c235918987d51daf1db66d51c99fac654c81d6f2f709a046ab446d5e5d",
|
||||
"sha256:b7802283b70ca24d7119b32915efa7c409982f59913c1a6c0640aacf118b95f5"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==4.4.1"
|
||||
},
|
||||
"six": {
|
||||
"hashes": [
|
||||
"sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
|
||||
"sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
|
||||
],
|
||||
"version": "==1.12.0"
|
||||
},
|
||||
"traitlets": {
|
||||
"hashes": [
|
||||
"sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835",
|
||||
"sha256:c6cb5e6f57c5a9bdaa40fa71ce7b4af30298fbab9ece9815b5d995ab6217c7d9"
|
||||
],
|
||||
"version": "==4.3.2"
|
||||
},
|
||||
"wcwidth": {
|
||||
"hashes": [
|
||||
"sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e",
|
||||
"sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c"
|
||||
],
|
||||
"version": "==0.1.7"
|
||||
}
|
||||
}
|
||||
}
|
97
README.md
97
README.md
@ -1,30 +1,101 @@
|
||||
# DSpace Statistics API
|
||||
A quick and dirty REST API to expose Solr view and download statistics for items in a DSpace repository.
|
||||
# DSpace Statistics API [](https://travis-ci.org/ilri/dspace-statistics-api) [](https://builds.sr.ht/~alanorth/dspace-statistics-api?)
|
||||
DSpace stores item view and download events in a Solr "statistics" core. This information is available for use in the various DSpace user interfaces, but is not exposed externally via any APIs. The DSpace 4/5 [REST API](https://wiki.duraspace.org/display/DSDOC5x/REST+API), for example, only exposes information about communities, collections, item metadata, and bitstreams.
|
||||
|
||||
Written and tested in Python 3.5, 3.6, and 3.7. Requires PostgreSQL version 9.5 or greater for [`UPSERT` support](https://wiki.postgresql.org/wiki/UPSERT).
|
||||
This project contains an indexer and a [Falcon-based](https://falcon.readthedocs.io/) web application to make the statistics available via a simple REST API. You can read more about the Solr queries used to gather the item view and download statistics on the [DSpace wiki](https://wiki.duraspace.org/display/DSPACE/Solr).
|
||||
|
||||
If you use the DSpace Statistics API please cite:
|
||||
|
||||
*Orth, A. 2018. DSpace statistics API. Nairobi, Kenya: ILRI. https://hdl.handle.net/10568/99143.*
|
||||
|
||||
## Requirements
|
||||
|
||||
- Python 3.5+
|
||||
- PostgreSQL version 9.5+ (due to [`UPSERT` support](https://wiki.postgresql.org/wiki/UPSERT))
|
||||
- DSpace with [Solr usage statistics enabled](https://wiki.duraspace.org/display/DSDOC5x/SOLR+Statistics) (tested with 5.x)
|
||||
|
||||
## Installation
|
||||
Create a virtual environment and run it:
|
||||
Create a Python virtual environment and install the dependencies:
|
||||
|
||||
$ python -m venv venv
|
||||
$ . venv/bin/activate
|
||||
$ python3 -m venv venv
|
||||
$ source venv/bin/activate
|
||||
$ pip install -r requirements.txt
|
||||
$ gunicorn app:api
|
||||
|
||||
## Running
|
||||
|
||||
Set up the environment variables for Solr and PostgreSQL:
|
||||
|
||||
$ export SOLR_SERVER=http://localhost:8080/solr
|
||||
$ export DATABASE_NAME=dspacestatistics
|
||||
$ export DATABASE_USER=dspacestatistics
|
||||
$ export DATABASE_PASS=dspacestatistics
|
||||
$ export DATABASE_HOST=localhost
|
||||
|
||||
Index the Solr statistics core to populate the PostgreSQL database:
|
||||
|
||||
$ python -m dspace_statistics_api.indexer
|
||||
|
||||
Run the REST API:
|
||||
|
||||
$ gunicorn dspace_statistics_api.app
|
||||
|
||||
Test to see if there are any statistics:
|
||||
|
||||
$ curl 'http://localhost:8000/items?limit=1'
|
||||
|
||||
## Testing
|
||||
Install development packages using pip:
|
||||
|
||||
$ pip install -r requirements-dev.txt
|
||||
|
||||
Run tests:
|
||||
|
||||
$ pytest
|
||||
|
||||
## Deployment
|
||||
There are example systemd service and timer units in the `contrib` directory. The API service listens on localhost by default so you will need to expose it publicly using a web server like nginx.
|
||||
|
||||
An example nginx configuration is:
|
||||
|
||||
```
|
||||
server {
|
||||
#...
|
||||
|
||||
location ~ /rest/statistics/?(.*) {
|
||||
access_log /var/log/nginx/statistics.log;
|
||||
proxy_pass http://statistics_api/$1$is_args$args;
|
||||
}
|
||||
}
|
||||
|
||||
upstream statistics_api {
|
||||
server 127.0.0.1:5000;
|
||||
}
|
||||
```
|
||||
|
||||
This would expose the API at `/rest/statistics`.
|
||||
|
||||
## Using the API
|
||||
The API exposes the following endpoints:
|
||||
|
||||
- GET `/items` — return views and downloads for all items that Solr knows about¹. Accepts `limit` and `page` query parameters for pagination of results.
|
||||
- GET `/item/id` — return views and downloads for a single item (*id* must be a positive integer). Returns HTTP 404 if an item id is not found.
|
||||
- GET `/` — return a basic API documentation page.
|
||||
- GET `/items` — return views and downloads for all items that Solr knows about¹. Accepts `limit` and `page` query parameters for pagination of results (`limit` must be an integer between 1 and 100, and `page` must be an integer greater than or equal to 0).
|
||||
- GET `/item/id` — return views and downloads for a single item (`id` must be a positive integer). Returns HTTP 404 if an item id is not found.
|
||||
|
||||
¹ We are querying the Solr statistics core, which technically only knows about items that have either views or downloads.
|
||||
The item id is the *internal* id for an item. You can get these from the standard DSpace REST API.
|
||||
|
||||
¹ We are querying the Solr statistics core, which technically only knows about items that have either views or downloads. If an item is not present here you can assume it has zero views and zero downloads, but not necessarily that it does not exist in the repository.
|
||||
|
||||
## Todo
|
||||
|
||||
- Add API documentation
|
||||
- Close up DB connection when gunicorn shuts down gracefully
|
||||
- Better logging
|
||||
- Tests
|
||||
- Version API
|
||||
- Use JSON in PostgreSQL
|
||||
- Add top items endpoint, perhaps `/top/items` or `/items/top`?
|
||||
- Make community and collection stats available
|
||||
- Support [DSpace 6 UUIDs](https://jira.duraspace.org/browse/DS-1782)
|
||||
- Switch to [Python 3.6+ f-string syntax](https://realpython.com/python-f-strings/)
|
||||
- Check IDs in database to see if they are deleted...
|
||||
|
||||
## License
|
||||
This work is licensed under the [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html).
|
||||
|
||||
The license allows you to use and modify the work for personal and commercial purposes, but if you distribute the work you must provide users with a means to access the source code for the version you are distributing. Read more about the [GPLv3 at TL;DR Legal](https://tldrlegal.com/license/gnu-general-public-license-v3-(gpl-3)).
|
||||
|
72
app.py
72
app.py
@ -1,72 +0,0 @@
|
||||
from database import database_connection
|
||||
import falcon
|
||||
from solr import solr_connection
|
||||
|
||||
db = database_connection()
|
||||
db.set_session(readonly=True)
|
||||
solr = solr_connection()
|
||||
|
||||
class AllItemsResource:
|
||||
def on_get(self, req, resp):
|
||||
"""Handles GET requests"""
|
||||
# Return HTTPBadRequest if id parameter is not present and valid
|
||||
limit = req.get_param_as_int("limit", min=0, max=100) or 100
|
||||
page = req.get_param_as_int("page", min=0) or 0
|
||||
offset = limit * page
|
||||
|
||||
cursor = db.cursor()
|
||||
|
||||
# get total number of items so we can estimate the pages
|
||||
cursor.execute('SELECT COUNT(id) FROM items')
|
||||
pages = round(cursor.fetchone()[0] / limit)
|
||||
|
||||
# get statistics, ordered by id, and use limit and offset to page through results
|
||||
cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset))
|
||||
results = cursor.fetchmany(limit)
|
||||
cursor.close()
|
||||
|
||||
# create a list to hold dicts of item stats
|
||||
statistics = list()
|
||||
|
||||
# iterate over results and build statistics object
|
||||
for item in results:
|
||||
statistics.append({ 'id': item['id'], 'views': item['views'], 'downloads': item['downloads'] })
|
||||
|
||||
message = {
|
||||
'currentPage': page,
|
||||
'totalPages': pages,
|
||||
'limit': limit,
|
||||
'statistics': statistics
|
||||
}
|
||||
|
||||
resp.media = message
|
||||
|
||||
class ItemResource:
|
||||
def on_get(self, req, resp, item_id):
|
||||
"""Handles GET requests"""
|
||||
|
||||
cursor = db.cursor()
|
||||
cursor.execute('SELECT views, downloads FROM items WHERE id={}'.format(item_id))
|
||||
if cursor.rowcount == 0:
|
||||
raise falcon.HTTPNotFound(
|
||||
title='Item not found',
|
||||
description='The item with id "{}" was not found.'.format(item_id)
|
||||
)
|
||||
else:
|
||||
results = cursor.fetchone()
|
||||
|
||||
statistics = {
|
||||
'id': item_id,
|
||||
'views': results['views'],
|
||||
'downloads': results['downloads']
|
||||
}
|
||||
|
||||
resp.media = statistics
|
||||
|
||||
cursor.close()
|
||||
|
||||
api = falcon.API()
|
||||
api.add_route('/items', AllItemsResource())
|
||||
api.add_route('/item/{item_id:int}', ItemResource())
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
@ -9,10 +9,10 @@ Environment=DATABASE_PASS=dspacestatistics
|
||||
Environment=DATABASE_HOST=localhost
|
||||
User=nobody
|
||||
Group=nogroup
|
||||
WorkingDirectory=/opt/ilri/dspace-statistics-api
|
||||
ExecStart=/opt/ilri/dspace-statistics-api/venv/bin/gunicorn \
|
||||
WorkingDirectory=/var/lib/dspace-statistics-api
|
||||
ExecStart=/var/lib/dspace-statistics-api/venv/bin/gunicorn \
|
||||
--bind 127.0.0.1:5000 \
|
||||
app:api
|
||||
dspace_statistics_api.app
|
||||
ExecReload=/bin/kill -s HUP $MAINPID
|
||||
ExecStop=/bin/kill -s TERM $MAINPID
|
||||
|
||||
|
@ -10,8 +10,8 @@ Environment=DATABASE_PASS=dspacestatistics
|
||||
Environment=DATABASE_HOST=localhost
|
||||
User=nobody
|
||||
Group=nogroup
|
||||
WorkingDirectory=/opt/ilri/dspace-statistics-api
|
||||
ExecStart=/opt/ilri/dspace-statistics-api/venv/bin/python indexer.py
|
||||
WorkingDirectory=/var/lib/dspace-statistics-api
|
||||
ExecStart=/var/lib/dspace-statistics-api/venv/bin/python -m dspace_statistics_api.indexer
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
12
database.py
12
database.py
@ -1,12 +0,0 @@
|
||||
from config import DATABASE_NAME
|
||||
from config import DATABASE_USER
|
||||
from config import DATABASE_PASS
|
||||
from config import DATABASE_HOST
|
||||
import psycopg2, psycopg2.extras
|
||||
|
||||
def database_connection():
|
||||
connection = psycopg2.connect("dbname={} user={} password={} host='{}'".format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST), cursor_factory=psycopg2.extras.DictCursor)
|
||||
|
||||
return connection
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
0
dspace_statistics_api/__init__.py
Normal file
0
dspace_statistics_api/__init__.py
Normal file
81
dspace_statistics_api/app.py
Normal file
81
dspace_statistics_api/app.py
Normal file
@ -0,0 +1,81 @@
|
||||
from .database import DatabaseManager
|
||||
import falcon
|
||||
|
||||
|
||||
class RootResource:
|
||||
def on_get(self, req, resp):
|
||||
resp.status = falcon.HTTP_200
|
||||
resp.content_type = 'text/html'
|
||||
with open('dspace_statistics_api/docs/index.html', 'r') as f:
|
||||
resp.body = f.read()
|
||||
|
||||
|
||||
class AllItemsResource:
|
||||
def on_get(self, req, resp):
|
||||
"""Handles GET requests"""
|
||||
# Return HTTPBadRequest if id parameter is not present and valid
|
||||
limit = req.get_param_as_int("limit", min_value=0, max_value=100) or 100
|
||||
page = req.get_param_as_int("page", min_value=0) or 0
|
||||
offset = limit * page
|
||||
|
||||
with DatabaseManager() as db:
|
||||
db.set_session(readonly=True)
|
||||
|
||||
with db.cursor() as cursor:
|
||||
# get total number of items so we can estimate the pages
|
||||
cursor.execute('SELECT COUNT(id) FROM items')
|
||||
pages = round(cursor.fetchone()[0] / limit)
|
||||
|
||||
# get statistics, ordered by id, and use limit and offset to page through results
|
||||
cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset))
|
||||
|
||||
# create a list to hold dicts of item stats
|
||||
statistics = list()
|
||||
|
||||
# iterate over results and build statistics object
|
||||
for item in cursor:
|
||||
statistics.append({'id': item['id'], 'views': item['views'], 'downloads': item['downloads']})
|
||||
|
||||
message = {
|
||||
'currentPage': page,
|
||||
'totalPages': pages,
|
||||
'limit': limit,
|
||||
'statistics': statistics
|
||||
}
|
||||
|
||||
resp.media = message
|
||||
|
||||
|
||||
class ItemResource:
|
||||
def on_get(self, req, resp, item_id):
|
||||
"""Handles GET requests"""
|
||||
|
||||
with DatabaseManager() as db:
|
||||
db.set_session(readonly=True)
|
||||
|
||||
with db.cursor() as cursor:
|
||||
cursor = db.cursor()
|
||||
cursor.execute('SELECT views, downloads FROM items WHERE id={}'.format(item_id))
|
||||
if cursor.rowcount == 0:
|
||||
raise falcon.HTTPNotFound(
|
||||
title='Item not found',
|
||||
description='The item with id "{}" was not found.'.format(item_id)
|
||||
)
|
||||
else:
|
||||
results = cursor.fetchone()
|
||||
|
||||
statistics = {
|
||||
'id': item_id,
|
||||
'views': results['views'],
|
||||
'downloads': results['downloads']
|
||||
}
|
||||
|
||||
resp.media = statistics
|
||||
|
||||
|
||||
api = application = falcon.API()
|
||||
api.add_route('/', RootResource())
|
||||
api.add_route('/items', AllItemsResource())
|
||||
api.add_route('/item/{item_id:int}', ItemResource())
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
@ -7,5 +7,6 @@ DATABASE_NAME = os.environ.get('DATABASE_NAME', 'dspacestatistics')
|
||||
DATABASE_USER = os.environ.get('DATABASE_USER', 'dspacestatistics')
|
||||
DATABASE_PASS = os.environ.get('DATABASE_PASS', 'dspacestatistics')
|
||||
DATABASE_HOST = os.environ.get('DATABASE_HOST', 'localhost')
|
||||
DATABASE_PORT = os.environ.get('DATABASE_PORT', '5432')
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
30
dspace_statistics_api/database.py
Normal file
30
dspace_statistics_api/database.py
Normal file
@ -0,0 +1,30 @@
|
||||
from .config import DATABASE_NAME
|
||||
from .config import DATABASE_USER
|
||||
from .config import DATABASE_PASS
|
||||
from .config import DATABASE_HOST
|
||||
from .config import DATABASE_PORT
|
||||
import falcon
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
|
||||
class DatabaseManager():
|
||||
'''Manage database connection.'''
|
||||
|
||||
def __init__(self):
|
||||
self._connection_uri = 'dbname={} user={} password={} host={} port={}'.format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST, DATABASE_PORT)
|
||||
|
||||
def __enter__(self):
|
||||
try:
|
||||
self._connection = psycopg2.connect(self._connection_uri, cursor_factory=psycopg2.extras.DictCursor)
|
||||
except psycopg2.OperationalError:
|
||||
title = '500 Internal Server Error'
|
||||
description = 'Could not connect to database'
|
||||
raise falcon.HTTPInternalServerError(title, description)
|
||||
|
||||
return self._connection
|
||||
|
||||
def __exit__(self, exc_type, exc_value, exc_traceback):
|
||||
self._connection.close()
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
20
dspace_statistics_api/docs/index.html
Normal file
20
dspace_statistics_api/docs/index.html
Normal file
@ -0,0 +1,20 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en-US">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>DSpace Statistics API</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>DSpace Statistics API</h1>
|
||||
<p>This site is running the <a href="https://github.com/ilri/dspace-statistics-api" title="DSpace Statistics API project">DSpace Statistics API</a>. The following endpoints are available:</p>
|
||||
<ul>
|
||||
<li>GET <code>/</code> — return a basic API documentation page.</li>
|
||||
<li>GET <code>/items</code> — return views and downloads for all items that Solr knows about¹. Accepts <code>limit</code> and <code>page</code> query parameters for pagination of results (<code>limit</code> must be an integer between 1 and 100, and <code>page</code> must be an integer greater than or equal to 0).</li>
|
||||
<li>GET <code>/item/id</code> — return views and downloads for a single item (<code>id</code> must be a positive integer). Returns HTTP 404 if an item id is not found.</li>
|
||||
</ul>
|
||||
|
||||
<p>The item id is the <em>internal</em> id for an item. You can get these from the standard DSpace REST API.</p>
|
||||
|
||||
<p>¹ We are querying the Solr statistics core, which technically only knows about items that have either views or downloads. If an item is not present here you can assume it has zero views and zero downloads, but not necessarily that it does not exist in the repository.</code>
|
||||
</body>
|
||||
</html>
|
266
dspace_statistics_api/indexer.py
Normal file
266
dspace_statistics_api/indexer.py
Normal file
@ -0,0 +1,266 @@
|
||||
#
|
||||
# indexer.py
|
||||
#
|
||||
# Copyright 2018 Alan Orth.
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# ---
|
||||
#
|
||||
# Connects to a DSpace Solr statistics core and ingests item views and downloads
|
||||
# into a PostgreSQL database for use by other applications (like an API).
|
||||
#
|
||||
# This script is written for Python 3.5+ and requires several modules that you
|
||||
# can install with pip (I recommend using a Python virtual environment):
|
||||
#
|
||||
# $ pip install SolrClient psycopg2-binary
|
||||
#
|
||||
# See: https://solrclient.readthedocs.io/en/latest/SolrClient.html
|
||||
# See: https://wiki.duraspace.org/display/DSPACE/Solr
|
||||
|
||||
from .config import SOLR_SERVER
|
||||
from .database import DatabaseManager
|
||||
import json
|
||||
import psycopg2.extras
|
||||
import re
|
||||
import requests
|
||||
|
||||
|
||||
# Enumerate the cores in Solr to determine if statistics have been sharded into
|
||||
# yearly shards by DSpace's stats-util or not (for example: statistics-2018).
|
||||
def get_statistics_shards():
|
||||
# Initialize an empty list for statistics core years
|
||||
statistics_core_years = []
|
||||
|
||||
# URL for Solr status to check active cores
|
||||
solr_query_params = {
|
||||
'action': 'STATUS',
|
||||
'wt': 'json'
|
||||
}
|
||||
solr_url = SOLR_SERVER + '/admin/cores'
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
if res.status_code == requests.codes.ok:
|
||||
data = res.json()
|
||||
|
||||
# Iterate over active cores from Solr's STATUS response (cores are in
|
||||
# the status array of this response).
|
||||
for core in data['status']:
|
||||
# Pattern to match, for example: statistics-2018
|
||||
pattern = re.compile('^statistics-[0-9]{4}$')
|
||||
|
||||
if not pattern.match(core):
|
||||
continue
|
||||
|
||||
# Append current core to list
|
||||
statistics_core_years.append(core)
|
||||
|
||||
# Initialize a string to hold our shards (may end up being empty if the Solr
|
||||
# core has not been processed by stats-util).
|
||||
shards = str()
|
||||
|
||||
if len(statistics_core_years) > 0:
|
||||
# Begin building a string of shards starting with the default one
|
||||
shards = '{}/statistics'.format(SOLR_SERVER)
|
||||
|
||||
for core in statistics_core_years:
|
||||
# Create a comma-separated list of shards to pass to our Solr query
|
||||
#
|
||||
# See: https://wiki.apache.org/solr/DistributedSearch
|
||||
shards += ',{}/{}'.format(SOLR_SERVER, core)
|
||||
|
||||
# Return the string of shards, which may actually be empty. Solr doesn't
|
||||
# seem to mind if the shards query parameter is empty and I haven't seen
|
||||
# any negative performance impact so this should be fine.
|
||||
return shards
|
||||
|
||||
|
||||
def index_views():
|
||||
# get total number of distinct facets for items with a minimum of 1 view,
|
||||
# otherwise Solr returns all kinds of weird ids that are actually not in
|
||||
# the database. Also, stats are expensive, but we need stats.calcdistinct
|
||||
# so we can get the countDistinct summary.
|
||||
#
|
||||
# see: https://lucene.apache.org/solr/guide/6_6/the-stats-component.html
|
||||
solr_query_params = {
|
||||
'q': 'type:2',
|
||||
'fq': 'isBot:false AND statistics_type:view',
|
||||
'facet': 'true',
|
||||
'facet.field': 'id',
|
||||
'facet.mincount': 1,
|
||||
'facet.limit': 1,
|
||||
'facet.offset': 0,
|
||||
'stats': 'true',
|
||||
'stats.field': 'id',
|
||||
'stats.calcdistinct': 'true',
|
||||
'shards': shards,
|
||||
'rows': 0,
|
||||
'wt': 'json'
|
||||
}
|
||||
|
||||
solr_url = SOLR_SERVER + '/statistics/select'
|
||||
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
try:
|
||||
# get total number of distinct facets (countDistinct)
|
||||
results_totalNumFacets = res.json()['stats']['stats_fields']['id']['countDistinct']
|
||||
except TypeError:
|
||||
print('No item views to index, exiting.')
|
||||
|
||||
exit(0)
|
||||
|
||||
# divide results into "pages" (cast to int to effectively round down)
|
||||
results_per_page = 100
|
||||
results_num_pages = int(results_totalNumFacets / results_per_page)
|
||||
results_current_page = 0
|
||||
|
||||
with DatabaseManager() as db:
|
||||
with db.cursor() as cursor:
|
||||
# create an empty list to store values for batch insertion
|
||||
data = []
|
||||
|
||||
while results_current_page <= results_num_pages:
|
||||
# "pages" are zero based, but one based is more human readable
|
||||
print('Indexing item views (page {} of {})'.format(results_current_page + 1, results_num_pages + 1))
|
||||
|
||||
solr_query_params = {
|
||||
'q': 'type:2',
|
||||
'fq': 'isBot:false AND statistics_type:view',
|
||||
'facet': 'true',
|
||||
'facet.field': 'id',
|
||||
'facet.mincount': 1,
|
||||
'facet.limit': results_per_page,
|
||||
'facet.offset': results_current_page * results_per_page,
|
||||
'shards': shards,
|
||||
'rows': 0,
|
||||
'wt': 'json',
|
||||
'json.nl': 'map' # return facets as a dict instead of a flat list
|
||||
}
|
||||
|
||||
solr_url = SOLR_SERVER + '/statistics/select'
|
||||
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
# Solr returns facets as a dict of dicts (see json.nl parameter)
|
||||
views = res.json()['facet_counts']['facet_fields']
|
||||
# iterate over the 'id' dict and get the item ids and views
|
||||
for item_id, item_views in views['id'].items():
|
||||
data.append((item_id, item_views))
|
||||
|
||||
# do a batch insert of values from the current "page" of results
|
||||
sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views'
|
||||
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
|
||||
db.commit()
|
||||
|
||||
# clear all items from the list so we can populate it with the next batch
|
||||
data.clear()
|
||||
|
||||
results_current_page += 1
|
||||
|
||||
|
||||
def index_downloads():
|
||||
# get the total number of distinct facets for items with at least 1 download
|
||||
solr_query_params= {
|
||||
'q': 'type:0',
|
||||
'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
|
||||
'facet': 'true',
|
||||
'facet.field': 'owningItem',
|
||||
'facet.mincount': 1,
|
||||
'facet.limit': 1,
|
||||
'facet.offset': 0,
|
||||
'stats': 'true',
|
||||
'stats.field': 'owningItem',
|
||||
'stats.calcdistinct': 'true',
|
||||
'shards': shards,
|
||||
'rows': 0,
|
||||
'wt': 'json'
|
||||
}
|
||||
|
||||
solr_url = SOLR_SERVER + '/statistics/select'
|
||||
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
try:
|
||||
# get total number of distinct facets (countDistinct)
|
||||
results_totalNumFacets = res.json()['stats']['stats_fields']['owningItem']['countDistinct']
|
||||
except TypeError:
|
||||
print('No item downloads to index, exiting.')
|
||||
|
||||
exit(0)
|
||||
|
||||
# divide results into "pages" (cast to int to effectively round down)
|
||||
results_per_page = 100
|
||||
results_num_pages = int(results_totalNumFacets / results_per_page)
|
||||
results_current_page = 0
|
||||
|
||||
with DatabaseManager() as db:
|
||||
with db.cursor() as cursor:
|
||||
# create an empty list to store values for batch insertion
|
||||
data = []
|
||||
|
||||
while results_current_page <= results_num_pages:
|
||||
# "pages" are zero based, but one based is more human readable
|
||||
print('Indexing item downloads (page {} of {})'.format(results_current_page + 1, results_num_pages + 1))
|
||||
|
||||
solr_query_params = {
|
||||
'q': 'type:0',
|
||||
'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
|
||||
'facet': 'true',
|
||||
'facet.field': 'owningItem',
|
||||
'facet.mincount': 1,
|
||||
'facet.limit': results_per_page,
|
||||
'facet.offset': results_current_page * results_per_page,
|
||||
'shards': shards,
|
||||
'rows': 0,
|
||||
'wt': 'json',
|
||||
'json.nl': 'map' # return facets as a dict instead of a flat list
|
||||
}
|
||||
|
||||
solr_url = SOLR_SERVER + '/statistics/select'
|
||||
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
# Solr returns facets as a dict of dicts (see json.nl parameter)
|
||||
downloads = res.json()['facet_counts']['facet_fields']
|
||||
# iterate over the 'owningItem' dict and get the item ids and downloads
|
||||
for item_id, item_downloads in downloads['owningItem'].items():
|
||||
data.append((item_id, item_downloads))
|
||||
|
||||
# do a batch insert of values from the current "page" of results
|
||||
sql = 'INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads'
|
||||
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
|
||||
db.commit()
|
||||
|
||||
# clear all items from the list so we can populate it with the next batch
|
||||
data.clear()
|
||||
|
||||
results_current_page += 1
|
||||
|
||||
|
||||
with DatabaseManager() as db:
|
||||
with db.cursor() as cursor:
|
||||
# create table to store item views and downloads
|
||||
cursor.execute('''CREATE TABLE IF NOT EXISTS items
|
||||
(id INT PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)''')
|
||||
|
||||
# commit the table creation before closing the database connection
|
||||
db.commit()
|
||||
|
||||
shards = get_statistics_shards()
|
||||
|
||||
index_views()
|
||||
index_downloads()
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
173
indexer.py
173
indexer.py
@ -1,173 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# indexer.py
|
||||
#
|
||||
# Copyright 2018 Alan Orth.
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# ---
|
||||
#
|
||||
# Connects to a DSpace Solr statistics core and ingests item views and downloads
|
||||
# into a PostgreSQL database for use by other applications (like an API).
|
||||
#
|
||||
# This script is written for Python 3.5+ and requires several modules that you
|
||||
# can install with pip (I recommend using a Python virtual environment):
|
||||
#
|
||||
# $ pip install SolrClient psycopg2-binary
|
||||
#
|
||||
# See: https://solrclient.readthedocs.io/en/latest/SolrClient.html
|
||||
# See: https://wiki.duraspace.org/display/DSPACE/Solr
|
||||
|
||||
from database import database_connection
|
||||
import json
|
||||
import psycopg2.extras
|
||||
from solr import solr_connection
|
||||
|
||||
def index_views():
|
||||
# get total number of distinct facets for items with a minimum of 1 view,
|
||||
# otherwise Solr returns all kinds of weird ids that are actually not in
|
||||
# the database. Also, stats are expensive, but we need stats.calcdistinct
|
||||
# so we can get the countDistinct summary.
|
||||
#
|
||||
# see: https://lucene.apache.org/solr/guide/6_6/the-stats-component.html
|
||||
res = solr.query('statistics', {
|
||||
'q':'type:2',
|
||||
'fq':'isBot:false AND statistics_type:view',
|
||||
'facet':True,
|
||||
'facet.field':'id',
|
||||
'facet.mincount':1,
|
||||
'facet.limit':1,
|
||||
'facet.offset':0,
|
||||
'stats':True,
|
||||
'stats.field':'id',
|
||||
'stats.calcdistinct':True
|
||||
}, rows=0)
|
||||
|
||||
# get total number of distinct facets (countDistinct)
|
||||
results_totalNumFacets = json.loads(res.get_json())['stats']['stats_fields']['id']['countDistinct']
|
||||
|
||||
# divide results into "pages" (cast to int to effectively round down)
|
||||
results_per_page = 100
|
||||
results_num_pages = int(results_totalNumFacets / results_per_page)
|
||||
results_current_page = 0
|
||||
|
||||
cursor = db.cursor()
|
||||
|
||||
# create an empty list to store values for batch insertion
|
||||
data = []
|
||||
|
||||
while results_current_page <= results_num_pages:
|
||||
print('Indexing item views (page {} of {})'.format(results_current_page, results_num_pages))
|
||||
|
||||
res = solr.query('statistics', {
|
||||
'q':'type:2',
|
||||
'fq':'isBot:false AND statistics_type:view',
|
||||
'facet':True,
|
||||
'facet.field':'id',
|
||||
'facet.mincount':1,
|
||||
'facet.limit':results_per_page,
|
||||
'facet.offset':results_current_page * results_per_page
|
||||
}, rows=0)
|
||||
|
||||
# SolrClient's get_facets() returns a dict of dicts
|
||||
views = res.get_facets()
|
||||
# in this case iterate over the 'id' dict and get the item ids and views
|
||||
for item_id, item_views in views['id'].items():
|
||||
data.append((item_id, item_views))
|
||||
|
||||
# do a batch insert of values from the current "page" of results
|
||||
sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.views'
|
||||
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
|
||||
db.commit()
|
||||
|
||||
# clear all items from the list so we can populate it with the next batch
|
||||
data.clear()
|
||||
|
||||
results_current_page += 1
|
||||
|
||||
cursor.close()
|
||||
|
||||
def index_downloads():
|
||||
# get the total number of distinct facets for items with at least 1 download
|
||||
res = solr.query('statistics', {
|
||||
'q':'type:0',
|
||||
'fq':'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
|
||||
'facet':True,
|
||||
'facet.field':'owningItem',
|
||||
'facet.mincount':1,
|
||||
'facet.limit':1,
|
||||
'facet.offset':0,
|
||||
'stats':True,
|
||||
'stats.field':'owningItem',
|
||||
'stats.calcdistinct':True
|
||||
}, rows=0)
|
||||
|
||||
# get total number of distinct facets (countDistinct)
|
||||
results_totalNumFacets = json.loads(res.get_json())['stats']['stats_fields']['owningItem']['countDistinct']
|
||||
|
||||
# divide results into "pages" (cast to int to effectively round down)
|
||||
results_per_page = 100
|
||||
results_num_pages = int(results_totalNumFacets / results_per_page)
|
||||
results_current_page = 0
|
||||
|
||||
cursor = db.cursor()
|
||||
|
||||
# create an empty list to store values for batch insertion
|
||||
data = []
|
||||
|
||||
while results_current_page <= results_num_pages:
|
||||
print('Indexing item downloads (page {} of {})'.format(results_current_page, results_num_pages))
|
||||
|
||||
res = solr.query('statistics', {
|
||||
'q':'type:0',
|
||||
'fq':'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
|
||||
'facet':True,
|
||||
'facet.field':'owningItem',
|
||||
'facet.mincount':1,
|
||||
'facet.limit':results_per_page,
|
||||
'facet.offset':results_current_page * results_per_page
|
||||
}, rows=0)
|
||||
|
||||
# SolrClient's get_facets() returns a dict of dicts
|
||||
downloads = res.get_facets()
|
||||
# in this case iterate over the 'owningItem' dict and get the item ids and downloads
|
||||
for item_id, item_downloads in downloads['owningItem'].items():
|
||||
data.append((item_id, item_downloads))
|
||||
|
||||
# do a batch insert of values from the current "page" of results
|
||||
sql = 'INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads'
|
||||
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
|
||||
db.commit()
|
||||
|
||||
# clear all items from the list so we can populate it with the next batch
|
||||
data.clear()
|
||||
|
||||
results_current_page += 1
|
||||
|
||||
cursor.close()
|
||||
|
||||
db = database_connection()
|
||||
solr = solr_connection()
|
||||
|
||||
# create table to store item views and downloads
|
||||
cursor = db.cursor()
|
||||
cursor.execute('''CREATE TABLE IF NOT EXISTS items
|
||||
(id INT PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)''')
|
||||
index_views()
|
||||
index_downloads()
|
||||
|
||||
db.close()
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
4
pytest.ini
Normal file
4
pytest.ini
Normal file
@ -0,0 +1,4 @@
|
||||
[pytest]
|
||||
addopts= -rsxX -s -v --strict
|
||||
filterwarnings =
|
||||
error::UserWarning
|
26
requirements-dev.txt
Normal file
26
requirements-dev.txt
Normal file
@ -0,0 +1,26 @@
|
||||
-i https://pypi.org/simple
|
||||
atomicwrites==1.3.0
|
||||
attrs==19.1.0
|
||||
backcall==0.1.0
|
||||
decorator==4.4.0
|
||||
entrypoints==0.3
|
||||
flake8==3.7.7
|
||||
ipython-genutils==0.2.0
|
||||
ipython==7.5.0
|
||||
jedi==0.13.3
|
||||
mccabe==0.6.1
|
||||
more-itertools==7.0.0 ; python_version > '2.7'
|
||||
parso==0.4.0
|
||||
pexpect==4.7.0 ; sys_platform != 'win32'
|
||||
pickleshare==0.7.5
|
||||
pluggy==0.9.0
|
||||
prompt-toolkit==2.0.9
|
||||
ptyprocess==0.6.0
|
||||
py==1.8.0
|
||||
pycodestyle==2.5.0
|
||||
pyflakes==2.1.1
|
||||
pygments==2.3.1
|
||||
pytest==4.4.1
|
||||
six==1.12.0
|
||||
traitlets==4.3.2
|
||||
wcwidth==0.1.7
|
@ -1,12 +1,9 @@
|
||||
certifi==2018.8.24
|
||||
-i https://pypi.org/simple
|
||||
certifi==2019.3.9
|
||||
chardet==3.0.4
|
||||
falcon==1.4.1
|
||||
falcon==2.0.0
|
||||
gunicorn==19.9.0
|
||||
idna==2.7
|
||||
kazoo==2.5.0
|
||||
psycopg2-binary==2.7.5
|
||||
python-mimeparse==1.6.0
|
||||
requests==2.19.1
|
||||
six==1.11.0
|
||||
SolrClient==0.2.1
|
||||
urllib3==1.23
|
||||
idna==2.8
|
||||
psycopg2-binary==2.8.2
|
||||
requests==2.21.0
|
||||
urllib3==1.24.3
|
||||
|
9
solr.py
9
solr.py
@ -1,9 +0,0 @@
|
||||
from config import SOLR_SERVER
|
||||
from SolrClient import SolrClient
|
||||
|
||||
def solr_connection():
|
||||
connection = SolrClient(SOLR_SERVER)
|
||||
|
||||
return connection
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
77226
tests/dspacestatistics.sql
Normal file
77226
tests/dspacestatistics.sql
Normal file
File diff suppressed because it is too large
Load Diff
67
tests/test_api.py
Normal file
67
tests/test_api.py
Normal file
@ -0,0 +1,67 @@
|
||||
from falcon import testing
|
||||
import json
|
||||
import pytest
|
||||
|
||||
from dspace_statistics_api.app import api
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
return testing.TestClient(api)
|
||||
|
||||
|
||||
def test_get_docs(client):
|
||||
'''Test requesting the documentation at the root.'''
|
||||
|
||||
response = client.simulate_get('/')
|
||||
|
||||
assert isinstance(response.content, bytes)
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
def test_get_item(client):
|
||||
'''Test requesting a single item.'''
|
||||
|
||||
response = client.simulate_get('/item/17')
|
||||
response_doc = json.loads(response.text)
|
||||
|
||||
assert isinstance(response_doc['downloads'], int)
|
||||
assert isinstance(response_doc['id'], int)
|
||||
assert isinstance(response_doc['views'], int)
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
def test_get_missing_item(client):
|
||||
'''Test requesting a single non-existing item.'''
|
||||
|
||||
response = client.simulate_get('/item/1')
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
def test_get_items(client):
|
||||
'''Test requesting 100 items.'''
|
||||
|
||||
response = client.simulate_get('/items', query_string='limit=100')
|
||||
response_doc = json.loads(response.text)
|
||||
|
||||
assert isinstance(response_doc['currentPage'], int)
|
||||
assert isinstance(response_doc['totalPages'], int)
|
||||
assert isinstance(response_doc['statistics'], list)
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
def test_get_items_invalid_limit(client):
|
||||
'''Test requesting 100 items with an invalid limit parameter.'''
|
||||
|
||||
response = client.simulate_get('/items', query_string='limit=101')
|
||||
|
||||
assert response.status_code == 400
|
||||
|
||||
|
||||
def test_get_items_invalid_page(client):
|
||||
'''Test requesting 100 items with an invalid page parameter.'''
|
||||
|
||||
response = client.simulate_get('/items', query_string='page=-1')
|
||||
|
||||
assert response.status_code == 400
|
Reference in New Issue
Block a user