mirror of
https://github.com/ilri/dspace-statistics-api.git
synced 2025-05-10 15:16:02 +02:00
Compare commits
109 Commits
Author | SHA1 | Date | |
---|---|---|---|
78f9949ecb
|
|||
af80c4b447
|
|||
edd9e90f59
|
|||
1806d50a51
|
|||
a459e66fd9
|
|||
5a3b392a1d | |||
9dcda114c6 | |||
2b8aba5835
|
|||
9eb30a98e3
|
|||
622e9a86f1
|
|||
2acd08e0ab
|
|||
f75bcf292c
|
|||
8f46ceb8d8
|
|||
18e1e1a227
|
|||
fd46041698
|
|||
4ce7231ece
|
|||
60689d9014
|
|||
7bca32189a
|
|||
94c5d91d3c
|
|||
a640f734c8
|
|||
d56a3420f7
|
|||
7add0d6164
|
|||
c86bec4d8f
|
|||
5429fe5cc8
|
|||
f8a4cfd3da | |||
be94c94433 | |||
ba49b78a25
|
|||
842f80036f
|
|||
f738b8029b | |||
d08c43f3d5 | |||
819f8e6b0d | |||
c79e50a364
|
|||
71006d8bbf
|
|||
b7d723ef7c
|
|||
914ec52fbb
|
|||
5524066656
|
|||
043d897cef
|
|||
bd28353cda
|
|||
e23d66c2a2
|
|||
40e284dac0
|
|||
934fa9db9b
|
|||
1fabb72b58
|
|||
c7f95f0b60
|
|||
c95a98dd2d
|
|||
3f70f94a10
|
|||
9b8ad9defd | |||
d69ab20220
|
|||
378f56ddc2
|
|||
5a2a7d684c
|
|||
18276e910f
|
|||
8de8c2765f | |||
11a1755e59
|
|||
a835b0fdc5
|
|||
a88600c92b
|
|||
019d9242c9 | |||
f4d7312a3f
|
|||
9c46cfc7e2
|
|||
c1c2e319ac
|
|||
0895b4f469
|
|||
dcfef06a65
|
|||
13736d6359
|
|||
4fc64edeb8 | |||
2a8901dc4f
|
|||
e25c974796
|
|||
ffc62e9ee6
|
|||
556c5ae088
|
|||
d94134f80a
|
|||
586231eb2d
|
|||
766b77a3b6
|
|||
1959e8154e
|
|||
d40b2f0b2e | |||
061d0a8f5f | |||
e57660ff88 | |||
5c8756bede | |||
bae9fb80e4 | |||
8a65d99e08
|
|||
d479b7dc6c
|
|||
40aac8bf89 | |||
53ba6f2936
|
|||
140cc4cb07
|
|||
d5d2d2149b
|
|||
4c51d12eb4
|
|||
a6ce44e852 | |||
f6e866a589
|
|||
eb5c187d41
|
|||
b06c82bb16
|
|||
2f342be948
|
|||
e39f2b260c | |||
60ad474b88
|
|||
888f85d19e
|
|||
df7de93964
|
|||
7218631cc4
|
|||
085e525b2f
|
|||
e1580df12f
|
|||
be18779ff9
|
|||
60cfd8f23b
|
|||
87fd117d77
|
|||
f262ebdca2
|
|||
64d7f1a3b2
|
|||
a238a727d2
|
|||
cc5ce3ab98 | |||
70dfcb93c5
|
|||
69bcd1b5e4
|
|||
5f3bd61998
|
|||
e54dd8888f
|
|||
2ba09f8693
|
|||
a468a87a5a
|
|||
6a30b6550d
|
|||
18f013bfa0
|
24
.build.yml
Normal file
24
.build.yml
Normal file
@ -0,0 +1,24 @@
|
||||
image: archlinux
|
||||
packages:
|
||||
- python-pipenv
|
||||
- postgresql
|
||||
sources:
|
||||
- https://git.sr.ht/~alanorth/dspace-statistics-api
|
||||
tasks:
|
||||
- setup: |
|
||||
id
|
||||
psql --version
|
||||
sudo su - postgres -c "initdb --locale en_US.UTF-8 -E UTF8 -D '/var/lib/postgres/data'"
|
||||
sudo systemctl start postgresql
|
||||
createuser -U postgres dspacestatistics
|
||||
psql -U postgres -c "ALTER USER dspacestatistics WITH PASSWORD 'dspacestatistics'"
|
||||
createdb -U postgres -O dspacestatistics --encoding=UNICODE dspacestatistics
|
||||
cd dspace-statistics-api
|
||||
psql -U postgres -d dspacestatistics < tests/dspacestatistics.sql
|
||||
pipenv install --dev
|
||||
- test: |
|
||||
cd dspace-statistics-api
|
||||
pipenv run pytest
|
||||
environment:
|
||||
PIPENV_NOSPIN: 'True'
|
||||
PIPENV_HIDE_EMOJIS: 'True'
|
4
.hound.yml
Normal file
4
.hound.yml
Normal file
@ -0,0 +1,4 @@
|
||||
flake8:
|
||||
enabled: true
|
||||
config_file: .flake8
|
||||
fail_on_violations: true
|
22
.travis.yml
22
.travis.yml
@ -1,11 +1,23 @@
|
||||
dist: xenial
|
||||
language: python
|
||||
python:
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
- "3.7-dev"
|
||||
script: pip install -r requirements.txt
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- "3.7"
|
||||
addons:
|
||||
postgresql: "9.6"
|
||||
before_script:
|
||||
- psql --version
|
||||
- createuser -U postgres dspacestatistics
|
||||
- psql -U postgres -c "ALTER USER dspacestatistics WITH PASSWORD 'dspacestatistics'"
|
||||
- createdb -U postgres -O dspacestatistics --encoding=UNICODE dspacestatistics
|
||||
- psql -U postgres -d dspacestatistics < tests/dspacestatistics.sql
|
||||
install:
|
||||
- "pip install pipenv --upgrade-strategy=only-if-needed"
|
||||
- "pipenv install --dev"
|
||||
script: pytest
|
||||
env:
|
||||
- PIPENV_NOSPIN=True
|
||||
- PIPENV_HIDE_EMOJIS=True
|
||||
|
||||
# vim: ts=2 sw=2 et
|
||||
|
79
CHANGELOG.md
79
CHANGELOG.md
@ -4,34 +4,83 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
### [0.6.1] - 2018-10-31
|
||||
## Added
|
||||
## [1.1.0] - 2019-05-05
|
||||
## Updated
|
||||
- Falcon 2.0.0 (@alanorth)
|
||||
|
||||
## [1.0.0] - 2019-04-15
|
||||
### Added
|
||||
- Build configuration for build.sr.ht
|
||||
|
||||
### Updated
|
||||
- Run pipenv update, bringing pytest version 4.4.0, psycopg-binary 2.8.2, etc
|
||||
- sr.ht and TravisCI configuration to disable emojis and animation to keep logs clean
|
||||
|
||||
### Changed
|
||||
- Use vanilla requests library instead of SolrClient
|
||||
- Use one-based paging in indexer output (for human readability)
|
||||
|
||||
## [0.9.0] - 2019-01-22
|
||||
### Updated
|
||||
- pytest version 4.0.0
|
||||
- Fix indexing of sharded statistics cores ([#10))
|
||||
- Handle case of missing views/downloads gracefully
|
||||
|
||||
## [0.8.1] - 2018-11-14
|
||||
### Changed
|
||||
- README.md to recommend using vanilla Python virtual environments and pip instead of pipenv
|
||||
- Regenerate pipenv environment to capture only direct dependencies
|
||||
|
||||
### Added
|
||||
- `requirements-dev.txt` for installing development packages with pip
|
||||
|
||||
## [0.8.0] - 2018-11-11
|
||||
### Changed
|
||||
- Properly handle database connection errors
|
||||
|
||||
### Added
|
||||
- API tests with pytest
|
||||
|
||||
## [0.7.0] - 2018-11-07
|
||||
### Added
|
||||
- Ability to configure PostgreSQL database port with DATABASE_PORT environment variable (defaults to 5432)
|
||||
- Hound CI configuration to validate pull requests against PEP 8 code style with Flake8
|
||||
- Configuration for [pipenv](https://pipenv.readthedocs.io/en/latest/)
|
||||
|
||||
### Changed
|
||||
- Use a database management class with Python context management to automatically open/close connections and cursors
|
||||
|
||||
### Changed
|
||||
- Validate code against PEP 8 style guide with Flake8
|
||||
|
||||
## [0.6.1] - 2018-10-31
|
||||
### Added
|
||||
- API documentation at root path (/)
|
||||
|
||||
### [0.6.0] - 2018-10-31
|
||||
## Changed
|
||||
## [0.6.0] - 2018-10-31
|
||||
### Changed
|
||||
- Refactor project structure (note breaking changes to API and indexing invocation, see contrib and README.md)
|
||||
|
||||
### [0.5.2] - 2018-10-28
|
||||
## Changed
|
||||
## [0.5.2] - 2018-10-28
|
||||
### Changed
|
||||
- Update library versions in requirements.txt
|
||||
|
||||
### [0.5.1] - 2018-10-24
|
||||
## Changed
|
||||
## [0.5.1] - 2018-10-24
|
||||
### Changed
|
||||
- Use Python's native json instead of ujson
|
||||
|
||||
### [0.5.0] - 2018-10-24
|
||||
## Added
|
||||
## [0.5.0] - 2018-10-24
|
||||
### Added
|
||||
- Example nginx configuration to README.md
|
||||
|
||||
## Changed
|
||||
### Changed
|
||||
- Don't initialize Solr connection in API
|
||||
|
||||
### [0.4.3] - 2018-10-17
|
||||
## Changed
|
||||
## [0.4.3] - 2018-10-17
|
||||
### Changed
|
||||
- Use pip install as script for Travis CI
|
||||
|
||||
## Improved
|
||||
### Improved
|
||||
- Documentation for deployment and testing
|
||||
|
||||
## [0.4.2] - 2018-10-04
|
||||
@ -45,7 +94,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## [0.4.1] - 2018-09-26
|
||||
### Changed
|
||||
- Use execute_values() to batch insert records to PostgreSQL
|
||||
- Use `execute_values()` to batch insert records to PostgreSQL
|
||||
|
||||
## [0.4.0] - 2018-09-25
|
||||
### Fixed
|
||||
|
18
Pipfile
Normal file
18
Pipfile
Normal file
@ -0,0 +1,18 @@
|
||||
[[source]]
|
||||
url = "https://pypi.org/simple"
|
||||
verify_ssl = true
|
||||
name = "pypi"
|
||||
|
||||
[packages]
|
||||
gunicorn = "*"
|
||||
falcon = "==2.0.0"
|
||||
"psycopg2-binary" = "*"
|
||||
requests = "*"
|
||||
|
||||
[dev-packages]
|
||||
ipython = "*"
|
||||
"flake8" = "*"
|
||||
pytest = "*"
|
||||
|
||||
[requires]
|
||||
python_version = "3.7"
|
301
Pipfile.lock
generated
Normal file
301
Pipfile.lock
generated
Normal file
@ -0,0 +1,301 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "b0152688b12e9a7d176f42fd1940613deedc79e6a561edc1d86ed9248cbe4255"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
"python_version": "3.7"
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "pypi",
|
||||
"url": "https://pypi.org/simple",
|
||||
"verify_ssl": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"default": {
|
||||
"certifi": {
|
||||
"hashes": [
|
||||
"sha256:59b7658e26ca9c7339e00f8f4636cdfe59d34fa37b9b04f6f9e9926b3cece1a5",
|
||||
"sha256:b26104d6835d1f5e49452a26eb2ff87fe7090b89dfcaee5ea2212697e1e1d7ae"
|
||||
],
|
||||
"version": "==2019.3.9"
|
||||
},
|
||||
"chardet": {
|
||||
"hashes": [
|
||||
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
|
||||
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
|
||||
],
|
||||
"version": "==3.0.4"
|
||||
},
|
||||
"falcon": {
|
||||
"hashes": [
|
||||
"sha256:18157af2a4fc3feedf2b5dcc6196f448639acf01c68bc33d4d5a04c3ef87f494",
|
||||
"sha256:24adcd2b29a8ffa9d552dc79638cd21736a3fb04eda7d102c6cebafdaadb88ad",
|
||||
"sha256:54f2cb4b687035b2a03206dbfc538055cc48b59a953187b0458aa1b574d47b53",
|
||||
"sha256:59d1e8c993b9a37ea06df9d72cf907a46cc8063b30717cdac2f34d1658b6f936",
|
||||
"sha256:733033ec80c896e30a43ab3e776856096836787197a44eb21022320a61311983",
|
||||
"sha256:74cf1d18207381c665b9e6292d65100ce146d958707793174b03869dc6e614f4",
|
||||
"sha256:95bf6ce986c1119aef12c9b348f4dee9c6dcc58391bdd0bc2b0bf353c2b15986",
|
||||
"sha256:9712975adcf8c6e12876239085ad757b8fdeba223d46d23daef82b47658f83a9",
|
||||
"sha256:a5ebb22a04c9cc65081938ee7651b4e3b4d2a28522ea8ec04c7bdd2b3e9e8cd8",
|
||||
"sha256:aa184895d1ad4573fbfaaf803563d02f019ebdf4790e41cc568a330607eae439",
|
||||
"sha256:e3782b7b92fefd46a6ad1fd8fe63fe6c6f1b7740a95ca56957f48d1aee34b357",
|
||||
"sha256:e9efa0791b5d9f9dd9689015ea6bce0a27fcd5ecbcd30e6d940bffa4f7f03389",
|
||||
"sha256:eea593cf466b9c126ce667f6d30503624ef24459f118c75594a69353b6c3d5fc",
|
||||
"sha256:f93351459f110b4c1ee28556aef9a791832df6f910bea7b3f616109d534df06b"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.0.0"
|
||||
},
|
||||
"gunicorn": {
|
||||
"hashes": [
|
||||
"sha256:aa8e0b40b4157b36a5df5e599f45c9c76d6af43845ba3b3b0efe2c70473c2471",
|
||||
"sha256:fa2662097c66f920f53f70621c6c58ca4a3c4d3434205e608e121b5b3b71f4f3"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==19.9.0"
|
||||
},
|
||||
"idna": {
|
||||
"hashes": [
|
||||
"sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
|
||||
"sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c"
|
||||
],
|
||||
"version": "==2.8"
|
||||
},
|
||||
"psycopg2-binary": {
|
||||
"hashes": [
|
||||
"sha256:007ca0df127b1862fc010125bc4100b7a630efc6841047bd11afceadb4754611",
|
||||
"sha256:03c49e02adf0b4d68f422fdbd98f7a7c547beb27e99a75ed02298f85cb48406a",
|
||||
"sha256:0a1232cdd314e08848825edda06600455ad2a7adaa463ebfb12ece2d09f3370e",
|
||||
"sha256:131c80d0958c89273d9720b9adf9df1d7600bb3120e16019a7389ab15b079af5",
|
||||
"sha256:2de34cc3b775724623f86617d2601308083176a495f5b2efc2bbb0da154f483a",
|
||||
"sha256:2eddc31500f73544a2a54123d4c4b249c3c711d31e64deddb0890982ea37397a",
|
||||
"sha256:484f6c62bdc166ee0e5be3aa831120423bf399786d1f3b0304526c86180fbc0b",
|
||||
"sha256:4c2d9369ed40b4a44a8ccd6bc3a7db6272b8314812d2d1091f95c4c836d92e06",
|
||||
"sha256:70f570b5fa44413b9f30dbc053d17ef3ce6a4100147a10822f8662e58d473656",
|
||||
"sha256:7a2b5b095f3bd733aab101c89c0e1a3f0dfb4ebdc26f6374805c086ffe29d5b2",
|
||||
"sha256:804914a669186e2843c1f7fbe12b55aad1b36d40a28274abe6027deffad9433d",
|
||||
"sha256:8520c03172da18345d012949a53617a963e0191ccb3c666f23276d5326af27b5",
|
||||
"sha256:90da901fc33ea393fc644607e4a3916b509387e9339ec6ebc7bfded45b7a0ae9",
|
||||
"sha256:a582416ad123291a82c300d1d872bdc4136d69ad0b41d57dc5ca3df7ef8e3088",
|
||||
"sha256:ac8c5e20309f4989c296d62cac20ee456b69c41fd1bc03829e27de23b6fa9dd0",
|
||||
"sha256:b2cf82f55a619879f8557fdaae5cec7a294fac815e0087c4f67026fdf5259844",
|
||||
"sha256:b59d6f8cfca2983d8fdbe457bf95d2192f7b7efdb2b483bf5fa4e8981b04e8b2",
|
||||
"sha256:be08168197021d669b9964bd87628fa88f910b1be31e7010901070f2540c05fd",
|
||||
"sha256:be0f952f1c365061041bad16e27e224e29615d4eb1fb5b7e7760a1d3d12b90b6",
|
||||
"sha256:c1c9a33e46d7c12b9c96cf2d4349d783e3127163fd96254dcd44663cf0a1d438",
|
||||
"sha256:d18c89957ac57dd2a2724ecfe9a759912d776f96ecabba23acb9ecbf5c731035",
|
||||
"sha256:d7e7b0ff21f39433c50397e60bf0995d078802c591ca3b8d99857ea18a7496ee",
|
||||
"sha256:da0929b2bf0d1f365345e5eb940d8713c1d516312e010135b14402e2a3d2404d",
|
||||
"sha256:de24a4962e361c512d3e528ded6c7480eab24c655b8ca1f0b761d3b3650d2f07",
|
||||
"sha256:e45f93ff3f7dae2202248cf413a87aeb330821bf76998b3cf374eda2fc893dd7",
|
||||
"sha256:f046aeae1f7a845041b8661bb7a52449202b6c5d3fb59eb4724e7ca088811904",
|
||||
"sha256:f1dc2b7b2748084b890f5d05b65a47cd03188824890e9a60818721fd492249fb",
|
||||
"sha256:fcbe7cf3a786572b73d2cd5f34ed452a5f5fac47c9c9d1e0642c457a148f9f88"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.8.2"
|
||||
},
|
||||
"requests": {
|
||||
"hashes": [
|
||||
"sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e",
|
||||
"sha256:7bf2a778576d825600030a110f3c0e3e8edc51dfaafe1c146e39a2027784957b"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.21.0"
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:2393a695cd12afedd0dcb26fe5d50d0cf248e5a66f75dbd89a3d4eb333a61af4",
|
||||
"sha256:a637e5fae88995b256e3409dc4d52c2e2e0ba32c42a6365fee8bbd2238de3cfb"
|
||||
],
|
||||
"version": "==1.24.3"
|
||||
}
|
||||
},
|
||||
"develop": {
|
||||
"atomicwrites": {
|
||||
"hashes": [
|
||||
"sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4",
|
||||
"sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6"
|
||||
],
|
||||
"version": "==1.3.0"
|
||||
},
|
||||
"attrs": {
|
||||
"hashes": [
|
||||
"sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79",
|
||||
"sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399"
|
||||
],
|
||||
"version": "==19.1.0"
|
||||
},
|
||||
"backcall": {
|
||||
"hashes": [
|
||||
"sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
|
||||
"sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
|
||||
],
|
||||
"version": "==0.1.0"
|
||||
},
|
||||
"decorator": {
|
||||
"hashes": [
|
||||
"sha256:86156361c50488b84a3f148056ea716ca587df2f0de1d34750d35c21312725de",
|
||||
"sha256:f069f3a01830ca754ba5258fde2278454a0b5b79e0d7f5c13b3b97e57d4acff6"
|
||||
],
|
||||
"version": "==4.4.0"
|
||||
},
|
||||
"entrypoints": {
|
||||
"hashes": [
|
||||
"sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19",
|
||||
"sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451"
|
||||
],
|
||||
"version": "==0.3"
|
||||
},
|
||||
"flake8": {
|
||||
"hashes": [
|
||||
"sha256:859996073f341f2670741b51ec1e67a01da142831aa1fdc6242dbf88dffbe661",
|
||||
"sha256:a796a115208f5c03b18f332f7c11729812c8c3ded6c46319c59b53efd3819da8"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.7.7"
|
||||
},
|
||||
"ipython": {
|
||||
"hashes": [
|
||||
"sha256:54c5a8aa1eadd269ac210b96923688ccf01ebb2d0f21c18c3c717909583579a8",
|
||||
"sha256:e840810029224b56cd0d9e7719dc3b39cf84d577f8ac686547c8ba7a06eeab26"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==7.5.0"
|
||||
},
|
||||
"ipython-genutils": {
|
||||
"hashes": [
|
||||
"sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
|
||||
"sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
|
||||
],
|
||||
"version": "==0.2.0"
|
||||
},
|
||||
"jedi": {
|
||||
"hashes": [
|
||||
"sha256:2bb0603e3506f708e792c7f4ad8fc2a7a9d9c2d292a358fbbd58da531695595b",
|
||||
"sha256:2c6bcd9545c7d6440951b12b44d373479bf18123a401a52025cf98563fbd826c"
|
||||
],
|
||||
"version": "==0.13.3"
|
||||
},
|
||||
"mccabe": {
|
||||
"hashes": [
|
||||
"sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
|
||||
"sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
|
||||
],
|
||||
"version": "==0.6.1"
|
||||
},
|
||||
"more-itertools": {
|
||||
"hashes": [
|
||||
"sha256:2112d2ca570bb7c3e53ea1a35cd5df42bb0fd10c45f0fb97178679c3c03d64c7",
|
||||
"sha256:c3e4748ba1aad8dba30a4886b0b1a2004f9a863837b8654e7059eebf727afa5a"
|
||||
],
|
||||
"markers": "python_version > '2.7'",
|
||||
"version": "==7.0.0"
|
||||
},
|
||||
"parso": {
|
||||
"hashes": [
|
||||
"sha256:17cc2d7a945eb42c3569d4564cdf49bde221bc2b552af3eca9c1aad517dcdd33",
|
||||
"sha256:2e9574cb12e7112a87253e14e2c380ce312060269d04bd018478a3c92ea9a376"
|
||||
],
|
||||
"version": "==0.4.0"
|
||||
},
|
||||
"pexpect": {
|
||||
"hashes": [
|
||||
"sha256:2094eefdfcf37a1fdbfb9aa090862c1a4878e5c7e0e7e7088bdb511c558e5cd1",
|
||||
"sha256:9e2c1fd0e6ee3a49b28f95d4b33bc389c89b20af6a1255906e90ff1262ce62eb"
|
||||
],
|
||||
"markers": "sys_platform != 'win32'",
|
||||
"version": "==4.7.0"
|
||||
},
|
||||
"pickleshare": {
|
||||
"hashes": [
|
||||
"sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca",
|
||||
"sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"
|
||||
],
|
||||
"version": "==0.7.5"
|
||||
},
|
||||
"pluggy": {
|
||||
"hashes": [
|
||||
"sha256:19ecf9ce9db2fce065a7a0586e07cfb4ac8614fe96edf628a264b1c70116cf8f",
|
||||
"sha256:84d306a647cc805219916e62aab89caa97a33a1dd8c342e87a37f91073cd4746"
|
||||
],
|
||||
"version": "==0.9.0"
|
||||
},
|
||||
"prompt-toolkit": {
|
||||
"hashes": [
|
||||
"sha256:11adf3389a996a6d45cc277580d0d53e8a5afd281d0c9ec71b28e6f121463780",
|
||||
"sha256:2519ad1d8038fd5fc8e770362237ad0364d16a7650fb5724af6997ed5515e3c1",
|
||||
"sha256:977c6583ae813a37dc1c2e1b715892461fcbdaa57f6fc62f33a528c4886c8f55"
|
||||
],
|
||||
"version": "==2.0.9"
|
||||
},
|
||||
"ptyprocess": {
|
||||
"hashes": [
|
||||
"sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
|
||||
"sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"
|
||||
],
|
||||
"version": "==0.6.0"
|
||||
},
|
||||
"py": {
|
||||
"hashes": [
|
||||
"sha256:64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa",
|
||||
"sha256:dc639b046a6e2cff5bbe40194ad65936d6ba360b52b3c3fe1d08a82dd50b5e53"
|
||||
],
|
||||
"version": "==1.8.0"
|
||||
},
|
||||
"pycodestyle": {
|
||||
"hashes": [
|
||||
"sha256:95a2219d12372f05704562a14ec30bc76b05a5b297b21a5dfe3f6fac3491ae56",
|
||||
"sha256:e40a936c9a450ad81df37f549d676d127b1b66000a6c500caa2b085bc0ca976c"
|
||||
],
|
||||
"version": "==2.5.0"
|
||||
},
|
||||
"pyflakes": {
|
||||
"hashes": [
|
||||
"sha256:17dbeb2e3f4d772725c777fabc446d5634d1038f234e77343108ce445ea69ce0",
|
||||
"sha256:d976835886f8c5b31d47970ed689944a0262b5f3afa00a5a7b4dc81e5449f8a2"
|
||||
],
|
||||
"version": "==2.1.1"
|
||||
},
|
||||
"pygments": {
|
||||
"hashes": [
|
||||
"sha256:5ffada19f6203563680669ee7f53b64dabbeb100eb51b61996085e99c03b284a",
|
||||
"sha256:e8218dd399a61674745138520d0d4cf2621d7e032439341bc3f647bff125818d"
|
||||
],
|
||||
"version": "==2.3.1"
|
||||
},
|
||||
"pytest": {
|
||||
"hashes": [
|
||||
"sha256:3773f4c235918987d51daf1db66d51c99fac654c81d6f2f709a046ab446d5e5d",
|
||||
"sha256:b7802283b70ca24d7119b32915efa7c409982f59913c1a6c0640aacf118b95f5"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==4.4.1"
|
||||
},
|
||||
"six": {
|
||||
"hashes": [
|
||||
"sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
|
||||
"sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
|
||||
],
|
||||
"version": "==1.12.0"
|
||||
},
|
||||
"traitlets": {
|
||||
"hashes": [
|
||||
"sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835",
|
||||
"sha256:c6cb5e6f57c5a9bdaa40fa71ce7b4af30298fbab9ece9815b5d995ab6217c7d9"
|
||||
],
|
||||
"version": "==4.3.2"
|
||||
},
|
||||
"wcwidth": {
|
||||
"hashes": [
|
||||
"sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e",
|
||||
"sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c"
|
||||
],
|
||||
"version": "==0.1.7"
|
||||
}
|
||||
}
|
||||
}
|
38
README.md
38
README.md
@ -1,21 +1,27 @@
|
||||
# DSpace Statistics API [](https://travis-ci.org/ilri/dspace-statistics-api)
|
||||
DSpace versions 4.0 and up include a [REST API](https://wiki.duraspace.org/display/DSDOC5x/REST+API) that allows the repository to be queried programmatically. The API exposes information about communities, collections, items, and bitstreams, but not item views or downloads. This project contains a lightweight indexer and a web application to make the view and download statistics available via a simple REST API that can be deployed simultaneously with DSpace's own.
|
||||
# DSpace Statistics API [](https://travis-ci.org/ilri/dspace-statistics-api) [](https://builds.sr.ht/~alanorth/dspace-statistics-api?)
|
||||
DSpace stores item view and download events in a Solr "statistics" core. This information is available for use in the various DSpace user interfaces, but is not exposed externally via any APIs. The DSpace 4/5 [REST API](https://wiki.duraspace.org/display/DSDOC5x/REST+API), for example, only exposes information about communities, collections, item metadata, and bitstreams.
|
||||
|
||||
You can read more about the Solr queries used to gather the item view and download statistics on the [DSpace wiki](https://wiki.duraspace.org/display/DSPACE/Solr).
|
||||
This project contains an indexer and a [Falcon-based](https://falcon.readthedocs.io/) web application to make the statistics available via a simple REST API. You can read more about the Solr queries used to gather the item view and download statistics on the [DSpace wiki](https://wiki.duraspace.org/display/DSPACE/Solr).
|
||||
|
||||
If you use the DSpace Statistics API please cite:
|
||||
|
||||
*Orth, A. 2018. DSpace statistics API. Nairobi, Kenya: ILRI. https://hdl.handle.net/10568/99143.*
|
||||
|
||||
## Requirements
|
||||
|
||||
- Python 3.5+
|
||||
- PostgreSQL version 9.5+ (due to [`UPSERT` support](https://wiki.postgresql.org/wiki/UPSERT))
|
||||
- DSpace 4+ with [Solr usage statistics enabled](https://wiki.duraspace.org/display/DSDOC5x/SOLR+Statistics)
|
||||
- DSpace with [Solr usage statistics enabled](https://wiki.duraspace.org/display/DSDOC5x/SOLR+Statistics) (tested with 5.x)
|
||||
|
||||
## Installation and Testing
|
||||
## Installation
|
||||
Create a Python virtual environment and install the dependencies:
|
||||
|
||||
$ python -m venv venv
|
||||
$ . venv/bin/activate
|
||||
$ python3 -m venv venv
|
||||
$ source venv/bin/activate
|
||||
$ pip install -r requirements.txt
|
||||
|
||||
## Running
|
||||
|
||||
Set up the environment variables for Solr and PostgreSQL:
|
||||
|
||||
$ export SOLR_SERVER=http://localhost:8080/solr
|
||||
@ -36,6 +42,15 @@ Test to see if there are any statistics:
|
||||
|
||||
$ curl 'http://localhost:8000/items?limit=1'
|
||||
|
||||
## Testing
|
||||
Install development packages using pip:
|
||||
|
||||
$ pip install -r requirements-dev.txt
|
||||
|
||||
Run tests:
|
||||
|
||||
$ pytest
|
||||
|
||||
## Deployment
|
||||
There are example systemd service and timer units in the `contrib` directory. The API service listens on localhost by default so you will need to expose it publicly using a web server like nginx.
|
||||
|
||||
@ -71,13 +86,16 @@ The item id is the *internal* id for an item. You can get these from the standar
|
||||
|
||||
## Todo
|
||||
|
||||
- Close DB connection when gunicorn shuts down gracefully
|
||||
- Better logging
|
||||
- Tests
|
||||
- Check if database exists (try/except)
|
||||
- Version API
|
||||
- Use JSON in PostgreSQL
|
||||
- Add top items endpoint, perhaps `/top/items` or `/items/top`?
|
||||
- Make community and collection stats available
|
||||
- Support [DSpace 6 UUIDs](https://jira.duraspace.org/browse/DS-1782)
|
||||
- Switch to [Python 3.6+ f-string syntax](https://realpython.com/python-f-strings/)
|
||||
- Check IDs in database to see if they are deleted...
|
||||
|
||||
## License
|
||||
This work is licensed under the [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html).
|
||||
|
||||
The license allows you to use and modify the work for personal and commercial purposes, but if you distribute the work you must provide users with a means to access the source code for the version you are distributing. Read more about the [GPLv3 at TL;DR Legal](https://tldrlegal.com/license/gnu-general-public-license-v3-(gpl-3)).
|
||||
|
@ -1,8 +1,6 @@
|
||||
from .database import database_connection
|
||||
from .database import DatabaseManager
|
||||
import falcon
|
||||
|
||||
db = database_connection()
|
||||
db.set_session(readonly=True)
|
||||
|
||||
class RootResource:
|
||||
def on_get(self, req, resp):
|
||||
@ -11,64 +9,69 @@ class RootResource:
|
||||
with open('dspace_statistics_api/docs/index.html', 'r') as f:
|
||||
resp.body = f.read()
|
||||
|
||||
|
||||
class AllItemsResource:
|
||||
def on_get(self, req, resp):
|
||||
"""Handles GET requests"""
|
||||
# Return HTTPBadRequest if id parameter is not present and valid
|
||||
limit = req.get_param_as_int("limit", min=0, max=100) or 100
|
||||
page = req.get_param_as_int("page", min=0) or 0
|
||||
limit = req.get_param_as_int("limit", min_value=0, max_value=100) or 100
|
||||
page = req.get_param_as_int("page", min_value=0) or 0
|
||||
offset = limit * page
|
||||
|
||||
cursor = db.cursor()
|
||||
with DatabaseManager() as db:
|
||||
db.set_session(readonly=True)
|
||||
|
||||
# get total number of items so we can estimate the pages
|
||||
cursor.execute('SELECT COUNT(id) FROM items')
|
||||
pages = round(cursor.fetchone()[0] / limit)
|
||||
with db.cursor() as cursor:
|
||||
# get total number of items so we can estimate the pages
|
||||
cursor.execute('SELECT COUNT(id) FROM items')
|
||||
pages = round(cursor.fetchone()[0] / limit)
|
||||
|
||||
# get statistics, ordered by id, and use limit and offset to page through results
|
||||
cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset))
|
||||
# get statistics, ordered by id, and use limit and offset to page through results
|
||||
cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset))
|
||||
|
||||
# create a list to hold dicts of item stats
|
||||
statistics = list()
|
||||
# create a list to hold dicts of item stats
|
||||
statistics = list()
|
||||
|
||||
# iterate over results and build statistics object
|
||||
for item in cursor:
|
||||
statistics.append({ 'id': item['id'], 'views': item['views'], 'downloads': item['downloads'] })
|
||||
|
||||
cursor.close()
|
||||
# iterate over results and build statistics object
|
||||
for item in cursor:
|
||||
statistics.append({'id': item['id'], 'views': item['views'], 'downloads': item['downloads']})
|
||||
|
||||
message = {
|
||||
'currentPage': page,
|
||||
'totalPages': pages,
|
||||
'limit': limit,
|
||||
'statistics': statistics
|
||||
'currentPage': page,
|
||||
'totalPages': pages,
|
||||
'limit': limit,
|
||||
'statistics': statistics
|
||||
}
|
||||
|
||||
resp.media = message
|
||||
|
||||
|
||||
class ItemResource:
|
||||
def on_get(self, req, resp, item_id):
|
||||
"""Handles GET requests"""
|
||||
|
||||
cursor = db.cursor()
|
||||
cursor.execute('SELECT views, downloads FROM items WHERE id={}'.format(item_id))
|
||||
if cursor.rowcount == 0:
|
||||
raise falcon.HTTPNotFound(
|
||||
title='Item not found',
|
||||
description='The item with id "{}" was not found.'.format(item_id)
|
||||
)
|
||||
else:
|
||||
results = cursor.fetchone()
|
||||
with DatabaseManager() as db:
|
||||
db.set_session(readonly=True)
|
||||
|
||||
statistics = {
|
||||
'id': item_id,
|
||||
'views': results['views'],
|
||||
'downloads': results['downloads']
|
||||
}
|
||||
with db.cursor() as cursor:
|
||||
cursor = db.cursor()
|
||||
cursor.execute('SELECT views, downloads FROM items WHERE id={}'.format(item_id))
|
||||
if cursor.rowcount == 0:
|
||||
raise falcon.HTTPNotFound(
|
||||
title='Item not found',
|
||||
description='The item with id "{}" was not found.'.format(item_id)
|
||||
)
|
||||
else:
|
||||
results = cursor.fetchone()
|
||||
|
||||
resp.media = statistics
|
||||
statistics = {
|
||||
'id': item_id,
|
||||
'views': results['views'],
|
||||
'downloads': results['downloads']
|
||||
}
|
||||
|
||||
resp.media = statistics
|
||||
|
||||
cursor.close()
|
||||
|
||||
api = application = falcon.API()
|
||||
api.add_route('/', RootResource())
|
||||
|
@ -7,5 +7,6 @@ DATABASE_NAME = os.environ.get('DATABASE_NAME', 'dspacestatistics')
|
||||
DATABASE_USER = os.environ.get('DATABASE_USER', 'dspacestatistics')
|
||||
DATABASE_PASS = os.environ.get('DATABASE_PASS', 'dspacestatistics')
|
||||
DATABASE_HOST = os.environ.get('DATABASE_HOST', 'localhost')
|
||||
DATABASE_PORT = os.environ.get('DATABASE_PORT', '5432')
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
||||
|
@ -2,11 +2,29 @@ from .config import DATABASE_NAME
|
||||
from .config import DATABASE_USER
|
||||
from .config import DATABASE_PASS
|
||||
from .config import DATABASE_HOST
|
||||
import psycopg2, psycopg2.extras
|
||||
from .config import DATABASE_PORT
|
||||
import falcon
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
def database_connection():
|
||||
connection = psycopg2.connect("dbname={} user={} password={} host='{}'".format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST), cursor_factory=psycopg2.extras.DictCursor)
|
||||
|
||||
return connection
|
||||
class DatabaseManager():
|
||||
'''Manage database connection.'''
|
||||
|
||||
def __init__(self):
|
||||
self._connection_uri = 'dbname={} user={} password={} host={} port={}'.format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST, DATABASE_PORT)
|
||||
|
||||
def __enter__(self):
|
||||
try:
|
||||
self._connection = psycopg2.connect(self._connection_uri, cursor_factory=psycopg2.extras.DictCursor)
|
||||
except psycopg2.OperationalError:
|
||||
title = '500 Internal Server Error'
|
||||
description = 'Could not connect to database'
|
||||
raise falcon.HTTPInternalServerError(title, description)
|
||||
|
||||
return self._connection
|
||||
|
||||
def __exit__(self, exc_type, exc_value, exc_traceback):
|
||||
self._connection.close()
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
||||
|
@ -29,10 +29,62 @@
|
||||
# See: https://solrclient.readthedocs.io/en/latest/SolrClient.html
|
||||
# See: https://wiki.duraspace.org/display/DSPACE/Solr
|
||||
|
||||
from .database import database_connection
|
||||
from .config import SOLR_SERVER
|
||||
from .database import DatabaseManager
|
||||
import json
|
||||
import psycopg2.extras
|
||||
from .solr import solr_connection
|
||||
import re
|
||||
import requests
|
||||
|
||||
|
||||
# Enumerate the cores in Solr to determine if statistics have been sharded into
|
||||
# yearly shards by DSpace's stats-util or not (for example: statistics-2018).
|
||||
def get_statistics_shards():
|
||||
# Initialize an empty list for statistics core years
|
||||
statistics_core_years = []
|
||||
|
||||
# URL for Solr status to check active cores
|
||||
solr_query_params = {
|
||||
'action': 'STATUS',
|
||||
'wt': 'json'
|
||||
}
|
||||
solr_url = SOLR_SERVER + '/admin/cores'
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
if res.status_code == requests.codes.ok:
|
||||
data = res.json()
|
||||
|
||||
# Iterate over active cores from Solr's STATUS response (cores are in
|
||||
# the status array of this response).
|
||||
for core in data['status']:
|
||||
# Pattern to match, for example: statistics-2018
|
||||
pattern = re.compile('^statistics-[0-9]{4}$')
|
||||
|
||||
if not pattern.match(core):
|
||||
continue
|
||||
|
||||
# Append current core to list
|
||||
statistics_core_years.append(core)
|
||||
|
||||
# Initialize a string to hold our shards (may end up being empty if the Solr
|
||||
# core has not been processed by stats-util).
|
||||
shards = str()
|
||||
|
||||
if len(statistics_core_years) > 0:
|
||||
# Begin building a string of shards starting with the default one
|
||||
shards = '{}/statistics'.format(SOLR_SERVER)
|
||||
|
||||
for core in statistics_core_years:
|
||||
# Create a comma-separated list of shards to pass to our Solr query
|
||||
#
|
||||
# See: https://wiki.apache.org/solr/DistributedSearch
|
||||
shards += ',{}/{}'.format(SOLR_SERVER, core)
|
||||
|
||||
# Return the string of shards, which may actually be empty. Solr doesn't
|
||||
# seem to mind if the shards query parameter is empty and I haven't seen
|
||||
# any negative performance impact so this should be fine.
|
||||
return shards
|
||||
|
||||
|
||||
def index_views():
|
||||
# get total number of distinct facets for items with a minimum of 1 view,
|
||||
@ -41,132 +93,174 @@ def index_views():
|
||||
# so we can get the countDistinct summary.
|
||||
#
|
||||
# see: https://lucene.apache.org/solr/guide/6_6/the-stats-component.html
|
||||
res = solr.query('statistics', {
|
||||
'q':'type:2',
|
||||
'fq':'isBot:false AND statistics_type:view',
|
||||
'facet':True,
|
||||
'facet.field':'id',
|
||||
'facet.mincount':1,
|
||||
'facet.limit':1,
|
||||
'facet.offset':0,
|
||||
'stats':True,
|
||||
'stats.field':'id',
|
||||
'stats.calcdistinct':True
|
||||
}, rows=0)
|
||||
solr_query_params = {
|
||||
'q': 'type:2',
|
||||
'fq': 'isBot:false AND statistics_type:view',
|
||||
'facet': 'true',
|
||||
'facet.field': 'id',
|
||||
'facet.mincount': 1,
|
||||
'facet.limit': 1,
|
||||
'facet.offset': 0,
|
||||
'stats': 'true',
|
||||
'stats.field': 'id',
|
||||
'stats.calcdistinct': 'true',
|
||||
'shards': shards,
|
||||
'rows': 0,
|
||||
'wt': 'json'
|
||||
}
|
||||
|
||||
# get total number of distinct facets (countDistinct)
|
||||
results_totalNumFacets = json.loads(res.get_json())['stats']['stats_fields']['id']['countDistinct']
|
||||
solr_url = SOLR_SERVER + '/statistics/select'
|
||||
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
try:
|
||||
# get total number of distinct facets (countDistinct)
|
||||
results_totalNumFacets = res.json()['stats']['stats_fields']['id']['countDistinct']
|
||||
except TypeError:
|
||||
print('No item views to index, exiting.')
|
||||
|
||||
exit(0)
|
||||
|
||||
# divide results into "pages" (cast to int to effectively round down)
|
||||
results_per_page = 100
|
||||
results_num_pages = int(results_totalNumFacets / results_per_page)
|
||||
results_current_page = 0
|
||||
|
||||
cursor = db.cursor()
|
||||
with DatabaseManager() as db:
|
||||
with db.cursor() as cursor:
|
||||
# create an empty list to store values for batch insertion
|
||||
data = []
|
||||
|
||||
# create an empty list to store values for batch insertion
|
||||
data = []
|
||||
while results_current_page <= results_num_pages:
|
||||
# "pages" are zero based, but one based is more human readable
|
||||
print('Indexing item views (page {} of {})'.format(results_current_page + 1, results_num_pages + 1))
|
||||
|
||||
while results_current_page <= results_num_pages:
|
||||
print('Indexing item views (page {} of {})'.format(results_current_page, results_num_pages))
|
||||
solr_query_params = {
|
||||
'q': 'type:2',
|
||||
'fq': 'isBot:false AND statistics_type:view',
|
||||
'facet': 'true',
|
||||
'facet.field': 'id',
|
||||
'facet.mincount': 1,
|
||||
'facet.limit': results_per_page,
|
||||
'facet.offset': results_current_page * results_per_page,
|
||||
'shards': shards,
|
||||
'rows': 0,
|
||||
'wt': 'json',
|
||||
'json.nl': 'map' # return facets as a dict instead of a flat list
|
||||
}
|
||||
|
||||
res = solr.query('statistics', {
|
||||
'q':'type:2',
|
||||
'fq':'isBot:false AND statistics_type:view',
|
||||
'facet':True,
|
||||
'facet.field':'id',
|
||||
'facet.mincount':1,
|
||||
'facet.limit':results_per_page,
|
||||
'facet.offset':results_current_page * results_per_page
|
||||
}, rows=0)
|
||||
solr_url = SOLR_SERVER + '/statistics/select'
|
||||
|
||||
# SolrClient's get_facets() returns a dict of dicts
|
||||
views = res.get_facets()
|
||||
# in this case iterate over the 'id' dict and get the item ids and views
|
||||
for item_id, item_views in views['id'].items():
|
||||
data.append((item_id, item_views))
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
# do a batch insert of values from the current "page" of results
|
||||
sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views'
|
||||
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
|
||||
db.commit()
|
||||
# Solr returns facets as a dict of dicts (see json.nl parameter)
|
||||
views = res.json()['facet_counts']['facet_fields']
|
||||
# iterate over the 'id' dict and get the item ids and views
|
||||
for item_id, item_views in views['id'].items():
|
||||
data.append((item_id, item_views))
|
||||
|
||||
# clear all items from the list so we can populate it with the next batch
|
||||
data.clear()
|
||||
# do a batch insert of values from the current "page" of results
|
||||
sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views'
|
||||
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
|
||||
db.commit()
|
||||
|
||||
results_current_page += 1
|
||||
# clear all items from the list so we can populate it with the next batch
|
||||
data.clear()
|
||||
|
||||
results_current_page += 1
|
||||
|
||||
cursor.close()
|
||||
|
||||
def index_downloads():
|
||||
# get the total number of distinct facets for items with at least 1 download
|
||||
res = solr.query('statistics', {
|
||||
'q':'type:0',
|
||||
'fq':'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
|
||||
'facet':True,
|
||||
'facet.field':'owningItem',
|
||||
'facet.mincount':1,
|
||||
'facet.limit':1,
|
||||
'facet.offset':0,
|
||||
'stats':True,
|
||||
'stats.field':'owningItem',
|
||||
'stats.calcdistinct':True
|
||||
}, rows=0)
|
||||
solr_query_params= {
|
||||
'q': 'type:0',
|
||||
'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
|
||||
'facet': 'true',
|
||||
'facet.field': 'owningItem',
|
||||
'facet.mincount': 1,
|
||||
'facet.limit': 1,
|
||||
'facet.offset': 0,
|
||||
'stats': 'true',
|
||||
'stats.field': 'owningItem',
|
||||
'stats.calcdistinct': 'true',
|
||||
'shards': shards,
|
||||
'rows': 0,
|
||||
'wt': 'json'
|
||||
}
|
||||
|
||||
# get total number of distinct facets (countDistinct)
|
||||
results_totalNumFacets = json.loads(res.get_json())['stats']['stats_fields']['owningItem']['countDistinct']
|
||||
solr_url = SOLR_SERVER + '/statistics/select'
|
||||
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
try:
|
||||
# get total number of distinct facets (countDistinct)
|
||||
results_totalNumFacets = res.json()['stats']['stats_fields']['owningItem']['countDistinct']
|
||||
except TypeError:
|
||||
print('No item downloads to index, exiting.')
|
||||
|
||||
exit(0)
|
||||
|
||||
# divide results into "pages" (cast to int to effectively round down)
|
||||
results_per_page = 100
|
||||
results_num_pages = int(results_totalNumFacets / results_per_page)
|
||||
results_current_page = 0
|
||||
|
||||
cursor = db.cursor()
|
||||
with DatabaseManager() as db:
|
||||
with db.cursor() as cursor:
|
||||
# create an empty list to store values for batch insertion
|
||||
data = []
|
||||
|
||||
# create an empty list to store values for batch insertion
|
||||
data = []
|
||||
while results_current_page <= results_num_pages:
|
||||
# "pages" are zero based, but one based is more human readable
|
||||
print('Indexing item downloads (page {} of {})'.format(results_current_page + 1, results_num_pages + 1))
|
||||
|
||||
while results_current_page <= results_num_pages:
|
||||
print('Indexing item downloads (page {} of {})'.format(results_current_page, results_num_pages))
|
||||
solr_query_params = {
|
||||
'q': 'type:0',
|
||||
'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
|
||||
'facet': 'true',
|
||||
'facet.field': 'owningItem',
|
||||
'facet.mincount': 1,
|
||||
'facet.limit': results_per_page,
|
||||
'facet.offset': results_current_page * results_per_page,
|
||||
'shards': shards,
|
||||
'rows': 0,
|
||||
'wt': 'json',
|
||||
'json.nl': 'map' # return facets as a dict instead of a flat list
|
||||
}
|
||||
|
||||
res = solr.query('statistics', {
|
||||
'q':'type:0',
|
||||
'fq':'isBot:false AND statistics_type:view AND bundleName:ORIGINAL',
|
||||
'facet':True,
|
||||
'facet.field':'owningItem',
|
||||
'facet.mincount':1,
|
||||
'facet.limit':results_per_page,
|
||||
'facet.offset':results_current_page * results_per_page
|
||||
}, rows=0)
|
||||
solr_url = SOLR_SERVER + '/statistics/select'
|
||||
|
||||
# SolrClient's get_facets() returns a dict of dicts
|
||||
downloads = res.get_facets()
|
||||
# in this case iterate over the 'owningItem' dict and get the item ids and downloads
|
||||
for item_id, item_downloads in downloads['owningItem'].items():
|
||||
data.append((item_id, item_downloads))
|
||||
res = requests.get(solr_url, params=solr_query_params)
|
||||
|
||||
# do a batch insert of values from the current "page" of results
|
||||
sql = 'INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads'
|
||||
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
|
||||
db.commit()
|
||||
# Solr returns facets as a dict of dicts (see json.nl parameter)
|
||||
downloads = res.json()['facet_counts']['facet_fields']
|
||||
# iterate over the 'owningItem' dict and get the item ids and downloads
|
||||
for item_id, item_downloads in downloads['owningItem'].items():
|
||||
data.append((item_id, item_downloads))
|
||||
|
||||
# clear all items from the list so we can populate it with the next batch
|
||||
data.clear()
|
||||
# do a batch insert of values from the current "page" of results
|
||||
sql = 'INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads'
|
||||
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)')
|
||||
db.commit()
|
||||
|
||||
results_current_page += 1
|
||||
# clear all items from the list so we can populate it with the next batch
|
||||
data.clear()
|
||||
|
||||
cursor.close()
|
||||
results_current_page += 1
|
||||
|
||||
db = database_connection()
|
||||
solr = solr_connection()
|
||||
|
||||
# create table to store item views and downloads
|
||||
cursor = db.cursor()
|
||||
cursor.execute('''CREATE TABLE IF NOT EXISTS items
|
||||
with DatabaseManager() as db:
|
||||
with db.cursor() as cursor:
|
||||
# create table to store item views and downloads
|
||||
cursor.execute('''CREATE TABLE IF NOT EXISTS items
|
||||
(id INT PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)''')
|
||||
|
||||
# commit the table creation before closing the database connection
|
||||
db.commit()
|
||||
|
||||
shards = get_statistics_shards()
|
||||
|
||||
index_views()
|
||||
index_downloads()
|
||||
|
||||
db.close()
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
||||
|
@ -1,9 +0,0 @@
|
||||
from .config import SOLR_SERVER
|
||||
from SolrClient import SolrClient
|
||||
|
||||
def solr_connection():
|
||||
connection = SolrClient(SOLR_SERVER)
|
||||
|
||||
return connection
|
||||
|
||||
# vim: set sw=4 ts=4 expandtab:
|
4
pytest.ini
Normal file
4
pytest.ini
Normal file
@ -0,0 +1,4 @@
|
||||
[pytest]
|
||||
addopts= -rsxX -s -v --strict
|
||||
filterwarnings =
|
||||
error::UserWarning
|
26
requirements-dev.txt
Normal file
26
requirements-dev.txt
Normal file
@ -0,0 +1,26 @@
|
||||
-i https://pypi.org/simple
|
||||
atomicwrites==1.3.0
|
||||
attrs==19.1.0
|
||||
backcall==0.1.0
|
||||
decorator==4.4.0
|
||||
entrypoints==0.3
|
||||
flake8==3.7.7
|
||||
ipython-genutils==0.2.0
|
||||
ipython==7.5.0
|
||||
jedi==0.13.3
|
||||
mccabe==0.6.1
|
||||
more-itertools==7.0.0 ; python_version > '2.7'
|
||||
parso==0.4.0
|
||||
pexpect==4.7.0 ; sys_platform != 'win32'
|
||||
pickleshare==0.7.5
|
||||
pluggy==0.9.0
|
||||
prompt-toolkit==2.0.9
|
||||
ptyprocess==0.6.0
|
||||
py==1.8.0
|
||||
pycodestyle==2.5.0
|
||||
pyflakes==2.1.1
|
||||
pygments==2.3.1
|
||||
pytest==4.4.1
|
||||
six==1.12.0
|
||||
traitlets==4.3.2
|
||||
wcwidth==0.1.7
|
@ -1,12 +1,9 @@
|
||||
certifi==2018.10.15
|
||||
-i https://pypi.org/simple
|
||||
certifi==2019.3.9
|
||||
chardet==3.0.4
|
||||
falcon==1.4.1
|
||||
falcon==2.0.0
|
||||
gunicorn==19.9.0
|
||||
idna==2.7
|
||||
kazoo==2.5.0
|
||||
psycopg2-binary==2.7.5
|
||||
python-mimeparse==1.6.0
|
||||
requests==2.20.0
|
||||
six==1.11.0
|
||||
-e git://github.com/alanorth/SolrClient.git@c629e3475be37c82770b2be61748be7e29882648#egg=SolrClient
|
||||
urllib3==1.24
|
||||
idna==2.8
|
||||
psycopg2-binary==2.8.2
|
||||
requests==2.21.0
|
||||
urllib3==1.24.3
|
||||
|
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
77226
tests/dspacestatistics.sql
Normal file
77226
tests/dspacestatistics.sql
Normal file
File diff suppressed because it is too large
Load Diff
67
tests/test_api.py
Normal file
67
tests/test_api.py
Normal file
@ -0,0 +1,67 @@
|
||||
from falcon import testing
|
||||
import json
|
||||
import pytest
|
||||
|
||||
from dspace_statistics_api.app import api
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
return testing.TestClient(api)
|
||||
|
||||
|
||||
def test_get_docs(client):
|
||||
'''Test requesting the documentation at the root.'''
|
||||
|
||||
response = client.simulate_get('/')
|
||||
|
||||
assert isinstance(response.content, bytes)
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
def test_get_item(client):
|
||||
'''Test requesting a single item.'''
|
||||
|
||||
response = client.simulate_get('/item/17')
|
||||
response_doc = json.loads(response.text)
|
||||
|
||||
assert isinstance(response_doc['downloads'], int)
|
||||
assert isinstance(response_doc['id'], int)
|
||||
assert isinstance(response_doc['views'], int)
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
def test_get_missing_item(client):
|
||||
'''Test requesting a single non-existing item.'''
|
||||
|
||||
response = client.simulate_get('/item/1')
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
def test_get_items(client):
|
||||
'''Test requesting 100 items.'''
|
||||
|
||||
response = client.simulate_get('/items', query_string='limit=100')
|
||||
response_doc = json.loads(response.text)
|
||||
|
||||
assert isinstance(response_doc['currentPage'], int)
|
||||
assert isinstance(response_doc['totalPages'], int)
|
||||
assert isinstance(response_doc['statistics'], list)
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
def test_get_items_invalid_limit(client):
|
||||
'''Test requesting 100 items with an invalid limit parameter.'''
|
||||
|
||||
response = client.simulate_get('/items', query_string='limit=101')
|
||||
|
||||
assert response.status_code == 400
|
||||
|
||||
|
||||
def test_get_items_invalid_page(client):
|
||||
'''Test requesting 100 items with an invalid page parameter.'''
|
||||
|
||||
response = client.simulate_get('/items', query_string='page=-1')
|
||||
|
||||
assert response.status_code == 400
|
Reference in New Issue
Block a user