1
0
mirror of https://github.com/ilri/dspace-statistics-api.git synced 2024-11-25 23:58:18 +01:00

Format code with black

This commit is contained in:
Alan Orth 2019-11-27 12:30:06 +02:00
parent d2fe420a9a
commit 4ff1fd4a22
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9
4 changed files with 137 additions and 107 deletions

View File

@ -5,8 +5,8 @@ import falcon
class RootResource: class RootResource:
def on_get(self, req, resp): def on_get(self, req, resp):
resp.status = falcon.HTTP_200 resp.status = falcon.HTTP_200
resp.content_type = 'text/html' resp.content_type = "text/html"
with open('dspace_statistics_api/docs/index.html', 'r') as f: with open("dspace_statistics_api/docs/index.html", "r") as f:
resp.body = f.read() resp.body = f.read()
@ -23,24 +23,34 @@ class AllItemsResource:
with db.cursor() as cursor: with db.cursor() as cursor:
# get total number of items so we can estimate the pages # get total number of items so we can estimate the pages
cursor.execute('SELECT COUNT(id) FROM items') cursor.execute("SELECT COUNT(id) FROM items")
pages = round(cursor.fetchone()[0] / limit) pages = round(cursor.fetchone()[0] / limit)
# get statistics, ordered by id, and use limit and offset to page through results # get statistics, ordered by id, and use limit and offset to page through results
cursor.execute('SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}'.format(limit, offset)) cursor.execute(
"SELECT id, views, downloads FROM items ORDER BY id ASC LIMIT {} OFFSET {}".format(
limit, offset
)
)
# create a list to hold dicts of item stats # create a list to hold dicts of item stats
statistics = list() statistics = list()
# iterate over results and build statistics object # iterate over results and build statistics object
for item in cursor: for item in cursor:
statistics.append({'id': item['id'], 'views': item['views'], 'downloads': item['downloads']}) statistics.append(
{
"id": item["id"],
"views": item["views"],
"downloads": item["downloads"],
}
)
message = { message = {
'currentPage': page, "currentPage": page,
'totalPages': pages, "totalPages": pages,
'limit': limit, "limit": limit,
'statistics': statistics "statistics": statistics,
} }
resp.media = message resp.media = message
@ -55,27 +65,31 @@ class ItemResource:
with db.cursor() as cursor: with db.cursor() as cursor:
cursor = db.cursor() cursor = db.cursor()
cursor.execute('SELECT views, downloads FROM items WHERE id={}'.format(item_id)) cursor.execute(
"SELECT views, downloads FROM items WHERE id={}".format(item_id)
)
if cursor.rowcount == 0: if cursor.rowcount == 0:
raise falcon.HTTPNotFound( raise falcon.HTTPNotFound(
title='Item not found', title="Item not found",
description='The item with id "{}" was not found.'.format(item_id) description='The item with id "{}" was not found.'.format(
item_id
),
) )
else: else:
results = cursor.fetchone() results = cursor.fetchone()
statistics = { statistics = {
'id': item_id, "id": item_id,
'views': results['views'], "views": results["views"],
'downloads': results['downloads'] "downloads": results["downloads"],
} }
resp.media = statistics resp.media = statistics
api = application = falcon.API() api = application = falcon.API()
api.add_route('/', RootResource()) api.add_route("/", RootResource())
api.add_route('/items', AllItemsResource()) api.add_route("/items", AllItemsResource())
api.add_route('/item/{item_id:int}', ItemResource()) api.add_route("/item/{item_id:int}", ItemResource())
# vim: set sw=4 ts=4 expandtab: # vim: set sw=4 ts=4 expandtab:

View File

@ -1,12 +1,12 @@
import os import os
# Check if Solr connection information was provided in the environment # Check if Solr connection information was provided in the environment
SOLR_SERVER = os.environ.get('SOLR_SERVER', 'http://localhost:8080/solr') SOLR_SERVER = os.environ.get("SOLR_SERVER", "http://localhost:8080/solr")
DATABASE_NAME = os.environ.get('DATABASE_NAME', 'dspacestatistics') DATABASE_NAME = os.environ.get("DATABASE_NAME", "dspacestatistics")
DATABASE_USER = os.environ.get('DATABASE_USER', 'dspacestatistics') DATABASE_USER = os.environ.get("DATABASE_USER", "dspacestatistics")
DATABASE_PASS = os.environ.get('DATABASE_PASS', 'dspacestatistics') DATABASE_PASS = os.environ.get("DATABASE_PASS", "dspacestatistics")
DATABASE_HOST = os.environ.get('DATABASE_HOST', 'localhost') DATABASE_HOST = os.environ.get("DATABASE_HOST", "localhost")
DATABASE_PORT = os.environ.get('DATABASE_PORT', '5432') DATABASE_PORT = os.environ.get("DATABASE_PORT", "5432")
# vim: set sw=4 ts=4 expandtab: # vim: set sw=4 ts=4 expandtab:

View File

@ -8,18 +8,22 @@ import psycopg2
import psycopg2.extras import psycopg2.extras
class DatabaseManager(): class DatabaseManager:
'''Manage database connection.''' """Manage database connection."""
def __init__(self): def __init__(self):
self._connection_uri = 'dbname={} user={} password={} host={} port={}'.format(DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST, DATABASE_PORT) self._connection_uri = "dbname={} user={} password={} host={} port={}".format(
DATABASE_NAME, DATABASE_USER, DATABASE_PASS, DATABASE_HOST, DATABASE_PORT
)
def __enter__(self): def __enter__(self):
try: try:
self._connection = psycopg2.connect(self._connection_uri, cursor_factory=psycopg2.extras.DictCursor) self._connection = psycopg2.connect(
self._connection_uri, cursor_factory=psycopg2.extras.DictCursor
)
except psycopg2.OperationalError: except psycopg2.OperationalError:
title = '500 Internal Server Error' title = "500 Internal Server Error"
description = 'Could not connect to database' description = "Could not connect to database"
raise falcon.HTTPInternalServerError(title, description) raise falcon.HTTPInternalServerError(title, description)
return self._connection return self._connection
@ -27,4 +31,5 @@ class DatabaseManager():
def __exit__(self, exc_type, exc_value, exc_traceback): def __exit__(self, exc_type, exc_value, exc_traceback):
self._connection.close() self._connection.close()
# vim: set sw=4 ts=4 expandtab: # vim: set sw=4 ts=4 expandtab:

View File

@ -43,11 +43,8 @@ def get_statistics_shards():
statistics_core_years = [] statistics_core_years = []
# URL for Solr status to check active cores # URL for Solr status to check active cores
solr_query_params = { solr_query_params = {"action": "STATUS", "wt": "json"}
'action': 'STATUS', solr_url = SOLR_SERVER + "/admin/cores"
'wt': 'json'
}
solr_url = SOLR_SERVER + '/admin/cores'
res = requests.get(solr_url, params=solr_query_params) res = requests.get(solr_url, params=solr_query_params)
if res.status_code == requests.codes.ok: if res.status_code == requests.codes.ok:
@ -55,9 +52,9 @@ def get_statistics_shards():
# Iterate over active cores from Solr's STATUS response (cores are in # Iterate over active cores from Solr's STATUS response (cores are in
# the status array of this response). # the status array of this response).
for core in data['status']: for core in data["status"]:
# Pattern to match, for example: statistics-2018 # Pattern to match, for example: statistics-2018
pattern = re.compile('^statistics-[0-9]{4}$') pattern = re.compile("^statistics-[0-9]{4}$")
if not pattern.match(core): if not pattern.match(core):
continue continue
@ -71,13 +68,13 @@ def get_statistics_shards():
if len(statistics_core_years) > 0: if len(statistics_core_years) > 0:
# Begin building a string of shards starting with the default one # Begin building a string of shards starting with the default one
shards = '{}/statistics'.format(SOLR_SERVER) shards = "{}/statistics".format(SOLR_SERVER)
for core in statistics_core_years: for core in statistics_core_years:
# Create a comma-separated list of shards to pass to our Solr query # Create a comma-separated list of shards to pass to our Solr query
# #
# See: https://wiki.apache.org/solr/DistributedSearch # See: https://wiki.apache.org/solr/DistributedSearch
shards += ',{}/{}'.format(SOLR_SERVER, core) shards += ",{}/{}".format(SOLR_SERVER, core)
# Return the string of shards, which may actually be empty. Solr doesn't # Return the string of shards, which may actually be empty. Solr doesn't
# seem to mind if the shards query parameter is empty and I haven't seen # seem to mind if the shards query parameter is empty and I haven't seen
@ -93,30 +90,32 @@ def index_views():
# #
# see: https://lucene.apache.org/solr/guide/6_6/the-stats-component.html # see: https://lucene.apache.org/solr/guide/6_6/the-stats-component.html
solr_query_params = { solr_query_params = {
'q': 'type:2', "q": "type:2",
'fq': 'isBot:false AND statistics_type:view', "fq": "isBot:false AND statistics_type:view",
'facet': 'true', "facet": "true",
'facet.field': 'id', "facet.field": "id",
'facet.mincount': 1, "facet.mincount": 1,
'facet.limit': 1, "facet.limit": 1,
'facet.offset': 0, "facet.offset": 0,
'stats': 'true', "stats": "true",
'stats.field': 'id', "stats.field": "id",
'stats.calcdistinct': 'true', "stats.calcdistinct": "true",
'shards': shards, "shards": shards,
'rows': 0, "rows": 0,
'wt': 'json' "wt": "json",
} }
solr_url = SOLR_SERVER + '/statistics/select' solr_url = SOLR_SERVER + "/statistics/select"
res = requests.get(solr_url, params=solr_query_params) res = requests.get(solr_url, params=solr_query_params)
try: try:
# get total number of distinct facets (countDistinct) # get total number of distinct facets (countDistinct)
results_totalNumFacets = res.json()['stats']['stats_fields']['id']['countDistinct'] results_totalNumFacets = res.json()["stats"]["stats_fields"]["id"][
"countDistinct"
]
except TypeError: except TypeError:
print('No item views to index, exiting.') print("No item views to index, exiting.")
exit(0) exit(0)
@ -132,35 +131,39 @@ def index_views():
while results_current_page <= results_num_pages: while results_current_page <= results_num_pages:
# "pages" are zero based, but one based is more human readable # "pages" are zero based, but one based is more human readable
print('Indexing item views (page {} of {})'.format(results_current_page + 1, results_num_pages + 1)) print(
"Indexing item views (page {} of {})".format(
results_current_page + 1, results_num_pages + 1
)
)
solr_query_params = { solr_query_params = {
'q': 'type:2', "q": "type:2",
'fq': 'isBot:false AND statistics_type:view', "fq": "isBot:false AND statistics_type:view",
'facet': 'true', "facet": "true",
'facet.field': 'id', "facet.field": "id",
'facet.mincount': 1, "facet.mincount": 1,
'facet.limit': results_per_page, "facet.limit": results_per_page,
'facet.offset': results_current_page * results_per_page, "facet.offset": results_current_page * results_per_page,
'shards': shards, "shards": shards,
'rows': 0, "rows": 0,
'wt': 'json', "wt": "json",
'json.nl': 'map' # return facets as a dict instead of a flat list "json.nl": "map", # return facets as a dict instead of a flat list
} }
solr_url = SOLR_SERVER + '/statistics/select' solr_url = SOLR_SERVER + "/statistics/select"
res = requests.get(solr_url, params=solr_query_params) res = requests.get(solr_url, params=solr_query_params)
# Solr returns facets as a dict of dicts (see json.nl parameter) # Solr returns facets as a dict of dicts (see json.nl parameter)
views = res.json()['facet_counts']['facet_fields'] views = res.json()["facet_counts"]["facet_fields"]
# iterate over the 'id' dict and get the item ids and views # iterate over the 'id' dict and get the item ids and views
for item_id, item_views in views['id'].items(): for item_id, item_views in views["id"].items():
data.append((item_id, item_views)) data.append((item_id, item_views))
# do a batch insert of values from the current "page" of results # do a batch insert of values from the current "page" of results
sql = 'INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views' sql = "INSERT INTO items(id, views) VALUES %s ON CONFLICT(id) DO UPDATE SET views=excluded.views"
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)') psycopg2.extras.execute_values(cursor, sql, data, template="(%s, %s)")
db.commit() db.commit()
# clear all items from the list so we can populate it with the next batch # clear all items from the list so we can populate it with the next batch
@ -172,30 +175,32 @@ def index_views():
def index_downloads(): def index_downloads():
# get the total number of distinct facets for items with at least 1 download # get the total number of distinct facets for items with at least 1 download
solr_query_params = { solr_query_params = {
'q': 'type:0', "q": "type:0",
'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL', "fq": "isBot:false AND statistics_type:view AND bundleName:ORIGINAL",
'facet': 'true', "facet": "true",
'facet.field': 'owningItem', "facet.field": "owningItem",
'facet.mincount': 1, "facet.mincount": 1,
'facet.limit': 1, "facet.limit": 1,
'facet.offset': 0, "facet.offset": 0,
'stats': 'true', "stats": "true",
'stats.field': 'owningItem', "stats.field": "owningItem",
'stats.calcdistinct': 'true', "stats.calcdistinct": "true",
'shards': shards, "shards": shards,
'rows': 0, "rows": 0,
'wt': 'json' "wt": "json",
} }
solr_url = SOLR_SERVER + '/statistics/select' solr_url = SOLR_SERVER + "/statistics/select"
res = requests.get(solr_url, params=solr_query_params) res = requests.get(solr_url, params=solr_query_params)
try: try:
# get total number of distinct facets (countDistinct) # get total number of distinct facets (countDistinct)
results_totalNumFacets = res.json()['stats']['stats_fields']['owningItem']['countDistinct'] results_totalNumFacets = res.json()["stats"]["stats_fields"]["owningItem"][
"countDistinct"
]
except TypeError: except TypeError:
print('No item downloads to index, exiting.') print("No item downloads to index, exiting.")
exit(0) exit(0)
@ -211,35 +216,39 @@ def index_downloads():
while results_current_page <= results_num_pages: while results_current_page <= results_num_pages:
# "pages" are zero based, but one based is more human readable # "pages" are zero based, but one based is more human readable
print('Indexing item downloads (page {} of {})'.format(results_current_page + 1, results_num_pages + 1)) print(
"Indexing item downloads (page {} of {})".format(
results_current_page + 1, results_num_pages + 1
)
)
solr_query_params = { solr_query_params = {
'q': 'type:0', "q": "type:0",
'fq': 'isBot:false AND statistics_type:view AND bundleName:ORIGINAL', "fq": "isBot:false AND statistics_type:view AND bundleName:ORIGINAL",
'facet': 'true', "facet": "true",
'facet.field': 'owningItem', "facet.field": "owningItem",
'facet.mincount': 1, "facet.mincount": 1,
'facet.limit': results_per_page, "facet.limit": results_per_page,
'facet.offset': results_current_page * results_per_page, "facet.offset": results_current_page * results_per_page,
'shards': shards, "shards": shards,
'rows': 0, "rows": 0,
'wt': 'json', "wt": "json",
'json.nl': 'map' # return facets as a dict instead of a flat list "json.nl": "map", # return facets as a dict instead of a flat list
} }
solr_url = SOLR_SERVER + '/statistics/select' solr_url = SOLR_SERVER + "/statistics/select"
res = requests.get(solr_url, params=solr_query_params) res = requests.get(solr_url, params=solr_query_params)
# Solr returns facets as a dict of dicts (see json.nl parameter) # Solr returns facets as a dict of dicts (see json.nl parameter)
downloads = res.json()['facet_counts']['facet_fields'] downloads = res.json()["facet_counts"]["facet_fields"]
# iterate over the 'owningItem' dict and get the item ids and downloads # iterate over the 'owningItem' dict and get the item ids and downloads
for item_id, item_downloads in downloads['owningItem'].items(): for item_id, item_downloads in downloads["owningItem"].items():
data.append((item_id, item_downloads)) data.append((item_id, item_downloads))
# do a batch insert of values from the current "page" of results # do a batch insert of values from the current "page" of results
sql = 'INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads' sql = "INSERT INTO items(id, downloads) VALUES %s ON CONFLICT(id) DO UPDATE SET downloads=excluded.downloads"
psycopg2.extras.execute_values(cursor, sql, data, template='(%s, %s)') psycopg2.extras.execute_values(cursor, sql, data, template="(%s, %s)")
db.commit() db.commit()
# clear all items from the list so we can populate it with the next batch # clear all items from the list so we can populate it with the next batch
@ -251,8 +260,10 @@ def index_downloads():
with DatabaseManager() as db: with DatabaseManager() as db:
with db.cursor() as cursor: with db.cursor() as cursor:
# create table to store item views and downloads # create table to store item views and downloads
cursor.execute('''CREATE TABLE IF NOT EXISTS items cursor.execute(
(id INT PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)''') """CREATE TABLE IF NOT EXISTS items
(id INT PRIMARY KEY, views INT DEFAULT 0, downloads INT DEFAULT 0)"""
)
# commit the table creation before closing the database connection # commit the table creation before closing the database connection
db.commit() db.commit()