mirror of
https://github.com/ilri/dspace-statistics-api.git
synced 2024-12-18 02:32:20 +01:00
dspace_statistics_api/indexer.py: Limit to UUIDs
We need to make sure that the indexer only tries to index UUIDs, as opposed to legacy IDs that may have been left over from a migration from earlier DSpace versions. For example, "98110-unmigrated", "-1" etc. For matching the UUIDs in Solr I decided that it is sufficient for our use case to simply match thirty-six characters, where a UUID is composed of thirty-two hexadecimal characters and four dashes. We don't need to do any verification of "real" UUIDs because it would be needlessly complex in our case. See: https://github.com/ilri/dspace-statistics-api/issues/12
This commit is contained in:
parent
d1c177e146
commit
49751b53f0
@ -47,7 +47,7 @@ def index_views(indexType: str, facetField: str):
|
||||
#
|
||||
# see: https://lucene.apache.org/solr/guide/6_6/the-stats-component.html
|
||||
solr_query_params = {
|
||||
"q": "type:2",
|
||||
"q": f"type:2 AND {facetField}:/.{{36}}/",
|
||||
"fq": "-isBot:true AND statistics_type:view",
|
||||
"fl": facetField,
|
||||
"facet": "true",
|
||||
@ -94,7 +94,7 @@ def index_views(indexType: str, facetField: str):
|
||||
)
|
||||
|
||||
solr_query_params = {
|
||||
"q": "type:2",
|
||||
"q": f"type:2 AND {facetField}:/.{{36}}/",
|
||||
"fq": "-isBot:true AND statistics_type:view",
|
||||
"fl": facetField,
|
||||
"facet": "true",
|
||||
@ -130,7 +130,7 @@ def index_views(indexType: str, facetField: str):
|
||||
def index_downloads(indexType: str, facetField: str):
|
||||
# get the total number of distinct facets for items with at least 1 download
|
||||
solr_query_params = {
|
||||
"q": "type:0",
|
||||
"q": f"type:0 AND {facetField}:/.{{36}}/",
|
||||
"fq": "-isBot:true AND statistics_type:view AND bundleName:ORIGINAL",
|
||||
"fl": facetField,
|
||||
"facet": "true",
|
||||
@ -176,7 +176,7 @@ def index_downloads(indexType: str, facetField: str):
|
||||
)
|
||||
|
||||
solr_query_params = {
|
||||
"q": "type:0",
|
||||
"q": f"type:0 AND {facetField}:/.{{36}}/",
|
||||
"fq": "-isBot:true AND statistics_type:view AND bundleName:ORIGINAL",
|
||||
"fl": facetField,
|
||||
"facet": "true",
|
||||
|
Loading…
Reference in New Issue
Block a user