mirror of
https://github.com/ilri/dspace-statistics-api.git
synced 2024-11-17 03:47:09 +01:00
dspace_statistics_api/indexer.py: Limit to UUIDs
We need to make sure that the indexer only tries to index UUIDs, as opposed to legacy IDs that may have been left over from a migration from earlier DSpace versions. For example, "98110-unmigrated", "-1" etc. For matching the UUIDs in Solr I decided that it is sufficient for our use case to simply match thirty-six characters, where a UUID is composed of thirty-two hexadecimal characters and four dashes. We don't need to do any verification of "real" UUIDs because it would be needlessly complex in our case. See: https://github.com/ilri/dspace-statistics-api/issues/12
This commit is contained in:
parent
d1c177e146
commit
49751b53f0
@ -47,7 +47,7 @@ def index_views(indexType: str, facetField: str):
|
|||||||
#
|
#
|
||||||
# see: https://lucene.apache.org/solr/guide/6_6/the-stats-component.html
|
# see: https://lucene.apache.org/solr/guide/6_6/the-stats-component.html
|
||||||
solr_query_params = {
|
solr_query_params = {
|
||||||
"q": "type:2",
|
"q": f"type:2 AND {facetField}:/.{{36}}/",
|
||||||
"fq": "-isBot:true AND statistics_type:view",
|
"fq": "-isBot:true AND statistics_type:view",
|
||||||
"fl": facetField,
|
"fl": facetField,
|
||||||
"facet": "true",
|
"facet": "true",
|
||||||
@ -94,7 +94,7 @@ def index_views(indexType: str, facetField: str):
|
|||||||
)
|
)
|
||||||
|
|
||||||
solr_query_params = {
|
solr_query_params = {
|
||||||
"q": "type:2",
|
"q": f"type:2 AND {facetField}:/.{{36}}/",
|
||||||
"fq": "-isBot:true AND statistics_type:view",
|
"fq": "-isBot:true AND statistics_type:view",
|
||||||
"fl": facetField,
|
"fl": facetField,
|
||||||
"facet": "true",
|
"facet": "true",
|
||||||
@ -130,7 +130,7 @@ def index_views(indexType: str, facetField: str):
|
|||||||
def index_downloads(indexType: str, facetField: str):
|
def index_downloads(indexType: str, facetField: str):
|
||||||
# get the total number of distinct facets for items with at least 1 download
|
# get the total number of distinct facets for items with at least 1 download
|
||||||
solr_query_params = {
|
solr_query_params = {
|
||||||
"q": "type:0",
|
"q": f"type:0 AND {facetField}:/.{{36}}/",
|
||||||
"fq": "-isBot:true AND statistics_type:view AND bundleName:ORIGINAL",
|
"fq": "-isBot:true AND statistics_type:view AND bundleName:ORIGINAL",
|
||||||
"fl": facetField,
|
"fl": facetField,
|
||||||
"facet": "true",
|
"facet": "true",
|
||||||
@ -176,7 +176,7 @@ def index_downloads(indexType: str, facetField: str):
|
|||||||
)
|
)
|
||||||
|
|
||||||
solr_query_params = {
|
solr_query_params = {
|
||||||
"q": "type:0",
|
"q": f"type:0 AND {facetField}:/.{{36}}/",
|
||||||
"fq": "-isBot:true AND statistics_type:view AND bundleName:ORIGINAL",
|
"fq": "-isBot:true AND statistics_type:view AND bundleName:ORIGINAL",
|
||||||
"fl": facetField,
|
"fl": facetField,
|
||||||
"facet": "true",
|
"facet": "true",
|
||||||
|
Loading…
Reference in New Issue
Block a user