From 49751b53f03f6756a7f62c84cc88e4178b7f0fab Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Tue, 5 Jan 2021 12:30:27 +0200 Subject: [PATCH] dspace_statistics_api/indexer.py: Limit to UUIDs We need to make sure that the indexer only tries to index UUIDs, as opposed to legacy IDs that may have been left over from a migration from earlier DSpace versions. For example, "98110-unmigrated", "-1" etc. For matching the UUIDs in Solr I decided that it is sufficient for our use case to simply match thirty-six characters, where a UUID is composed of thirty-two hexadecimal characters and four dashes. We don't need to do any verification of "real" UUIDs because it would be needlessly complex in our case. See: https://github.com/ilri/dspace-statistics-api/issues/12 --- dspace_statistics_api/indexer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dspace_statistics_api/indexer.py b/dspace_statistics_api/indexer.py index b82e197..77f9352 100644 --- a/dspace_statistics_api/indexer.py +++ b/dspace_statistics_api/indexer.py @@ -47,7 +47,7 @@ def index_views(indexType: str, facetField: str): # # see: https://lucene.apache.org/solr/guide/6_6/the-stats-component.html solr_query_params = { - "q": "type:2", + "q": f"type:2 AND {facetField}:/.{{36}}/", "fq": "-isBot:true AND statistics_type:view", "fl": facetField, "facet": "true", @@ -94,7 +94,7 @@ def index_views(indexType: str, facetField: str): ) solr_query_params = { - "q": "type:2", + "q": f"type:2 AND {facetField}:/.{{36}}/", "fq": "-isBot:true AND statistics_type:view", "fl": facetField, "facet": "true", @@ -130,7 +130,7 @@ def index_views(indexType: str, facetField: str): def index_downloads(indexType: str, facetField: str): # get the total number of distinct facets for items with at least 1 download solr_query_params = { - "q": "type:0", + "q": f"type:0 AND {facetField}:/.{{36}}/", "fq": "-isBot:true AND statistics_type:view AND bundleName:ORIGINAL", "fl": facetField, "facet": "true", @@ -176,7 +176,7 @@ def index_downloads(indexType: str, facetField: str): ) solr_query_params = { - "q": "type:0", + "q": f"type:0 AND {facetField}:/.{{36}}/", "fq": "-isBot:true AND statistics_type:view AND bundleName:ORIGINAL", "fl": facetField, "facet": "true",