Skip to content

Commit eae7e14

Browse files
committed
add caching to all search apis
1 parent bfc2ecb commit eae7e14

4 files changed

Lines changed: 109 additions & 46 deletions

File tree

api/views/search_collaborative.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import Any, Dict, List, Optional, Tuple, Union, cast
33

44
import structlog
5+
from django.core.cache import cache
56
from elasticsearch_dsl import A
67
from elasticsearch_dsl import Q as ESQ
78
from elasticsearch_dsl import Search
@@ -14,6 +15,8 @@
1415
from api.views.paginated_elastic_view import PaginatedElasticSearchAPIView
1516
from search.documents import CollaborativeDocument
1617

18+
METADATA_CACHE_TTL = 60 * 30 # 30 minutes
19+
1720
logger = structlog.get_logger(__name__)
1821

1922

@@ -147,6 +150,12 @@ def __init__(self, **kwargs: Any) -> None:
147150
attributes={"component": "search_collaborative"},
148151
)
149152
def get_searchable_and_aggregations(self) -> Tuple[List[str], Dict[str, str]]:
153+
cached: Optional[Tuple[List[str], Dict[str, str]]] = cache.get(
154+
"collaborative_search_metadata_config"
155+
)
156+
if cached:
157+
return cached
158+
150159
searchable_fields = [
151160
"title",
152161
"summary",
@@ -173,7 +182,9 @@ def get_searchable_and_aggregations(self) -> Tuple[List[str], Dict[str, str]]:
173182
for metadata in filterable_metadata:
174183
aggregations[f"metadata.{metadata.label}"] = "terms" # type: ignore
175184

176-
return searchable_fields, aggregations
185+
result = (searchable_fields, aggregations)
186+
cache.set("collaborative_search_metadata_config", result, timeout=METADATA_CACHE_TTL)
187+
return result
177188

178189
@trace_method(name="add_aggregations", attributes={"component": "search_collaborative"})
179190
def add_aggregations(self, search: Search) -> Search:
@@ -189,8 +200,17 @@ def add_aggregations(self, search: Search) -> Search:
189200
)
190201

191202
if aggregate_fields:
192-
metadata_qs = Metadata.objects.filter(filterable=True)
193-
filterable_metadata = [str(meta.label) for meta in metadata_qs] # type: ignore
203+
filterable_metadata: List[str] = (
204+
cache.get("collaborative_filterable_metadata_labels") or []
205+
)
206+
if not filterable_metadata:
207+
metadata_qs = Metadata.objects.filter(filterable=True)
208+
filterable_metadata = [str(meta.label) for meta in metadata_qs] # type: ignore
209+
cache.set(
210+
"collaborative_filterable_metadata_labels",
211+
filterable_metadata,
212+
timeout=METADATA_CACHE_TTL,
213+
)
194214

195215
metadata_bucket = search.aggs.bucket("metadata", "nested", path="metadata")
196216
composite_agg = A(
@@ -277,8 +297,15 @@ def generate_q_expression(self, query: str) -> Optional[Union[ESQuery, List[ESQu
277297

278298
@trace_method(name="add_filters", attributes={"component": "search_collaborative"})
279299
def add_filters(self, filters: Dict[str, str], search: Search) -> Search:
280-
non_filter_metadata = Metadata.objects.filter(filterable=False).all()
281-
excluded_labels: List[str] = [e.label for e in non_filter_metadata] # type: ignore
300+
excluded_labels: List[str] = cache.get("collaborative_non_filter_metadata_labels") or []
301+
if not excluded_labels:
302+
non_filter_metadata = Metadata.objects.filter(filterable=False).all()
303+
excluded_labels = [e.label for e in non_filter_metadata] # type: ignore
304+
cache.set(
305+
"collaborative_non_filter_metadata_labels",
306+
excluded_labels,
307+
timeout=METADATA_CACHE_TTL,
308+
)
282309

283310
for filter in filters:
284311
if filter in excluded_labels:

api/views/search_dataset.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import Any, Dict, List, Optional, Tuple, TypedDict, Union, cast
33

44
import structlog
5+
from django.core.cache import cache
56
from elasticsearch_dsl import A
67
from elasticsearch_dsl import Q as ESQ
78
from elasticsearch_dsl import Search
@@ -14,6 +15,8 @@
1415
from api.views.paginated_elastic_view import PaginatedElasticSearchAPIView
1516
from search.documents import DatasetDocument
1617

18+
METADATA_CACHE_TTL = 60 * 30 # 30 minutes
19+
1720
logger = structlog.get_logger(__name__)
1821

1922

@@ -147,6 +150,12 @@ def __init__(self, **kwargs: Any) -> None:
147150
)
148151
def get_searchable_and_aggregations(self) -> Tuple[List[str], Dict[str, str]]:
149152
"""Get searchable fields and aggregations for the search."""
153+
cached: Optional[Tuple[List[str], Dict[str, str]]] = cache.get(
154+
"dataset_search_metadata_config"
155+
)
156+
if cached:
157+
return cached
158+
150159
enabled_metadata = Metadata.objects.filter(enabled=True).all()
151160
searchable_fields: List[str] = []
152161
searchable_fields.extend(
@@ -170,7 +179,9 @@ def get_searchable_and_aggregations(self) -> Tuple[List[str], Dict[str, str]]:
170179
for metadata in enabled_metadata: # type: Metadata
171180
if metadata.filterable:
172181
aggregations[f"metadata.{metadata.label}"] = "terms"
173-
return searchable_fields, aggregations
182+
result = (searchable_fields, aggregations)
183+
cache.set("dataset_search_metadata_config", result, timeout=METADATA_CACHE_TTL)
184+
return result
174185

175186
@trace_method(name="add_aggregations", attributes={"component": "search_dataset"})
176187
def add_aggregations(self, search: Search) -> Search:
@@ -248,8 +259,13 @@ def generate_q_expression(self, query: str) -> Optional[Union[ESQuery, List[ESQu
248259
@trace_method(name="add_filters", attributes={"component": "search_dataset"})
249260
def add_filters(self, filters: Dict[str, str], search: Search) -> Search:
250261
"""Add filters to the search query."""
251-
non_filter_metadata = Metadata.objects.filter(filterable=False).all()
252-
excluded_labels: List[str] = [e.label for e in non_filter_metadata] # type: ignore
262+
excluded_labels: List[str] = cache.get("dataset_non_filter_metadata_labels") or []
263+
if not excluded_labels:
264+
non_filter_metadata = Metadata.objects.filter(filterable=False).all()
265+
excluded_labels = [e.label for e in non_filter_metadata] # type: ignore
266+
cache.set(
267+
"dataset_non_filter_metadata_labels", excluded_labels, timeout=METADATA_CACHE_TTL
268+
)
253269

254270
for filter in filters:
255271
if filter in excluded_labels:

api/views/search_unified.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import Any, Dict, List, Tuple
44

55
import structlog
6+
from django.core.cache import cache
67
from elasticsearch_dsl import Q as ESQ
78
from elasticsearch_dsl import Search
89
from rest_framework import serializers
@@ -13,6 +14,7 @@
1314
from api.models import Dataset, Geography, Metadata, UseCase
1415
from api.models.AIModel import AIModel
1516
from api.models.Collaborative import Collaborative
17+
from api.signals.dataset_signals import SEARCH_CACHE_VERSION_KEY
1618
from api.utils.telemetry_utils import trace_method
1719
from DataSpace import settings
1820
from search.documents import (
@@ -440,10 +442,27 @@ def perform_unified_search(
440442

441443
return results, total, aggregations
442444

445+
def _generate_unified_cache_key(self, request: Any) -> str:
446+
"""Generate a unique cache key for unified search based on request parameters."""
447+
params: Dict[str, str] = {
448+
"query": request.GET.get("query", ""),
449+
"page": request.GET.get("page", "1"),
450+
"size": request.GET.get("size", "10"),
451+
"types": request.GET.get("types", "dataset,usecase,aimodel,collaborative,publisher"),
452+
"filters": str(sorted(request.GET.dict().items())),
453+
"version": str(cache.get(SEARCH_CACHE_VERSION_KEY, 0)),
454+
}
455+
return f"unified_search:{hash(frozenset(params.items()))}"
456+
443457
@trace_method(name="get", attributes={"component": "unified_search"})
444458
def get(self, request: Any) -> Response:
445459
"""Handle GET request and return unified search results."""
446460
try:
461+
cache_key = self._generate_unified_cache_key(request)
462+
cached_result = cache.get(cache_key)
463+
if cached_result:
464+
return Response(cached_result)
465+
447466
query: str = request.GET.get("query", "")
448467
page: int = int(request.GET.get("page", 1))
449468
size: int = int(request.GET.get("size", 10))
@@ -476,6 +495,8 @@ def get(self, request: Any) -> Response:
476495
"types_searched": types_list,
477496
}
478497

498+
cache.set(cache_key, result, timeout=3600)
499+
479500
return Response(result)
480501

481502
except Exception as e:

api/views/search_usecase.py

Lines changed: 37 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import Any, Dict, List, Optional, Tuple, TypedDict, Union, cast
33

44
import structlog
5+
from django.core.cache import cache
56
from elasticsearch_dsl import A
67
from elasticsearch_dsl import Q as ESQ
78
from elasticsearch_dsl import Search
@@ -14,6 +15,8 @@
1415
from api.views.paginated_elastic_view import PaginatedElasticSearchAPIView
1516
from search.documents import UseCaseDocument
1617

18+
METADATA_CACHE_TTL = 60 * 30 # 30 minutes
19+
1720
logger = structlog.get_logger(__name__)
1821

1922

@@ -144,9 +147,7 @@ def __init__(self, **kwargs: Any) -> None:
144147
super().__init__(**kwargs)
145148
self.searchable_fields: List[str]
146149
self.aggregations: Dict[str, str]
147-
self.searchable_fields, self.aggregations = (
148-
self.get_searchable_and_aggregations()
149-
)
150+
self.searchable_fields, self.aggregations = self.get_searchable_and_aggregations()
150151
self.logger = structlog.get_logger(__name__)
151152

152153
@trace_method(
@@ -155,6 +156,12 @@ def __init__(self, **kwargs: Any) -> None:
155156
)
156157
def get_searchable_and_aggregations(self) -> Tuple[List[str], Dict[str, str]]:
157158
"""Get searchable fields and aggregations for the search."""
159+
cached: Optional[Tuple[List[str], Dict[str, str]]] = cache.get(
160+
"usecase_search_metadata_config"
161+
)
162+
if cached:
163+
return cached
164+
158165
searchable_fields = [
159166
"title",
160167
"summary",
@@ -181,7 +188,9 @@ def get_searchable_and_aggregations(self) -> Tuple[List[str], Dict[str, str]]:
181188
for metadata in filterable_metadata:
182189
aggregations[f"metadata.{metadata.label}"] = "terms" # type: ignore
183190

184-
return searchable_fields, aggregations
191+
result = (searchable_fields, aggregations)
192+
cache.set("usecase_search_metadata_config", result, timeout=METADATA_CACHE_TTL)
193+
return result
185194

186195
@trace_method(name="add_aggregations", attributes={"component": "search_usecase"})
187196
def add_aggregations(self, search: Search) -> Search:
@@ -199,18 +208,21 @@ def add_aggregations(self, search: Search) -> Search:
199208
)
200209

201210
if aggregate_fields:
202-
metadata_qs = Metadata.objects.filter(filterable=True)
203-
filterable_metadata = [str(meta.label) for meta in metadata_qs] # type: ignore
211+
filterable_metadata: List[str] = cache.get("usecase_filterable_metadata_labels") or []
212+
if not filterable_metadata:
213+
metadata_qs = Metadata.objects.filter(filterable=True)
214+
filterable_metadata = [str(meta.label) for meta in metadata_qs] # type: ignore
215+
cache.set(
216+
"usecase_filterable_metadata_labels",
217+
filterable_metadata,
218+
timeout=METADATA_CACHE_TTL,
219+
)
204220

205221
metadata_bucket = search.aggs.bucket("metadata", "nested", path="metadata")
206222
composite_agg = A(
207223
"composite",
208224
sources=[
209-
{
210-
"metadata_label": {
211-
"terms": {"field": "metadata.metadata_item.label"}
212-
}
213-
},
225+
{"metadata_label": {"terms": {"field": "metadata.metadata_item.label"}}},
214226
{"metadata_value": {"terms": {"field": "metadata.value"}}},
215227
],
216228
size=10000,
@@ -219,13 +231,7 @@ def add_aggregations(self, search: Search) -> Search:
219231
"filter",
220232
{ # type: ignore[arg-type]
221233
"bool": {
222-
"must": [
223-
{
224-
"terms": {
225-
"metadata.metadata_item.label": filterable_metadata
226-
}
227-
}
228-
]
234+
"must": [{"terms": {"metadata.metadata_item.label": filterable_metadata}}]
229235
}
230236
},
231237
)
@@ -235,12 +241,8 @@ def add_aggregations(self, search: Search) -> Search:
235241

236242
return search
237243

238-
@trace_method(
239-
name="generate_q_expression", attributes={"component": "search_usecase"}
240-
)
241-
def generate_q_expression(
242-
self, query: str
243-
) -> Optional[Union[ESQuery, List[ESQuery]]]:
244+
@trace_method(name="generate_q_expression", attributes={"component": "search_usecase"})
245+
def generate_q_expression(self, query: str) -> Optional[Union[ESQuery, List[ESQuery]]]:
244246
"""Generate Elasticsearch Query expression."""
245247
if query:
246248
queries: List[ESQuery] = []
@@ -256,9 +258,7 @@ def generate_q_expression(
256258
ESQ("wildcard", **{field: {"value": f"*{query}*"}}),
257259
ESQ(
258260
"fuzzy",
259-
**{
260-
field: {"value": query, "fuzziness": "AUTO"}
261-
},
261+
**{field: {"value": query, "fuzziness": "AUTO"}},
262262
),
263263
],
264264
),
@@ -281,18 +281,14 @@ def generate_q_expression(
281281
ESQ("wildcard", **{field: {"value": f"*{query}*"}}),
282282
ESQ(
283283
"fuzzy",
284-
**{
285-
field: {"value": query, "fuzziness": "AUTO"}
286-
},
284+
**{field: {"value": query, "fuzziness": "AUTO"}},
287285
),
288286
],
289287
),
290288
)
291289
)
292290
else:
293-
queries.append(
294-
ESQ("fuzzy", **{field: {"value": query, "fuzziness": "AUTO"}})
295-
)
291+
queries.append(ESQ("fuzzy", **{field: {"value": query, "fuzziness": "AUTO"}}))
296292
else:
297293
queries = [ESQ("match_all")]
298294

@@ -301,8 +297,13 @@ def generate_q_expression(
301297
@trace_method(name="add_filters", attributes={"component": "search_usecase"})
302298
def add_filters(self, filters: Dict[str, str], search: Search) -> Search:
303299
"""Add filters to the search query."""
304-
non_filter_metadata = Metadata.objects.filter(filterable=False).all()
305-
excluded_labels: List[str] = [e.label for e in non_filter_metadata] # type: ignore
300+
excluded_labels: List[str] = cache.get("usecase_non_filter_metadata_labels") or []
301+
if not excluded_labels:
302+
non_filter_metadata = Metadata.objects.filter(filterable=False).all()
303+
excluded_labels = [e.label for e in non_filter_metadata] # type: ignore
304+
cache.set(
305+
"usecase_non_filter_metadata_labels", excluded_labels, timeout=METADATA_CACHE_TTL
306+
)
306307

307308
for filter in filters:
308309
if filter in excluded_labels:
@@ -335,9 +336,7 @@ def add_filters(self, filters: Dict[str, str], search: Search) -> Search:
335336
search = search.filter(
336337
"nested",
337338
path="metadata",
338-
query={
339-
"bool": {"must": {"term": {f"metadata.value": filters[filter]}}}
340-
},
339+
query={"bool": {"must": {"term": {f"metadata.value": filters[filter]}}}},
341340
)
342341
return search
343342

0 commit comments

Comments
 (0)