From 473968f8696d3628dc976cf6d8e172c409c77e0f Mon Sep 17 00:00:00 2001 From: Cameron Lamb Date: Thu, 6 Mar 2025 09:27:31 +0000 Subject: [PATCH 1/4] Adjust query caching and update index behaviour --- src/core/tasks.py | 3 --- src/extended_search/apps.py | 5 ----- src/extended_search/backends/backend.py | 10 +++++++++ src/extended_search/query_builder.py | 27 ++++++++++++++++++++++--- 4 files changed, 34 insertions(+), 11 deletions(-) diff --git a/src/core/tasks.py b/src/core/tasks.py index f0c2d84c0..f311f0255 100644 --- a/src/core/tasks.py +++ b/src/core/tasks.py @@ -30,9 +30,6 @@ def ingest_uk_staff_locations(self): @celery_app.task(bind=True) @cache_lock(cache_key="update_search_index") def update_search_index(self): - # Run update_index --schema-only - call_command("update_index", schema_only=True) - # Run update_index call_command("update_index") diff --git a/src/extended_search/apps.py b/src/extended_search/apps.py index 89cecd3c1..03e1f02f0 100644 --- a/src/extended_search/apps.py +++ b/src/extended_search/apps.py @@ -8,7 +8,6 @@ class ExtendedSearchConfig(AppConfig): def ready(self): import extended_search.signals # noqa from extended_search import query_builder, settings - from extended_search.index import get_indexed_models settings.settings_singleton.initialise_field_dict() settings.settings_singleton.initialise_env_dict() @@ -16,7 +15,3 @@ def ready(self): settings.settings_singleton.initialise_db_dict() settings.extended_search_settings = settings.settings_singleton.to_dict() query_builder.extended_search_settings = settings.extended_search_settings - - for model_class in get_indexed_models(): - if hasattr(model_class, "indexed_fields") and model_class.indexed_fields: - query_builder.CustomQueryBuilder.build_search_query(model_class, True) diff --git a/src/extended_search/backends/backend.py b/src/extended_search/backends/backend.py index 4043d34c6..dbbdd0c29 100644 --- a/src/extended_search/backends/backend.py +++ b/src/extended_search/backends/backend.py @@ -4,6 +4,7 @@ Elasticsearch7Mapping, Elasticsearch7SearchBackend, Elasticsearch7SearchQueryCompiler, + ElasticsearchAtomicIndexRebuilder, Field, ) from wagtail.search.index import SearchField @@ -12,6 +13,7 @@ from extended_search import settings as search_settings from extended_search.index import RelatedFields from extended_search.query import Filtered, FunctionScore, Nested, OnlyFields +from extended_search.query_builder import build_queries_for_index class FilteredSearchMapping(Elasticsearch7Mapping): @@ -390,9 +392,17 @@ class CustomSearchQueryCompiler( mapping_class = CustomSearchMapping +class CustomAtomicIndexRebuilder(ElasticsearchAtomicIndexRebuilder): + def start(self): + index = super().start() + build_queries_for_index(index) + return index + + class CustomSearchBackend(Elasticsearch7SearchBackend): query_compiler_class = CustomSearchQueryCompiler mapping_class = CustomSearchMapping + atomic_rebuilder_class = CustomAtomicIndexRebuilder SearchBackend = CustomSearchBackend diff --git a/src/extended_search/query_builder.py b/src/extended_search/query_builder.py index 61b2e96d9..6ee6b125a 100644 --- a/src/extended_search/query_builder.py +++ b/src/extended_search/query_builder.py @@ -1,13 +1,15 @@ import inspect import logging -from typing import Optional, Type +from typing import TYPE_CHECKING, Optional, Type from django.conf import settings from django.core.cache import cache from django.db import models from wagtail.search import index +from wagtail.search.backends import get_search_backend from wagtail.search.query import Boost, Fuzzy, Phrase, PlainText, SearchQuery +from extended_search import query_builder from extended_search import settings as search_settings from extended_search.index import ( BaseField, @@ -23,6 +25,11 @@ from extended_search.types import AnalysisType, SearchQueryType +if TYPE_CHECKING: + from wagtail.search.backends.elasticsearch7 import Elasticsearch7Index + + from extended_search.backends.backend import CustomSearchBackend + logger = logging.getLogger(__name__) @@ -324,7 +331,9 @@ def get_search_query(cls, model_class, query_str: str): return cls.swap_variables(built_query, query_str) @classmethod - def build_search_query(cls, model_class, ignore_cache=False): + def build_search_query( + cls, model_class, ignore_cache=False, index: "Elasticsearch7Index | None" = None + ): """ Generates a full query for a model class, by running query builder against the given model as well as all models with the given as a @@ -332,7 +341,11 @@ def build_search_query(cls, model_class, ignore_cache=False): type, and all are joined together at the end. """ if settings.SEARCH_ENABLE_QUERY_CACHE: - cache_key = model_class.__name__ + search_backend: "CustomSearchBackend" = get_search_backend() + model_index = search_backend.get_index_for_model(model_class) + if index and index != model_index: + return None + cache_key = f"{model_index.name}__{model_class.__name__}" if not ignore_cache: built_query = cache.get(cache_key, None) if built_query: @@ -410,3 +423,11 @@ def get_extended_models_with_unique_indexed_fields( ): extended_model_classes.append(indexed_model) return extended_model_classes + + +def build_queries_for_index(index: "Elasticsearch7Index"): + for model_class in get_indexed_models(): + if hasattr(model_class, "indexed_fields") and model_class.indexed_fields: + query_builder.CustomQueryBuilder.build_search_query( + model_class, True, index=index + ) From beb62d5adec4046d62f89f10cc9bbe57b1f9000e Mon Sep 17 00:00:00 2001 From: Cameron Lamb Date: Thu, 6 Mar 2025 10:07:17 +0000 Subject: [PATCH 2/4] Only index the models for the index that is being updated --- src/extended_search/backends/backend.py | 20 +++++++++++++++++--- src/extended_search/query_builder.py | 19 +++++++------------ 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/extended_search/backends/backend.py b/src/extended_search/backends/backend.py index dbbdd0c29..1c4bd5497 100644 --- a/src/extended_search/backends/backend.py +++ b/src/extended_search/backends/backend.py @@ -1,5 +1,6 @@ from typing import Optional, Union +from wagtail.search.backends import get_search_backend from wagtail.search.backends.elasticsearch7 import ( Elasticsearch7Mapping, Elasticsearch7SearchBackend, @@ -8,12 +9,13 @@ Field, ) from wagtail.search.index import SearchField +from wagtail.search.management.commands.update_index import group_models_by_index from wagtail.search.query import MATCH_NONE, Fuzzy, MatchAll, Not, Phrase, PlainText from extended_search import settings as search_settings -from extended_search.index import RelatedFields +from extended_search.index import RelatedFields, get_indexed_models from extended_search.query import Filtered, FunctionScore, Nested, OnlyFields -from extended_search.query_builder import build_queries_for_index +from extended_search.query_builder import build_queries class FilteredSearchMapping(Elasticsearch7Mapping): @@ -395,7 +397,19 @@ class CustomSearchQueryCompiler( class CustomAtomicIndexRebuilder(ElasticsearchAtomicIndexRebuilder): def start(self): index = super().start() - build_queries_for_index(index) + + models_grouped_by_index = group_models_by_index( + get_search_backend(), get_indexed_models() + ) + models_for_current_index = [] + + for index_models in models_grouped_by_index.keys(): + if index.name.startswith(index_models.name): + models_for_current_index = models_grouped_by_index[index_models] + + if models_for_current_index: + build_queries(models=models_for_current_index) + return index diff --git a/src/extended_search/query_builder.py b/src/extended_search/query_builder.py index 6ee6b125a..d03055be9 100644 --- a/src/extended_search/query_builder.py +++ b/src/extended_search/query_builder.py @@ -26,8 +26,6 @@ if TYPE_CHECKING: - from wagtail.search.backends.elasticsearch7 import Elasticsearch7Index - from extended_search.backends.backend import CustomSearchBackend logger = logging.getLogger(__name__) @@ -331,9 +329,7 @@ def get_search_query(cls, model_class, query_str: str): return cls.swap_variables(built_query, query_str) @classmethod - def build_search_query( - cls, model_class, ignore_cache=False, index: "Elasticsearch7Index | None" = None - ): + def build_search_query(cls, model_class, ignore_cache=False): """ Generates a full query for a model class, by running query builder against the given model as well as all models with the given as a @@ -343,8 +339,6 @@ def build_search_query( if settings.SEARCH_ENABLE_QUERY_CACHE: search_backend: "CustomSearchBackend" = get_search_backend() model_index = search_backend.get_index_for_model(model_class) - if index and index != model_index: - return None cache_key = f"{model_index.name}__{model_class.__name__}" if not ignore_cache: built_query = cache.get(cache_key, None) @@ -425,9 +419,10 @@ def get_extended_models_with_unique_indexed_fields( return extended_model_classes -def build_queries_for_index(index: "Elasticsearch7Index"): - for model_class in get_indexed_models(): +def build_queries(models: list[models.Model] | None = None): + if not models: + models = get_indexed_models() + + for model_class in models: if hasattr(model_class, "indexed_fields") and model_class.indexed_fields: - query_builder.CustomQueryBuilder.build_search_query( - model_class, True, index=index - ) + query_builder.CustomQueryBuilder.build_search_query(model_class, True) From 5d4a7cecd95ee10986f9bac5a29c1eace9b74967 Mon Sep 17 00:00:00 2001 From: Cameron Lamb Date: Thu, 6 Mar 2025 10:30:16 +0000 Subject: [PATCH 3/4] We want to store the query against the alias name as that is the unique one --- src/extended_search/query_builder.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/extended_search/query_builder.py b/src/extended_search/query_builder.py index d03055be9..c144050c9 100644 --- a/src/extended_search/query_builder.py +++ b/src/extended_search/query_builder.py @@ -339,6 +339,10 @@ def build_search_query(cls, model_class, ignore_cache=False): if settings.SEARCH_ENABLE_QUERY_CACHE: search_backend: "CustomSearchBackend" = get_search_backend() model_index = search_backend.get_index_for_model(model_class) + if model_index.is_alias(): + alias_indexes = model_index.aliased_indices() + if len(alias_indexes) == 1: + model_index = model_index.aliased_indices()[0] cache_key = f"{model_index.name}__{model_class.__name__}" if not ignore_cache: built_query = cache.get(cache_key, None) From d9f51325a79a17dd0efeeb9b9c26b92cd94ff587 Mon Sep 17 00:00:00 2001 From: Cameron Lamb Date: Thu, 6 Mar 2025 10:30:50 +0000 Subject: [PATCH 4/4] We want to build the queries when we know the unique index name --- src/extended_search/backends/backend.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/extended_search/backends/backend.py b/src/extended_search/backends/backend.py index 1c4bd5497..b2a3a335b 100644 --- a/src/extended_search/backends/backend.py +++ b/src/extended_search/backends/backend.py @@ -395,23 +395,20 @@ class CustomSearchQueryCompiler( class CustomAtomicIndexRebuilder(ElasticsearchAtomicIndexRebuilder): - def start(self): - index = super().start() - + def finish(self): + super().finish() models_grouped_by_index = group_models_by_index( get_search_backend(), get_indexed_models() ) models_for_current_index = [] for index_models in models_grouped_by_index.keys(): - if index.name.startswith(index_models.name): + if self.index.name.startswith(index_models.name): models_for_current_index = models_grouped_by_index[index_models] if models_for_current_index: build_queries(models=models_for_current_index) - return index - class CustomSearchBackend(Elasticsearch7SearchBackend): query_compiler_class = CustomSearchQueryCompiler