onyx-dot-app · evan-danswer · Mar 5, 2025
@@ -153,8 +153,9 @@ def generate_initial_answer(
     )
     for tool_response in yield_search_responses(
         query=question,
-        reranked_sections=answer_generation_documents.streaming_documents,
-        final_context_sections=answer_generation_documents.context_documents,
+        get_retrieved_sections=lambda: answer_generation_documents.context_documents,
+        get_reranked_sections=lambda: answer_generation_documents.streaming_documents,
+        get_final_context_sections=lambda: answer_generation_documents.context_documents,
         search_query_info=query_info,
         get_section_relevance=lambda: relevance_list,
         search_tool=graph_config.tooling.search_tool,

@@ -179,8 +179,9 @@ def generate_validate_refined_answer(
     )
     for tool_response in yield_search_responses(
         query=question,
-        reranked_sections=answer_generation_documents.streaming_documents,
-        final_context_sections=answer_generation_documents.context_documents,
+        get_retrieved_sections=lambda: answer_generation_documents.context_documents,
+        get_reranked_sections=lambda: answer_generation_documents.streaming_documents,
+        get_final_context_sections=lambda: answer_generation_documents.context_documents,
         search_query_info=query_info,
         get_section_relevance=lambda: relevance_list,
         search_tool=graph_config.tooling.search_tool,

@@ -13,7 +13,6 @@
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.models import SubQuestionPiece
-from onyx.context.search.models import IndexFilters
 from onyx.tools.models import SearchQueryInfo
 from onyx.utils.logger import setup_logger
 
@@ -144,8 +143,6 @@ def get_query_info(results: list[QueryRetrievalResult]) -> SearchQueryInfo:
         if result.query_info is not None:
             query_info = result.query_info
             break
-    return query_info or SearchQueryInfo(
-        predicted_search=None,
-        final_filters=IndexFilters(access_control_list=None),
-        recency_bias_multiplier=1.0,
-    )
+
+    assert query_info is not None, "must have query info"
+    return query_info
@@ -56,8 +56,9 @@ def format_results(
         relevance_list = relevance_from_docs(reranked_documents)
         for tool_response in yield_search_responses(
             query=state.question,
-            reranked_sections=state.retrieved_documents,
-            final_context_sections=reranked_documents,
+            get_retrieved_sections=lambda: reranked_documents,
+            get_reranked_sections=lambda: state.retrieved_documents,
+            get_final_context_sections=lambda: reranked_documents,
             search_query_info=query_info,
             get_section_relevance=lambda: relevance_list,
             search_tool=graph_config.tooling.search_tool,

@@ -10,13 +10,24 @@
 from onyx.agents.agent_search.orchestration.states import ToolChoice
 from onyx.agents.agent_search.orchestration.states import ToolChoiceState
 from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
+from onyx.agents.agent_search.shared_graph_utils.constants import EMBEDDING_KEY
+from onyx.agents.agent_search.shared_graph_utils.constants import IS_KEYWORD_KEY
+from onyx.agents.agent_search.shared_graph_utils.constants import KEYWORDS_KEY
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
 from onyx.chat.tool_handling.tool_response_handler import get_tool_by_name
 from onyx.chat.tool_handling.tool_response_handler import (
     get_tool_call_for_non_tool_calling_llm_impl,
 )
+from onyx.context.search.preprocessing.preprocessing import query_analysis
+from onyx.context.search.retrieval.search_runner import get_query_embedding
 from onyx.tools.tool import Tool
+from onyx.tools.tool_implementations.search.search_tool import SearchTool
 from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_in_background
+from onyx.utils.threadpool_concurrency import TimeoutThread
+from onyx.utils.threadpool_concurrency import wait_on_background
+from onyx.utils.timing import log_function_time
+from shared_configs.model_server_models import Embedding
 
 logger = setup_logger()
 
@@ -25,6 +36,7 @@
 # and a function that handles extracting the necessary fields
 # from the state and config
 # TODO: fan-out to multiple tool call nodes? Make this configurable?
+@log_function_time(print_only=True)
 def choose_tool(
     state: ToolChoiceState,
     config: RunnableConfig,
@@ -37,6 +49,29 @@ def choose_tool(
     should_stream_answer = state.should_stream_answer
 
     agent_config = cast(GraphConfig, config["metadata"]["config"])
+
+    force_use_tool = agent_config.tooling.force_use_tool
+
+    embedding_thread: TimeoutThread[Embedding] | None = None
+    keyword_thread: TimeoutThread[tuple[bool, list[str]]] | None = None
+    if (
+        not agent_config.behavior.use_agentic_search
+        and agent_config.tooling.search_tool is not None
+        and (
+            not force_use_tool.force_use or force_use_tool.tool_name == SearchTool.name
+        )
+    ):
+        # Run in a background thread to avoid blocking the main thread
+        embedding_thread = run_in_background(
+            get_query_embedding,
+            agent_config.inputs.search_request.query,
+            agent_config.persistence.db_session,
+        )
+        keyword_thread = run_in_background(
+            query_analysis,
+            agent_config.inputs.search_request.query,
+        )
+
     using_tool_calling_llm = agent_config.tooling.using_tool_calling_llm
     prompt_builder = state.prompt_snapshot or agent_config.inputs.prompt_builder
 
@@ -47,7 +82,6 @@ def choose_tool(
     tools = [
         tool for tool in (agent_config.tooling.tools or []) if tool.name in state.tools
     ]
-    force_use_tool = agent_config.tooling.force_use_tool
 
     tool, tool_args = None, None
     if force_use_tool.force_use and force_use_tool.args is not None:
@@ -71,6 +105,14 @@ def choose_tool(
     # If we have a tool and tool args, we are ready to request a tool call.
     # This only happens if the tool call was forced or we are using a non-tool calling LLM.
     if tool and tool_args:
+        if embedding_thread and tool.name == SearchTool._NAME:
+            # Wait for the embedding thread to finish
+            embedding = wait_on_background(embedding_thread)
+            tool_args[EMBEDDING_KEY] = embedding
+        if keyword_thread and tool.name == SearchTool._NAME:
+            is_keyword, keywords = wait_on_background(keyword_thread)
+            tool_args[IS_KEYWORD_KEY] = is_keyword
+            tool_args[KEYWORDS_KEY] = keywords
         return ToolChoiceUpdate(
             tool_choice=ToolChoice(
                 tool=tool,
@@ -145,6 +187,15 @@ def choose_tool(
     logger.debug(f"Selected tool: {selected_tool.name}")
     logger.debug(f"Selected tool call request: {selected_tool_call_request}")
 
+    if embedding_thread and selected_tool.name == SearchTool._NAME:
+        # Wait for the embedding thread to finish
+        embedding = wait_on_background(embedding_thread)
+        selected_tool_call_request["args"][EMBEDDING_KEY] = embedding
+    if keyword_thread and selected_tool.name == SearchTool._NAME:
+        is_keyword, keywords = wait_on_background(keyword_thread)
+        selected_tool_call_request["args"][IS_KEYWORD_KEY] = is_keyword
+        selected_tool_call_request["args"][KEYWORDS_KEY] = keywords
+
     return ToolChoiceUpdate(
         tool_choice=ToolChoice(
             tool=selected_tool,

@@ -9,18 +9,23 @@
 from onyx.agents.agent_search.basic.utils import process_llm_stream
 from onyx.agents.agent_search.models import GraphConfig
 from onyx.chat.models import LlmDoc
-from onyx.chat.models import OnyxContexts
 from onyx.tools.tool_implementations.search.search_tool import (
-    SEARCH_DOC_CONTENT_ID,
+    SEARCH_RESPONSE_SUMMARY_ID,
+)
+from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
+from onyx.tools.tool_implementations.search.search_utils import (
+    context_from_inference_section,
 )
 from onyx.tools.tool_implementations.search_like_tool_utils import (
     FINAL_CONTEXT_DOCUMENTS_ID,
 )
 from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
 
 logger = setup_logger()
 
 
+@log_function_time(print_only=True)
 def basic_use_tool_response(
     state: BasicState, config: RunnableConfig, writer: StreamWriter = lambda _: None
 ) -> BasicOutput:
@@ -50,11 +55,13 @@ def basic_use_tool_response(
     for yield_item in tool_call_responses:
         if yield_item.id == FINAL_CONTEXT_DOCUMENTS_ID:
             final_search_results = cast(list[LlmDoc], yield_item.response)
-        elif yield_item.id == SEARCH_DOC_CONTENT_ID:
-            search_contexts = cast(OnyxContexts, yield_item.response).contexts
-            for doc in search_contexts:
-                if doc.document_id not in initial_search_results:
-                    initial_search_results.append(doc)
+        elif yield_item.id == SEARCH_RESPONSE_SUMMARY_ID:
+            search_response_summary = cast(SearchResponseSummary, yield_item.response)
+            for section in search_response_summary.top_sections:
+                if section.center_chunk.document_id not in initial_search_results:
+                    initial_search_results.append(
+                        context_from_inference_section(section)
+                    )
 
     new_tool_call_chunk = AIMessageChunk(content="")
     if not agent_config.behavior.skip_gen_ai_answer_generation:

@@ -13,6 +13,11 @@
 AGENT_ANSWER_SEPARATOR = "Answer:"
 
 
+EMBEDDING_KEY = "embedding"
+IS_KEYWORD_KEY = "is_keyword"
+KEYWORDS_KEY = "keywords"
+
+
 class AgentLLMErrorType(str, Enum):
     TIMEOUT = "timeout"
     RATE_LIMIT = "rate_limit"

@@ -90,97 +90,97 @@ def process_token(
                     next(group for group in citation.groups() if group is not None)
                 )
 
-                if 1 <= numerical_value <= self.max_citation_num:
-                    context_llm_doc = self.context_docs[numerical_value - 1]
-                    final_citation_num = self.final_order_mapping[
-                        context_llm_doc.document_id
-                    ]
+                if not (1 <= numerical_value <= self.max_citation_num):
+                    continue
+
+                context_llm_doc = self.context_docs[numerical_value - 1]
+                final_citation_num = self.final_order_mapping[
+                    context_llm_doc.document_id
+                ]
 
-                    if final_citation_num not in self.citation_order:
-                        self.citation_order.append(final_citation_num)
+                if final_citation_num not in self.citation_order:
+                    self.citation_order.append(final_citation_num)
 
-                    citation_order_idx = (
-                        self.citation_order.index(final_citation_num) + 1
+                citation_order_idx = self.citation_order.index(final_citation_num) + 1
+
+                # get the value that was displayed to user, should always
+                # be in the display_doc_order_dict. But check anyways
+                if context_llm_doc.document_id in self.display_order_mapping:
+                    displayed_citation_num = self.display_order_mapping[
+                        context_llm_doc.document_id
+                    ]
+                else:
+                    displayed_citation_num = final_citation_num
+                    logger.warning(
+                        f"Doc {context_llm_doc.document_id} not in display_doc_order_dict. Used LLM citation number instead."
                     )
 
-                    # get the value that was displayed to user, should always
-                    # be in the display_doc_order_dict. But check anyways
-                    if context_llm_doc.document_id in self.display_order_mapping:
-                        displayed_citation_num = self.display_order_mapping[
-                            context_llm_doc.document_id
-                        ]
+                # Skip consecutive citations of the same work
+                if final_citation_num in self.current_citations:
+                    start, end = citation.span()
+                    real_start = length_to_add + start
+                    diff = end - start
+                    self.curr_segment = (
+                        self.curr_segment[: length_to_add + start]
+                        + self.curr_segment[real_start + diff :]
+                    )
+                    length_to_add -= diff
+                    continue
+
+                # Handle edge case where LLM outputs citation itself
+                if self.curr_segment.startswith("[["):
+                    match = re.match(r"\[\[(\d+)\]\]", self.curr_segment)
+                    if match:
+                        try:
+                            doc_id = int(match.group(1))
+                            context_llm_doc = self.context_docs[doc_id - 1]
+                            yield CitationInfo(
+                                # citation_num is now the number post initial ranking, i.e. as displayed to user
+                                citation_num=displayed_citation_num,
+                                document_id=context_llm_doc.document_id,
+                            )
+                        except Exception as e:
+                            logger.warning(
+                                f"Manual LLM citation didn't properly cite documents {e}"
+                            )
                     else:
-                        displayed_citation_num = final_citation_num
                         logger.warning(
-                            f"Doc {context_llm_doc.document_id} not in display_doc_order_dict. Used LLM citation number instead."
+                            "Manual LLM citation wasn't able to close brackets"
                         )
+                    continue
 
-                    # Skip consecutive citations of the same work
-                    if final_citation_num in self.current_citations:
-                        start, end = citation.span()
-                        real_start = length_to_add + start
-                        diff = end - start
-                        self.curr_segment = (
-                            self.curr_segment[: length_to_add + start]
-                            + self.curr_segment[real_start + diff :]
-                        )
-                        length_to_add -= diff
-                        continue
-
-                    # Handle edge case where LLM outputs citation itself
-                    if self.curr_segment.startswith("[["):
-                        match = re.match(r"\[\[(\d+)\]\]", self.curr_segment)
-                        if match:
-                            try:
-                                doc_id = int(match.group(1))
-                                context_llm_doc = self.context_docs[doc_id - 1]
-                                yield CitationInfo(
-                                    # citation_num is now the number post initial ranking, i.e. as displayed to user
-                                    citation_num=displayed_citation_num,
-                                    document_id=context_llm_doc.document_id,
-                                )
-                            except Exception as e:
-                                logger.warning(
-                                    f"Manual LLM citation didn't properly cite documents {e}"
-                                )
-                        else:
-                            logger.warning(
-                                "Manual LLM citation wasn't able to close brackets"
-                            )
-                        continue
-
-                    link = context_llm_doc.link
+                link = context_llm_doc.link
 
-                    self.past_cite_count = len(self.llm_out)
-                    self.current_citations.append(final_citation_num)
+                self.past_cite_count = len(self.llm_out)
+                self.current_citations.append(final_citation_num)
 
-                    if citation_order_idx not in self.cited_inds:
-                        self.cited_inds.add(citation_order_idx)
-                        yield CitationInfo(
-                            # citation number is now the one that was displayed to user
-                            citation_num=displayed_citation_num,
-                            document_id=context_llm_doc.document_id,
-                        )
+                if citation_order_idx not in self.cited_inds:
+                    self.cited_inds.add(citation_order_idx)
+                    yield CitationInfo(
+                        # citation number is now the one that was displayed to user
+                        citation_num=displayed_citation_num,
+                        document_id=context_llm_doc.document_id,
+                    )
 
-                    start, end = citation.span()
-                    if link:
-                        prev_length = len(self.curr_segment)
-                        self.curr_segment = (
-                            self.curr_segment[: start + length_to_add]
-                            + f"[[{displayed_citation_num}]]({link})"  # use the value that was displayed to user
-                            + self.curr_segment[end + length_to_add :]
-                        )
-                        length_to_add += len(self.curr_segment) - prev_length
-                    else:
-                        prev_length = len(self.curr_segment)
-                        self.curr_segment = (
-                            self.curr_segment[: start + length_to_add]
-                            + f"[[{displayed_citation_num}]]()"  # use the value that was displayed to user
-                            + self.curr_segment[end + length_to_add :]
-                        )
-                        length_to_add += len(self.curr_segment) - prev_length
+                start, end = citation.span()
+                if link:
+                    prev_length = len(self.curr_segment)
+                    self.curr_segment = (
+                        self.curr_segment[: start + length_to_add]
+                        + f"[[{displayed_citation_num}]]({link})"  # use the value that was displayed to user
+                        + self.curr_segment[end + length_to_add :]
+                    )
+                    length_to_add += len(self.curr_segment) - prev_length
+                else:
+                    prev_length = len(self.curr_segment)
+                    self.curr_segment = (
+                        self.curr_segment[: start + length_to_add]
+                        + f"[[{displayed_citation_num}]]()"  # use the value that was displayed to user
+                        + self.curr_segment[end + length_to_add :]
+                    )
+                    length_to_add += len(self.curr_segment) - prev_length
 
-                    last_citation_end = end + length_to_add
+                last_citation_end = end + length_to_add
 
             if last_citation_end > 0:
                 result += self.curr_segment[:last_citation_end]