diff --git a/.viperlightignore b/.viperlightignore
index d92077aaa..3d491d0e8 100644
--- a/.viperlightignore
+++ b/.viperlightignore
@@ -13,6 +13,7 @@ api_test/test_data/*
 api_test/gen-report-lambda.py
 source/portal/src/utils/const.ts
 source/lambda/online/lambda_main/test/main_local_test_retail.py
+source/lambda/online/lambda_main/test/main_local_test_common.py
 source/lambda/online/functions/retail_tools/lambda_product_information_search/product_information_search.py
 source/lambda/job/test/prepare_data.py
 README.md
diff --git a/source/infrastructure/lib/chat/chat-stack.ts b/source/infrastructure/lib/chat/chat-stack.ts
index fd8f8d2aa..2373aa9d0 100644
--- a/source/infrastructure/lib/chat/chat-stack.ts
+++ b/source/infrastructure/lib/chat/chat-stack.ts
@@ -69,7 +69,7 @@ export class ChatStack extends NestedStack implements ChatStackOutputs {
   private lambdaOnlineAgent: Function;
   private lambdaOnlineLLMGenerate: Function;
   private chatbotTableName: string;
-  private lambdaOnlineFunctions: Function;
+  // private lambdaOnlineFunctions: Function;
 
   constructor(scope: Construct, id: string, props: ChatStackProps) {
     super(scope, id);
@@ -282,23 +282,23 @@ export class ChatStack extends NestedStack implements ChatStackOutputs {
     this.lambdaOnlineLLMGenerate.addToRolePolicy(this.iamHelper.dynamodbStatement);
 
 
-    const lambdaOnlineFunctions = new LambdaFunction(this, "lambdaOnlineFunctions", {
-      runtime: Runtime.PYTHON_3_12,
-      handler: "lambda_tools.lambda_handler",
-      code: Code.fromAsset(
-        join(__dirname, "../../../lambda/online/functions/functions_utils"),
-      ),
-      memorySize: 4096,
-      vpc: vpc,
-      securityGroups: securityGroups,
-      layers: [apiLambdaOnlineSourceLayer, apiLambdaJobSourceLayer],
-      environment: {
-        CHATBOT_TABLE: props.sharedConstructOutputs.chatbotTable.tableName,
-        INDEX_TABLE: this.indexTableName,
-        MODEL_TABLE: this.modelTableName,
-      },
-    });
-    this.lambdaOnlineFunctions = lambdaOnlineFunctions.function;
+    // const lambdaOnlineFunctions = new LambdaFunction(this, "lambdaOnlineFunctions", {
+    //   runtime: Runtime.PYTHON_3_12,
+    //   handler: "lambda_tools.lambda_handler",
+    //   code: Code.fromAsset(
+    //     join(__dirname, "../../../lambda/online/functions/functions_utils"),
+    //   ),
+    //   memorySize: 4096,
+    //   vpc: vpc,
+    //   securityGroups: securityGroups,
+    //   layers: [apiLambdaOnlineSourceLayer, apiLambdaJobSourceLayer],
+    //   environment: {
+    //     CHATBOT_TABLE: props.sharedConstructOutputs.chatbotTable.tableName,
+    //     INDEX_TABLE: this.indexTableName,
+    //     MODEL_TABLE: this.modelTableName,
+    //   },
+    // });
+    // this.lambdaOnlineFunctions = lambdaOnlineFunctions.function;
 
     this.lambdaOnlineQueryPreprocess.grantInvoke(this.lambdaOnlineMain);
 
@@ -310,8 +310,8 @@ export class ChatStack extends NestedStack implements ChatStackOutputs {
     this.lambdaOnlineLLMGenerate.grantInvoke(this.lambdaOnlineQueryPreprocess);
     this.lambdaOnlineLLMGenerate.grantInvoke(this.lambdaOnlineAgent);
 
-    this.lambdaOnlineFunctions.grantInvoke(this.lambdaOnlineMain);
-    this.lambdaOnlineFunctions.grantInvoke(this.lambdaOnlineIntentionDetection);
+    // this.lambdaOnlineFunctions.grantInvoke(this.lambdaOnlineMain);
+    // this.lambdaOnlineFunctions.grantInvoke(this.lambdaOnlineIntentionDetection);
 
     if (props.config.chat.amazonConnect.enabled) {
       new ConnectConstruct(this, "connect-construct", {
diff --git a/source/infrastructure/lib/knowledge-base/knowledge-base-stack.ts b/source/infrastructure/lib/knowledge-base/knowledge-base-stack.ts
index 52812678a..29fef8a11 100644
--- a/source/infrastructure/lib/knowledge-base/knowledge-base-stack.ts
+++ b/source/infrastructure/lib/knowledge-base/knowledge-base-stack.ts
@@ -233,7 +233,7 @@ export class KnowledgeBaseStack extends NestedStack implements KnowledgeBaseStac
         "--PORTAL_BUCKET": this.uiPortalBucketName,
         "--CHATBOT_TABLE": props.sharedConstructOutputs.chatbotTable.tableName,
         "--additional-python-modules":
-          "langchain==0.1.11,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.84,openai==0.28.1,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6,mammoth==1.6.0,chardet==5.2.0,python-docx==1.1.0,nltk==3.8.1,pdfminer.six==20221105,smart-open==7.0.4,lxml==5.2.2,pandas==2.1.2,openpyxl==3.1.5,xlrd==2.0.1",
+          "langchain==0.1.11,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.84,openai==0.28.1,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6,mammoth==1.6.0,chardet==5.2.0,python-docx==1.1.0,nltk==3.8.1,pdfminer.six==20221105,smart-open==7.0.4,lxml==5.2.2,pandas==2.1.2,openpyxl==3.1.5,xlrd==2.0.1,langchain_community==0.3.5",
         // Add multiple extra python files
         "--extra-py-files": extraPythonFilesList
       },
diff --git a/source/lambda/job/dep/llm_bot_dep/sm_utils.py b/source/lambda/job/dep/llm_bot_dep/sm_utils.py
index f9a063268..7520ec9c5 100644
--- a/source/lambda/job/dep/llm_bot_dep/sm_utils.py
+++ b/source/lambda/job/dep/llm_bot_dep/sm_utils.py
@@ -1,11 +1,12 @@
 import json
 import io
 from typing import Any, Dict, Iterator, List, Mapping, Optional
-from langchain.llms.sagemaker_endpoint import LLMContentHandler, SagemakerEndpoint
-from langchain.embeddings import SagemakerEndpointEmbeddings, BedrockEmbeddings
-from langchain.embeddings.sagemaker_endpoint import EmbeddingsContentHandler
+from langchain_community.llms import SagemakerEndpoint
+from langchain_community.llms.sagemaker_endpoint import LLMContentHandler
+from langchain_community.embeddings import SagemakerEndpointEmbeddings,BedrockEmbeddings
+from langchain_community.embeddings.sagemaker_endpoint import EmbeddingsContentHandler
 from langchain.callbacks.manager import CallbackManagerForLLMRun
-from langchain.llms.utils import enforce_stop_tokens
+from langchain_community.llms.utils import enforce_stop_tokens
 from typing import Dict, List, Optional, Any,Iterator
 from langchain_core.outputs import GenerationChunk
 import boto3
@@ -234,12 +235,12 @@ def transform_output(self, output: bytes) -> str:
     function. See `boto3`_. docs for more info.
     .. _boto3: <https://boto3.amazonaws.com/v1/documentation/api/latest/index.html>
     """
-    content_type = "application/json"
-    accepts = "application/json"
+    content_type: str = "application/json"
+    accepts: str = "application/json"
     class Config:
         """Configuration for this pydantic object."""
 
-        extra = Extra.forbid
+        extra = Extra.forbid.value
 
     @root_validator()
     def validate_environment(cls, values: Dict) -> Dict:
diff --git a/source/lambda/job/dep/requirements.txt b/source/lambda/job/dep/requirements.txt
index f6284f98b..37eb0483f 100644
--- a/source/lambda/job/dep/requirements.txt
+++ b/source/lambda/job/dep/requirements.txt
@@ -16,4 +16,5 @@ opensearch-py==2.6.0
 lxml==5.2.2
 pandas==2.1.2
 openpyxl==3.1.5
-xlrd==2.0.1
\ No newline at end of file
+xlrd==2.0.1
+langchain_community==0.3.5
\ No newline at end of file
diff --git a/source/lambda/online/common_entry_agent_workflow.png b/source/lambda/online/common_entry_agent_workflow.png
deleted file mode 100644
index 851ca8d2b..000000000
Binary files a/source/lambda/online/common_entry_agent_workflow.png and /dev/null differ
diff --git a/source/lambda/online/common_entry_workflow.png b/source/lambda/online/common_entry_workflow.png
index 697d76747..abd505301 100644
Binary files a/source/lambda/online/common_entry_workflow.png and b/source/lambda/online/common_entry_workflow.png differ
diff --git a/source/lambda/online/common_logic/common_utils/constant.py b/source/lambda/online/common_logic/common_utils/constant.py
index 45e8631d0..1ed416130 100644
--- a/source/lambda/online/common_logic/common_utils/constant.py
+++ b/source/lambda/online/common_logic/common_utils/constant.py
@@ -82,17 +82,19 @@ class LLMTaskType(ConstantBase):
     HYDE_TYPE = "hyde"
     CONVERSATION_SUMMARY_TYPE = "conversation_summary"
     RETAIL_CONVERSATION_SUMMARY_TYPE = "retail_conversation_summary"
-
     MKT_CONVERSATION_SUMMARY_TYPE = "mkt_conversation_summary"
     MKT_QUERY_REWRITE_TYPE = "mkt_query_rewrite"
     STEPBACK_PROMPTING_TYPE = "stepback_prompting"
-    TOOL_CALLING = "tool_calling"
+    TOOL_CALLING_XML = "tool_calling_xml"
+    TOOL_CALLING_API = "tool_calling_api"
     RETAIL_TOOL_CALLING = "retail_tool_calling"
     RAG = "rag"
+    MTK_RAG = "mkt_rag"
     CHAT = 'chat'
     AUTO_EVALUATION = "auto_evaluation"
 
 
+
 class MessageType(ConstantBase):
     HUMAN_MESSAGE_TYPE = 'human'
     AI_MESSAGE_TYPE = 'ai'
@@ -126,19 +128,26 @@ class LLMModelType(ConstantBase):
     CLAUDE_2 = "anthropic.claude-v2"
     CLAUDE_21 = "anthropic.claude-v2:1"
     CLAUDE_3_HAIKU = "anthropic.claude-3-haiku-20240307-v1:0"
+    CLAUDE_3_5_HAIKU = "anthropic.claude-3-5-haiku-20241022-v1:0"
     CLAUDE_3_SONNET = "anthropic.claude-3-sonnet-20240229-v1:0"
     CLAUDE_3_5_SONNET = "anthropic.claude-3-5-sonnet-20240620-v1:0"
+    CLAUDE_3_5_SONNET_V2 = "anthropic.claude-3-5-sonnet-20241022-v2:0"
     MIXTRAL_8X7B_INSTRUCT = "mistral.mixtral-8x7b-instruct-v0:1"
     BAICHUAN2_13B_CHAT = "Baichuan2-13B-Chat-4bits"
     INTERNLM2_CHAT_7B = "internlm2-chat-7b"
     INTERNLM2_CHAT_20B = "internlm2-chat-20b"
     GLM_4_9B_CHAT = "glm-4-9b-chat"
-    CHATGPT_35_TURBO = "gpt-3.5-turbo-0125"
+    CHATGPT_35_TURBO_0125 = "gpt-3.5-turbo-0125"
     CHATGPT_4_TURBO = "gpt-4-turbo"
     CHATGPT_4O = "gpt-4o"
     QWEN2INSTRUCT7B = "qwen2-7B-instruct"
     QWEN2INSTRUCT72B = "qwen2-72B-instruct"
     QWEN15INSTRUCT32B = "qwen1_5-32B-instruct"
+    LLAMA3_1_70B_INSTRUCT = "meta.llama3-1-70b-instruct-v1:0"
+    LLAMA3_2_90B_INSTRUCT = "us.meta.llama3-2-90b-instruct-v1:0"
+    MISTRAL_LARGE_2407 = "mistral.mistral-large-2407-v1:0"
+    COHERE_COMMAND_R_PLUS = "cohere.command-r-plus-v1:0"
+
 
 
 class EmbeddingModelType(ConstantBase):
@@ -170,8 +179,7 @@ class IndexTag(Enum):
 
 @unique
 class KBType(Enum):
-    AOS = "aos"
-
+    AOS = "aos" 
 
 GUIDE_INTENTION_NOT_FOUND = "Intention not found, please add intentions first when using agent mode, refer to https://amzn-chn.feishu.cn/docx/HlxvduJYgoOz8CxITxXc43XWn8e"
 INDEX_DESC = "Answer question based on search result"
@@ -179,4 +187,5 @@ class KBType(Enum):
 
 class Threshold(ConstantBase):
     QQ_IN_RAG_CONTEXT = 0.5
+    INTENTION_ALL_KNOWLEDGE_RETRIEVAL = 0.4
 
diff --git a/source/lambda/online/common_logic/common_utils/lambda_invoke_utils.py b/source/lambda/online/common_logic/common_utils/lambda_invoke_utils.py
index a03b0cf93..923f3ddde 100644
--- a/source/lambda/online/common_logic/common_utils/lambda_invoke_utils.py
+++ b/source/lambda/online/common_logic/common_utils/lambda_invoke_utils.py
@@ -3,18 +3,23 @@
 import importlib
 import json
 import time
+import os 
 from typing import Any, Dict, Optional, Callable, Union
+import threading
 
 import requests
 from common_logic.common_utils.constant import StreamMessageType
 from common_logic.common_utils.logger_utils import get_logger
 from common_logic.common_utils.websocket_utils import is_websocket_request, send_to_ws_client
-from langchain.pydantic_v1 import BaseModel, Field, root_validator
+from pydantic import BaseModel, Field, model_validator
+
 
 from .exceptions import LambdaInvokeError
 
 logger = get_logger("lambda_invoke_utils")
-
+# thread_local = threading.local()
+thread_local = threading.local()
+CURRENT_STATE = None
 
 __FUNC_NAME_MAP = {
     "query_preprocess": "Preprocess for Multi-round Conversation",
@@ -26,6 +31,38 @@
     "llm_direct_results_generation": "LLM Response"
 }
 
+
+class StateContext:
+
+    def __init__(self,state):
+        self.state=state
+    
+    @classmethod
+    def get_current_state(cls):
+        # print("thread id",threading.get_ident(),'parent id',threading.)
+        # state = getattr(thread_local,'state',None)
+        state = CURRENT_STATE
+        assert state is not None,"There is not a valid state in current context"
+        return state
+
+    @classmethod
+    def set_current_state(cls, state):
+        global CURRENT_STATE 
+        assert CURRENT_STATE is None, "Parallel node executions are not alowed"
+        CURRENT_STATE = state
+    
+    @classmethod
+    def clear_state(cls):
+        global CURRENT_STATE
+        CURRENT_STATE = None
+
+    def __enter__(self):
+        self.set_current_state(self.state)
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.clear_state()
+
+
 class LAMBDA_INVOKE_MODE(enum.Enum):
     LAMBDA = "lambda"
     LOCAL = "local"
@@ -55,26 +92,24 @@ class LambdaInvoker(BaseModel):
     region_name: str = None
     credentials_profile_name: Optional[str] = Field(default=None, exclude=True)
 
-    @root_validator()
+    @model_validator(mode="before")
     def validate_environment(cls, values: Dict):
         if values.get("client") is not None:
             return values
         try:
             import boto3
-
             try:
-                if values["credentials_profile_name"] is not None:
+                if values.get("credentials_profile_name") is not None:
                     session = boto3.Session(
                         profile_name=values["credentials_profile_name"]
                     )
                 else:
                     # use default credentials
                     session = boto3.Session()
-
                 values["client"] = session.client(
-                    "lambda", region_name=values["region_name"]
+                    "lambda",
+                    region_name=values.get("region_name",os.environ['AWS_REGION'])
                 )
-
             except Exception as e:
                 raise ValueError(
                     "Could not load credentials to authenticate with AWS client. "
@@ -97,8 +132,9 @@ def invoke_with_lambda(self, lambda_name: str, event_body: dict):
         )
         response_body = invoke_response["Payload"]
         response_str = response_body.read().decode()
-
         response_body = json.loads(response_str)
+        if "body" in response_body:
+            response_body = json.loads(response_body['body'])
 
         if "errorType" in response_body:
             error = (
@@ -108,7 +144,6 @@ def invoke_with_lambda(self, lambda_name: str, event_body: dict):
                 + f"{response_body['errorType']}: {response_body['errorMessage']}"
             )
             raise LambdaInvokeError(error)
-
         return response_body
 
     def invoke_with_local(
@@ -285,7 +320,10 @@ def wrapper(state: Dict[str, Any]) -> Dict[str, Any]:
                        current_stream_use, ws_connection_id, enable_trace)
             state['trace_infos'].append(
                 f"Enter: {func.__name__}, time: {time.time()}")
-            output = func(state)
+            
+            with StateContext(state): 
+                output = func(state)
+
             current_monitor_infos = output.get(monitor_key, None)
             if current_monitor_infos is not None:
                 send_trace(f"\n\n {current_monitor_infos}",
diff --git a/source/lambda/online/common_logic/common_utils/logger_utils.py b/source/lambda/online/common_logic/common_utils/logger_utils.py
index 22ba70327..118459421 100644
--- a/source/lambda/online/common_logic/common_utils/logger_utils.py
+++ b/source/lambda/online/common_logic/common_utils/logger_utils.py
@@ -1,4 +1,3 @@
-
 import logging
 import threading
 import os
@@ -10,15 +9,13 @@
 logger_lock = threading.Lock()
 
 
-def cloud_print_wrapper(fn):
-    @wraps(fn)
-    def _inner(msg, *args, **kwargs):
+class CloudStreamHandler(logging.StreamHandler):
+    def emit(self, record):
         from common_logic.common_utils.lambda_invoke_utils import is_running_local
         if not is_running_local:
             # enable multiline as one message in cloudwatch
-            msg = msg.replace("\n", "\r")
-        return fn(msg, *args, **kwargs)
-    return _inner
+            record.msg = record.msg.replace("\n", "\r")
+        return super().emit(record)
 
 
 class Logger:
@@ -37,16 +34,11 @@ def _get_logger(
         logger = logging.getLogger(name)
         logger.propagate = 0
         # Create a handler
-        c_handler = logging.StreamHandler()
+        c_handler = CloudStreamHandler()
         formatter = logging.Formatter(format, datefmt=datefmt)
         c_handler.setFormatter(formatter)
         logger.addHandler(c_handler)
         logger.setLevel(level)
-        logger.info = cloud_print_wrapper(logger.info)
-        logger.error = cloud_print_wrapper(logger.error)
-        logger.warning = cloud_print_wrapper(logger.warning)
-        logger.critical = cloud_print_wrapper(logger.critical)
-        logger.debug = cloud_print_wrapper(logger.debug)
         cls.logger_map[name] = logger
         return logger
 
@@ -72,3 +64,15 @@ def print_llm_messages(msg, logger=logger):
         "ENABLE_PRINT_MESSAGES", 'True').lower() in ('true', '1', 't')
     if enable_print_messages:
         logger.info(msg)
+
+
+def llm_messages_print_decorator(fn):
+    @wraps(fn)
+    def _inner(*args, **kwargs):
+        if args:
+            print_llm_messages(args)
+        if kwargs:
+            print_llm_messages(kwargs)
+        return fn(*args, **kwargs)
+    return _inner
+    
diff --git a/source/lambda/online/common_logic/common_utils/prompt_utils.py b/source/lambda/online/common_logic/common_utils/prompt_utils.py
index 1b7be44a9..2adec2858 100644
--- a/source/lambda/online/common_logic/common_utils/prompt_utils.py
+++ b/source/lambda/online/common_logic/common_utils/prompt_utils.py
@@ -2,7 +2,7 @@
 import os
 import json
 
-from langchain.pydantic_v1 import BaseModel, Field
+from pydantic import BaseModel, Field
 from collections import defaultdict
 from common_logic.common_utils.constant import LLMModelType, LLMTaskType
 import copy
@@ -17,7 +17,11 @@
 EXPORT_MODEL_IDS = [
     LLMModelType.CLAUDE_3_HAIKU,
     LLMModelType.CLAUDE_3_SONNET,
-    # LLMModelType.CLAUDE_3_5_SONNET,
+    LLMModelType.CLAUDE_3_5_HAIKU,
+    LLMModelType.CLAUDE_3_5_SONNET_V2,
+    LLMModelType.LLAMA3_1_70B_INSTRUCT,
+    LLMModelType.MISTRAL_LARGE_2407,
+    LLMModelType.COHERE_COMMAND_R_PLUS
 ]
 
 EXPORT_SCENES = [
@@ -141,8 +145,14 @@ def prompt_template_render(self, prompt_template: dict):
         LLMModelType.CLAUDE_3_HAIKU,
         LLMModelType.CLAUDE_3_SONNET,
         LLMModelType.CLAUDE_3_5_SONNET,
+        LLMModelType.CLAUDE_3_5_HAIKU,
+        LLMModelType.CLAUDE_3_5_SONNET_V2,
         LLMModelType.CLAUDE_INSTANCE,
-        LLMModelType.MIXTRAL_8X7B_INSTRUCT
+        LLMModelType.MIXTRAL_8X7B_INSTRUCT,
+        LLMModelType.LLAMA3_1_70B_INSTRUCT,
+        LLMModelType.LLAMA3_2_90B_INSTRUCT,
+        LLMModelType.MISTRAL_LARGE_2407,
+        LLMModelType.COHERE_COMMAND_R_PLUS,
     ],
     task_type=LLMTaskType.RAG,
     prompt_template=CLAUDE_RAG_SYSTEM_PROMPT,
@@ -171,37 +181,9 @@ def prompt_template_render(self, prompt_template: dict):
 )
 
 
-# CHIT_CHAT_SYSTEM_TEMPLATE = "你是一个AI助理。今天是{date},{weekday}. "
-
-# register_prompt_templates(
-#     model_ids=[
-#         LLMModelType.CLAUDE_2,
-#         LLMModelType.CLAUDE_21,
-#         LLMModelType.CLAUDE_3_HAIKU,
-#         LLMModelType.CLAUDE_3_SONNET,
-#         LLMModelType.CLAUDE_3_5_SONNET,
-#         LLMModelType.CLAUDE_INSTANCE,
-#         LLMModelType.MIXTRAL_8X7B_INSTRUCT,
-#         LLMModelType.GLM_4_9B_CHAT,
-#         LLMModelType.QWEN2INSTRUCT72B,
-#         LLMModelType.QWEN2INSTRUCT7B
-#     ],
-#     task_type=LLMTaskType.CHAT,
-#     prompt_template=CHIT_CHAT_SYSTEM_TEMPLATE,
-#     prompt_name="system_prompt"
-# )
-
-
-# CQR_TEMPLATE = """Given the following conversation between `USER` and `AI`, and a follow up `USER` reply, Put yourself in the shoes of `USER`, rephrase the follow up \
-# `USER` reply to be a standalone reply.
-
-# Chat History:
-# {history}
-
-# The USER's follow up reply: {question}"""
-
-
+################ 
 # query rewrite prompt template from paper https://arxiv.org/pdf/2401.10225
+###################
 CQR_SYSTEM_PROMPT = """You are a helpful, pattern-following assistant."""
 
 CQR_USER_PROMPT_TEMPLATE = """Given the following conversation between PersonU and PersonA:
@@ -280,11 +262,18 @@ def prompt_template_render(self, prompt_template: dict):
         LLMModelType.CLAUDE_3_HAIKU,
         LLMModelType.CLAUDE_3_SONNET,
         LLMModelType.CLAUDE_3_5_SONNET,
+        LLMModelType.CLAUDE_3_5_HAIKU,
+        LLMModelType.CLAUDE_3_5_SONNET_V2,
         LLMModelType.CLAUDE_INSTANCE,
         LLMModelType.MIXTRAL_8X7B_INSTRUCT,
         LLMModelType.QWEN2INSTRUCT72B,
         LLMModelType.QWEN2INSTRUCT7B,
-        LLMModelType.GLM_4_9B_CHAT
+        LLMModelType.GLM_4_9B_CHAT,
+        LLMModelType.LLAMA3_1_70B_INSTRUCT,
+        LLMModelType.LLAMA3_2_90B_INSTRUCT,
+        LLMModelType.MISTRAL_LARGE_2407,
+        LLMModelType.COHERE_COMMAND_R_PLUS,
+    
     ],
     task_type=LLMTaskType.CONVERSATION_SUMMARY_TYPE,
     prompt_template=CQR_SYSTEM_PROMPT,
@@ -298,11 +287,17 @@ def prompt_template_render(self, prompt_template: dict):
         LLMModelType.CLAUDE_3_HAIKU,
         LLMModelType.CLAUDE_3_SONNET,
         LLMModelType.CLAUDE_3_5_SONNET,
+        LLMModelType.CLAUDE_3_5_HAIKU,
+        LLMModelType.CLAUDE_3_5_SONNET_V2,
         LLMModelType.CLAUDE_INSTANCE,
         LLMModelType.MIXTRAL_8X7B_INSTRUCT,
         LLMModelType.QWEN2INSTRUCT72B,
         LLMModelType.QWEN2INSTRUCT7B,
-        LLMModelType.GLM_4_9B_CHAT
+        LLMModelType.GLM_4_9B_CHAT,
+        LLMModelType.LLAMA3_1_70B_INSTRUCT,
+        LLMModelType.LLAMA3_2_90B_INSTRUCT,
+        LLMModelType.MISTRAL_LARGE_2407,
+        LLMModelType.COHERE_COMMAND_R_PLUS,
     ],
     task_type=LLMTaskType.CONVERSATION_SUMMARY_TYPE,
     prompt_template=CQR_USER_PROMPT_TEMPLATE,
@@ -317,18 +312,26 @@ def prompt_template_render(self, prompt_template: dict):
         LLMModelType.CLAUDE_3_HAIKU,
         LLMModelType.CLAUDE_3_SONNET,
         LLMModelType.CLAUDE_3_5_SONNET,
+        LLMModelType.CLAUDE_3_5_HAIKU,
+        LLMModelType.CLAUDE_3_5_SONNET_V2,
         LLMModelType.CLAUDE_INSTANCE,
         LLMModelType.MIXTRAL_8X7B_INSTRUCT,
         LLMModelType.QWEN2INSTRUCT72B,
         LLMModelType.QWEN2INSTRUCT7B,
-        LLMModelType.GLM_4_9B_CHAT
+        LLMModelType.GLM_4_9B_CHAT,
+        LLMModelType.LLAMA3_1_70B_INSTRUCT,
+        LLMModelType.LLAMA3_2_90B_INSTRUCT,
+        LLMModelType.MISTRAL_LARGE_2407,
+        LLMModelType.COHERE_COMMAND_R_PLUS,
     ],
     task_type=LLMTaskType.CONVERSATION_SUMMARY_TYPE,
     prompt_template=json.dumps(CQR_FEW_SHOTS, ensure_ascii=False, indent=2),
     prompt_name="few_shots"
 )
 
-# agent prompt
+
+
+############## xml agent prompt #############
 AGENT_USER_PROMPT = "你是一个AI助理。今天是{date},{weekday}. "
 register_prompt_templates(
     model_ids=[
@@ -337,8 +340,10 @@ def prompt_template_render(self, prompt_template: dict):
         LLMModelType.CLAUDE_3_HAIKU,
         LLMModelType.CLAUDE_3_SONNET,
         LLMModelType.CLAUDE_3_5_SONNET,
+        LLMModelType.CLAUDE_3_5_HAIKU,
+        LLMModelType.CLAUDE_3_5_SONNET_V2,
     ],
-    task_type=LLMTaskType.TOOL_CALLING,
+    task_type=LLMTaskType.TOOL_CALLING_XML,
     prompt_template=AGENT_USER_PROMPT,
     prompt_name="user_prompt"
 )
@@ -362,12 +367,144 @@ def prompt_template_render(self, prompt_template: dict):
         LLMModelType.CLAUDE_3_HAIKU,
         LLMModelType.CLAUDE_3_SONNET,
         LLMModelType.CLAUDE_3_5_SONNET,
+        LLMModelType.CLAUDE_3_5_HAIKU,
+        LLMModelType.CLAUDE_3_5_SONNET_V2,
+        LLMModelType.LLAMA3_1_70B_INSTRUCT,
+        LLMModelType.MISTRAL_LARGE_2407,
+        LLMModelType.COHERE_COMMAND_R_PLUS,
     ],
-    task_type=LLMTaskType.TOOL_CALLING,
+    task_type=LLMTaskType.TOOL_CALLING_XML,
     prompt_template=AGENT_GUIDELINES_PROMPT,
     prompt_name="guidelines_prompt"
 )
 
+################# api agent prompt #####################
+AGENT_SYSTEM_PROMPT = """\
+You are a helpful and honest AI assistant. Today is {date},{weekday}. 
+Here are some guidelines for you:
+<guidlines>
+- Here are steps for you to decide to use which tool:
+    1. Determine whether the current context is sufficient to answer the user's question.
+    2. If the current context is sufficient to answer the user's question, call the `give_final_response` tool.
+    3. If the current context is not sufficient to answer the user's question, you can consider calling one of the provided tools.
+    4. If any of required parameters of the tool you want to call do not appears in context, call the `give_rhetorical_question` tool to ask the user for more information. 
+- Always output with the same language as the content from user. If the content is English, use English to output. If the content is Chinese, use Chinese to output.
+- Always call one tool at a time.
+</guidlines>
+Here's some context for reference:
+<context>
+{context}
+</context>"""
+
+# AGENT_SYSTEM_PROMPT = """\
+# You are a helpful and honest AI assistant. Today is {date},{weekday}. 
+# Here are some guidelines for you:
+# <guidlines>
+# - Output your step by step thinking in one pair of <thinking> and </thinking> tags, here are steps for you to think about deciding to use which tool:
+#     1. If the context contains the result of last tool call, it needs to be analyzed.
+#     2. Determine whether the current context is sufficient to answer the user's question.
+#     3. If the current context is sufficient to answer the user's question, call the `give_final_response` tool.
+#     4. If the current context is not sufficient to answer the user's question, you can consider calling one of the provided tools.
+#     5. If any of required parameters of the tool you want to call do not appears in context, call the `give_rhetorical_question` tool to ask the user for more information. 
+# - Always output with the same language as the content from user. If the content is English, use English to output. If the content is Chinese, use Chinese to output.
+# - Always invoke one tool.
+# - Before invoking any tool, be sure to first output your thought process in one pair of <thinking> and </thinking> tag.
+# </guidlines>"""
+
+
+# AGENT_SYSTEM_PROMPT = """\
+# You are a helpful and honest AI assistant. Today is {date},{weekday}. 
+# Here are some guidelines for you:
+# <guidlines>
+# - Output your step by step thinking in one pair of <thinking> and </thinking> tags, here are steps for you to think about deciding to use which tool:
+#     1. If the context contains the result of last tool call, it needs to be analyzed.
+#     2. Determine whether the current context is sufficient to answer the user's question.
+#     3. If the current context is sufficient to answer the user's question, call the `give_final_response` tool.
+#     4. If the current context is not sufficient to answer the user's question, you can consider calling one of the provided tools.
+#     5. If any of required parameters of the tool you want to call do not appears in context, call the `give_rhetorical_question` tool to ask the user for more information. 
+# - Always output with the same language as the content from user. If the content is English, use English to output. If the content is Chinese, use Chinese to output.
+# - Always invoke one tool.
+# </guidlines>
+
+# # Output example
+# <thinking>
+# write your thinking according to guidlines.
+# </thinking>
+# [invoke some tools]"""
+
+
+# - Output your thinking before to call one tool.
+register_prompt_templates(
+    model_ids=[
+        LLMModelType.CLAUDE_3_HAIKU,
+        LLMModelType.CLAUDE_3_SONNET,
+        LLMModelType.CLAUDE_3_5_SONNET,
+        LLMModelType.CLAUDE_3_5_SONNET_V2,
+        LLMModelType.CLAUDE_3_5_HAIKU,
+        # LLMModelType.LLAMA3_1_70B_INSTRUCT,
+        # LLMModelType.LLAMA3_2_90B_INSTRUCT,
+        # LLMModelType.MISTRAL_LARGE_2407,
+        # LLMModelType.COHERE_COMMAND_R_PLUS,
+    ],
+    task_type=LLMTaskType.TOOL_CALLING_API,
+    prompt_template=AGENT_SYSTEM_PROMPT,
+    prompt_name="agent_system_prompt"
+)
+
+
+AGENT_SYSTEM_PROMPT_COHERE = """\
+## Task & Context
+You are a helpful and honest AI assistant. Today is {date},{weekday}. 
+Here's some context for reference:
+{context}
+
+## Guidlines
+Here are some guidelines for you:
+- Here are strategies for you to decide to use which tool:
+    1. Determine whether the current context is sufficient to answer the user's question.
+    2. If the current context is sufficient to answer the user's question, call the `give_final_response` tool.
+    3. If the current context is not sufficient to answer the user's question, you can consider calling one of the provided tools.
+    4. If any of required parameters of the tool you want to call do not appears in context, call the `give_rhetorical_question` tool to ask the user for more information. 
+- Always output with the same language as the content from user. If the content is English, use English to output. If the content is Chinese, use Chinese to output.
+- Always call one tool at a time."""
+
+register_prompt_templates(
+    model_ids=[
+        LLMModelType.COHERE_COMMAND_R_PLUS,
+        LLMModelType.LLAMA3_1_70B_INSTRUCT,
+        LLMModelType.LLAMA3_2_90B_INSTRUCT,
+        LLMModelType.MISTRAL_LARGE_2407
+    ],
+    task_type=LLMTaskType.TOOL_CALLING_API,
+    prompt_template=AGENT_SYSTEM_PROMPT_COHERE,
+    prompt_name="agent_system_prompt"
+)
+
+
+TOOL_FEWSHOT_PROMPT = """\
+Input: {query}
+Args: {args}"""
+
+register_prompt_templates(
+    model_ids=[
+        LLMModelType.CLAUDE_2,
+        LLMModelType.CLAUDE_21,
+        LLMModelType.CLAUDE_3_HAIKU,
+        LLMModelType.CLAUDE_3_SONNET,
+        LLMModelType.CLAUDE_3_5_SONNET_V2,
+        LLMModelType.CLAUDE_3_5_HAIKU,
+        LLMModelType.CLAUDE_3_5_SONNET,
+        LLMModelType.LLAMA3_1_70B_INSTRUCT,
+        LLMModelType.LLAMA3_2_90B_INSTRUCT,
+        LLMModelType.MISTRAL_LARGE_2407,
+        LLMModelType.COHERE_COMMAND_R_PLUS,
+    ],
+    task_type=LLMTaskType.TOOL_CALLING_API,
+    prompt_template=TOOL_FEWSHOT_PROMPT,
+    prompt_name="tool_fewshot_prompt"
+)
+
+
 
 if __name__ == "__main__":
     print(get_all_templates())
diff --git a/source/lambda/online/common_logic/common_utils/pydantic_models.py b/source/lambda/online/common_logic/common_utils/pydantic_models.py
index 07e6a6014..2cfc90f96 100644
--- a/source/lambda/online/common_logic/common_utils/pydantic_models.py
+++ b/source/lambda/online/common_logic/common_utils/pydantic_models.py
@@ -27,6 +27,7 @@ class ForbidBaseModel(BaseModel):
 class AllowBaseModel(BaseModel):
     class Config:
         extra = "allow"
+        use_enum_values = True
 
 
 class LLMConfig(AllowBaseModel):
@@ -87,7 +88,7 @@ class RagToolConfig(AllowBaseModel):
 
 class AgentConfig(ForbidBaseModel):
     llm_config: LLMConfig = Field(default_factory=LLMConfig)
-    tools: list[str] = Field(default_factory=list)
+    tools: list[Union[str,dict]] = Field(default_factory=list)
     only_use_rag_tool: bool = False
 
 
@@ -113,7 +114,7 @@ class ChatbotConfig(AllowBaseModel):
     private_knowledge_config: PrivateKnowledgeConfig = Field(
         default_factory=PrivateKnowledgeConfig
     )
-    tools_config: dict[str, Any] = Field(default_factory=dict)
+    # tools_config: dict[str, Any] = Field(default_factory=dict)
 
     def update_llm_config(self, new_llm_config: dict):
         """unified update llm config
diff --git a/source/lambda/online/common_logic/common_utils/response_utils.py b/source/lambda/online/common_logic/common_utils/response_utils.py
index 5b94e8c15..fe54fe083 100644
--- a/source/lambda/online/common_logic/common_utils/response_utils.py
+++ b/source/lambda/online/common_logic/common_utils/response_utils.py
@@ -107,7 +107,6 @@ def stream_response(event_body:dict, response:dict):
                 },
                 ws_connection_id=ws_connection_id
             )
-
             answer_str += chunk
         
         if log_first_token_time:
diff --git a/source/lambda/online/common_logic/langchain_integration/__init__.py b/source/lambda/online/common_logic/langchain_integration/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/source/lambda/online/common_logic/langchain_integration/chains/__init__.py b/source/lambda/online/common_logic/langchain_integration/chains/__init__.py
new file mode 100644
index 000000000..fe79ec231
--- /dev/null
+++ b/source/lambda/online/common_logic/langchain_integration/chains/__init__.py
@@ -0,0 +1,206 @@
+from typing import Any
+from common_logic.common_utils.constant import LLMTaskType
+
+
+class LLMChainMeta(type):
+    def __new__(cls, name, bases, attrs):
+        new_cls = type.__new__(cls, name, bases, attrs)
+        if name == "LLMChain":
+            return new_cls
+        new_cls.model_map[new_cls.get_chain_id()] = new_cls
+        return new_cls
+
+class LLMChain(metaclass=LLMChainMeta):
+    model_map = {}
+
+    @classmethod
+    def get_chain_id(cls):
+        return cls._get_chain_id(cls.model_id, cls.intent_type)
+
+    @staticmethod
+    def _get_chain_id(model_id, intent_type):
+        return f"{model_id}__{intent_type}"
+
+    @classmethod
+    def get_chain(cls, model_id, intent_type, model_kwargs=None, **kwargs):
+        # dynamic import 
+        _load_module(intent_type)
+        return cls.model_map[cls._get_chain_id(model_id, intent_type)].create_chain(
+            model_kwargs=model_kwargs, **kwargs
+        )
+
+def _import_chat_chain():
+    from .chat_chain import (
+    Claude2ChatChain,
+    Claude21ChatChain,
+    ClaudeInstanceChatChain,
+    Internlm2Chat7BChatChain,
+    Internlm2Chat20BChatChain,
+    Baichuan2Chat13B4BitsChatChain,
+    Claude3HaikuChatChain,
+    Claude3SonnetChatChain,
+)
+
+def _import_conversation_summary_chain():
+    from .conversation_summary_chain import (
+    Internlm2Chat7BConversationSummaryChain,
+    ClaudeInstanceConversationSummaryChain,
+    Claude21ConversationSummaryChain,
+    Claude3HaikuConversationSummaryChain,
+    Claude3SonnetConversationSummaryChain,
+    Internlm2Chat20BConversationSummaryChain
+)
+
+def _import_intention_chain():
+    from .intention_chain import (
+    Claude21IntentRecognitionChain,
+    Claude2IntentRecognitionChain,
+    ClaudeInstanceIntentRecognitionChain,
+    Claude3HaikuIntentRecognitionChain,
+    Claude3SonnetIntentRecognitionChain,
+    Internlm2Chat7BIntentRecognitionChain,
+    Internlm2Chat20BIntentRecognitionChain,
+    
+)
+
+
+def _import_rag_chain():
+    from .rag_chain import (
+    Claude21RagLLMChain,
+    Claude2RagLLMChain,
+    ClaudeInstanceRAGLLMChain,
+    Claude3HaikuRAGLLMChain,
+    Claude3SonnetRAGLLMChain,
+    Baichuan2Chat13B4BitsKnowledgeQaChain
+)
+
+
+def _import_translate_chain():
+    from .translate_chain import (
+        Internlm2Chat7BTranslateChain,
+        Internlm2Chat20BTranslateChain
+    )
+
+def _import_mkt_conversation_summary_chains():
+    from marketing_chains.mkt_conversation_summary import (
+    Claude21MKTConversationSummaryChain,
+    ClaudeInstanceMKTConversationSummaryChain,
+    Claude2MKTConversationSummaryChain,
+    Claude3HaikuMKTConversationSummaryChain,
+    Claude3SonnetMKTConversationSummaryChain,
+    Internlm2Chat7BMKTConversationSummaryChain,
+    Internlm2Chat20BMKTConversationSummaryChain
+)
+
+def _import_mkt_rag_chain():
+    from marketing_chains.mkt_rag_chain import (
+    Internlm2Chat7BKnowledgeQaChain,
+    Internlm2Chat20BKnowledgeQaChain
+)
+
+def _import_stepback_chain():
+    from .stepback_chain import (
+    Claude21StepBackChain,
+    ClaudeInstanceStepBackChain,
+    Claude2StepBackChain,
+    Claude3HaikuStepBackChain,
+    Claude3SonnetStepBackChain,
+    Internlm2Chat7BStepBackChain,
+    Internlm2Chat20BStepBackChain
+)
+
+def _import_hyde_chain():
+    from .hyde_chain import (
+    Claude21HydeChain,
+    Claude2HydeChain,
+    Claude3HaikuHydeChain,
+    Claude3SonnetHydeChain,
+    ClaudeInstanceHydeChain,
+    Internlm2Chat20BHydeChain,
+    Internlm2Chat7BHydeChain
+)
+
+def _import_query_rewrite_chain():
+    from .query_rewrite_chain import (
+    Claude21QueryRewriteChain,
+    Claude2QueryRewriteChain,
+    ClaudeInstanceQueryRewriteChain,
+    Claude3HaikuQueryRewriteChain,
+    Claude3SonnetQueryRewriteChain,
+    Internlm2Chat20BQueryRewriteChain,
+    Internlm2Chat7BQueryRewriteChain
+)
+
+
+def _import_tool_calling_chain_claude_xml():
+    from .tool_calling_chain_claude_xml import (
+    Claude21ToolCallingChain,
+    Claude3HaikuToolCallingChain,
+    Claude2ToolCallingChain,
+    Claude3SonnetToolCallingChain,
+    ClaudeInstanceToolCallingChain
+)
+
+def _import_retail_conversation_summary_chain():
+    from .retail_chains.retail_conversation_summary_chain import (
+    Claude2RetailConversationSummaryChain,
+    Claude21RetailConversationSummaryChain,
+    Claude3HaikuRetailConversationSummaryChain,
+    Claude3SonnetRetailConversationSummaryChain,
+    ClaudeInstanceRetailConversationSummaryChain
+)
+
+
+def _import_retail_tool_calling_chain_claude_xml():
+    from .retail_chains.retail_tool_calling_chain_claude_xml import (
+    Claude2RetailToolCallingChain,
+    Claude21RetailToolCallingChain,
+    ClaudeInstanceRetailToolCallingChain,
+    Claude3SonnetRetailToolCallingChain,
+    Claude3HaikuRetailToolCallingChain
+)
+
+def _import_tool_calling_chain_api():
+    from .tool_calling_chain_api import (
+        Claude21ToolCallingChain,
+        Claude2ToolCallingChain,
+        Claude35SonnetToolCallingChain,
+        Claude3HaikuToolCallingChain,
+        Claude3SonnetToolCallingChain,
+        Llama31Instruct70BToolCallingChain,
+        CohereCommandRPlusToolCallingChain,
+        MistraLlarge2407ToolCallingChain
+    )
+
+
+def _import_auto_evaluation_chain():
+    from .retail_chains.auto_evaluation_chain import (
+    Claude3HaikuAutoEvaluationChain,
+    Claude21AutoEvaluationChain,
+    Claude2AutoEvaluationChain
+
+)
+
+
+def _load_module(intent_type):
+    assert intent_type in CHAIN_MODULE_LOAD_FN_MAP,(intent_type,CHAIN_MODULE_LOAD_FN_MAP)
+    CHAIN_MODULE_LOAD_FN_MAP[intent_type]()
+
+
+CHAIN_MODULE_LOAD_FN_MAP = {
+    LLMTaskType.CHAT:_import_chat_chain,
+    LLMTaskType.CONVERSATION_SUMMARY_TYPE:_import_conversation_summary_chain,
+    LLMTaskType.INTENT_RECOGNITION_TYPE: _import_intention_chain,
+    LLMTaskType.RAG: _import_rag_chain,
+    LLMTaskType.QUERY_TRANSLATE_TYPE: _import_translate_chain,
+    LLMTaskType.MKT_CONVERSATION_SUMMARY_TYPE: _import_mkt_conversation_summary_chains,
+    LLMTaskType.MTK_RAG: _import_mkt_rag_chain,
+    LLMTaskType.STEPBACK_PROMPTING_TYPE: _import_stepback_chain,
+    LLMTaskType.HYDE_TYPE: _import_hyde_chain,
+    LLMTaskType.QUERY_REWRITE_TYPE: _import_query_rewrite_chain,
+    LLMTaskType.TOOL_CALLING_XML: _import_tool_calling_chain_claude_xml,
+    LLMTaskType.TOOL_CALLING_API:_import_tool_calling_chain_api,
+    LLMTaskType.RETAIL_CONVERSATION_SUMMARY_TYPE: _import_retail_conversation_summary_chain,
+    LLMTaskType.RETAIL_TOOL_CALLING: _import_retail_tool_calling_chain_claude_xml,
+    LLMTaskType.AUTO_EVALUATION: _import_auto_evaluation_chain
+}
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/llm_chain_base.py b/source/lambda/online/common_logic/langchain_integration/chains/__llm_chain_base.py
similarity index 100%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/llm_chain_base.py
rename to source/lambda/online/common_logic/langchain_integration/chains/__llm_chain_base.py
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/chat_chain.py b/source/lambda/online/common_logic/langchain_integration/chains/chat_chain.py
similarity index 89%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/chat_chain.py
rename to source/lambda/online/common_logic/langchain_integration/chains/chat_chain.py
index 730a84904..325c55fdb 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/chat_chain.py
+++ b/source/lambda/online/common_logic/langchain_integration/chains/chat_chain.py
@@ -4,10 +4,10 @@
 from langchain_core.messages import AIMessage,SystemMessage
 from langchain.prompts import ChatPromptTemplate,HumanMessagePromptTemplate
 from langchain_core.messages import convert_to_messages
+from langchain_core.output_parsers import StrOutputParser
 
-
-from ..llm_models import Model
-from .llm_chain_base import LLMChain
+from ..chat_models import Model
+from . import LLMChain
 
 from common_logic.common_utils.constant import (
     MessageType,
@@ -62,15 +62,14 @@ def create_chain(cls, model_kwargs=None, **kwargs):
         messages_template = ChatPromptTemplate.from_messages(messages)
         llm = Model.get_model(cls.model_id, model_kwargs=model_kwargs, **kwargs)
         chain = messages_template | RunnableLambda(lambda x: x.messages)
+        chain = chain | llm | StrOutputParser()
+
         if stream:
-            chain = (
-                chain | RunnableLambda(lambda messages: llm.stream(messages))
-                | RunnableLambda(lambda x: (i.content for i in x))
-            )
+            final_chain = RunnableLambda(lambda x: chain.stream(x))
         else:
-            chain = chain | llm | RunnableLambda(lambda x: x.content)
+            final_chain = RunnableLambda(lambda x: chain.invoke(x))
 
-        return chain
+        return final_chain
 
 
 class Claude21ChatChain(Claude2ChatChain):
@@ -90,7 +89,15 @@ class Claude3HaikuChatChain(Claude2ChatChain):
 
 
 class Claude35SonnetChatChain(Claude2ChatChain):
-    model_id = "anthropic.claude-3-5-sonnet-20240620-v1:0"
+    model_id = LLMModelType.CLAUDE_3_5_SONNET
+
+
+class Claude35SonnetV2ChatChain(Claude2ChatChain):
+    model_id = LLMModelType.CLAUDE_3_5_SONNET_V2
+
+
+class Claude35HaikuChatChain(Claude2ChatChain):
+    model_id = LLMModelType.CLAUDE_3_5_HAIKU
 
 
 class Mixtral8x7bChatChain(Claude2ChatChain):
@@ -98,6 +105,22 @@ class Mixtral8x7bChatChain(Claude2ChatChain):
     default_model_kwargs = {"max_tokens": 4096, "temperature": 0.01}
 
 
+class Llama31Instruct70BChatChain(Claude2ChatChain):
+    model_id = LLMModelType.LLAMA3_1_70B_INSTRUCT
+
+class Llama32Instruct90BChatChain(Claude2ChatChain):
+    model_id = LLMModelType.LLAMA3_2_90B_INSTRUCT
+
+
+class MistraLlargeChat2407ChatChain(Claude2ChatChain):
+    model_id = LLMModelType.MISTRAL_LARGE_2407
+
+
+class CohereCommandRPlusChatChain(Claude2ChatChain):
+    model_id = LLMModelType.COHERE_COMMAND_R_PLUS
+
+
+
 class Baichuan2Chat13B4BitsChatChain(LLMChain):
     model_id = LLMModelType.BAICHUAN2_13B_CHAT
     intent_type = LLMTaskType.CHAT
@@ -121,7 +144,7 @@ def create_chain(cls, model_kwargs=None, **kwargs):
         return llm_chain
 
 
-class Iternlm2Chat7BChatChain(LLMChain):
+class Internlm2Chat7BChatChain(LLMChain):
     model_id = LLMModelType.INTERNLM2_CHAT_7B
     intent_type = LLMTaskType.CHAT
 
@@ -194,7 +217,7 @@ def create_chain(cls, model_kwargs=None, **kwargs):
         return llm_chain
 
 
-class Iternlm2Chat20BChatChain(Iternlm2Chat7BChatChain):
+class Internlm2Chat20BChatChain(Internlm2Chat7BChatChain):
     model_id = LLMModelType.INTERNLM2_CHAT_20B
 
 
@@ -300,7 +323,7 @@ class Qwen2Instruct72BChatChain(Qwen2Instruct7BChatChain):
     
 
 class ChatGPT35ChatChain(LLMChain):
-    model_id = LLMModelType.CHATGPT_35_TURBO
+    model_id = LLMModelType.CHATGPT_35_TURBO_0125
     intent_type = LLMTaskType.CHAT
 
     @classmethod
@@ -321,15 +344,14 @@ def create_chain(cls, model_kwargs=None, **kwargs):
         messages_template = ChatPromptTemplate.from_messages(messages)
         llm = Model.get_model(cls.model_id, model_kwargs=model_kwargs, **kwargs)
         chain = messages_template | RunnableLambda(lambda x: x.messages)
+        chain = chain | llm | StrOutputParser()
+
         if stream:
-            chain = (
-                chain | RunnableLambda(lambda messages: llm.stream(messages))
-                | RunnableLambda(lambda x: (i.content for i in x))
-            )
+            final_chain = RunnableLambda(lambda x: chain.stream(x))
         else:
-            chain = chain | llm | RunnableLambda(lambda x: x.content)
+            final_chain = RunnableLambda(lambda x: chain.invoke(x))
 
-        return chain
+        return final_chain
 
 class ChatGPT4ChatChain(ChatGPT35ChatChain):
     model_id = LLMModelType.CHATGPT_4_TURBO
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/conversation_summary_chain.py b/source/lambda/online/common_logic/langchain_integration/chains/conversation_summary_chain.py
similarity index 79%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/conversation_summary_chain.py
rename to source/lambda/online/common_logic/langchain_integration/chains/conversation_summary_chain.py
index c3f1aa1db..8b7dc1009 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/conversation_summary_chain.py
+++ b/source/lambda/online/common_logic/langchain_integration/chains/conversation_summary_chain.py
@@ -2,14 +2,13 @@
 from typing import List 
 import json 
 from langchain.schema.runnable import (
-    RunnableLambda,
-    RunnablePassthrough,
+    RunnableLambda
 )
 
 
-from ..llm_models import Model
-from .chat_chain import Iternlm2Chat7BChatChain
-from .llm_chain_base import LLMChain
+from ..chat_models import Model
+from .chat_chain import Internlm2Chat7BChatChain
+from . import LLMChain
 from common_logic.common_utils.constant import (
     MessageType,
     LLMTaskType,
@@ -39,7 +38,7 @@
 SYSTEM_MESSAGE_TYPE = MessageType.SYSTEM_MESSAGE_TYPE
 
 
-class Iternlm2Chat20BConversationSummaryChain(Iternlm2Chat7BChatChain):
+class Internlm2Chat20BConversationSummaryChain(Internlm2Chat7BChatChain):
     model_id = LLMModelType.INTERNLM2_CHAT_20B
     default_model_kwargs = {
         "max_new_tokens": 300,
@@ -74,7 +73,7 @@ def create_prompt(cls, x,system_prompt=None):
         prompt = prompt + "Standalone Question: "
         return prompt
 
-class Iternlm2Chat7BConversationSummaryChain(Iternlm2Chat20BConversationSummaryChain):
+class Internlm2Chat7BConversationSummaryChain(Internlm2Chat20BConversationSummaryChain):
     model_id = LLMModelType.INTERNLM2_CHAT_7B
 
 
@@ -138,6 +137,7 @@ def create_messages_inputs(cls,x:dict,user_prompt,few_shots:list[dict]):
 
     @classmethod
     def create_messages_chain(cls,**kwargs):
+        enable_prefill = kwargs['enable_prefill']
         system_prompt = get_prompt_template(
             model_id=cls.model_id,
             task_type=cls.intent_type,
@@ -158,13 +158,17 @@ def create_messages_chain(cls,**kwargs):
 
         system_prompt = kwargs.get("system_prompt", system_prompt)
         user_prompt = kwargs.get('user_prompt', user_prompt)
+        
 
-        cqr_template = ChatPromptTemplate.from_messages([
+        messages = [
             SystemMessage(content=system_prompt),
             ('placeholder','{few_shots}'),
-            HumanMessagePromptTemplate.from_template(user_prompt),
-            AIMessage(content=cls.prefill)
-        ])
+            HumanMessagePromptTemplate.from_template(user_prompt)
+        ]
+        if enable_prefill:
+            messages.append(AIMessage(content=cls.prefill))
+
+        cqr_template = ChatPromptTemplate.from_messages(messages)
         return RunnableLambda(lambda x: cls.create_messages_inputs(x,user_prompt=user_prompt,few_shots=json.loads(few_shots))) | cqr_template 
  
     @classmethod
@@ -175,9 +179,9 @@ def create_chain(cls, model_kwargs=None, **kwargs):
             model_id=cls.model_id,
             model_kwargs=model_kwargs,
         )
-        messages_chain = cls.create_messages_chain(**kwargs)
+        messages_chain = cls.create_messages_chain(**kwargs,enable_prefill=llm.enable_prefill)
         chain = messages_chain | RunnableLambda(lambda x: print_llm_messages(f"conversation summary messages: {x.messages}") or x.messages) \
-              | llm | RunnableLambda(lambda x: x.content)
+              | llm | RunnableLambda(lambda x: x.content.replace(cls.prefill,"").strip())
         return chain
 
 
@@ -197,6 +201,39 @@ class Claude3HaikuConversationSummaryChain(Claude2ConversationSummaryChain):
     model_id = LLMModelType.CLAUDE_3_HAIKU
 
 
+class Claude35HaikuConversationSummaryChain(Claude2ConversationSummaryChain):
+    model_id = LLMModelType.CLAUDE_3_5_HAIKU
+
+
+class Claude35SonnetConversationSummaryChain(Claude2ConversationSummaryChain):
+    model_id = LLMModelType.CLAUDE_3_5_SONNET
+
+class Claude35SonnetV2ConversationSummaryChain(Claude2ConversationSummaryChain):
+    model_id = LLMModelType.CLAUDE_3_5_SONNET_V2
+
+
+class Mixtral8x7bConversationSummaryChain(Claude2ConversationSummaryChain):
+    model_id = LLMModelType.MIXTRAL_8X7B_INSTRUCT
+    default_model_kwargs = {"max_tokens": 4096, "temperature": 0.01}
+
+
+class Llama31Instruct70BConversationSummaryChain(Claude2ConversationSummaryChain):
+    model_id = LLMModelType.LLAMA3_1_70B_INSTRUCT
+
+
+class Llama32Instruct90BConversationSummaryChain(Claude2ConversationSummaryChain):
+    model_id = LLMModelType.LLAMA3_2_90B_INSTRUCT
+
+
+class MistraLlargeChat2407ConversationSummaryChain(Claude2ConversationSummaryChain):
+    model_id = LLMModelType.MISTRAL_LARGE_2407
+
+
+class CohereCommandRPlusConversationSummaryChain(Claude2ConversationSummaryChain):
+    model_id = LLMModelType.COHERE_COMMAND_R_PLUS
+
+
+
 class Qwen2Instruct72BConversationSummaryChain(Claude2ConversationSummaryChain):
     model_id = LLMModelType.QWEN2INSTRUCT72B
 
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/hyde_chain.py b/source/lambda/online/common_logic/langchain_integration/chains/hyde_chain.py
similarity index 90%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/hyde_chain.py
rename to source/lambda/online/common_logic/langchain_integration/chains/hyde_chain.py
index 45609a825..2d34d5dc2 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/hyde_chain.py
+++ b/source/lambda/online/common_logic/langchain_integration/chains/hyde_chain.py
@@ -14,10 +14,10 @@
     LLMModelType
 )
 
-from ..llm_chains import LLMChain
-from ..llm_models import Model as LLM_Model
-from .chat_chain import Iternlm2Chat7BChatChain
-from .llm_chain_base import LLMChain
+from ..chains import LLMChain
+from ..chat_models import Model as LLM_Model
+from .chat_chain import Internlm2Chat7BChatChain
+from . import LLMChain
 
 HYDE_TYPE = LLMTaskType.HYDE_TYPE
 
@@ -79,7 +79,7 @@ class Claude35SonnetHydeChain(Claude2HydeChain):
 internlm2_meta_instruction = "You are a helpful AI Assistant."
 
 
-class Iternlm2Chat7BHydeChain(Iternlm2Chat7BChatChain):
+class Internlm2Chat7BHydeChain(Internlm2Chat7BChatChain):
     model_id = LLMModelType.INTERNLM2_CHAT_7B
     intent_type = HYDE_TYPE
 
@@ -98,6 +98,6 @@ def create_prompt(cls, x):
         return prompt
 
 
-class Iternlm2Chat20BHydeChain(Iternlm2Chat7BHydeChain):
+class Internlm2Chat20BHydeChain(Internlm2Chat7BHydeChain):
     model_id = LLMModelType.INTERNLM2_CHAT_20B
     intent_type = HYDE_TYPE
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/intention_chain.py b/source/lambda/online/common_logic/langchain_integration/chains/intention_chain.py
similarity index 96%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/intention_chain.py
rename to source/lambda/online/common_logic/langchain_integration/chains/intention_chain.py
index 292023fda..bc2602beb 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/intention_chain.py
+++ b/source/lambda/online/common_logic/langchain_integration/chains/intention_chain.py
@@ -13,9 +13,9 @@
 )
 
 from common_logic.common_utils.constant import LLMTaskType,LLMModelType
-from ..llm_models import Model
-from .chat_chain import Iternlm2Chat7BChatChain
-from .llm_chain_base import LLMChain
+from ..chat_models import Model
+from .chat_chain import Internlm2Chat7BChatChain
+from . import LLMChain
 
 abs_dir = os.path.dirname(__file__)
 
@@ -48,7 +48,7 @@ def load_intention_file(intent_save_path=intent_save_path, seed=42):
     }
 
 
-class Iternlm2Chat7BIntentRecognitionChain(Iternlm2Chat7BChatChain):
+class Internlm2Chat7BIntentRecognitionChain(Internlm2Chat7BChatChain):
     model_id = LLMModelType.INTERNLM2_CHAT_7B
     intent_type =LLMTaskType.INTENT_RECOGNITION_TYPE
 
@@ -102,7 +102,7 @@ def create_chain(cls, model_kwargs=None, **kwargs):
         return chain
 
 
-class Iternlm2Chat20BIntentRecognitionChain(Iternlm2Chat7BIntentRecognitionChain):
+class Internlm2Chat20BIntentRecognitionChain(Internlm2Chat7BIntentRecognitionChain):
     model_id = LLMModelType.INTERNLM2_CHAT_20B
 
 
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/marketing_chains/__init__.py b/source/lambda/online/common_logic/langchain_integration/chains/marketing_chains/__init__.py
similarity index 63%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/marketing_chains/__init__.py
rename to source/lambda/online/common_logic/langchain_integration/chains/marketing_chains/__init__.py
index 1307aab1c..a78a05a3a 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/marketing_chains/__init__.py
+++ b/source/lambda/online/common_logic/langchain_integration/chains/marketing_chains/__init__.py
@@ -4,12 +4,12 @@
     Claude2MKTConversationSummaryChain,
     Claude3HaikuMKTConversationSummaryChain,
     Claude3SonnetMKTConversationSummaryChain,
-    Iternlm2Chat7BMKTConversationSummaryChain,
-    Iternlm2Chat20BMKTConversationSummaryChain
+    Internlm2Chat7BMKTConversationSummaryChain,
+    Internlm2Chat20BMKTConversationSummaryChain
 )
 
 from .mkt_rag_chain import (
-    Iternlm2Chat7BKnowledgeQaChain,
-    Iternlm2Chat20BKnowledgeQaChain
+    Internlm2Chat7BKnowledgeQaChain,
+    Internlm2Chat20BKnowledgeQaChain
 )
 
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/marketing_chains/mkt_conversation_summary.py b/source/lambda/online/common_logic/langchain_integration/chains/marketing_chains/mkt_conversation_summary.py
similarity index 94%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/marketing_chains/mkt_conversation_summary.py
rename to source/lambda/online/common_logic/langchain_integration/chains/marketing_chains/mkt_conversation_summary.py
index 4b04e90bf..87cc8b584 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/marketing_chains/mkt_conversation_summary.py
+++ b/source/lambda/online/common_logic/langchain_integration/chains/marketing_chains/mkt_conversation_summary.py
@@ -4,7 +4,7 @@
     RunnablePassthrough,
 )
 
-from ..chat_chain import Claude2ChatChain, Iternlm2Chat7BChatChain
+from ..chat_chain import Claude2ChatChain, Internlm2Chat7BChatChain
 
 from common_logic.common_utils.constant import (
     MessageType,
@@ -20,7 +20,7 @@
 
 CHIT_CHAT_SYSTEM_TEMPLATE = """You are a helpful AI Assistant"""
 
-class Iternlm2Chat7BMKTConversationSummaryChain(Iternlm2Chat7BChatChain):
+class Internlm2Chat7BMKTConversationSummaryChain(Internlm2Chat7BChatChain):
     model_id = LLMModelType.INTERNLM2_CHAT_7B
     intent_type = MKT_CONVERSATION_SUMMARY_TYPE
 
@@ -84,8 +84,8 @@ def create_chain(cls, model_kwargs=None, **kwargs):
         return chain
 
 
-class Iternlm2Chat20BMKTConversationSummaryChain(
-    Iternlm2Chat7BMKTConversationSummaryChain
+class Internlm2Chat20BMKTConversationSummaryChain(
+    Internlm2Chat7BMKTConversationSummaryChain
 ):
     model_id = LLMModelType.INTERNLM2_CHAT_20B
 
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/marketing_chains/mkt_rag_chain.py b/source/lambda/online/common_logic/langchain_integration/chains/marketing_chains/mkt_rag_chain.py
similarity index 92%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/marketing_chains/mkt_rag_chain.py
rename to source/lambda/online/common_logic/langchain_integration/chains/marketing_chains/mkt_rag_chain.py
index 43754b02e..3ce5f2631 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/marketing_chains/mkt_rag_chain.py
+++ b/source/lambda/online/common_logic/langchain_integration/chains/marketing_chains/mkt_rag_chain.py
@@ -2,21 +2,21 @@
     LLMTaskType,
     LLMModelType
 )
-from ..chat_chain import Iternlm2Chat7BChatChain
+from ..chat_chain import Internlm2Chat7BChatChain
 from common_logic.common_utils.prompt_utils import register_prompt_templates,get_prompt_template
 
 INTERLM2_RAG_PROMPT_TEMPLATE = "你是一个Amazon AWS的客服助理小Q，帮助的用户回答使用AWS过程中的各种问题。\n面对用户的问题，你需要给出中文回答，注意不要在回答中重复输出内容。\n下面给出相关问题的背景知识, 需要注意的是如果你认为当前的问题不能在背景知识中找到答案, 你需要拒答。\n背景知识:\n{context}\n\n"
 
 register_prompt_templates(
     model_ids=[LLMModelType.INTERNLM2_CHAT_7B,LLMModelType.INTERNLM2_CHAT_20B],
-    task_type=LLMTaskType.RAG,
+    task_type=LLMTaskType.MTK_RAG,
     prompt_template=INTERLM2_RAG_PROMPT_TEMPLATE,
     prompt_name="system_prompt"
 )
 
-class Iternlm2Chat7BKnowledgeQaChain(Iternlm2Chat7BChatChain):
+class Internlm2Chat7BKnowledgeQaChain(Internlm2Chat7BChatChain):
     model_id = LLMModelType.INTERNLM2_CHAT_7B
-    intent_type = LLMTaskType.RAG
+    intent_type = LLMTaskType.MTK_RAG
     default_model_kwargs = {"temperature": 0.05, "max_new_tokens": 1000}
 
     @classmethod
@@ -51,5 +51,5 @@ def create_prompt(cls, x):
         return prompt
 
 
-class Iternlm2Chat20BKnowledgeQaChain(Iternlm2Chat7BKnowledgeQaChain):
+class Internlm2Chat20BKnowledgeQaChain(Internlm2Chat7BKnowledgeQaChain):
     model_id = LLMModelType.INTERNLM2_CHAT_20B
\ No newline at end of file
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/query_rewrite_chain.py b/source/lambda/online/common_logic/langchain_integration/chains/query_rewrite_chain.py
similarity index 89%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/query_rewrite_chain.py
rename to source/lambda/online/common_logic/langchain_integration/chains/query_rewrite_chain.py
index 6eab55f9f..480902b83 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/query_rewrite_chain.py
+++ b/source/lambda/online/common_logic/langchain_integration/chains/query_rewrite_chain.py
@@ -11,10 +11,10 @@
     LLMTaskType,
     LLMModelType
 )
-from ..llm_chains import LLMChain
-from ..llm_models import Model as LLM_Model
-from .chat_chain import Iternlm2Chat7BChatChain
-from .llm_chain_base import LLMChain
+from ..chains import LLMChain
+from ..chat_models import Model as LLM_Model
+from .chat_chain import Internlm2Chat7BChatChain
+from . import LLMChain
 
 QUERY_REWRITE_TYPE = LLMTaskType.QUERY_REWRITE_TYPE
 query_expansion_template_claude = PromptTemplate.from_template("""You are an AI language model assistant. Your task is to generate 1 - 5 different sub questions OR alternate versions of the given user question to retrieve relevant documents from a vector database.
@@ -86,7 +86,15 @@ class Claude3SonnetQueryRewriteChain(Claude2QueryRewriteChain):
 
 
 class Claude35SonnetQueryRewriteChain(Claude2QueryRewriteChain):
-    mdoel_id = "anthropic.claude-3-5-sonnet-20240620-v1:0"
+    mdoel_id = LLMModelType.CLAUDE_3_5_SONNET
+
+
+class Claude35SonnetV2QueryRewriteChain(Claude2QueryRewriteChain):
+    mdoel_id = LLMModelType.CLAUDE_3_5_SONNET_V2
+
+
+class Claude35HaikuQueryRewriteChain(Claude2QueryRewriteChain):
+    mdoel_id = LLMModelType.CLAUDE_3_5_HAIKU
 
 
 internlm2_meta_instruction = """You are an AI language model assistant. Your task is to generate 1 - 5 different sub questions OR alternate versions of the given user question to retrieve relevant documents from a vector database.
@@ -108,7 +116,7 @@ class Claude35SonnetQueryRewriteChain(Claude2QueryRewriteChain):
 </questions>"""
 
 
-class Iternlm2Chat7BQueryRewriteChain(Iternlm2Chat7BChatChain):
+class Internlm2Chat7BQueryRewriteChain(Internlm2Chat7BChatChain):
     model_id = LLMModelType.INTERNLM2_CHAT_7B
     intent_type = QUERY_REWRITE_TYPE
 
@@ -138,6 +146,6 @@ def create_chain(cls, model_kwargs=None, **kwargs):
         return chain
 
 
-class Iternlm2Chat20BQueryRewriteChain(Iternlm2Chat7BQueryRewriteChain):
+class Internlm2Chat20BQueryRewriteChain(Internlm2Chat7BQueryRewriteChain):
     model_id = LLMModelType.INTERNLM2_CHAT_20B
     intent_type = QUERY_REWRITE_TYPE
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/rag_chain.py b/source/lambda/online/common_logic/langchain_integration/chains/rag_chain.py
similarity index 83%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/rag_chain.py
rename to source/lambda/online/common_logic/langchain_integration/chains/rag_chain.py
index f04750f64..bfacac8f5 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/rag_chain.py
+++ b/source/lambda/online/common_logic/langchain_integration/chains/rag_chain.py
@@ -4,6 +4,7 @@
     HumanMessagePromptTemplate,
     SystemMessagePromptTemplate
 )
+from langchain_core.output_parsers import StrOutputParser
 
 from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
 from common_logic.common_utils.constant import (
@@ -14,8 +15,8 @@
 from common_logic.common_utils.logger_utils import print_llm_messages
 
 # from ...prompt_template import convert_chat_history_from_fstring_format
-from ..llm_models import Model
-from .llm_chain_base import LLMChain
+from ..chat_models import Model
+from . import LLMChain
 
 
 def get_claude_rag_context(contexts: list):
@@ -55,15 +56,15 @@ def create_chain(cls, model_kwargs=None, **kwargs):
         )
         llm = Model.get_model(cls.model_id, model_kwargs=model_kwargs, **kwargs)
         chain = context_chain | ChatPromptTemplate.from_messages(chat_messages) | RunnableLambda(lambda x: print_llm_messages(f"rag messages: {x.messages}") or x)
+        
+        chain = chain | llm | StrOutputParser()
+
         if stream:
-            chain = (
-                chain
-                | RunnableLambda(lambda x: llm.stream(x.messages))
-                | RunnableLambda(lambda x: (i.content for i in x))
-            )
+            final_chain = RunnableLambda(lambda x: chain.stream(x))
         else:
-            chain = chain | llm | RunnableLambda(lambda x: x.content)
-        return chain
+            final_chain = RunnableLambda(lambda x: chain.invoke(x))
+
+        return final_chain
 
 
 class Claude21RagLLMChain(Claude2RagLLMChain):
@@ -81,6 +82,32 @@ class Claude3SonnetRAGLLMChain(Claude2RagLLMChain):
 class Claude3HaikuRAGLLMChain(Claude2RagLLMChain):
     model_id = LLMModelType.CLAUDE_3_HAIKU
 
+class Claude35SonnetRAGLLMChain(Claude2RagLLMChain):
+    model_id = LLMModelType.CLAUDE_3_5_SONNET
+
+
+class Claude35SonnetV2RAGLLMChain(Claude2RagLLMChain):
+    model_id = LLMModelType.CLAUDE_3_5_SONNET_V2
+
+class Claude35HaikuRAGLLMChain(Claude2RagLLMChain):
+    model_id = LLMModelType.CLAUDE_3_5_HAIKU
+
+
+class Llama31Instruct70B(Claude2RagLLMChain):
+    model_id = LLMModelType.LLAMA3_1_70B_INSTRUCT
+
+class Llama32Instruct90B(Claude2RagLLMChain):
+    model_id = LLMModelType.LLAMA3_2_90B_INSTRUCT
+
+
+class MistraLlarge2407(Claude2RagLLMChain):
+    model_id = LLMModelType.MISTRAL_LARGE_2407
+
+
+class CohereCommandRPlus(Claude2RagLLMChain):
+    model_id = LLMModelType.COHERE_COMMAND_R_PLUS
+
+
 class Mixtral8x7bChatChain(Claude2RagLLMChain):
     model_id = LLMModelType.MIXTRAL_8X7B_INSTRUCT
 
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/retail_chains/__init__.py b/source/lambda/online/common_logic/langchain_integration/chains/retail_chains/__init__.py
similarity index 100%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/retail_chains/__init__.py
rename to source/lambda/online/common_logic/langchain_integration/chains/retail_chains/__init__.py
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/retail_chains/auto_evaluation_chain.py b/source/lambda/online/common_logic/langchain_integration/chains/retail_chains/auto_evaluation_chain.py
similarity index 98%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/retail_chains/auto_evaluation_chain.py
rename to source/lambda/online/common_logic/langchain_integration/chains/retail_chains/auto_evaluation_chain.py
index bcdd7011d..28d4b22c0 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/retail_chains/auto_evaluation_chain.py
+++ b/source/lambda/online/common_logic/langchain_integration/chains/retail_chains/auto_evaluation_chain.py
@@ -12,7 +12,7 @@
     LLMModelType,
 )
 from ...llm_models import Model
-from ..llm_chain_base import LLMChain
+from ..__llm_chain_base import LLMChain
 
 from ..chat_chain import Claude2ChatChain
 
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/retail_chains/retail_conversation_summary_chain.py b/source/lambda/online/common_logic/langchain_integration/chains/retail_chains/retail_conversation_summary_chain.py
similarity index 99%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/retail_chains/retail_conversation_summary_chain.py
rename to source/lambda/online/common_logic/langchain_integration/chains/retail_chains/retail_conversation_summary_chain.py
index d5be022ef..eae0716d6 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/retail_chains/retail_conversation_summary_chain.py
+++ b/source/lambda/online/common_logic/langchain_integration/chains/retail_chains/retail_conversation_summary_chain.py
@@ -8,7 +8,7 @@
 
 
 from ...llm_models import Model
-from ..llm_chain_base import LLMChain
+from ..__llm_chain_base import LLMChain
 from common_logic.common_utils.constant import (
     MessageType,
     LLMTaskType,
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/retail_chains/retail_tool_calling_chain_claude_xml.py b/source/lambda/online/common_logic/langchain_integration/chains/retail_chains/retail_tool_calling_chain_claude_xml.py
similarity index 99%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/retail_chains/retail_tool_calling_chain_claude_xml.py
rename to source/lambda/online/common_logic/langchain_integration/chains/retail_chains/retail_tool_calling_chain_claude_xml.py
index 803e4ef23..71a953c5a 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/retail_chains/retail_tool_calling_chain_claude_xml.py
+++ b/source/lambda/online/common_logic/langchain_integration/chains/retail_chains/retail_tool_calling_chain_claude_xml.py
@@ -22,7 +22,7 @@
     SceneType
 )
 from functions import get_tool_by_name
-from ..llm_chain_base import LLMChain
+from ..__llm_chain_base import LLMChain
 from ...llm_models import Model
 
 tool_call_guidelines = """<guidlines>
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/retail_chains/retail_tool_calling_chain_json.py b/source/lambda/online/common_logic/langchain_integration/chains/retail_chains/retail_tool_calling_chain_json.py
similarity index 99%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/retail_chains/retail_tool_calling_chain_json.py
rename to source/lambda/online/common_logic/langchain_integration/chains/retail_chains/retail_tool_calling_chain_json.py
index d20bb6c03..f1bc5d8b0 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/retail_chains/retail_tool_calling_chain_json.py
+++ b/source/lambda/online/common_logic/langchain_integration/chains/retail_chains/retail_tool_calling_chain_json.py
@@ -25,7 +25,7 @@
 )
 from functions import get_tool_by_name
 
-from ..llm_chain_base import LLMChain
+from ..__llm_chain_base import LLMChain
 from ...llm_models import Model
 from ..chat_chain import GLM4Chat9BChatChain
 from common_logic.common_utils.logger_utils import get_logger
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/stepback_chain.py b/source/lambda/online/common_logic/langchain_integration/chains/stepback_chain.py
similarity index 94%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/stepback_chain.py
rename to source/lambda/online/common_logic/langchain_integration/chains/stepback_chain.py
index f17b4e4b8..4fb49a410 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/stepback_chain.py
+++ b/source/lambda/online/common_logic/langchain_integration/chains/stepback_chain.py
@@ -8,13 +8,13 @@
     LLMTaskType,
     LLMModelType
 )
-from ..llm_chains.chat_chain import Iternlm2Chat7BChatChain
-from ..llm_chains.llm_chain_base import LLMChain
-from ..llm_models import Model
+from .chat_chain import Internlm2Chat7BChatChain
+from . import LLMChain
+from ..chat_models import Model
 
 STEPBACK_PROMPTING_TYPE = LLMTaskType.STEPBACK_PROMPTING_TYPE
 
-class Iternlm2Chat7BStepBackChain(Iternlm2Chat7BChatChain):
+class Internlm2Chat7BStepBackChain(Internlm2Chat7BChatChain):
     model_id = LLMModelType.INTERNLM2_CHAT_7B
     intent_type = STEPBACK_PROMPTING_TYPE
 
@@ -61,7 +61,7 @@ def create_prompt(cls, x):
         return prompt
 
 
-class Iternlm2Chat20BStepBackChain(Iternlm2Chat7BStepBackChain):
+class Internlm2Chat20BStepBackChain(Internlm2Chat7BStepBackChain):
     model_id = LLMModelType.INTERNLM2_CHAT_20B
     intent_type = STEPBACK_PROMPTING_TYPE
 
diff --git a/source/lambda/online/common_logic/langchain_integration/chains/tool_calling_chain_api.py b/source/lambda/online/common_logic/langchain_integration/chains/tool_calling_chain_api.py
new file mode 100644
index 000000000..62d982a33
--- /dev/null
+++ b/source/lambda/online/common_logic/langchain_integration/chains/tool_calling_chain_api.py
@@ -0,0 +1,177 @@
+# tool calling chain
+import json
+from typing import List,Dict,Any
+from collections import defaultdict
+
+from common_logic.common_utils.prompt_utils import get_prompt_template
+from langchain_core.messages import(
+    AIMessage,
+    SystemMessage
+) 
+from langchain.prompts import ChatPromptTemplate
+from langchain_core.messages import AIMessage,SystemMessage
+from langchain.tools.base import BaseTool
+from langchain_core.language_models import BaseChatModel
+
+from common_logic.common_utils.constant import (
+    LLMTaskType,
+    LLMModelType,
+    MessageType
+)
+from common_logic.common_utils.time_utils import get_china_now
+
+from . import LLMChain
+from ..chat_models import Model
+
+
+class Claude2ToolCallingChain(LLMChain):
+    model_id = LLMModelType.CLAUDE_2
+    intent_type = LLMTaskType.TOOL_CALLING_API
+    default_model_kwargs = {
+        "max_tokens": 2000,
+        "temperature": 0.1,
+        "top_p": 0.9
+    }
+
+    @classmethod
+    def create_chat_history(cls,x):
+        chat_history = x['chat_history'] + \
+            [{"role": MessageType.HUMAN_MESSAGE_TYPE,"content": x['query']}] + \
+            x['agent_tool_history']
+        return chat_history
+
+    @classmethod
+    def get_common_system_prompt(cls,system_prompt_template:str,all_knowledge_retrieved_list=None):
+        all_knowledge_retrieved_list = all_knowledge_retrieved_list or []
+        all_knowledge_retrieved = "\n\n".join(all_knowledge_retrieved_list)
+        now = get_china_now()
+        date_str = now.strftime("%Y年%m月%d日")
+        weekdays = ['星期一', '星期二', '星期三', '星期四', '星期五', '星期六', '星期日']
+        weekday = weekdays[now.weekday()]
+        system_prompt = system_prompt_template.format(date=date_str,weekday=weekday,context=all_knowledge_retrieved)
+        return system_prompt
+
+
+    @classmethod
+    def bind_tools(cls,llm:BaseChatModel,tools:List[BaseTool], fewshot_examples=None, fewshot_template=None,tool_choice='any'):
+        tools = [tool.model_copy() for tool in tools]
+        if not fewshot_examples:
+            if getattr(llm,"enable_auto_tool_choice",True):
+                return llm.bind_tools(tools,tool_choice=tool_choice)
+            return llm.bind_tools(tools)
+
+        # add fewshot examples to tool description
+        tools_map = {tool.name:tool for tool in tools}
+
+        # group fewshot examples
+        fewshot_examples_grouped = defaultdict(list)
+        for example in fewshot_examples:
+            fewshot_examples_grouped[example['name']].append(example)
+
+        for tool_name,examples in fewshot_examples_grouped.items():
+            tool = tools_map[tool_name]
+            tool.description += "\n\nHere are some examples where this tool are called:\n"
+            examples_strs = []
+            for example in examples:
+                params_str = json.dumps(example['kwargs'],ensure_ascii=False)
+                examples_strs.append(
+                    fewshot_template.format(
+                        query=example['query'],
+                        args=params_str
+                    )
+                )
+            
+            tool.description += "\n\n".join(examples_strs)
+        
+        if getattr(llm,"enable_auto_tool_choice",True):
+            return llm.bind_tools(tools,tool_choice=tool_choice)
+        return llm.bind_tools(tools)
+    
+        
+    @classmethod
+    def create_chain(cls, model_kwargs=None, **kwargs):
+        model_kwargs = model_kwargs or {}
+        tools:list = kwargs['tools']
+        assert all(isinstance(tool,BaseTool) for tool in tools),tools
+        fewshot_examples = kwargs.get('fewshot_examples',[])
+        agent_system_prompt = get_prompt_template(
+            model_id=cls.model_id,
+            task_type=cls.intent_type,
+            prompt_name="agent_system_prompt"     
+        ).prompt_template
+
+        agent_system_prompt = kwargs.get("agent_system_prompt",None) or agent_system_prompt
+        
+        all_knowledge_retrieved_list = kwargs.get('all_knowledge_retrieved_list',[])
+        agent_system_prompt = cls.get_common_system_prompt(
+            agent_system_prompt,all_knowledge_retrieved_list
+        )
+        
+        # tool fewshot prompt 
+        tool_fewshot_prompt = get_prompt_template(
+            model_id=cls.model_id,
+            task_type=cls.intent_type,
+            prompt_name="tool_fewshot_prompt"
+        ).prompt_template
+        tool_fewshot_prompt = kwargs.get('tool_fewshot_prompt',None) or tool_fewshot_prompt
+
+        model_kwargs = {**cls.default_model_kwargs, **model_kwargs}
+
+        llm = Model.get_model(
+            model_id=cls.model_id,
+            model_kwargs=model_kwargs,
+        )
+
+        llm = cls.bind_tools(llm,tools,fewshot_examples,fewshot_template=tool_fewshot_prompt)
+        
+        tool_calling_template = ChatPromptTemplate.from_messages(
+            [
+               SystemMessage(content=agent_system_prompt), 
+               ("placeholder", "{chat_history}"),
+               ("human", "{query}"),
+               ("placeholder", "{agent_tool_history}"),
+
+            ]
+        )
+        chain = tool_calling_template | llm 
+        return chain
+
+
+class Claude21ToolCallingChain(Claude2ToolCallingChain):
+    model_id = LLMModelType.CLAUDE_21
+
+
+class Claude3SonnetToolCallingChain(Claude2ToolCallingChain):
+    model_id = LLMModelType.CLAUDE_3_SONNET
+
+
+class Claude3HaikuToolCallingChain(Claude2ToolCallingChain):
+    model_id = LLMModelType.CLAUDE_3_HAIKU
+
+
+class Claude35SonnetToolCallingChain(Claude2ToolCallingChain):
+    model_id = LLMModelType.CLAUDE_3_5_SONNET
+
+
+class Claude35SonnetV2ToolCallingChain(Claude2ToolCallingChain):
+    model_id = LLMModelType.CLAUDE_3_5_SONNET_V2
+
+
+class Claude35HaikuToolCallingChain(Claude2ToolCallingChain):
+    model_id = LLMModelType.CLAUDE_3_5_HAIKU
+
+
+class Llama31Instruct70BToolCallingChain(Claude2ToolCallingChain):
+    model_id = LLMModelType.LLAMA3_1_70B_INSTRUCT
+
+
+class Llama32Instruct90BToolCallingChain(Claude2ToolCallingChain):
+    model_id = LLMModelType.LLAMA3_2_90B_INSTRUCT
+
+
+class MistraLlarge2407ToolCallingChain(Claude2ToolCallingChain):
+    model_id = LLMModelType.MISTRAL_LARGE_2407
+
+
+class CohereCommandRPlusToolCallingChain(Claude2ToolCallingChain):
+    model_id = LLMModelType.COHERE_COMMAND_R_PLUS
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/tool_calling_chain_claude_xml.py b/source/lambda/online/common_logic/langchain_integration/chains/tool_calling_chain_claude_xml.py
similarity index 98%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/tool_calling_chain_claude_xml.py
rename to source/lambda/online/common_logic/langchain_integration/chains/tool_calling_chain_claude_xml.py
index b31ab0d69..114139f84 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/tool_calling_chain_claude_xml.py
+++ b/source/lambda/online/common_logic/langchain_integration/chains/tool_calling_chain_claude_xml.py
@@ -24,8 +24,8 @@
 )
 from common_logic.common_utils.time_utils import get_china_now
 
-from .llm_chain_base import LLMChain
-from ..llm_models import Model
+from . import LLMChain
+from ..chat_models import Model
 
 incorrect_tool_call_example = """Here is an example of an incorrectly formatted tool call, which you should avoid.
 <incorrect_tool_calling>
@@ -168,13 +168,13 @@ def convert_openai_tool_to_anthropic(tools:list[dict])->str:
 
 class Claude2ToolCallingChain(LLMChain):
     model_id = LLMModelType.CLAUDE_2
-    intent_type = LLMTaskType.TOOL_CALLING
+    intent_type = LLMTaskType.TOOL_CALLING_XML
     default_model_kwargs = {
         "max_tokens": 2000,
         "temperature": 0.1,
         "top_p": 0.9,
         "stop_sequences": ["\n\nHuman:", "\n\nAssistant","</function_calls>"],
-        }
+    }
 
     @staticmethod
     def format_fewshot_examples(fewshot_examples:list[dict]):
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/translate_chain.py b/source/lambda/online/common_logic/langchain_integration/chains/translate_chain.py
similarity index 88%
rename from source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/translate_chain.py
rename to source/lambda/online/common_logic/langchain_integration/chains/translate_chain.py
index 07b92b3bb..638128b75 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/translate_chain.py
+++ b/source/lambda/online/common_logic/langchain_integration/chains/translate_chain.py
@@ -5,12 +5,12 @@
     LLMTaskType,
     LLMModelType
 )
-from .chat_chain import Iternlm2Chat7BChatChain
+from .chat_chain import Internlm2Chat7BChatChain
 
 QUERY_TRANSLATE_TYPE = LLMTaskType.QUERY_TRANSLATE_TYPE
 
 
-class Iternlm2Chat7BTranslateChain(Iternlm2Chat7BChatChain):
+class Internlm2Chat7BTranslateChain(Internlm2Chat7BChatChain):
     intent_type = QUERY_TRANSLATE_TYPE
     default_model_kwargs = {"temperature": 0.1, "max_new_tokens": 200}
 
@@ -36,5 +36,5 @@ def create_chain(cls, model_kwargs=None, **kwargs):
         return llm_chain
 
 
-class Iternlm2Chat20BTranslateChain(Iternlm2Chat7BTranslateChain):
+class Internlm2Chat20BTranslateChain(Internlm2Chat7BTranslateChain):
     model_id = LLMModelType.INTERNLM2_CHAT_20B
diff --git a/source/lambda/online/common_logic/langchain_integration/chat_models/__init__.py b/source/lambda/online/common_logic/langchain_integration/chat_models/__init__.py
new file mode 100644
index 000000000..8f0bd1bee
--- /dev/null
+++ b/source/lambda/online/common_logic/langchain_integration/chat_models/__init__.py
@@ -0,0 +1,104 @@
+"""
+chat models build in command pattern
+"""
+from common_logic.common_utils.constant import LLMModelType
+
+
+class ModeMixins:
+    @staticmethod
+    def convert_messages_role(messages:list[dict],role_map:dict):
+        """
+        Args:
+            messages (list[dict]): 
+            role_map (dict): {"current_role":"targe_role"}
+
+        Returns:
+            _type_: as messages
+        """
+        valid_roles = list(role_map.keys())
+        new_messages = []
+        for message in messages:
+            message = {**message}
+            role = message['role']
+            assert role in valid_roles,(role,valid_roles,messages)
+            message['role'] = role_map[role]
+            new_messages.append(message)
+        return new_messages    
+
+
+class ModelMeta(type):
+    def __new__(cls, name, bases, attrs):
+        new_cls = type.__new__(cls, name, bases, attrs)
+        if name == "Model" or new_cls.model_id is None:
+            return new_cls
+        new_cls.model_map[new_cls.model_id] = new_cls
+        return new_cls
+
+
+class Model(ModeMixins,metaclass=ModelMeta):
+    model_id: str = None
+    enable_auto_tool_choice: bool = True
+    enable_prefill: bool = True
+    model_map = {}
+
+    @classmethod
+    def create_model(cls, model_kwargs=None, **kwargs):
+        raise NotImplementedError
+
+    @classmethod
+    def get_model(cls, model_id, model_kwargs=None, **kwargs):
+        # dynamic load module 
+        _load_module(model_id)
+        return cls.model_map[model_id].create_model(model_kwargs=model_kwargs, **kwargs)
+
+def _import_bedrock_models():
+    from .bedrock_models import (
+        Claude2,
+        ClaudeInstance,
+        Claude21,
+        Claude3Sonnet,
+        Claude3Haiku,
+        Claude35Sonnet,
+        Claude35Haiku,
+        Claude35SonnetV2,
+        MistralLarge2407,
+        Llama3d1Instruct70B,
+        CohereCommandRPlus
+    )
+
+def _import_openai_models():
+    from .openai_models import (
+        ChatGPT35,
+        ChatGPT4Turbo,
+        ChatGPT4o
+    )
+
+
+def _load_module(model_id):
+    assert model_id in MODEL_MODULE_LOAD_FN_MAP,(model_id,MODEL_MODULE_LOAD_FN_MAP)
+    MODEL_MODULE_LOAD_FN_MAP[model_id]()
+
+
+MODEL_MODULE_LOAD_FN_MAP = {
+    LLMModelType.CHATGPT_35_TURBO_0125:_import_openai_models,
+    LLMModelType.CHATGPT_4_TURBO:_import_openai_models,
+    LLMModelType.CHATGPT_4O:_import_openai_models,
+    LLMModelType.CLAUDE_2:_import_bedrock_models,
+    LLMModelType.CLAUDE_INSTANCE:_import_bedrock_models,
+    LLMModelType.CLAUDE_21:_import_bedrock_models,
+    LLMModelType.CLAUDE_3_SONNET:_import_bedrock_models,
+    LLMModelType.CLAUDE_3_HAIKU:_import_bedrock_models,
+    LLMModelType.CLAUDE_3_5_SONNET:_import_bedrock_models,
+    LLMModelType.LLAMA3_1_70B_INSTRUCT:_import_bedrock_models,
+    LLMModelType.LLAMA3_2_90B_INSTRUCT:_import_bedrock_models,
+    LLMModelType.MISTRAL_LARGE_2407:_import_bedrock_models,
+    LLMModelType.COHERE_COMMAND_R_PLUS:_import_bedrock_models,
+    LLMModelType.CLAUDE_3_5_SONNET_V2:_import_bedrock_models,
+    LLMModelType.CLAUDE_3_5_HAIKU:_import_bedrock_models,
+}
+
+
+
+
+
+
diff --git a/source/lambda/online/common_logic/langchain_integration/chat_models/bedrock_models.py b/source/lambda/online/common_logic/langchain_integration/chat_models/bedrock_models.py
new file mode 100644
index 000000000..8282392f0
--- /dev/null
+++ b/source/lambda/online/common_logic/langchain_integration/chat_models/bedrock_models.py
@@ -0,0 +1,102 @@
+import os
+from langchain_aws.chat_models import ChatBedrockConverse as _ChatBedrockConverse
+from common_logic.common_utils.constant import (
+    MessageType,
+    LLMModelType
+)
+from common_logic.common_utils.logger_utils import get_logger,llm_messages_print_decorator
+from . import Model
+
+logger = get_logger("bedrock_model")
+
+
+class ChatBedrockConverse(_ChatBedrockConverse):
+    enable_auto_tool_choice: bool = False
+    enable_prefill: bool = True
+
+
+# Bedrock model type
+class Claude2(Model):
+    model_id = LLMModelType.CLAUDE_2
+    default_model_kwargs = {"max_tokens": 2000, "temperature": 0.7, "top_p": 0.9}
+    enable_auto_tool_choice = False
+
+    @classmethod
+    def create_model(cls, model_kwargs=None, **kwargs):
+        model_kwargs = model_kwargs or {}
+        model_kwargs = {**cls.default_model_kwargs, **model_kwargs}
+
+        credentials_profile_name = (
+            kwargs.get("credentials_profile_name", None)
+            or os.environ.get("AWS_PROFILE", None)
+            or None
+        )
+        region_name = (
+            kwargs.get("region_name", None)
+            or os.environ.get("BEDROCK_REGION", None)
+            or None
+        )
+        llm = ChatBedrockConverse(
+            credentials_profile_name=credentials_profile_name,
+            region_name=region_name,
+            model=cls.model_id,
+            enable_auto_tool_choice=cls.enable_auto_tool_choice,
+            enable_prefill=cls.enable_prefill,
+            **model_kwargs,
+        )
+        llm.client.converse_stream = llm_messages_print_decorator(llm.client.converse_stream)
+        llm.client.converse = llm_messages_print_decorator(llm.client.converse)
+        return llm
+
+
+class ClaudeInstance(Claude2):
+    model_id = LLMModelType.CLAUDE_INSTANCE
+
+
+class Claude21(Claude2):
+    model_id = LLMModelType.CLAUDE_21
+
+
+class Claude3Sonnet(Claude2):
+    model_id = LLMModelType.CLAUDE_3_SONNET
+
+
+class Claude3Haiku(Claude2):
+    model_id = LLMModelType.CLAUDE_3_HAIKU
+
+
+class Claude35Sonnet(Claude2):
+    model_id = LLMModelType.CLAUDE_3_5_SONNET
+
+
+class Claude35SonnetV2(Claude2):
+    model_id = LLMModelType.CLAUDE_3_5_SONNET_V2
+
+
+class Claude35Haiku(Claude2):
+    model_id = LLMModelType.CLAUDE_3_5_HAIKU
+
+
+class MistralLarge2407(Claude2):
+    model_id = LLMModelType.MISTRAL_LARGE_2407
+    enable_prefill = False
+
+
+class Llama3d1Instruct70B(Claude2):
+    model_id = LLMModelType.LLAMA3_1_70B_INSTRUCT
+    enable_auto_tool_choice = False 
+    enable_prefill = False
+
+class Llama3d2Instruct90B(Claude2):
+    model_id = LLMModelType.LLAMA3_2_90B_INSTRUCT
+    enable_auto_tool_choice = False 
+    enable_prefill = False
+
+
+class CohereCommandRPlus(Claude2):
+    model_id = LLMModelType.COHERE_COMMAND_R_PLUS
+    enable_auto_tool_choice = False 
+    enable_prefill = False
+    
+
+
diff --git a/source/lambda/online/common_logic/langchain_integration/chat_models/openai_models.py b/source/lambda/online/common_logic/langchain_integration/chat_models/openai_models.py
new file mode 100644
index 000000000..fdddeb454
--- /dev/null
+++ b/source/lambda/online/common_logic/langchain_integration/chat_models/openai_models.py
@@ -0,0 +1,28 @@
+from langchain_openai import ChatOpenAI
+from common_logic.common_utils.constant import LLMModelType
+from common_logic.common_utils.logger_utils import get_logger
+from . import Model
+
+logger = get_logger("openai_model")
+
+class ChatGPT35(Model):
+    model_id = LLMModelType.CHATGPT_35_TURBO_0125
+    default_model_kwargs = {"max_tokens": 2000, "temperature": 0.7, "top_p": 0.9}
+
+    @classmethod
+    def create_model(cls, model_kwargs=None, **kwargs):
+        model_kwargs = model_kwargs or {}
+        model_kwargs = {**cls.default_model_kwargs, **model_kwargs}
+        llm = ChatOpenAI(
+            model=cls.model_id,
+            **model_kwargs,
+        )
+        return llm
+
+
+class ChatGPT4Turbo(ChatGPT35):
+    model_id = LLMModelType.CHATGPT_4_TURBO
+
+
+class ChatGPT4o(ChatGPT35):
+    model_id = LLMModelType.CHATGPT_4O
\ No newline at end of file
diff --git a/source/lambda/online/functions/functions_utils/retriever/retriever.py b/source/lambda/online/common_logic/langchain_integration/retrievers/retriever.py
similarity index 93%
rename from source/lambda/online/functions/functions_utils/retriever/retriever.py
rename to source/lambda/online/common_logic/langchain_integration/retrievers/retriever.py
index 086006e08..d1c9884c8 100644
--- a/source/lambda/online/functions/functions_utils/retriever/retriever.py
+++ b/source/lambda/online/common_logic/langchain_integration/retrievers/retriever.py
@@ -1,32 +1,30 @@
 import json
 import os
-
 os.environ["PYTHONUNBUFFERED"] = "1"
 import logging
 import sys
 
 import boto3
 from common_logic.common_utils.chatbot_utils import ChatbotManager
-from common_logic.common_utils.lambda_invoke_utils import chatbot_lambda_call_wrapper
-from functions.functions_utils.retriever.utils.aos_retrievers import (
+from common_logic.langchain_integration.retrievers.utils.aos_retrievers import (
     QueryDocumentBM25Retriever,
     QueryDocumentKNNRetriever,
     QueryQuestionRetriever,
 )
-from functions.functions_utils.retriever.utils.context_utils import (
+from common_logic.langchain_integration.retrievers.utils.context_utils import (
     retriever_results_format,
 )
-from functions.functions_utils.retriever.utils.reranker import (
+from common_logic.langchain_integration.retrievers.utils.reranker import (
     BGEReranker,
     MergeReranker,
 )
-from functions.functions_utils.retriever.utils.websearch_retrievers import (
+from common_logic.langchain_integration.retrievers.utils.websearch_retrievers import (
     GoogleRetriever,
 )
 from langchain.retrievers import (
-    AmazonKnowledgeBasesRetriever,
     ContextualCompressionRetriever,
 )
+from langchain_community.retrievers import AmazonKnowledgeBasesRetriever
 from langchain.retrievers.merger_retriever import MergerRetriever
 from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
 from langchain_community.retrievers import AmazonKnowledgeBasesRetriever
diff --git a/source/lambda/online/functions/functions_utils/retriever/utils/aos_retrievers.py b/source/lambda/online/common_logic/langchain_integration/retrievers/utils/aos_retrievers.py
similarity index 98%
rename from source/lambda/online/functions/functions_utils/retriever/utils/aos_retrievers.py
rename to source/lambda/online/common_logic/langchain_integration/retrievers/utils/aos_retrievers.py
index 6a7b5caeb..5fb9ff4d5 100644
--- a/source/lambda/online/functions/functions_utils/retriever/utils/aos_retrievers.py
+++ b/source/lambda/online/common_logic/langchain_integration/retrievers/utils/aos_retrievers.py
@@ -710,6 +710,20 @@ def organize_results(
         for aos_hit in aos_hits:
             result = {"data": {}}
             source = aos_hit["_source"]["metadata"][source_field]
+            source = (
+                source.replace(
+                    "s3://aws-chatbot-knowledge-base/aws-acts-knowledge/qd/zh_CN/",
+                    "https://www.amazonaws.cn/",
+                )
+                .replace(
+                    "s3://aws-chatbot-knowledge-base/aws-acts-knowledge/qd/en_US/",
+                    "https://www.amazonaws.cn/en/",
+                )
+                .replace(
+                    "s3://aws-chatbot-knowledge-base/aws-global-site-cn-knowledge/",
+                    "https://aws.amazon.com/",
+                )
+            )
             result["source"] = source
             result["score"] = aos_hit["_score"]
             result["detail"] = aos_hit["_source"]
diff --git a/source/lambda/online/functions/functions_utils/retriever/utils/aos_utils.py b/source/lambda/online/common_logic/langchain_integration/retrievers/utils/aos_utils.py
similarity index 100%
rename from source/lambda/online/functions/functions_utils/retriever/utils/aos_utils.py
rename to source/lambda/online/common_logic/langchain_integration/retrievers/utils/aos_utils.py
diff --git a/source/lambda/online/functions/functions_utils/retriever/utils/context_utils.py b/source/lambda/online/common_logic/langchain_integration/retrievers/utils/context_utils.py
similarity index 92%
rename from source/lambda/online/functions/functions_utils/retriever/utils/context_utils.py
rename to source/lambda/online/common_logic/langchain_integration/retrievers/utils/context_utils.py
index 0b228a475..cada844c0 100644
--- a/source/lambda/online/functions/functions_utils/retriever/utils/context_utils.py
+++ b/source/lambda/online/common_logic/langchain_integration/retrievers/utils/context_utils.py
@@ -24,8 +24,7 @@ def contexts_trunc(docs: list[dict], context_num=2):
             context_strs.append(content)
             s.add(content)
             context_docs.append(
-                {"doc": content,
-                    "source": doc["source"], "score": doc["score"]}
+                {"doc": content, "source": doc["source"], "score": doc["score"]}
             )
             context_sources.append(doc["source"])
     return {
@@ -54,10 +53,8 @@ def retriever_results_format(
                 "answer": doc.metadata.get("answer", ""),
                 "question": doc.metadata.get("question", ""),
                 "figure": doc.metadata.get("figure", []),
-                "retrieval_content": doc.metadata.get("retrieval_content", ""),
             }
         )
-
     if print_source:
         source_strs = []
         for doc_dict in doc_dicts:
diff --git a/source/lambda/online/functions/functions_utils/retriever/utils/reranker.py b/source/lambda/online/common_logic/langchain_integration/retrievers/utils/reranker.py
similarity index 100%
rename from source/lambda/online/functions/functions_utils/retriever/utils/reranker.py
rename to source/lambda/online/common_logic/langchain_integration/retrievers/utils/reranker.py
diff --git a/source/lambda/online/functions/functions_utils/retriever/utils/test.py b/source/lambda/online/common_logic/langchain_integration/retrievers/utils/test.py
similarity index 100%
rename from source/lambda/online/functions/functions_utils/retriever/utils/test.py
rename to source/lambda/online/common_logic/langchain_integration/retrievers/utils/test.py
diff --git a/source/lambda/online/functions/functions_utils/retriever/utils/websearch_retrievers.py b/source/lambda/online/common_logic/langchain_integration/retrievers/utils/websearch_retrievers.py
similarity index 98%
rename from source/lambda/online/functions/functions_utils/retriever/utils/websearch_retrievers.py
rename to source/lambda/online/common_logic/langchain_integration/retrievers/utils/websearch_retrievers.py
index f9b67d609..babdeb9b3 100644
--- a/source/lambda/online/functions/functions_utils/retriever/utils/websearch_retrievers.py
+++ b/source/lambda/online/common_logic/langchain_integration/retrievers/utils/websearch_retrievers.py
@@ -9,7 +9,7 @@
 logger = logging.getLogger()
 logger.setLevel(logging.INFO)
 
-from langchain.utilities import GoogleSearchAPIWrapper
+from langchain_community.utilities import GoogleSearchAPIWrapper
 from langchain.callbacks.manager import CallbackManagerForRetrieverRun
 from langchain.docstore.document import Document
 from langchain.schema.retriever import BaseRetriever
diff --git a/source/lambda/online/common_logic/langchain_integration/tools/__init__.py b/source/lambda/online/common_logic/langchain_integration/tools/__init__.py
new file mode 100644
index 000000000..95a38fb01
--- /dev/null
+++ b/source/lambda/online/common_logic/langchain_integration/tools/__init__.py
@@ -0,0 +1,237 @@
+from typing import Optional,Union
+from pydantic import BaseModel
+import platform
+import json 
+import inspect 
+from functools import wraps
+import types 
+
+from datamodel_code_generator import DataModelType, PythonVersion
+from datamodel_code_generator.format import DatetimeClassType
+from datamodel_code_generator.model import get_data_model_types
+from datamodel_code_generator.parser.jsonschema import JsonSchemaParser
+from langchain.tools.base import StructuredTool as _StructuredTool ,BaseTool
+from common_logic.common_utils.constant import SceneType
+from common_logic.common_utils.lambda_invoke_utils import invoke_with_lambda
+from functools import partial
+
+
+class StructuredTool(_StructuredTool):
+    pass 
+
+
+class ToolIdentifier(BaseModel):
+    scene: SceneType
+    name: str
+    
+    @property
+    def tool_id(self):
+        return f"{self.scene}__{self.name}"
+
+
+class ToolManager:
+    tool_map = {}
+
+    @staticmethod
+    def convert_tool_def_to_pydantic(tool_id,tool_def:Union[dict,BaseModel]):
+        if not isinstance(tool_def,dict):
+            return tool_def 
+        # convert tool definition to pydantic model 
+        current_python_version = ".".join(platform.python_version().split(".")[:-1])
+        data_model_types = get_data_model_types(
+            DataModelType.PydanticBaseModel,
+            target_python_version=PythonVersion(current_python_version),
+            target_datetime_class=DatetimeClassType.Datetime
+        )
+        parser = JsonSchemaParser(
+            json.dumps(tool_def,ensure_ascii=False,indent=2),
+            data_model_type=data_model_types.data_model,
+            data_model_root_type=data_model_types.root_model,
+            data_model_field_type=data_model_types.field_model,
+            data_type_manager_type=data_model_types.data_type_manager,
+            dump_resolve_reference_action=data_model_types.dump_resolve_reference_action,
+            use_schema_description=True
+        )
+        result = parser.parse()
+        result = result.replace("from __future__ import annotations","")
+        new_tool_module = types.ModuleType(tool_id)
+        exec(result, new_tool_module.__dict__)
+        model_cls = new_tool_module.Model
+        return model_cls
+
+    
+    @staticmethod
+    def get_tool_identifier(scene=None,name=None,tool_identifier=None):
+        if tool_identifier is None:
+            tool_identifier = ToolIdentifier(scene=scene,name=name)
+        return tool_identifier
+
+
+    @classmethod
+    def register_lc_tool(
+        cls,
+        tool:BaseTool,
+        scene=None,
+        name=None,
+        tool_identifier=None,
+    ):
+        tool_identifier = cls.get_tool_identifier(
+            scene=scene,
+            name=name,
+            tool_identifier=tool_identifier
+        )
+        assert isinstance(tool,BaseTool),(tool,type(tool))
+        cls.tool_map[tool_identifier.tool_id] = tool 
+        return tool
+    
+
+    @classmethod
+    def register_func_as_tool(
+        cls,
+        func:callable,
+        tool_def:dict,
+        return_direct:False,
+        scene=None,
+        name=None,
+        tool_identifier=None,
+    ):
+        tool_identifier = cls.get_tool_identifier(
+            scene=scene,
+            name=name,
+            tool_identifier=tool_identifier
+        )
+        tool = StructuredTool.from_function(
+            func=func,
+            name=tool_identifier.name,
+            args_schema=ToolManager.convert_tool_def_to_pydantic(
+                tool_id=tool_identifier.tool_id,
+                tool_def=tool_def
+            ),
+            return_direct=return_direct
+        )
+        # register tool 
+        return ToolManager.register_lc_tool(
+            tool_identifier=tool_identifier,
+            tool=tool
+        )
+    
+
+    @classmethod
+    def register_aws_lambda_as_tool(
+        cls,
+        lambda_name:str,
+        tool_def:dict,
+        scene=None,
+        name=None,
+        tool_identifier=None,    
+        return_direct=False                     
+        ):
+
+        def _func(**kargs):
+            return invoke_with_lambda(lambda_name,kargs)
+
+        tool_identifier = cls.get_tool_identifier(
+            scene=scene,
+            name=name,
+            tool_identifier=tool_identifier
+        )
+        tool = StructuredTool.from_function(
+            func=_func,
+            name=tool_identifier.name,
+            args_schema=ToolManager.convert_tool_def_to_pydantic(
+                tool_id=tool_identifier.tool_id,
+                tool_def=tool_def
+            ),
+            return_direct=return_direct
+        )
+        return ToolManager.register_lc_tool(
+            tool_identifier=tool_identifier,
+            tool=tool
+        )
+
+    @classmethod
+    def register_common_rag_tool(
+        cls,
+        retriever_config:dict,
+        description:str,
+        scene=None,
+        name=None,
+        tool_identifier=None,   
+        return_direct=False
+    ):
+        assert scene == SceneType.COMMON, scene
+        from .common_tools.rag import rag_tool
+
+        tool_identifier = cls.get_tool_identifier(
+            scene=scene,
+            name=name,
+            tool_identifier=tool_identifier
+        )
+
+        class RagModel(BaseModel):
+            class Config:
+                schema_extra = {"description": description}
+
+        tool = StructuredTool.from_function(
+            func=partial(rag_tool,
+                         retriever_config=retriever_config
+                        ),
+            name=tool_identifier.name,
+            args_schema=ToolManager.convert_tool_def_to_pydantic(
+                tool_id=tool_identifier.tool_id,
+                tool_def=RagModel
+            ),
+            description=description,
+            return_direct=return_direct,
+            response_format="content_and_artifact"
+        )
+        
+        return ToolManager.register_lc_tool(
+            tool_identifier=tool_identifier,
+            tool=tool
+        )
+        
+
+    @classmethod
+    def get_tool(cls, scene, name,**kwargs):
+        # dynamic import 
+        tool_identifier = ToolIdentifier(scene=scene, name=name)
+        tool_id = tool_identifier.tool_id
+        if tool_id not in cls.tool_map:
+            TOOL_MOFULE_LOAD_FN_MAP[tool_id](**kwargs)
+        return cls.tool_map[tool_id]
+
+
+TOOL_MOFULE_LOAD_FN_MAP = {}
+
+
+def lazy_tool_load_decorator(scene:SceneType,name):
+    def decorator(func):
+        tool_identifier = ToolIdentifier(scene=scene, name=name)
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            if "tool_identifier" in inspect.signature(func).parameters:
+                kwargs = {**kwargs,"tool_identifier":tool_identifier}
+            return func(*args, **kwargs)
+        TOOL_MOFULE_LOAD_FN_MAP[tool_identifier.tool_id] = wrapper
+        return wrapper
+    return decorator
+
+
+from . import common_tools
+
+
+
+
+
+
+
+    
+
+
+
+
+
+
+
+
diff --git a/source/lambda/online/common_logic/langchain_integration/tools/common_tools/__init__.py b/source/lambda/online/common_logic/langchain_integration/tools/common_tools/__init__.py
new file mode 100644
index 000000000..0586a00bd
--- /dev/null
+++ b/source/lambda/online/common_logic/langchain_integration/tools/common_tools/__init__.py
@@ -0,0 +1,209 @@
+from typing import Optional,Dict,Any
+import sys 
+from io import StringIO
+
+from .. import lazy_tool_load_decorator,ToolIdentifier,ToolManager
+from common_logic.common_utils.constant import SceneType
+
+
+
+@lazy_tool_load_decorator(SceneType.COMMON,"get_weather")
+def _load_weather_tool(tool_identifier:ToolIdentifier):
+    from . import get_weather
+    tool_def = {
+        "description": "Get the current weather for `city_name`",
+        "properties": {
+            "city_name": {
+                "description": "The name of the city. If the city name does not appear visibly in the user's response, please call the `give_rhetorical_question` to ask for city name.",
+                "type": "string"
+            },
+        },
+        "required": ["city_name"]
+    }
+    ToolManager.register_func_as_tool(
+        func=get_weather.get_weather,
+        tool_def=tool_def,
+        scene=tool_identifier.scene,
+        name=tool_identifier.name,
+        return_direct=False
+    )
+
+
+@lazy_tool_load_decorator(SceneType.COMMON,"give_rhetorical_question")
+def _load_rhetorical_tool(tool_identifier:ToolIdentifier):
+    from . import give_rhetorical_question
+    tool_def = {
+        "description": "This tool is designed to handle the scenario when required parameters are missing from other tools. It prompts the user to provide the necessary information, ensuring that all essential parameters are collected before proceeding. This tools enhances user interaction by clarifying what is needed and improving the overall usability of the application.",
+        "properties": {
+            "question": {
+                "description": "The rhetorical question to user. Example:\nInput: 今天天气怎么样?\nOutput: 请问您想了解哪个城市的天气?",
+                "type": "string"
+            },
+        },
+        "required": ["question"]
+    } 
+    ToolManager.register_func_as_tool(
+        scene=tool_identifier.scene,
+        name=tool_identifier.name,
+        func=give_rhetorical_question.give_rhetorical_question,
+        tool_def=tool_def,
+        return_direct=True
+    )
+
+
+@lazy_tool_load_decorator(SceneType.COMMON,"give_final_response")
+def _load_final_response_tool(tool_identifier:ToolIdentifier):
+    from . import give_final_response
+    
+    tool_def = {
+        "description": "If none of the other tools need to be called, call the current tool to complete the direct response to the user.",
+        "properties": {
+            "response": {
+                "description": "Response to user",
+                "type": "string"
+            }
+        },
+        "required": ["response"]
+    }
+    ToolManager.register_func_as_tool(
+        scene=tool_identifier.scene,
+        name=tool_identifier.name,
+        func=give_final_response.give_final_response,
+        tool_def=tool_def,
+        return_direct=True
+    )
+
+
+@lazy_tool_load_decorator(SceneType.COMMON,"chat")
+def _load_chat_tool(tool_identifier:ToolIdentifier):
+    from . import chat
+    tool_def = {
+        "description": "casual talk with AI",
+        "properties": {
+            "response": {
+                "description": "response to users",
+                "type": "string"
+                }
+        },
+        "required": ["response"]
+    }
+
+    ToolManager.register_func_as_tool(
+        scene=tool_identifier.scene,
+        name=tool_identifier.name,
+        func=chat.chat,
+        tool_def=tool_def,
+        return_direct=True
+    )
+
+
+@lazy_tool_load_decorator(SceneType.COMMON,"rag_tool")
+def _load_rag_tool(tool_identifier:ToolIdentifier):
+    from . import rag
+    tool_def = {
+        "description": "private knowledge",
+        "properties": {
+            "query": {
+                "description": "query for retrieve",
+                "type": "string"
+                }
+        }
+    }
+    ToolManager.register_func_as_tool(
+        scene=tool_identifier.scene,
+        name=tool_identifier.name,
+        func=rag.rag_tool,
+        tool_def=tool_def,
+        return_direct=True
+    )
+
+
+################### langchain tools #######################
+
+@lazy_tool_load_decorator(SceneType.COMMON,"python_repl")
+def _loadd_python_repl_tool(tool_identifier:ToolIdentifier):
+    from langchain_core.tools import Tool
+    from langchain_experimental.utilities import PythonREPL as _PythonREPL
+    from langchain_experimental.utilities.python import warn_once
+    import multiprocessing
+
+    
+    # modify LangChain's PythonREPL to adapt aws lambda,
+    # where it's execution environment not having /dev/shm
+    class PythonREPL(_PythonREPL):
+        @classmethod
+        def worker(
+            cls,
+            command: str,
+            globals: Optional[Dict],
+            locals: Optional[Dict],
+            conn: Any,
+        ) -> None:
+            old_stdout = sys.stdout
+            sys.stdout = mystdout = StringIO()
+            try:
+                cleaned_command = cls.sanitize_input(command)
+                exec(cleaned_command, globals, locals)
+                sys.stdout = old_stdout
+                conn.send(mystdout.getvalue())
+            except Exception as e:
+                sys.stdout = old_stdout
+                conn.send(repr(e))
+            conn.close()
+        def run(self, command: str, timeout: Optional[int] = None) -> str:
+            """Run command with own globals/locals and returns anything printed.
+            Timeout after the specified number of seconds."""
+
+            # Warn against dangers of PythonREPL
+            warn_once()
+
+            # queue: multiprocessing.Queue = multiprocessing.Queue()
+            parent_conn, child_conn = multiprocessing.Pipe()
+
+            # Only use multiprocessing if we are enforcing a timeout
+            if timeout is not None:
+                # create a Process
+                p = multiprocessing.Process(
+                    target=self.worker, args=(command, self.globals, self.locals, child_conn)
+                )
+
+                # start it
+                p.start()
+
+                # wait for the process to finish or kill it after timeout seconds
+                p.join(timeout)
+
+                if p.is_alive():
+                    p.terminate()
+                    return "Execution timed out"
+            else:
+                self.worker(command, self.globals, self.locals, child_conn)
+            # get the result from the worker function
+            return parent_conn.recv()
+
+    python_repl = PythonREPL()
+
+    def _run(command: str, timeout = None) -> str:
+        res = python_repl.run(command=command,timeout=timeout)
+        if not res:
+            raise ValueError(f"The current tool does not produce a result, modify your code and continue to call the `python_repl` tool, making sure to use the `print` function to output the final result.")                  
+        return res 
+
+    description = """\
+This tool handles scientific computing problems by executing python code. Typical scenarios include the follows:
+1. Mathematical arithmetic/numerical comparisons.
+2. Code execution scenarios, such as data analysis, visualization, etc.
+
+Input should be a valid python code. If you want to see the output of a value, you must print it out with `print(...)` statement.
+"""
+    repl_tool = Tool(
+        name="python_repl",
+        description=description,
+        func=_run
+    )
+    ToolManager.register_lc_tool(
+        scene=tool_identifier.scene,
+        name=tool_identifier.name,
+        tool=repl_tool
+    )
+
diff --git a/source/lambda/online/common_logic/langchain_integration/tools/common_tools/chat.py b/source/lambda/online/common_logic/langchain_integration/tools/common_tools/chat.py
new file mode 100644
index 000000000..c007c3534
--- /dev/null
+++ b/source/lambda/online/common_logic/langchain_integration/tools/common_tools/chat.py
@@ -0,0 +1,5 @@
+# give chat response
+
+def chat(response:str):
+    return response
+    
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_common_tools/comparison_rag.py b/source/lambda/online/common_logic/langchain_integration/tools/common_tools/comparison_rag.py
similarity index 100%
rename from source/lambda/online/functions/lambda_common_tools/comparison_rag.py
rename to source/lambda/online/common_logic/langchain_integration/tools/common_tools/comparison_rag.py
diff --git a/source/lambda/online/functions/lambda_common_tools/get_weather.py b/source/lambda/online/common_logic/langchain_integration/tools/common_tools/get_weather.py
similarity index 97%
rename from source/lambda/online/functions/lambda_common_tools/get_weather.py
rename to source/lambda/online/common_logic/langchain_integration/tools/common_tools/get_weather.py
index 9f19fada8..ccecb204c 100644
--- a/source/lambda/online/functions/lambda_common_tools/get_weather.py
+++ b/source/lambda/online/common_logic/langchain_integration/tools/common_tools/get_weather.py
@@ -1,4 +1,4 @@
-# test tool
+# get weather tool
 import requests
 
 def get_weather(city_name:str):
diff --git a/source/lambda/online/common_logic/langchain_integration/tools/common_tools/give_final_response.py b/source/lambda/online/common_logic/langchain_integration/tools/common_tools/give_final_response.py
new file mode 100644
index 000000000..82146d9b0
--- /dev/null
+++ b/source/lambda/online/common_logic/langchain_integration/tools/common_tools/give_final_response.py
@@ -0,0 +1,4 @@
+# give final response tool
+
+def give_final_response(response:str):
+    return response
\ No newline at end of file
diff --git a/source/lambda/online/common_logic/langchain_integration/tools/common_tools/give_rhetorical_question.py b/source/lambda/online/common_logic/langchain_integration/tools/common_tools/give_rhetorical_question.py
new file mode 100644
index 000000000..ac78268af
--- /dev/null
+++ b/source/lambda/online/common_logic/langchain_integration/tools/common_tools/give_rhetorical_question.py
@@ -0,0 +1,4 @@
+# give rhetorical question
+
+def give_rhetorical_question(question:str):
+    return question
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_common_tools/rag.py b/source/lambda/online/common_logic/langchain_integration/tools/common_tools/rag.py
similarity index 50%
rename from source/lambda/online/functions/lambda_common_tools/rag.py
rename to source/lambda/online/common_logic/langchain_integration/tools/common_tools/rag.py
index 3ce4622ef..1d35e7103 100644
--- a/source/lambda/online/functions/lambda_common_tools/rag.py
+++ b/source/lambda/online/common_logic/langchain_integration/tools/common_tools/rag.py
@@ -1,41 +1,35 @@
-from common_logic.common_utils.lambda_invoke_utils import invoke_lambda
+from common_logic.common_utils.lambda_invoke_utils import invoke_lambda,StateContext
 from common_logic.common_utils.prompt_utils import get_prompt_templates_from_ddb
 from common_logic.common_utils.constant import (
     LLMTaskType
 )
 from common_logic.common_utils.lambda_invoke_utils import send_trace
-from common_logic.common_utils.monitor_utils import format_rag_data
+from common_logic.langchain_integration.retrievers.retriever import lambda_handler as retrieve_fn
+from common_logic.langchain_integration.chains import LLMChain
+import threading 
 
-
-def lambda_handler(event_body, context=None):
-    state = event_body["state"]
+def rag_tool(retriever_config:dict,query=None):
+    state = StateContext.get_current_state()
+    # state = event_body['state']
     context_list = []
-    # Add qq match results
-    context_list.extend(state["qq_match_results"])
+    # add qq match results
+    context_list.extend(state['qq_match_results'])
     figure_list = []
-    retriever_params = state["chatbot_config"]["private_knowledge_config"]
-    retriever_params["query"] = state[retriever_params.get(
-        "retriever_config", {}).get("query_key", "query")]
-    output: str = invoke_lambda(
-        event_body=retriever_params,
-        lambda_name="Online_Functions",
-        lambda_module_path="functions.functions_utils.retriever.retriever",
-        handler_name="lambda_handler",
-    )
+    retriever_params = retriever_config
+    retriever_params["query"] = query or state[retriever_config.get("query_key","query")]
+    output = retrieve_fn(retriever_params)
 
     for doc in output["result"]["docs"]:
         context_list.append(doc["page_content"])
-        figure_list = figure_list + doc.get("figure", [])
-
+        figure_list = figure_list + doc.get("figure",[])
+    
     # Remove duplicate figures
     unique_set = {tuple(d.items()) for d in figure_list}
     unique_figure_list = [dict(t) for t in unique_set]
     state['extra_response']['figures'] = unique_figure_list
-
-    context_md = format_rag_data(output["result"]["docs"], state["qq_match_contexts"])
-    send_trace(
-        f"\n\n{context_md}\n\n", enable_trace=state["enable_trace"])
-
+    
+    send_trace(f"\n\n**rag-contexts:**\n\n {context_list}", enable_trace=state["enable_trace"])
+    
     group_name = state['chatbot_config']['group_name']
     llm_config = state["chatbot_config"]["private_knowledge_config"]['llm_config']
     chatbot_id = state["chatbot_config"]["chatbot_id"]
@@ -47,23 +41,22 @@ def lambda_handler(event_body, context=None):
         chatbot_id=chatbot_id
     )
 
-    output: str = invoke_lambda(
-        lambda_name="Online_LLM_Generate",
-        lambda_module_path="lambda_llm_generate.llm_generate",
-        handler_name="lambda_handler",
-        event_body={
-            "llm_config": {
+    llm_config = {
                 **prompt_templates_from_ddb,
                 **llm_config,
                 "stream": state["stream"],
                 "intent_type": task_type,
-            },
-            "llm_input": {
+            }
+    
+    llm_input = {
                 "contexts": context_list,
                 "query": state["query"],
-                "chat_history": state["chat_history"],
-            },
-        },
+                "chat_history": state["chat_history"]
+        }
+
+    chain = LLMChain.get_chain(
+        **llm_config
     )
+    output = chain.invoke(llm_input)
+    return output,output
 
-    return {"code": 0, "result": output}
diff --git a/source/lambda/online/functions/lambda_common_tools/step_back_rag.py b/source/lambda/online/common_logic/langchain_integration/tools/common_tools/step_back_rag.py
similarity index 100%
rename from source/lambda/online/functions/lambda_common_tools/step_back_rag.py
rename to source/lambda/online/common_logic/langchain_integration/tools/common_tools/step_back_rag.py
diff --git a/source/lambda/online/functions/__init__.py b/source/lambda/online/functions/__init__.py
deleted file mode 100644
index 2497bbe94..000000000
--- a/source/lambda/online/functions/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-# tool
-from ._tool_base import get_tool_by_name,Tool,tool_manager
-
-def init_common_tools():
-    from . import lambda_common_tools
-
-def init_aws_qa_tools():
-    from . import lambda_aws_qa_tools
-
-def init_retail_tools():
-    from . import lambda_retail_tools
\ No newline at end of file
diff --git a/source/lambda/online/functions/_tool_base.py b/source/lambda/online/functions/_tool_base.py
deleted file mode 100644
index 63fca04dc..000000000
--- a/source/lambda/online/functions/_tool_base.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import Union,Callable
-from langchain.pydantic_v1 import BaseModel,Field
-from enum import Enum
-from common_logic.common_utils.constant import SceneType,ToolRuningMode
-
-class ToolDefType(Enum):
-    openai = "openai"
-
-
-class Tool(BaseModel):
-    name: str = Field(description="tool name")
-    lambda_name: str = Field(description="lambda name")
-    lambda_module_path: Union[str, Callable] = Field(description="local module path")
-    handler_name:str = Field(description="local handler name", default="lambda_handler")
-    tool_def: dict = Field(description="tool definition")
-    tool_init_kwargs:dict = Field(description="tool initial kwargs",default=None)
-    running_mode: str = Field(description="tool running mode, can be loop or output", default=ToolRuningMode.LOOP)
-    tool_def_type: ToolDefType = Field(description="tool definition type",default=ToolDefType.openai.value)
-    scene: str = Field(description="tool use scene",default=SceneType.COMMON)
-    # should_ask_parameter: bool = Field(description="tool use scene")
-
-class ToolManager:
-    def __init__(self) -> None:
-        self.tools = {}
-    
-    def get_tool_id(self,tool_name:str,scene:str):
-        return f"{tool_name}__{scene}"
-    
-    def register_tool(self,tool_info:dict):
-        tool_def = tool_info['tool_def']
-        default_paramters = {
-                "type": "object",
-                "properties": {},
-                "required": []
-            }
-        if "parameters" not in tool_def:
-            tool_def['parameters'] = default_paramters
-        else:
-            tool_def['parameters'] = {**default_paramters, **tool_def['parameters']}
-
-        tool = Tool(**tool_info)
-        assert tool.tool_def_type == ToolDefType.openai.value, f"tool_def_type: {tool.tool_def_type} not support"
-        self.tools[self.get_tool_id(tool.name,tool.scene)] = tool
-
-    def get_tool_by_name(self,name,scene=SceneType.COMMON):
-        return self.tools[self.get_tool_id(name,scene)]
-
-tool_manager = ToolManager()
-get_tool_by_name = tool_manager.get_tool_by_name
-
-
-
-
-
diff --git a/source/lambda/online/functions/lambda_aws_qa_tools/__init__.py b/source/lambda/online/functions/lambda_aws_qa_tools/__init__.py
deleted file mode 100644
index 635eee674..000000000
--- a/source/lambda/online/functions/lambda_aws_qa_tools/__init__.py
+++ /dev/null
@@ -1,177 +0,0 @@
-from common_logic.common_utils.constant import SceneType,ToolRuningMode
-from .._tool_base import tool_manager 
-from . import (
-    check_service_availability,
-    explain_abbr,
-    service_org,
-    aws_ec2_price,
-    transfer
-)
-
-SCENE = SceneType.AWS_QA
-LAMBDA_NAME = "lambda_aws_qa_tools"
-
-tool_manager.register_tool({
-    "name": "service_availability",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": check_service_availability.lambda_handler,
-    "tool_def":{
-        "name": "service_availability",
-        "description":"query the availability of service in specified region",
-        "parameters":{
-            "type":"object",
-            "properties":{
-                "service":{
-                    "type":"string",
-                    "description":"the AWS service name"
-                },
-                "region":{
-                    "type":"string",
-                    "description":"the AWS region name where the service is located in, for example us-east-1(N.Virginal), us-west-2(Oregon), eu-west-2(London), ap-southeast-1(Singapore)"
-                }
-            },
-            "required":[
-                "service",
-                "region"
-            ]
-        },
-        "running_mode": ToolRuningMode.LOOP
-    }
-})
-
-tool_manager.register_tool({
-    "name": "explain_abbr",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": explain_abbr.lambda_handler,
-    "tool_def":{
-        "name": "explain_abbr",
-        "description": "explain abbreviation for user",
-        "parameters": {
-            "type": "object",
-            "properties": {
-                "abbr": {
-                    "type": "string",
-                    "description": "the abbreviation of terms in AWS"
-                }
-            },
-            "required": ["abbr"]
-        },
-        "running_mode": ToolRuningMode.ONCE
-    }
-})
-
-
-tool_manager.register_tool({
-    "name": "get_contact",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": service_org.lambda_handler,
-    "tool_def":{
-        "name":"get_contact",
-        "description":"query the contact person in the 'SSO' organization",
-        "parameters":{
-            "type":"object",
-            "properties":{
-                "employee":{
-                    "type":"string",
-                    "description":"employee name in the 'SSO' organization"
-                },
-                "role":{
-                    "type":"string",
-                    "description":"employee's role, usually it's Sales, Product Manager, Tech, Program Manager, Leader"
-                },
-                "domain":{
-                    "type":"string",
-                    "description":"Techical domain for the employee，For Example AIML, Analytics, Compute"
-                },
-                "scope":{
-                    "type":"string",
-                    "description":"employee's scope of responsibility. For Sales role, it could be territory like north/east/south/west, For tech role, it could be specific service"
-                }
-            },
-            "required":[
-                "employee"
-            ]
-        },
-        "running_mode": ToolRuningMode.LOOP
-     }
-})
-
-tool_manager.register_tool({
-    "name": "ec2_price",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": aws_ec2_price.lambda_handler,
-    "tool_def": {
-        "name": "ec2_price",
-        "description": "query the price of AWS ec2 instance",
-        "parameters": {
-            "type": "object",
-            "properties": {
-                "instance_type": {
-                    "type": "string",
-                    "description": "the AWS ec2 instance type, for example, c5.xlarge, m5.large, t3.mirco, g4dn.2xlarge, if it is a partial of the instance type, you should try to auto complete it. for example, if it is r6g.2x, you can complete it as r6g.2xlarge"
-                },
-                "region": {
-                    "type": "string",
-                    "description": "the AWS region name where the ec2 is located in, for example us-east-1, us-west-1, if it is common words such as 'us east 1','美东1','美西2',you should try to normalize it to standard AWS region name, for example, 'us east 1' is normalized to 'us-east-1', '美东2' is normalized to 'us-east-2','美西2' is normalized to 'us-west-2','北京' is normalized to 'cn-north-1', '宁夏' is normalized to 'cn-northwest-1', '中国区' is normalized to 'cn-north-1'"
-                },
-                "os": {
-                    "type": "string",
-                    "description": "the operating system of ec2 instance, the valid value should be 'Linux' or 'Windows'"
-                },
-                "term": {
-                    "type": "string",
-                    "description": "the payment term, the valid value should be 'OnDemand' or 'Reserved' "
-                },
-                "purchase_option": {
-                    "type": "string",
-                    "description": "the purchase option of Reserved instance, the valid value should be 'No Upfront', 'Partial Upfront' or 'All Upfront' "
-                }
-            },
-            "required": ["instance_type"]
-        },
-        "running_mode": ToolRuningMode.LOOP
-    }
-})
-
-
-tool_manager.register_tool({
-    "name":"transfer",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": transfer.lambda_handler,
-    "tool_def": {
-        "name": "转人工",
-        "description": "转人工"
-    },
-    "running_mode": ToolRuningMode.ONCE
-})
-
-# tool_manager.register_tool({
-#     "name":"assist",
-#     "lambda_name": "",
-#     "lambda_module_path": "",
-#     "tool_def": {
-#         "name": "assist",
-#         "description": "assist user to do some office work",
-#         "parameters": {
-#             "type": "object",
-#             "properties": {
-#                 "response": {
-#                     "description": "Response to user",
-#                     "type": "string"
-#             }
-#             },
-#             "required": ["response"]
-#         },
-#     },
-#     "running_mode":ToolRuningMode.ONCE
-# })
-
-
-
-
-
diff --git a/source/lambda/online/functions/lambda_aws_qa_tools/aws_ec2_price.py b/source/lambda/online/functions/lambda_aws_qa_tools/aws_ec2_price.py
deleted file mode 100644
index b307879b9..000000000
--- a/source/lambda/online/functions/lambda_aws_qa_tools/aws_ec2_price.py
+++ /dev/null
@@ -1,191 +0,0 @@
-
-import json
-from typing import Union,Optional, Union
-import os
-import boto3
-import requests
-from pydantic import BaseModel,ValidationInfo, field_validator, Field
-import re
-
-class EC2PriceRequest(BaseModel):
-    region: Optional[str] = Field (description='region name', default='us-east-1')
-    term: Optional[str] = Field (description='purchase term', default='OnDemand')
-    instance_type: str 
-    purchase_option: Optional[str] = Field (description='purchase option', default='')
-    os:Optional[str] = Field(description='Operation system', default='Linux')
-
-    @classmethod
-    def validate_ec2_instance_type(cls,instance_type):
-        # support other instance ml.m5.xlarge
-        # pattern = r'^(?:[a-z0-9][a-z0-9.-]*[a-z0-9])?(?:[a-z](?:[a-z0-9-]*[a-z0-9])?)?(\.[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)*\.[a-z0-9]{2,63}$'
-        ## only ec2, for m5.xlarge
-        pattern = r"^([a-z0-9]+\.[a-z0-9]+)$"
-
-        return re.match(pattern, instance_type) is not None and not instance_type.endswith(".")
-    
-    @classmethod
-    def validate_region_name(cls,region_name):
-        pattern = r"^[a-z]{2}(-gov)?-(central|east|north|south|west|northeast|northwest|southeast|southwest)-\d$"
-        return re.match(pattern, region_name) is not None
-
-    @field_validator('region')
-    def validate_region(cls, value:str,info: ValidationInfo):
-        if not cls.validate_region_name(value):
-            raise ValueError(f"{value} is not a valid AWS region name.")
-        return value
-    
-    @field_validator('term')
-    def validate_term(cls, value:str,info: ValidationInfo):
-        allowed_values = ['OnDemand','Reserved']
-        if value not in allowed_values:
-            raise ValueError(f'value must be one of {allowed_values}')
-        return value
-    
-    @field_validator('purchase_option')
-    def validate_option(cls, value:str,info: ValidationInfo):
-        allowed_values = ['No Upfront','All Upfront','Partial Upfront','']
-        if value not in allowed_values:
-            raise ValueError(f'value must be one of {allowed_values}')
-        return value
-
-    @field_validator('os')
-    def validate_os(cls, value:str,info: ValidationInfo):
-        allowed_values = ['Linux','Windows']
-        if value not in allowed_values:
-            raise ValueError(f'value must be one of {allowed_values}')
-        return value
-
-    @field_validator('instance_type')
-    def validate_instance_type(cls, value:str,info: ValidationInfo):
-        if not cls.validate_ec2_instance_type(value):
-            raise ValueError(f'{value} is not a valid EC2 instance type name.')
-        return value
-
-def purchase_option_filter(term_attri:dict, value:str) -> dict:
-    if not value:
-        return True
-    if term_attri:
-        purchaseOption = term_attri.get('PurchaseOption')
-        if purchaseOption == value:
-            return True
-    return None
-
-
-def remote_proxy_call(**args):
-    api = os.environ.get('api_endpoint')
-    key = os.environ.get('api_key')
-    payload = json.dumps(args)
-    if not api or not key:
-        return None
-    try:
-        resp = requests.post(api,headers={"Content-Type":"application/json","Authorization":f"Bearer {key}"},data=payload)
-        data = resp.json()
-        return data.get('message')
-    except Exception as e:
-        print(e)
-        return None
-    
-
-def query_ec2_price(args) -> Union[str,None]:
-    request = EC2PriceRequest(**args)
-    region = request.region
-    term = request.term
-    instance_type = request.instance_type
-    os = request.os
-    purchase_option = request.purchase_option
-    if region.startswith('cn-'):
-        return remote_proxy_call(**args)
-    else:
-        pricing_client = boto3.client('pricing',region_name='us-east-1')
-        def parse_price(products,term):
-            ret = []
-            for product in products:
-                product = json.loads(product)
-                on_demand_terms = product['terms'].get(term)
-                if on_demand_terms and term == 'Reserved':
-                    for _, term_details in on_demand_terms.items():
-                        price_dimensions = term_details['priceDimensions']
-                        term_attri = term_details.get('termAttributes')
-                        is_valid = purchase_option_filter(term_attri,purchase_option)
-                        option = term_attri.get('PurchaseOption')
-                        if is_valid:
-                            for _, price_dimension in price_dimensions.items():
-                                price = price_dimension['pricePerUnit']['CNY'] if region.startswith('cn-') else price_dimension['pricePerUnit']['USD']
-                                dollar = 'CNY' if region.startswith('cn-') else 'USD'
-                                desc =  price_dimension['description']
-                                unit =  price_dimension['unit']
-                                if not desc.startswith("$0.00 per") and not desc.startswith("USD 0.0 per") \
-                                        and not desc.startswith("0.00 CNY per") and not desc.startswith("CNY 0.0 per"):
-                                    ret.append(f"Region: {region}, Purchase option: {option}, Lease contract length: {term_attri.get('LeaseContractLength')}, Offering Class: {term_attri.get('OfferingClass')}, Price per {unit}: {dollar} {price} , description: {desc}")
-                elif on_demand_terms:
-                    for _, term_details in on_demand_terms.items():
-                        price_dimensions = term_details['priceDimensions']
-                        if price_dimensions:
-                            for _, price_dimension in price_dimensions.items():
-                                price = price_dimension['pricePerUnit']['CNY'] if region.startswith('cn-') else price_dimension['pricePerUnit']['USD']
-                                desc =  price_dimension['description']
-                                unit =  price_dimension['unit']
-                                if not desc.startswith("$0.00 per") and not desc.startswith("USD 0.0 per") and not desc.startswith("0.00 CNY per"):
-                                    ret.append(f"Region: {region}, Price per {unit}: {price}, description: {desc}")
-            return ret
-        filters = [
-            {
-                'Type': 'TERM_MATCH',
-                'Field': 'instanceType',
-                'Value': instance_type 
-            },
-            {
-                'Type': 'TERM_MATCH',
-                'Field': 'ServiceCode',
-                'Value': 'AmazonEC2'
-            },
-            {
-                'Type': 'TERM_MATCH',
-                'Field': 'regionCode',
-                'Value': region
-            },
-            {
-                'Type': 'TERM_MATCH',
-                'Field': 'tenancy',
-                'Value': 'Shared'
-            },
-            {
-                'Type': 'TERM_MATCH',
-                'Field': 'operatingSystem',
-                'Value': os
-            }
-        ]
-    
-        if purchase_option:
-            filters = filters + [{
-                        'Type': 'TERM_MATCH',
-                        'Field': 'PurchaseOption',
-                        'Value': purchase_option
-                    }] 
-            
-        response = pricing_client.get_products(
-            ServiceCode='AmazonEC2',
-            Filters=filters
-        )
-        products = response['PriceList']
-        prices = parse_price(products,term)
-        
-        return '\n'.join(prices) if prices else None
-
-def lambda_handler(event, context=None):
-    '''
-    event: {
-        "body": "{
-            \"instance_type\":\"m5.xlarge\",
-            \"region\":\"us-east-1\",
-            \"term\":\"eserved\",
-            \"purchase_option\":\"All Upfront\"
-        }"
-    }
-    '''
-    result = query_ec2_price(event["kwargs"])
-    return {"code":0, "result": result}
-
-if __name__ == "__main__":
-    args = {'instance_type':'m5.xlarge','region':'us-east-1','term':'Reserved','purchase_option':'All Upfront'}
-    print(query_ec2_price(args))
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_aws_qa_tools/check_service_availability.py b/source/lambda/online/functions/lambda_aws_qa_tools/check_service_availability.py
deleted file mode 100644
index a3bd18c08..000000000
--- a/source/lambda/online/functions/lambda_aws_qa_tools/check_service_availability.py
+++ /dev/null
@@ -1,80 +0,0 @@
-
-import json
-from typing import Union,Optional, Union
-import os
-import boto3
-import requests
-from pydantic import BaseModel,ValidationInfo, field_validator, Field
-import re
-
-def get_all_regions():
-    ec2 = boto3.client('ec2')
-    regions = ec2.describe_regions()
-    return [region['RegionName'] for region in regions['Regions']] + ['cn-north-1', 'cn-northwest-1']
-
-def get_all_services():
-    session = boto3.Session()
-    services = session.get_available_services()
-    return services
-
-class ServiceAvailabilityRequest(BaseModel):
-    region: str = Field (description='region name')
-    service: str = Field (description='service name')
-
-    # def __init__(self, **data):
-    #     super().__init__(**data)
-    #     self.region = self.region.lower()
-    #     self.service = self.service.lower()
-    
-    @field_validator('region')
-    @classmethod
-    def validate_region(cls, region):
-        if region not in region_list:
-            raise ValueError("region must be in aws region list.")
-        return region
-
-    @field_validator('service')
-    @classmethod
-    def validate_service(cls, service):
-        if service not in service_list:
-            raise ValueError("service must be in aws service list.")
-        return service
-
-region_list = get_all_regions()
-service_list = get_all_services()
-
-def check_service_availability(args):
-    try:
-        request = ServiceAvailabilityRequest(**args)
-    except Exception as e:
-        return str(e)
-    service = request.service
-    region = request.region
-    try:
-        # Attempt to create a client for the specified service in the specified region
-        boto3.client(service, region_name=region)
-        return "available"
-    except Exception as e:
-        # Handle exceptions, which may indicate that the service is not available in the region
-        print(f"Service {service} is not available in {region}: {e}")
-        return "unavailable"
-    
-def lambda_handler(event, context=None):
-    '''
-    event: {
-        "body": "{
-            \"instance_type\":\"m5.xlarge\",
-            \"region\":\"us-east-1\",
-            \"term\":\"eserved\",
-            \"purchase_option\":\"All Upfront\"
-        }"
-    }
-    '''
-    result = check_service_availability(event)
-    return {"code":0, "result": result}
-
-if __name__ == "__main__":
-    # Example usage
-    args = {'service':'bedrock','region':'cn-north-1'}
-    is_available = check_service_availability(args)
-    print(f'Service {args["service"]} is available in {args["region"]}: {is_available}')
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_aws_qa_tools/comfort.py b/source/lambda/online/functions/lambda_aws_qa_tools/comfort.py
deleted file mode 100644
index 04bd4d97d..000000000
--- a/source/lambda/online/functions/lambda_aws_qa_tools/comfort.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# give comfort response
-def lambda_handler(event_body,context=None):
-    return {"code": 0, "result": "不好意思没能帮到您，是否帮你转人工客服？"}
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_aws_qa_tools/explain_abbr.py b/source/lambda/online/functions/lambda_aws_qa_tools/explain_abbr.py
deleted file mode 100644
index 76743bac6..000000000
--- a/source/lambda/online/functions/lambda_aws_qa_tools/explain_abbr.py
+++ /dev/null
@@ -1,4 +0,0 @@
-#  explain abbr tool
-def lambda_handler(event_body,context=None):
-    return {"code": 0, "result":event_body['kwargs']['abbr']}
-   
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_aws_qa_tools/service_org.py b/source/lambda/online/functions/lambda_aws_qa_tools/service_org.py
deleted file mode 100644
index 235f324a1..000000000
--- a/source/lambda/online/functions/lambda_aws_qa_tools/service_org.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from langchain.prompts import PromptTemplate
-from langchain.chains import LLMChain
-from langchain.llms.bedrock import Bedrock
-from common_logic.common_utils.constant import LLMModelType
-import boto3
-import os
-
-BEDROCK_REGION = os.environ.get('region','us-west-2')
-def service_org(**args):
-    context = """placeholder"""
-    
-    prompt_tmp = """
-        你是云服务AWS的智能客服机器人AWSBot
-
-        给你 SSO (Service Specialist Organization) 的组织信息
-        {context}
-
-        Job role (角色, 岗位类型) description:
-        - GTMS: Go To Market Specialist
-        - SS: Specialist Sales
-        - SSA: Specialist Solution Architechure
-        - TPM: 
-        - PM: Project Manager
-
-        Scope means job scope
-        service_name equal to business unit
-
-        If the context does not contain the knowleage for the question, truthfully says you does not know.
-        Don't put two people's names together. For example, zheng zhang not equal to zheng hao and xueqing not equal to Xueqing Lai
-
-        Find out the most relevant context, and give the answer according to the context
-        Skip the preamble; go straight to the point.
-        Only give the final answer.
-        Do not repeat similar answer.
-        使用中文回复，人名不需要按照中文习惯回复
-
-        {question}
-        """
-
-    def create_prompt_templete(prompt_template):
-        PROMPT = PromptTemplate(
-            template=prompt_template,
-            input_variables=["context",'question','chat_history']
-        )
-        return PROMPT
-
-
-    boto3_bedrock = boto3.client(
-            service_name="bedrock-runtime",
-            region_name=BEDROCK_REGION
-        )
-    
-    parameters = {
-        "max_tokens_to_sample": 8096,
-        "stop_sequences": ["\nObservation"],
-        "temperature":0.01,
-        "top_p":0.85
-    }
-        
-    model_id = LLMModelType.CLAUDE_2
-    llm = Bedrock(model_id=model_id, client=boto3_bedrock, model_kwargs=parameters)
-    
-    prompt = create_prompt_templete(prompt_tmp) 
-    llmchain = LLMChain(llm=llm,verbose=False,prompt = prompt)
-    answer = llmchain.run({'question':args.get('query'), "context": context})
-    answer = answer.strip()
-    return answer
-
-def lambda_handler(event, context=None):
-    '''
-    event: {
-        \"query\":\"Bruce负责哪一块？\"
-    }
-    '''
-    result = service_org(**event['kwargs'])
-    return {"code":0, "result": result}
-
-if __name__ == "__main__":
-    args = {'query': 'Bruce负责哪一块？'}
-    print(service_org(**args))
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_aws_qa_tools/transfer.py b/source/lambda/online/functions/lambda_aws_qa_tools/transfer.py
deleted file mode 100644
index 7fa33fcf6..000000000
--- a/source/lambda/online/functions/lambda_aws_qa_tools/transfer.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# give transfer response
-def lambda_handler(event_body,context=None):
-    return {"code": 0, "result": "立即为您转人工客服，请稍后"}
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_common_tools/__init__.py b/source/lambda/online/functions/lambda_common_tools/__init__.py
deleted file mode 100644
index c57069898..000000000
--- a/source/lambda/online/functions/lambda_common_tools/__init__.py
+++ /dev/null
@@ -1,121 +0,0 @@
-from common_logic.common_utils.constant import SceneType, ToolRuningMode
-from .._tool_base import tool_manager
-from . import (
-    get_weather,
-    give_rhetorical_question,
-    give_final_response,
-    chat,
-    rag
-)
-
-
-SCENE = SceneType.COMMON
-LAMBDA_NAME = "lambda_common_tools"
-
-tool_manager.register_tool({
-    "name": "get_weather",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": get_weather.lambda_handler,
-    "tool_def": {
-            "name": "get_weather",
-            "description": "Get the current weather for `city_name`",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "city_name": {
-                        "description": "The name of the city to be queried",
-                        "type": "string"
-                    },
-                },
-                "required": ["city_name"]
-            }
-    },
-    "running_mode": ToolRuningMode.LOOP
-})
-
-
-tool_manager.register_tool(
-    {
-        "name": "give_rhetorical_question",
-        "scene": SCENE,
-        "lambda_name": LAMBDA_NAME,
-        "lambda_module_path": give_rhetorical_question.lambda_handler,
-        "tool_def": {
-            "name": "give_rhetorical_question",
-                "description": "If the user's question is not clear and specific, resulting in the inability to call other tools, please call this tool to ask the user a rhetorical question",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "question": {
-                            "description": "The rhetorical question to user",
-                            "type": "string"
-                        },
-                    },
-                    "required": ["question"],
-                },
-        },
-        "running_mode": ToolRuningMode.ONCE
-    }
-)
-
-
-tool_manager.register_tool(
-    {
-        "name": "give_final_response",
-        "scene": SCENE,
-        "lambda_name": LAMBDA_NAME,
-        "lambda_module_path": give_final_response.lambda_handler,
-        "tool_def": {
-                "name": "give_final_response",
-                "description": "If none of the other tools need to be called, call the current tool to complete the direct response to the user.",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "response": {
-                            "description": "Response to user",
-                            "type": "string"
-                        }
-                    },
-                    "required": ["response"]
-                },
-        },
-        "running_mode": ToolRuningMode.ONCE
-    }
-)
-
-
-tool_manager.register_tool({
-    "name": "chat",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": chat.lambda_handler,
-    "tool_def": {
-        "name": "chat",
-        "description": "casual talk with AI",
-        "parameters": {
-            "type": "object",
-            "properties": {
-                "response": {
-                    "description": "response to users",
-                    "type": "string"
-                }},
-            "required": ["response"]
-        },
-    },
-    "running_mode": ToolRuningMode.ONCE
-})
-
-
-tool_manager.register_tool({
-    "name": "rag_tool",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": rag.lambda_handler,
-    "tool_def": {
-        "name": "rag_tool",
-        "description": "private knowledge",
-        "parameters": {}
-    },
-    "running_mode": ToolRuningMode.ONCE
-})
diff --git a/source/lambda/online/functions/lambda_common_tools/chat.py b/source/lambda/online/functions/lambda_common_tools/chat.py
deleted file mode 100644
index 7f4cb60d0..000000000
--- a/source/lambda/online/functions/lambda_common_tools/chat.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# give chat response
-def lambda_handler(event_body,context=None):
-    try:
-        result = event_body['kwargs']['response']
-        return {"code": 0, "result":result}
-    except KeyError:
-        return {"code": 1, "result": "The parameter “response” not found."}
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_common_tools/give_final_response.py b/source/lambda/online/functions/lambda_common_tools/give_final_response.py
deleted file mode 100644
index 654ad136c..000000000
--- a/source/lambda/online/functions/lambda_common_tools/give_final_response.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# give final response
-def lambda_handler(event_body,context=None):
-    try:
-        result = event_body['kwargs']['response']
-        return {"code": 0, "result":result}
-    except KeyError:
-        return {"code": 1, "result": "The parameter “response” not found."}
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_common_tools/give_rhetorical_question.py b/source/lambda/online/functions/lambda_common_tools/give_rhetorical_question.py
deleted file mode 100644
index eeb17e4c6..000000000
--- a/source/lambda/online/functions/lambda_common_tools/give_rhetorical_question.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# give rhetorical question
-def lambda_handler(event_body,context=None):
-    try:
-        result = event_body['kwargs']['question']
-        return {"code": 0, "result":result}
-    except KeyError:
-        return {"code": 1, "result": "The parameter “question” not  found."}
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_retail_tools/__init__.py b/source/lambda/online/functions/lambda_retail_tools/__init__.py
deleted file mode 100644
index 42e88a3c6..000000000
--- a/source/lambda/online/functions/lambda_retail_tools/__init__.py
+++ /dev/null
@@ -1,337 +0,0 @@
-from common_logic.common_utils.constant import SceneType,ToolRuningMode
-from .._tool_base import tool_manager 
-from . import daily_reception
-from . import goods_exchange
-from . import customer_complain
-from .  import size_guide
-from . import product_information_search
-from . import order_info
-from . import product_aftersales
-from ..lambda_common_tools import give_rhetorical_question  
-from ..lambda_common_tools import give_final_response
-from ..lambda_common_tools import comparison_rag, step_back_rag
-from . import rule_response
-from . import transfer
-from . import promotion
-
-
-SCENE = SceneType.RETAIL  
-LAMBDA_NAME = "lambda_retail_tools"
-
-
-tool_manager.register_tool({
-    "name":"daily_reception",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": daily_reception.lambda_handler,
-    "tool_def": {
-        "name": "daily_reception",
-        "description": "daily reception",
-    },
-    "running_mode": ToolRuningMode.ONCE
-})
-
-
-tool_manager.register_tool({
-    "name":"goods_exchange",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": goods_exchange.lambda_handler,
-    "tool_def": {
-        "name": "goods_exchange",
-        "description": "This tool handles user requests for product returns or exchanges.",
-    },
-    "running_mode": ToolRuningMode.ONCE
-})
-
-
-tool_manager.register_tool({
-    "name": "customer_complain",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": customer_complain.lambda_handler,
-    "tool_def": {
-        "name": "customer_complain",
-        "description": "有关于客户抱怨的工具，比如商品质量，错发商品，漏发商品等",
-    },
-    "running_mode": ToolRuningMode.ONCE
-})
-
-
-tool_manager.register_tool({
-    "name":"promotion",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": promotion.lambda_handler,
-    "tool_def": {
-        "name": "promotion",
-        "description": "有关于商品促销的信息，比如返点，奖品和奖励等",
-    },
-    "running_mode": ToolRuningMode.ONCE
-})
-
-
-tool_manager.register_tool({
-    "name":"size_guide",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": size_guide.lambda_handler,
-    "tool_def": {
-        "name": "size_guide",
-        "description": """size guide for customer
-            Step1: Determin what type of goods the customer wants to buy according to the goods information in <商品信息> </商品信息> xml tag,
-                such as shoes or apparel.
-            Step2: If the customer wants to buy shoes, you should provide the customer's shoes_size or foot_length.
-            Step3: If the customer wants to buy apparel, you should provide the customer's height and weight.
-            Notice: if the customer's weight unit is 斤, you should convert it to kg, 1斤=0.5kg""",
-        "parameters": {
-            "type": "object",
-            "properties": {
-                "height": {
-                    "description": "height of the customer",
-                    "type": "int"
-                },
-                "weight": {
-                    "description": "weight of the customer",
-                    "type": "int"
-                },
-                "shoes_size": {
-                    "description": "size of the customer's shoes",
-                    "type": "float"
-                },
-                "foot_length": {
-                    "description": "length of the customer's foot",
-                    "type": "float"
-                }
-            },
-            "required": []
-        },
-    },
-    "running_mode": ToolRuningMode.LOOP
-})
-
-
-tool_manager.register_tool({
-    "name":"goods_recommendation",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": product_information_search.lambda_handler,
-    "tool_def": {
-        "name": "goods_recommendation",
-        "description": "recommend the product to the customer",
-    },
-    "running_mode": ToolRuningMode.ONCE
-})
-
-
-tool_manager.register_tool({
-    "name":"order_pipeline",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": order_info.lambda_handler,
-    "tool_def": {
-        "name": "order_pipeline",
-        "description": "query the order information",
-    },
-    "running_mode": ToolRuningMode.ONCE
-})
-
-
-tool_manager.register_tool({
-    "name":"product_logistics",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": order_info.lambda_handler,
-    "tool_def": {
-        "name": "product_logistics",
-        "description": "查询商品物流信息，运费规则和物流规则，其中运费规则包括退货，换货，错发商品，漏发商品等。物流规则包括发货时间等",
-    },
-    "running_mode": ToolRuningMode.ONCE,
-})
-
-
-tool_manager.register_tool({
-    "name":"goods_storage",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": order_info.lambda_handler,
-    "tool_def": {
-        "name": "goods_storage",
-        "description": "商品的库存信息，比如应对没货的情况等",
-    },
-    "running_mode": ToolRuningMode.ONCE,
-})
-
-
-tool_manager.register_tool({
-    "name": "rule_response",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": rule_response.lambda_handler,
-    "tool_def": {
-        "name": "rule_response",
-        "description": "If a user's reply contains just a link or a long number, use this tool to reply.",
-    },
-    "running_mode": ToolRuningMode.ONCE,
-})
-
-
-tool_manager.register_tool({
-    "name":"transfer",
-    "scene": SCENE,
-    "lambda_name": LAMBDA_NAME,
-    "lambda_module_path": transfer.lambda_handler,
-    "tool_def": {
-        "name": "转人工",
-        "description": "转人工"
-    },
-    "running_mode": ToolRuningMode.ONCE
-})
-
-
-tool_manager.register_tool(
-    {
-        "name":"product_quality",
-        "scene": SCENE,
-        "lambda_name": LAMBDA_NAME,
-        "lambda_module_path": product_aftersales.lambda_handler,
-        "tool_def": {
-            "name": "product_quality",
-            "description": "商品的售后处理，主要包括客户关于商品质量的抱怨，比如开胶等问题的",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "shop": {
-                        "description": """The shop which the customer bought the product.
-                         If the customer do not provide the shop name, the shop name is 'tianmao' by default.
-                         The shop name must be in the list of ['tianmao', 'taobao','jingdong','dewu','other']""",
-                        "type": "str"
-                    }
-                },
-                "required": []
-            }
-        },
-        "running_mode": ToolRuningMode.ONCE
-    }
-)
-
-tool_manager.register_tool(
-    {
-        "name":"step_back_rag",
-        "scene": SCENE,
-        "lambda_name": LAMBDA_NAME,
-        "lambda_module_path": step_back_rag.lambda_handler,
-        "tool_def": {
-            "name": "step_back_rag",
-            "description": "如果用户的问题过于具体，请把改写为更加通用的问题",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "query": {
-                        "description": """基于历史消息改写的问题""",
-                        "type": "str"
-                    },
-                    "step_back_query": {
-                        "description": """改写后的问题""",
-                        "type": "str"
-                    }
-                },
-                "required": ["query", "step_back_query"]
-            }
-        },
-        "running_mode": ToolRuningMode.ONCE
-    }
-)
-
-
-tool_manager.register_tool(
-    {
-        "name":"comparison_rag",
-        "scene": SCENE,
-        "lambda_name": LAMBDA_NAME,
-        "lambda_module_path": comparison_rag.lambda_handler,
-        "tool_def": {
-            "name": "comparison_rag",
-            "description": "在处理比较类型的问题，比如比较两个产品的区别时，使用这个工具",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "query": {
-                        "description": """基于历史消息改写的问题""",
-                        "type": "str"
-                    },
-                    "query_a": {
-                        "description": """比较对象A的查询语句""",
-                        "type": "str"
-                    },
-                    "query_b": {
-                        "description": """比较对象B的查询语句""",
-                        "type": "str"
-                    }
-                },
-                "required": ["query", "query_a", "query_b"]
-            }
-        },
-        "running_mode": ToolRuningMode.ONCE
-    }
-)
-
-tool_manager.register_tool(
-    {
-        "name":"give_rhetorical_question",
-        "scene": SCENE,
-        "lambda_name": LAMBDA_NAME,
-        "lambda_module_path": give_rhetorical_question.lambda_handler,
-        "tool_def":{
-                "name": "give_rhetorical_question",
-                "description": "If the user's question is not clear and specific, resulting in the inability to call other tools, please call this tool to ask the user a rhetorical question",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "question": {
-                            "description": "The rhetorical question to user",
-                            "type": "string"
-                    },
-                    },
-                    "required": ["question"],
-                },
-            },
-        "running_mode": ToolRuningMode.ONCE
-    }
-)
-
-
-tool_manager.register_tool(
-    {
-        "name": "give_final_response",
-        "scene": SCENE,
-        "lambda_name": LAMBDA_NAME,
-        "lambda_module_path": give_final_response.lambda_handler,
-        "tool_def":{
-                "name": "give_final_response",
-                "description": "If none of the other tools need to be called, call the current tool to complete the direct response to the user.",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "response": {
-                            "description": "Response to user",
-                            "type": "string"
-                    }
-                    },
-                    "required": ["response"]
-                },
-            },
-         "running_mode": ToolRuningMode.ONCE
-    }
-)
-
-
-
-
-
-
-
-
-
-
-
diff --git a/source/lambda/online/functions/lambda_retail_tools/customer_complain.py b/source/lambda/online/functions/lambda_retail_tools/customer_complain.py
deleted file mode 100644
index f20b18ac1..000000000
--- a/source/lambda/online/functions/lambda_retail_tools/customer_complain.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# customer complain
-from common_logic.common_utils.lambda_invoke_utils import invoke_lambda
-from common_logic.common_utils.lambda_invoke_utils import send_trace
-from common_logic.common_utils.constant import (
-    LLMTaskType
-)
-
-
-def lambda_handler(event_body,context=None):
-    state = event_body['state']
-    # call retriever
-    retriever_params = state["chatbot_config"]["rag_customer_complain_config"]["retriever_config"]
-    retriever_params["query"] = state["query"]
-    output:str = invoke_lambda(
-        event_body=retriever_params,
-        lambda_name="Online_Functions",
-        lambda_module_path="functions.functions_utils.retriever.retriever",
-        handler_name="lambda_handler"
-    )
-    contexts = [doc['page_content'] for doc in output['result']['docs']]
-
-    context = "\n\n".join(contexts)
-    send_trace(f'**rag_customer_complain_retriever** {context}', state["stream"], state["ws_connection_id"])
-    
-    # llm generate
-    # prompt = dedent(f"""你是安踏的客服助理，正在处理有关于客户抱怨的问题，这些问题有关于商品质量等方面，需要你按照下面的guidelines进行回复:
-    system_prompt = ("你是安踏的客服助理，正在处理有关于消费者抱怨的问题。context列举了一些可能和客户问题有关的具体场景及回复，你可以进行参考:\n"
-                    "<context>\n"
-                    f"{context}\n"
-                    "</context>\n"
-                    "需要你按照下面的guidelines进行回复:\n"
-                    "<guidelines>\n"
-                    " - 回答内容为一句话，言简意赅。\n"
-                    " - 尽量安抚客户情绪。\n"
-                    " - 直接回答，不要说\"亲爱的顾客，您好\"\n"
-                    "</guidelines>\n"
-                    )
-    output:str = invoke_lambda(
-        lambda_name='Online_LLM_Generate',
-        lambda_module_path="lambda_llm_generate.llm_generate",
-        handler_name='lambda_handler',
-        event_body={
-            "llm_config": {
-                **state['chatbot_config']['rag_customer_complain_config']['llm_config'],
-                "system_prompt":system_prompt,
-                "intent_type": LLMTaskType.CHAT},
-            "llm_input": { "query": state['query'], "chat_history": state['chat_history']}
-            }
-        )
-    
-    return {"code":0, "result":output}
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_retail_tools/daily_reception.py b/source/lambda/online/functions/lambda_retail_tools/daily_reception.py
deleted file mode 100644
index cbd048919..000000000
--- a/source/lambda/online/functions/lambda_retail_tools/daily_reception.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# daily reception tool
-from common_logic.common_utils.lambda_invoke_utils import invoke_lambda
-from common_logic.common_utils.lambda_invoke_utils import send_trace
-from common_logic.common_utils.constant import (
-    LLMTaskType
-)
-
-def lambda_handler(event_body,context=None):
-    state = event_body['state']
-    # retriver
-    retriever_params = state["chatbot_config"]["rag_daily_reception_config"]['retriever_config']
-    retriever_params["query"] = state["query"]
-    output:str = invoke_lambda(
-        event_body=retriever_params,
-        lambda_name="Online_Functions",
-        lambda_module_path="functions.functions_utils.retriever.retriever",
-        handler_name="lambda_handler"
-    )
-    contexts = [doc['page_content'] for doc in output['result']['docs']]
-    context = "\n\n".join(contexts)
-    send_trace(f'**rag_daily_reception_retriever** {context}')
-
-    # llm generate
-    system_prompt = (f"你是安踏的客服助理，正在帮用户解答问题，客户提出的问题大多是属于日常接待类别，你需要按照下面的guidelines进行回复:\n"
-                    "<guidelines>\n"
-                    " - 回复内容需要展现出礼貌。回答内容为一句话，言简意赅。\n"
-                    " - 使用中文回答。\n"
-                    "</guidelines>\n"
-                    "下面列举了一些具体的场景下的回复，你可以结合用户的问题进行参考:\n"
-                    "<context>\n"
-                    f"{context}\n"
-                    "</context>"
-                )
-    
-    output:str = invoke_lambda(
-        lambda_name='Online_LLM_Generate',
-        lambda_module_path="lambda_llm_generate.llm_generate",
-        handler_name='lambda_handler',
-        event_body={
-            "llm_config": {
-                **state['chatbot_config']['rag_daily_reception_config']['llm_config'], 
-                "system_prompt": system_prompt,
-                "intent_type": LLMTaskType.CHAT},
-            "llm_input": {"query": state['query'], "chat_history": state['chat_history']}
-            }
-        )
-
-    return {"code":0, "result":output}
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_retail_tools/goods_exchange.py b/source/lambda/online/functions/lambda_retail_tools/goods_exchange.py
deleted file mode 100644
index bd492ab26..000000000
--- a/source/lambda/online/functions/lambda_retail_tools/goods_exchange.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# goods exchange
-from common_logic.common_utils.lambda_invoke_utils import invoke_lambda
-from common_logic.common_utils.lambda_invoke_utils import send_trace
-from common_logic.common_utils.constant import (
-    LLMTaskType
-)
-
-def lambda_handler(event_body,context=None):
-    state = event_body['state']
-
-    # retriver
-    retriever_params = state["chatbot_config"]["rag_goods_exchange_config"]['retriever_config']
-    retriever_params["query"] = state["query"]
-    output:str = invoke_lambda(
-        event_body=retriever_params,
-        lambda_name="Online_Functions",
-        lambda_module_path="functions.functions_utils.retriever.retriever",
-        handler_name="lambda_handler"
-    )
-    contexts = [doc['page_content'] for doc in output['result']['docs']]
-
-    context = "\n\n".join(contexts)
-    send_trace(f'**rag_goods_exchange_retriever** {context}', state["stream"], state["ws_connection_id"])
-
-    # llm generate
-    system_prompt = (f"你是安踏的客服助理，正在帮用户解答问题，客户提出的问题大多是属于商品退换货范畴，你需要按照下面的guidelines进行回复:\n"
-                    "<guidelines>\n"
-                    " - 回复内容需要展现出礼貌。回答内容为一句话，言简意赅。\n"
-                    " - 使用中文回答。\n"
-                    "</guidelines>\n"
-                    "下面列举了一些具体的场景下的回复，你可以结合用户的问题进行参考回答:\n"
-                    "<context>\n"
-                    f"{context}\n"
-                    "</context>\n"
-                )
-    
-    output:str = invoke_lambda(
-        lambda_name='Online_LLM_Generate',
-        lambda_module_path="lambda_llm_generate.llm_generate",
-        handler_name='lambda_handler',
-        event_body={
-            "llm_config": {
-                **state['chatbot_config']['rag_goods_exchange_config']['llm_config'],
-                "system_prompt":system_prompt,
-                "intent_type": LLMTaskType.CHAT},
-            "llm_input": { "query": state['query'], "chat_history": state['chat_history']}
-            }
-        )
-    return {"code":0, "result":output}
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_retail_tools/order_info.py b/source/lambda/online/functions/lambda_retail_tools/order_info.py
deleted file mode 100644
index 0dd7f57f0..000000000
--- a/source/lambda/online/functions/lambda_retail_tools/order_info.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import os
-import re
-import boto3
-import json
-
-from common_logic.common_utils.s3_utils import download_file_from_s3,check_local_folder
-from common_logic.common_utils.lambda_invoke_utils import invoke_lambda
-
-data_bucket_name = os.environ.get("RES_BUCKET", "aws-chatbot-knowledge-base-test")
-order_info_path = "/tmp/functions/retail_tools/lambda_order_info/order_info.json"
-check_local_folder(order_info_path)
-download_file_from_s3(data_bucket_name, "retail_json/order_info.json", order_info_path)
-order_dict = json.load(open(order_info_path))
-
-def lambda_handler(event_body, context=None):
-    state = event_body["state"]
-    if "goods_id" not in state["chatbot_config"]:
-        return {"code":1, "result":"goods_id is required"}
-    goods_id = state["chatbot_config"]["goods_id"]
-#     ans = """
-#     如果遇到是关于是关于发货的问题，请回答‘根据您的问题,FILA实施全国多仓发货,下单后正常会48小时内帮您安排发货的哦。如果超过48小时还未发货,您可以随时联系我们客服,我们会尽快为您解决。’
-# """
-    result = order_dict.get(goods_id, {}).get("status","") 
-    
-    if result == "":
-        search_result = 0
-        retriever_params = state["chatbot_config"]["rag_delivery_track_config"]["retriever_config"]
-        retriever_params["query"] = state["query"]
-        output:str = invoke_lambda(
-            event_body=retriever_params,
-            lambda_name="Online_Functions",
-            lambda_module_path="functions.functions_utils.retriever.retriever",
-            handler_name="lambda_handler"
-        )
-        contexts = [doc['page_content'] for doc in output['result']['docs']]
-
-        result = "\n\n".join(contexts)
-
-        # search_result = f"当前没有查询到相关内容，请参考下面的内容，用一句话进行简洁回复:\n<内容>\n{result}\n</内容>"
-        # result = search_result
-        # result = contexts
-    else:
-        search_result = 1
-
-    return {"code":0, "result":result, "name": "product_information_search", "search_result": search_result}
-
-if __name__ == "__main__":
-    event_body = {}
-    lambda_handler(event_body)
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_retail_tools/product_aftersales.py b/source/lambda/online/functions/lambda_retail_tools/product_aftersales.py
deleted file mode 100644
index 0eaac2fbd..000000000
--- a/source/lambda/online/functions/lambda_retail_tools/product_aftersales.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# goods after sales
-from common_logic.common_utils.lambda_invoke_utils import invoke_lambda
-from common_logic.common_utils.lambda_invoke_utils import send_trace
-from common_logic.common_utils.constant import (
-    LLMTaskType
-)
-from datetime import datetime 
-
-
-def lambda_handler(event_body,context=None):
-    state = event_body['state']
-    recent_tool_calling:list[dict] = state['function_calling_parsed_tool_calls'][0]
-    if "shop" in recent_tool_calling['kwargs'] and recent_tool_calling['kwargs']['shop'] != "tianmao":
-        contexts = ["顾客不是在天猫购买的商品，请他咨询其他商家"]
-        # return {"contexts": contexts}
-    else:
-        retriever_params = state["chatbot_config"]["rag_product_aftersales_config"]["retriever_config"]
-        retriever_params["query"] = state["query"]
-        output:str = invoke_lambda(
-            event_body=retriever_params,
-            lambda_name="Online_Functions",
-            lambda_module_path="functions.functions_utils.retriever.retriever",
-            handler_name="lambda_handler"
-        )
-        contexts = [doc['page_content'] for doc in output['result']['docs']]
-
-    context = "\n\n".join(contexts)
-    send_trace(f'**rag_product_aftersales_retriever** {context}', state["stream"], state["ws_connection_id"])
-    
-
-    # llm generate 
-    create_time = state.get('create_time', None)
-    goods_id = state.get('chatbot_config').get('goods_id', 757492962957)
-    try:
-        create_datetime_object = datetime.strptime(create_time, '%Y-%m-%d %H:%M:%S.%f')
-    except Exception as e:
-        create_datetime_object = datetime.now()
-        print(f"create_time: {create_time} is not valid, use current time instead.")
-    create_time_str = create_datetime_object.strftime('%Y-%m-%d')
-    # TODO: fix received time format
-
-    from lambda_main.main_utils.online_entries.retail_entry import order_dict
-    
-    received_time = order_dict.get(str(goods_id), {}).get("received_time", "2023/9/129:03:13")
-    order_time = " ".join([received_time[:9], received_time[9:]])
-    try:
-        order_date_str = datetime.strptime(order_time, '%Y/%m/%d %H:%M:%S').strftime('%Y-%m-%d')
-        receive_elapsed_days = (create_datetime_object - datetime.strptime(order_date_str, '%Y-%m-%d')).days
-        receive_elapsed_months = receive_elapsed_days // 30
-    except Exception as e:
-        order_date_str = "2023-9-12"
-        receive_elapsed_months = 6
-
-    system_prompt = (f"你是安踏的客服助理，正在帮消费者解答问题，消费者提出的问题大多是属于商品的质量和物流规则。context列举了一些可能有关的具体场景及回复，你可以进行参考:\n"
-                    f"客户咨询的问题所对应的订单日期为{order_date_str}。\n"
-                    f"当前时间{create_time_str}\n"
-                    f"客户收到商品已经超过{receive_elapsed_months}个月\n"
-                    "<context>\n"
-                    f"{context}\n"
-                    "</context>\n"
-                    "你需要按照下面的guidelines对消费者的问题进行回答:\n"
-                    "<guidelines>\n"
-                    " - 回答内容为一句话，言简意赅。\n"
-                    " - 如果问题与context内容不相关，就不要采用。\n"
-                    " - 消费者的问题里面可能包含口语化的表达，比如鞋子开胶的意思是用胶黏合的鞋体裂开。这和胶丝遗留没有关系\n"
-                    " - 洗涤后出现问题也属于质量问题\n"
-                    " - 消费者的回复不够清晰的时候，直接回复: 不知道刚才给您的建议是否有帮助？。不要有额外补充\n"
-                    " - 如果客户问到质量相关问题，请根据前面的订单信息和三包规则，确定是否超出三包期限，如果超出三包期限请告知消费者无法处理，如果在三包期限内请按照三包要求处理，并安抚。\n"
-                    "</guidelines>\n"
-                    )
-    # print('llm config',state['chatbot_config']['rag_product_aftersales_config']['llm_config'])
-    output:str = invoke_lambda(
-        lambda_name='Online_LLM_Generate',
-        lambda_module_path="lambda_llm_generate.llm_generate",
-        handler_name='lambda_handler',
-        event_body={
-            "llm_config": {
-                **state['chatbot_config']['rag_product_aftersales_config']['llm_config'], 
-                "system_prompt":system_prompt,
-                "intent_type": LLMTaskType.CHAT
-            },
-            "llm_input": { "query": state['query'], "chat_history": state['chat_history']}
-            }
-        )
-    
-    return {"code":0, "result":output}
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_retail_tools/product_information_search.py b/source/lambda/online/functions/lambda_retail_tools/product_information_search.py
deleted file mode 100644
index 147d32396..000000000
--- a/source/lambda/online/functions/lambda_retail_tools/product_information_search.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import json
-import os
-from common_logic.common_utils.s3_utils import download_file_from_s3
-from common_logic.common_utils.lambda_invoke_utils import invoke_lambda,node_monitor_wrapper
-from common_logic.common_utils.lambda_invoke_utils import send_trace,is_running_local
-
-goods_info_path = "/tmp/functions/retail_tools/lambda_order_info/goods_info.json"
-parent_path = '/'.join((goods_info_path).split('/')[:-1])
-os.system(f"mkdir -p {parent_path}")
-
-data_bucket_name = os.environ.get("RES_BUCKET", "aws-chatbot-knowledge-base-test")
-download_file_from_s3(data_bucket_name, "retail_json/goods_info.json", goods_info_path)
-goods_dict = json.load(open(goods_info_path))
-
-def lambda_handler(event_body, context=None):
-    state = event_body["state"]
-    if "goods_id" not in state["chatbot_config"]:
-        return {"code":1, "result":"goods_id is required"}
-    goods_id = str(state["chatbot_config"]["goods_id"])
-    context_goods_info = goods_dict[goods_id]
-
-    retriever_params = state["chatbot_config"]["rag_goods_info_config"]["retriever_config"]
-    retriever_params["query"] = state["query"]
-    output:str = invoke_lambda(
-        event_body=retriever_params,
-        lambda_name="Online_Functions",
-        lambda_module_path="functions.functions_utils.retriever.retriever",
-        handler_name="lambda_handler"
-    )
-    goods_info_list = [doc['page_content'] for doc in output['result']['docs'] if doc['score'] > 0.6]
-
-    query_goods_info = "\n\n".join(goods_info_list)
-    send_trace(f'**rag_goods_info_retriever** {context}', state["stream"], state["ws_connection_id"])
-    result = f"**用户当前咨询的商品是** {context_goods_info}\n\n**用户可能想找的商品是** {query_goods_info}"
-
-    return {"code":0, "result":result, "name": "product_information_search"}
-
-if __name__ == "__main__":
-    event_body = {}
-    lambda_handler(event_body)
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_retail_tools/promotion.py b/source/lambda/online/functions/lambda_retail_tools/promotion.py
deleted file mode 100644
index 0bf5aca9e..000000000
--- a/source/lambda/online/functions/lambda_retail_tools/promotion.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# promotion tool 
-
-from common_logic.common_utils.lambda_invoke_utils import invoke_lambda
-from common_logic.common_utils.lambda_invoke_utils import send_trace
-from common_logic.common_utils.constant import (
-    LLMTaskType
-)
-
-
-def lambda_handler(event_body,context=None):
-    state = event_body['state']
-    
-    # retrieve
-    retriever_params = state["chatbot_config"]["rag_promotion_config"]["retriever_config"]
-    retriever_params["query"] = state["query"]
-    output:str = invoke_lambda(
-        event_body=retriever_params,
-        lambda_name="Online_Functions",
-        lambda_module_path="functions.functions_utils.retriever.retriever",
-        handler_name="lambda_handler"
-    )
-    contexts = [doc['page_content'] for doc in output['result']['docs']]
-
-    context = "\n\n".join(contexts)
-    send_trace(f'**rag_promotion_retriever** {context}', state["stream"], state["ws_connection_id"])
-
-    # llm generate
-    system_prompt = ("你是安踏的客服助理，正在帮消费者解答有关于商品促销的问题，这些问题是有关于积分、奖品、奖励等方面。\n"
-                     "context列举了一些可能有关的具体场景及回复，你可以进行参考:\n"
-                    f"<context>\n{context}\n</context>\n"
-                    "你需要按照下面的guidelines对消费者的问题进行回答:\n"
-                    "<guidelines>\n"
-                    " - 回答内容为一句话，言简意赅。\n"
-                    " - 如果问题与context内容不相关，就不要采用。\n"
-                    " - 使用中文进行回答。\n"
-                    "</guidelines>"
-    )
-   
-    output:str = invoke_lambda(
-        lambda_name='Online_LLM_Generate',
-        lambda_module_path="lambda_llm_generate.llm_generate",
-        handler_name='lambda_handler',
-        event_body={
-            "llm_config": {
-                **state['chatbot_config']['rag_promotion_config']['llm_config'],
-                "system_prompt":system_prompt,
-                "intent_type": LLMTaskType.CHAT},
-            "llm_input": { "query": state['query'], "chat_history": state['chat_history']}
-            }
-        )
-    
-    return {"code":0, "result": output}
-
diff --git a/source/lambda/online/functions/lambda_retail_tools/rule_response.py b/source/lambda/online/functions/lambda_retail_tools/rule_response.py
deleted file mode 100644
index 3b5a8f029..000000000
--- a/source/lambda/online/functions/lambda_retail_tools/rule_response.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# rule_url_reply
-import random 
-import re  
-from functions.lambda_retail_tools.product_information_search import goods_dict
-from common_logic.common_utils.constant import LLMTaskType
-from common_logic.common_utils.lambda_invoke_utils import invoke_lambda
-
-
-def lambda_handler(event_body, context=None):
-    state = event_body["state"]
-    state["extra_response"]["current_agent_intent_type"] = "rule reply"
-    goods_info_tag = state['goods_info_tag']
-    if state['query'].endswith(('.jpg','.png')):
-        answer = random.choice([
-            "收到，亲。请问我们可以怎么为您效劳呢？",
-            "您好，请问有什么需要帮助的吗？"
-        ])
-        return {"code":0, "result": answer}
-    # product information
-    r = re.findall(r"item.htm\?id=(.*)",state['query'])
-    if r:
-        goods_id = r[0]
-    else:
-        goods_id = 0
-    if goods_id in goods_dict:
-        # call llm to make summary of goods info
-        human_goods_info = state['human_goods_info']
-        output = f"您好，该商品的特点是:\n{human_goods_info}"
-        if human_goods_info:
-            system_prompt = (f"你是安踏的客服助理，当前用户对下面的商品感兴趣:\n"
-                        f"<{goods_info_tag}>\n{human_goods_info}\n</{goods_info_tag}>\n"
-                        "请你结合商品的基础信息，特别是卖点信息返回一句推荐语。"
-                    )
-            output:str = invoke_lambda(
-                lambda_name='Online_LLM_Generate',
-                lambda_module_path="lambda_llm_generate.llm_generate",
-                handler_name='lambda_handler',
-                event_body={
-                    "llm_config": {
-                        **state['chatbot_config']['rag_daily_reception_config']['llm_config'], 
-                        "system_prompt": system_prompt,
-                        "intent_type": LLMTaskType.CHAT},
-                    "llm_input": {"query": state['query'], "chat_history": state['chat_history']}
-                        }
-                    )
-         
-        return {"code":0, "result":output}
-    
-    return {"code":0, "result":"您好"}
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_retail_tools/size_guide.py b/source/lambda/online/functions/lambda_retail_tools/size_guide.py
deleted file mode 100644
index cc148432f..000000000
--- a/source/lambda/online/functions/lambda_retail_tools/size_guide.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import os
-import re
-import json
-
-import numpy as np
-
-from common_logic.common_utils.s3_utils import download_file_from_s3, check_local_folder
-
-data_bucket_name = os.environ.get("RES_BUCKET", "aws-chatbot-knowledge-base-test")
-good2type_dict_path = "/tmp/functions/retail_tools/lambda_size_guide/good2type_dict.json"
-size_dict_path = "/tmp/functions/retail_tools/lambda_size_guide/size_dict.json"
-check_local_folder(good2type_dict_path)
-check_local_folder(size_dict_path)
-download_file_from_s3(data_bucket_name, "retail_json/good2type_dict.json", good2type_dict_path)
-download_file_from_s3(data_bucket_name, "retail_json/size_dict.json", size_dict_path)
-good2type_dict = json.load(open(good2type_dict_path))
-size_dict = json.load(open(size_dict_path))
-
-def find_nearest(array, value):
-    float_array = np.asarray([float(x) for x in array])
-    array = np.asarray(array)
-    idx = (np.abs(float_array - value)).argmin()
-    return array[idx]
-
-def lambda_handler(event_body, context=None):
-    state = event_body["state"]
-    if "goods_id" not in state["chatbot_config"]:
-        return {"code":1, "result":"goods_id is required"}
-    goods_id = str(state["chatbot_config"]["goods_id"])
-    kwargs = event_body["kwargs"]
-    if goods_id not in good2type_dict:
-        return {"code":1, "result":"该商品的尺码信息缺失，请不要使用尺码工具"}
-    goods_type_1, goods_type_2 = good2type_dict[goods_id]
-    if goods_type_1 == "shoes":
-        if "shoes_size" in kwargs:
-            try:
-                shoe_size = float(kwargs["shoes_size"])
-            except:
-                return {"code":1, "result":"shoes_size should be a number"}
-            if goods_type_1 == "shoes" and goods_type_2 == "童鞋":
-                return {"code":1, "result":"童鞋不存在鞋码，请输入脚长查询"}
-            std_shoe_size = find_nearest(list(size_dict.get(goods_type_1).get(goods_type_2).get("shoes_size").keys()), shoe_size)
-            result = size_dict.get(goods_type_1).get(goods_type_2).get("shoes_size").get(std_shoe_size, "42")
-            # No sutabale size for the input shoes size or foot length
-            if result == "此款暂无适合亲的尺码":
-                result += "，您当前输入的鞋码为{}，请确认一下参数是否正确，如果有修改可以再次调用尺码工具".format(shoe_size)
-        elif "foot_length" in kwargs:
-            try:
-                foot_length = float(kwargs["foot_length"])
-            except:
-                return {"code":1, "result":"foot_length should be a number"}
-            std_foot_length = find_nearest(list(size_dict.get(goods_type_1).get(goods_type_2).get("foot_length").keys()), foot_length)
-            result = size_dict.get(goods_type_1).get(goods_type_2).get("foot_length").get(std_foot_length, "28")
-            # No sutabale size for the input foot length
-            if result == "此款暂无适合亲的尺码":
-                result += "，您当前输入的脚长为{}cm，请确认一下参数是否正确，如果有修改可以再次调用尺码工具".format(foot_length)
-        else:
-            return {"code":1, "result":"请继续询问用户的脚长或鞋码"}
-    elif goods_type_1 == "apparel":
-        if "height" not in kwargs:
-            return {"code":1, "result":"请继续询问用户的身高"}
-        if "weight" not in kwargs:
-            return {"code":1, "result":"请继续询问用户的体重"}
-        try:
-            height = float(kwargs["height"])
-            weight = float(kwargs["weight"])
-        except:
-            return {"code":1, "result":"身高和体重必须是数值，请确认输入是否正确"}
-        std_height = find_nearest(list(size_dict.get(goods_type_1).get(goods_type_2).\
-                                       get("height_weight").keys()), height)
-        std_weight = find_nearest(list(size_dict.get(goods_type_1).get(goods_type_2).\
-                                       get("height_weight").get(std_height).keys()), weight)
-        result = size_dict.get(goods_type_1).get(goods_type_2).get("height_weight").\
-            get(std_height).get(std_weight)
-        # No sutabale size for the input height and weight
-        if result == "亲亲，很抱歉，这款暂时没有适合您的尺码":
-            result += "，您当前输入的身高为{}cm，体重为{}kg，请确认一下参数是否正确，如果有修改可以再次调用尺码工具".\
-                format(height, weight)
-    return {"code":0, "result":result, "name": "尺码查询"}
-
-if __name__ == "__main__":
-    event_body = {}
-    lambda_handler(event_body)
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_retail_tools/transfer.py b/source/lambda/online/functions/lambda_retail_tools/transfer.py
deleted file mode 100644
index 24a6a01de..000000000
--- a/source/lambda/online/functions/lambda_retail_tools/transfer.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# retail transfer
-def lambda_handler(event_body,context=None):
-    return {"code":0, "result":"您好,我是安踏官方客服,很高兴为您服务。请问您有什么需要帮助的吗?"}
\ No newline at end of file
diff --git a/source/lambda/online/functions/lambda_tool.py b/source/lambda/online/functions/lambda_tool.py
deleted file mode 100644
index dec440eff..000000000
--- a/source/lambda/online/functions/lambda_tool.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# unified lambda tool calling
-from functions import get_tool_by_name,Tool
-from common_logic.common_utils.lambda_invoke_utils import invoke_lambda
-from common_logic.common_utils.lambda_invoke_utils import chatbot_lambda_call_wrapper
-
-@chatbot_lambda_call_wrapper
-def lambda_handler(event_body,context=None):
-    tool_name = event_body['tool_name']
-    state = event_body['state']
-    tool:Tool = get_tool_by_name(tool_name,scene=state['chatbot_config']['scene'])
-    
-    output:dict = invoke_lambda(
-            event_body=event_body,
-            lambda_name=tool.lambda_name,
-            lambda_module_path=tool.lambda_module_path,
-            handler_name=tool.handler_name
-        )
-    return output
-
-
-
-
-
-    
\ No newline at end of file
diff --git a/source/lambda/online/functions/tool_calling_parse.py b/source/lambda/online/functions/tool_calling_parse.py
deleted file mode 100644
index ddc756d26..000000000
--- a/source/lambda/online/functions/tool_calling_parse.py
+++ /dev/null
@@ -1,364 +0,0 @@
-"""
-tool calling parse, convert content by llm to dict
-"""
-from typing import List
-import re
-import json  
-from langchain_core.messages import(
-    ToolCall
-) 
-from common_logic.common_utils.exceptions import (
-    ToolNotExistError,
-    ToolParameterNotExistError,
-    MultipleToolNameError,
-    ToolNotFound
-)
-from functions.tool_execute_result_format import format_tool_call_results
-from common_logic.common_utils.constant import (
-    LLMModelType,
-    MessageType
-)
-
-
-
-class ToolCallingParseMeta(type):
-    def __new__(cls, name, bases, attrs):
-        new_cls = type.__new__(cls, name, bases, attrs)
-
-        if name == "ToolCallingParse":
-            return new_cls
-        new_cls.model_map[new_cls.model_id] = new_cls
-        return new_cls
-    
-
-class ToolCallingParse(metaclass=ToolCallingParseMeta):
-    model_map = {}
-
-    @classmethod
-    def parse_tool(cls,agent_output):
-        target_cls = cls.model_map[agent_output['current_agent_model_id']]
-        return target_cls.parse_tool(agent_output)
-        
-
-class Claude3SonnetFToolCallingParse(ToolCallingParse):
-    model_id = LLMModelType.CLAUDE_3_SONNET
-    tool_format = ("<function_calls>\n"
-            "<invoke>\n"
-            "<tool_name>$TOOL_NAME</tool_name>\n"
-            "<parameters>\n"
-            "<$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>\n"
-            "...\n"
-            "</parameters>\n"
-            "</invoke>\n"
-            "</function_calls>\n"
-            )
-    
-    @classmethod
-    def convert_anthropic_xml_to_dict(cls,model_id,function_calls:List[str], tools:list[dict]) -> List[dict]:
-        # formatted_tools = [convert_to_openai_function(tool) for tool in tools]
-        tool_calls:list[ToolCall] = []
-        tools_mapping = {tool['name']:tool for tool in tools}
-        for function_call in function_calls:
-            tool_names = re.findall(r'<tool_name>(.*?)</tool_name>', function_call, re.S)
-            if len(tool_names) > 1:
-                raise MultipleToolNameError(function_call_content=function_call)
-           
-            tool_name = tool_names[0].strip()
-
-            if tool_name not in tools_mapping:
-                raise ToolNotExistError(
-                        tool_name=tool_name,
-                        function_call_content=function_call
-                        )
-            cur_tool:dict = tools_mapping[tool_name]
-            arguments = {}
-            for parameter_key in cur_tool['parameters']['required']:
-                value = re.findall(f'<{parameter_key}>(.*?)</{parameter_key}>', function_call, re.DOTALL)
-                if not value:
-                    # expand search region
-                    # search_region = re.findall(f'<parameter>\n<name>{parameter_key}</name>(.*?)</parameter>', function_call, re.DOTALL)
-                    # # print(search_region)
-                    # value = re.findall(f'<value>(.*?)</value>', search_region, re.DOTALL)
-                    # if not value:
-                    raise ToolParameterNotExistError(
-                        tool_name=tool_name,
-                        parameter_key=parameter_key,
-                        function_call_content=function_call,
-                        tool_format=f"\n注意正确的工具调用格式应该是下面的:\n{cls.tool_format}\n"
-                        )
-                # TODO, add too many parameters error
-                assert len(value) == 1,(parameter_key,function_call)
-                arguments[parameter_key] = value[0].strip()
-            for parameter_key in cur_tool['parameters']['properties'].keys():
-                value = re.findall(f'<{parameter_key}>(.*?)</{parameter_key}>', function_call, re.DOTALL)
-                if value:
-                    arguments[parameter_key] = value[0].strip()
-            tool_calls.append(dict(name=tool_name,kwargs=arguments,model_id=model_id))
-        return tool_calls
-
-    @classmethod
-    def tool_not_found(cls,agent_message):
-        tool_format = cls.tool_format
-        e = ToolNotFound()
-        e.agent_message = agent_message
-        e.error_message = {
-                    "role": MessageType.HUMAN_MESSAGE_TYPE,
-                    "content": f"当前没有解析到tool,请检查tool调用的格式是否正确，并重新输出某个tool的调用。注意正确的tool调用格式应该为: {tool_format}。\n如果你认为当前不需要调用其他工具，请直接调用“give_final_response”工具进行返回。"
-                }
-        return e
-    
-    @classmethod
-    def parse_tool(
-        cls,
-        agent_output
-    ) -> list:
-        function_calls = agent_output['agent_output']['function_calls']
-        tools = agent_output['current_agent_tools_def']
-        agent_message = {
-            "role": MessageType.AI_MESSAGE_TYPE,
-            "content": agent_output['agent_output']['content'],
-            "additional_kwargs": {}
-        }
-
-        if not function_calls:
-            raise cls.tool_not_found(agent_message=agent_message)
-        try:
-            tool_calls = cls.convert_anthropic_xml_to_dict(
-                cls.model_id,
-                function_calls=function_calls,
-                tools=tools
-            )
-            if not tool_calls:
-                raise cls.tool_not_found(agent_message=agent_message)
-            
-            agent_message['additional_kwargs']['tool_calls'] = tool_calls
-            return {"agent_message": agent_message,"tool_calls":tool_calls}
-        except (ToolNotExistError,ToolParameterNotExistError,MultipleToolNameError) as e:
-            e.error_message = format_tool_call_results(
-                    model_id = agent_output['current_agent_model_id'],
-                    tool_output=[{
-                        "output":{
-                            "code": 1,
-                            "result": e.to_agent(),
-                            "tool_name": e.tool_name}
-                        }]
-                )['tool_message']
-            e.agent_message = agent_message
-            raise e 
-
-
-class Claude3HaikuToolCallingParse(Claude3SonnetFToolCallingParse):
-    model_id = LLMModelType.CLAUDE_3_HAIKU
-
-
-class Claude35SonnetFToolCallingParse(Claude3SonnetFToolCallingParse):
-    model_id = "anthropic.claude-3-5-sonnet-20240620-v1:0"
-
-
-class Claude2ToolCallingParse(Claude3SonnetFToolCallingParse):
-    model_id = LLMModelType.CLAUDE_2
-
-
-class Claude21ToolCallingParse(Claude3SonnetFToolCallingParse):
-    model_id = LLMModelType.CLAUDE_21
-
-
-class ClaudeInstanceToolCallingParse(Claude3SonnetFToolCallingParse):
-    model_id = LLMModelType.CLAUDE_INSTANCE
-
-
-class Mixtral8x7bToolCallingParse(Claude3SonnetFToolCallingParse):
-    model_id = LLMModelType.MIXTRAL_8X7B_INSTRUCT
-
-
-class GLM4Chat9BToolCallingParse(ToolCallingParse):
-    model_id = LLMModelType.GLM_4_9B_CHAT
-
-    @classmethod
-    def parse_tool_kwargs(cls,content:str,tools_def:list[dict]):
-        tool_name = content.split("\n")[0]
-        cur_tool_def = None
-        for tool_def in tools_def:
-            if tool_def['name'] == tool_name:
-                cur_tool_def = tool_def
-                break 
-        if cur_tool_def is None:
-            raise ToolNotExistError(tool_name=tool_name,function_call_content=content)
-        tool_params = "\n".join(content.split("\n")[1:]).replace("<|observation|>","").strip()
-        tool_params = json.loads(tool_params)
-        
-
-        cur_params_names = set(list(tool_params.keys()))
-        tool_def_requires = set(cur_tool_def['parameters'].get("required",[]))
-        remain_requires = list(tool_def_requires.difference(cur_params_names))
-        if remain_requires:
-            raise ToolParameterNotExistError(
-                tool_name=tool_name,
-                parameter_key=remain_requires[0],
-                function_call_content=content
-                ) 
-
-        return {"name":tool_name,"kwargs":tool_params,"model_id":cls.model_id}
-
-
-    @classmethod
-    def parse_tool(cls,agent_output):
-        try:
-            content = agent_output['agent_output'].strip()
-            # check use tool or direct reply
-            tools = agent_output['current_agent_tools_def']
-            agent_message = {
-                    "role": MessageType.AI_MESSAGE_TYPE,
-                    "content": content,
-                    "additional_kwargs": {}
-                }
-            
-            assert content.endswith(("<|user|>","<|observation|>")), content
-            
-            if content.endswith("<|observation|>"):
-                # use one tool
-                tool_call = cls.parse_tool_kwargs(content,tools_def=tools)
-                agent_message['content'] = agent_message['content'].replace(tool_call['name'],"").strip()
-                agent_message['additional_kwargs']['tool_calls'] = [tool_call]
-            else:
-                # default tool is give_final_response
-                # response = content.replace("<|user|>","")
-                tool_call = {"name":"give_final_response","kwargs":{"response":content},"model_id":cls.model_id}
-            return {
-                "agent_message": agent_message,
-                "tool_calls": [tool_call]
-            }
-        except (ToolNotExistError, ToolParameterNotExistError, MultipleToolNameError) as e:
-            e.agent_message = agent_message
-            e.error_message = format_tool_call_results(
-                    model_id = agent_output['current_agent_model_id'],
-                    tool_output=[{
-                        "output":{
-                            "code": 1,
-                            "result": e.to_agent(),
-                            "tool_name": e.tool_name}
-                        }]
-                )['tool_message']
-            raise e 
-
-
-
-class Qwen2Instruct7BToolCallingParse(ToolCallingParse):
-    model_id = LLMModelType.QWEN2INSTRUCT7B
-    FN_NAME = '✿FUNCTION✿'
-    FN_ARGS = '✿ARGS✿'
-    FN_RESULT = '✿RESULT✿'
-    FN_EXIT = '✿RETURN✿'
-
-    tool_format = (f"{FN_NAME}: 工具名称\n"
-                   f"{FN_ARGS}: 工具输入\n"
-                   f"{FN_RESULT}"
-                   )
-
-    thinking_tag = "思考"
-    fix_reply_tag = "固定回复"
-
-    @classmethod
-    def parse_tool_kwargs(cls,content:str,tools_def:list[dict],agent_message):
-        try:
-            r = re.match(f"{cls.FN_NAME}(.*?){cls.FN_ARGS}(.*?){cls.FN_RESULT}",content,re.S)
-            tool_name = r.group(1).strip().lstrip(":").strip()
-            tool_params = json.loads(r.group(2).strip().lstrip(":").strip())
-        except Exception as e:
-            e = cls.tool_not_found(agent_message,error=str(e))
-            raise e
-
-        cur_tool_def = None
-        for tool_def in tools_def:
-            if tool_def['name'] == tool_name:
-                cur_tool_def = tool_def
-                break 
-        if cur_tool_def is None:
-            raise ToolNotExistError(tool_name=tool_name,function_call_content=content)
-        
-        cur_params_names = set(list(tool_params.keys()))
-        tool_def_requires = set(cur_tool_def['parameters'].get("required",[]))
-        remain_requires = list(tool_def_requires.difference(cur_params_names))
-        
-        if remain_requires:
-            raise ToolParameterNotExistError(
-                tool_name=tool_name,
-                parameter_key=remain_requires[0],
-                function_call_content=content
-                )
-        return {"name":tool_name,"kwargs":tool_params,"model_id":cls.model_id}
-
-
-    @classmethod
-    def tool_not_found(cls,agent_message,error=""):
-        tool_format = cls.tool_format
-        e = ToolNotFound()
-        e.agent_message = agent_message
-        e.error_message = {
-                    "role": MessageType.TOOL_MESSAGE_TYPE,
-                    "content": f"\n{cls.FN_RESULT}: 当前没有解析到tool,{error}\n请检查tool调用的格式是否正确，并重新输出某个tool的调用。注意正确的tool调用格式应该为: {tool_format}。"
-                }
-        return e
-    
-    @classmethod
-    def parse_tool(cls,agent_output):
-        thinking_tag = cls.thinking_tag
-        try: 
-            output:dict = agent_output['agent_output']
-            tools = agent_output['current_agent_tools_def']
-            agent_message = {
-                    "role": MessageType.AI_MESSAGE_TYPE,
-                    "content": output['content'],
-                    "additional_kwargs": {}
-                }
-            
-            function_calls = output['function_calls']
-            if function_calls:
-                tool_call = cls.parse_tool_kwargs(function_calls[0],tools_def=tools,agent_message=agent_message)
-                agent_message['additional_kwargs']['tool_calls'] = [tool_call]
-            else:
-                if any(s in output['content'] for s in [cls.FN_ARGS,cls.FN_EXIT,cls.FN_NAME,cls.FN_RESULT]):
-                    e = cls.tool_not_found(agent_message=agent_message,error="如果你想调用某个工具，请确保格式正确。")
-                    raise e
-                response = re.sub(f"<{thinking_tag}>.*?</{thinking_tag}>","",output['content'],flags=re.DOTALL).strip()
-                response = response.replace(f'<{cls.fix_reply_tag}>',"").replace(f'</{cls.fix_reply_tag}>',"").replace(f"</{thinking_tag}>","").strip()
-                tool_call = {"name":"give_final_response","kwargs":{"response":response},"model_id":cls.model_id}
-
-            return {
-                "agent_message": agent_message,
-                "tool_calls": [tool_call]
-            }
-        except (ToolNotExistError, ToolParameterNotExistError, MultipleToolNameError) as e:
-            e.agent_message = agent_message
-            e.error_message = format_tool_call_results(
-                    model_id = agent_output['current_agent_model_id'],
-                    tool_output=[{
-                        "output":{
-                            "code": 1,
-                            "result": e.to_agent(),
-                            "tool_name": e.tool_name}
-                        }]
-                )['tool_message']
-            raise e 
-        
-
-class Qwen2Instruct72BToolCallingParse(Qwen2Instruct7BToolCallingParse):
-    model_id = LLMModelType.QWEN2INSTRUCT72B
-
-
-
-class QWEN15INSTRUCT32BToolCallingParse(Qwen2Instruct7BToolCallingParse):
-    model_id = LLMModelType.QWEN15INSTRUCT32B
-
-
-parse_tool_calling = ToolCallingParse.parse_tool
-
-
-
-        
-
-
-
-
-
-
diff --git a/source/lambda/online/functions/tool_execute_result_format.py b/source/lambda/online/functions/tool_execute_result_format.py
deleted file mode 100644
index c8ab9c0f0..000000000
--- a/source/lambda/online/functions/tool_execute_result_format.py
+++ /dev/null
@@ -1,193 +0,0 @@
-"""
-tool execute format
-"""
-
-from common_logic.common_utils.constant import (
-    LLMModelType,
-    MessageType
-)
-
-class FormatMeta(type):
-    def __new__(cls, name, bases, attrs):
-        new_cls = type.__new__(cls, name, bases, attrs)
-
-        if name == "FormatToolResult":
-            return new_cls
-        new_cls.model_map[new_cls.model_id] = new_cls
-        return new_cls
-    
-
-class FormatToolResult(metaclass=FormatMeta):
-    model_map = {}
-
-    @classmethod
-    def format(cls,model_id,tool_output:dict):
-        target_cls = cls.model_map[model_id]
-        return target_cls.format(tool_output)
-        
-
-CLAUDE_TOOL_EXECUTE_SUCCESS_TEMPLATE = """
-<function_results>
-<result>
-<tool_name>{tool_name}</tool_name>
-<stdout>
-{result}
-</stdout>
-</result>
-</function_results>
-"""
-
-CLAUDE_TOOL_EXECUTE_FAIL_TEMPLATE = """
-<function_results>
-<error>
-{error}
-</error>
-</function_results>
-"""
-
-MIXTRAL8X7B_TOOL_EXECUTE_SUCCESS_TEMPLATE = """工具: {tool_name} 的执行结果如下:
-{result}"""
-
-MIXTRAL8X7B_TOOL_EXECUTE_FAIL_TEMPLATE = """工具: {tool_name} 执行错误，错误如下:
-{error}"""
-
-class Claude3SonnetFormatToolResult(FormatToolResult):
-    model_id = LLMModelType.CLAUDE_3_SONNET
-    execute_success_template = CLAUDE_TOOL_EXECUTE_SUCCESS_TEMPLATE
-    execute_fail_template = CLAUDE_TOOL_EXECUTE_FAIL_TEMPLATE
-    
-    @classmethod
-    def format_one_tool_output(cls,tool_output:dict):
-        exe_code = tool_output['code']
-        if exe_code == 1:
-            # failed
-            return cls.execute_fail_template.format(
-                error=tool_output['result'],
-                tool_name = tool_output['tool_name']
-            )
-        elif exe_code == 0:
-            # succeed
-            return cls.execute_success_template.format(
-                tool_name=tool_output['tool_name'],
-                result=tool_output['result']
-            )
-        else:
-            raise ValueError(f"Invalid tool execute: {tool_output}") 
-    
-    @classmethod
-    def format(cls,tool_call_outputs:list[dict]):
-        tool_call_result_strs = []
-        for tool_call_result in tool_call_outputs:
-            tool_exe_output = tool_call_result['output']
-            if 'name' in tool_call_result.keys():
-                tool_exe_output['tool_name'] = tool_call_result['name']
-            ret:str = cls.format_one_tool_output(
-                tool_exe_output
-            )
-            tool_call_result_strs.append(ret)
-        
-        ret = "\n".join(tool_call_result_strs)
-        return {
-            "tool_message": {
-                "role": MessageType.HUMAN_MESSAGE_TYPE,
-                "content": ret,
-                "additional_kwargs": {
-                    "original": [out['output'] for out in tool_call_outputs],
-                    "raw_tool_call_results": tool_call_outputs,
-                    },
-            }
-        }
-
-class Claude3HaikuFormatToolResult(Claude3SonnetFormatToolResult):
-    model_id = LLMModelType.CLAUDE_3_HAIKU
-
-
-class Claude35SonnetFormatToolResult(Claude3SonnetFormatToolResult):
-    model_id = "anthropic.claude-3-5-sonnet-20240620-v1:0"
-
-
-class Claude2FormatToolResult(Claude3SonnetFormatToolResult):
-    model_id = LLMModelType.CLAUDE_2
-
-
-class Claude21FormatToolResult(Claude3SonnetFormatToolResult):
-    model_id = LLMModelType.CLAUDE_21
-
-
-class ClaudeInstanceFormatToolResult(Claude3SonnetFormatToolResult):
-    model_id = LLMModelType.CLAUDE_INSTANCE
-
-
-class Mixtral8x7bFormatToolResult(Claude3SonnetFormatToolResult):
-    model_id = LLMModelType.MIXTRAL_8X7B_INSTRUCT
-    execute_success_template = MIXTRAL8X7B_TOOL_EXECUTE_SUCCESS_TEMPLATE
-    execute_fail_template = MIXTRAL8X7B_TOOL_EXECUTE_FAIL_TEMPLATE
-
-
-class GLM4Chat9BFormatToolResult(FormatToolResult):
-    model_id = LLMModelType.GLM_4_9B_CHAT
-    
-    @classmethod
-    def format(cls,tool_call_outputs:list[dict]):
-        tool_call_result_strs = []
-        for tool_call_result in tool_call_outputs:
-            tool_exe_output = tool_call_result['output']
-            tool_call_result_strs.append(str(tool_exe_output['result']))
-        # print(tool_exe_output['result'])
-        ret = "\n".join(tool_call_result_strs)
-        return {
-            "tool_message": {
-                "role": MessageType.TOOL_MESSAGE_TYPE,
-                "content": ret,
-                "additional_kwargs": {
-                    "original": [out['output'] for out in tool_call_outputs],
-                    "raw_tool_call_results":tool_call_outputs,
-                    },
-            }
-        }
-
-class Qwen2Instruct7BFormatToolResult(FormatToolResult):
-    model_id = LLMModelType.QWEN2INSTRUCT7B
-    FN_RESULT = '✿RESULT✿'
-    FN_EXIT = '✿RETURN✿'
-
-    @classmethod
-    def format(cls,tool_call_outputs:list[dict]):
-        tool_call_result_strs = []
-        for tool_call_result in tool_call_outputs:
-            tool_exe_output = tool_call_result['output']
-            result = tool_exe_output["result"]
-            tool_call_result_strs.append(f'\n{cls.FN_RESULT}: {result}\n{cls.FN_EXIT}:')
-        
-        ret = "\n".join(tool_call_result_strs)
-        return {
-            "tool_message": {
-                "role": MessageType.TOOL_MESSAGE_TYPE,
-                "content": ret,
-                "additional_kwargs": {
-                    "original": [out['output'] for out in tool_call_outputs],
-                    "raw_tool_call_results":tool_call_outputs,
-                    },
-            }
-        }
-
-
-class Qwen2Instruct72BFormatToolResult(Qwen2Instruct7BFormatToolResult):
-    model_id = LLMModelType.QWEN2INSTRUCT72B
-
-
-class QWEN15INSTRUCT32BFormatToolResult(Qwen2Instruct7BFormatToolResult):
-    model_id = LLMModelType.QWEN15INSTRUCT32B
-
-
-format_tool_call_results = FormatToolResult.format
-
-
-
-        
-
-
-
-
-
-
diff --git a/source/lambda/online/lambda_agent/agent.py b/source/lambda/online/lambda_agent/agent.py
index da898a4b6..495e2587c 100644
--- a/source/lambda/online/lambda_agent/agent.py
+++ b/source/lambda/online/lambda_agent/agent.py
@@ -26,7 +26,7 @@ def tool_calling(state:dict):
         "fewshot_examples": state['intent_fewshot_examples'],
     }
 
-    agent_llm_type = state.get("agent_llm_type",None) or LLMTaskType.TOOL_CALLING
+    agent_llm_type = state.get("agent_llm_type",None) or LLMTaskType.TOOL_CALLING_XML
     
     group_name = state['chatbot_config']['group_name']
     chatbot_id = state['chatbot_config']['chatbot_id']
diff --git a/source/lambda/online/lambda_intention_detection/intention.py b/source/lambda/online/lambda_intention_detection/intention.py
index fbc72fbc6..3499ea293 100644
--- a/source/lambda/online/lambda_intention_detection/intention.py
+++ b/source/lambda/online/lambda_intention_detection/intention.py
@@ -1,9 +1,11 @@
-from common_logic.common_utils.logger_utils  import get_logger
-from common_logic.common_utils.lambda_invoke_utils import chatbot_lambda_call_wrapper,invoke_lambda
 import json
 import pathlib
 import os
 
+from common_logic.common_utils.logger_utils  import get_logger
+from common_logic.common_utils.lambda_invoke_utils import chatbot_lambda_call_wrapper,invoke_lambda
+from common_logic.langchain_integration.retrievers.retriever import lambda_handler as retrieve_fn
+
 logger = get_logger("intention")
 kb_enabled = os.environ["KNOWLEDGE_BASE_ENABLED"].lower() == "true"
 kb_type = json.loads(os.environ["KNOWLEDGE_BASE_TYPE"])
@@ -27,12 +29,13 @@ def get_intention_results(query: str, intention_config: dict):
     }
 
     # call retriver
-    res:list[dict] = invoke_lambda(
-        lambda_name="Online_Functions",
-        lambda_module_path="functions.functions_utils.retriever.retriever",
-        handler_name="lambda_handler",
-        event_body=event_body
-    )
+    # res:list[dict] = invoke_lambda(
+    #     lambda_name="Online_Functions",
+    #     lambda_module_path="functions.functions_utils.retriever.retriever",
+    #     handler_name="lambda_handler",
+    #     event_body=event_body
+    # )
+    res = retrieve_fn(event_body)
 
     if not res["result"]["docs"]:
         # Return to guide the user to add intentions
@@ -95,6 +98,5 @@ def lambda_handler(state:dict, context=None):
                 **intention_config,
             }
         )
-
     return output
 
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate.py b/source/lambda/online/lambda_llm_generate/llm_generate.py
index 38408ce5e..0ce1506cb 100644
--- a/source/lambda/online/lambda_llm_generate/llm_generate.py
+++ b/source/lambda/online/lambda_llm_generate/llm_generate.py
@@ -1,5 +1,5 @@
 from common_logic.common_utils.logger_utils  import get_logger
-from lambda_llm_generate.llm_generate_utils import LLMChain
+from common_logic.langchain_integration.chains import LLMChain
 from common_logic.common_utils.lambda_invoke_utils import chatbot_lambda_call_wrapper
 
 logger = get_logger("llm_generate")
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/__init__.py b/source/lambda/online/lambda_llm_generate/llm_generate_utils/__init__.py
deleted file mode 100644
index f2c1de103..000000000
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from .llm_chains import LLMChain
-from .llm_models import Model
-
-
-def get_llm_chain(model_id, intent_type, model_kwargs=None, **kwargs):
-    return LLMChain.get_chain(
-        model_id, intent_type, model_kwargs=model_kwargs, **kwargs
-    )
-
-
-def get_llm_model(model_id, model_kwargs=None):
-    return Model.get_model(model_id, model_kwargs=model_kwargs)
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/__init__.py b/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/__init__.py
deleted file mode 100644
index 1577b1eb5..000000000
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_chains/__init__.py
+++ /dev/null
@@ -1,94 +0,0 @@
-from .llm_chain_base import LLMChain
-
-from .chat_chain import (
-    Claude2ChatChain,
-    Claude21ChatChain,
-    ClaudeInstanceChatChain,
-    Iternlm2Chat7BChatChain,
-    Iternlm2Chat20BChatChain,
-    Baichuan2Chat13B4BitsChatChain,
-    Claude3HaikuChatChain,
-    Claude3SonnetChatChain,
-    # ChatGPT35ChatChain,
-    # ChatGPT4ChatChain,
-    # ChatGPT4oChatChain,
-)
-
-from .conversation_summary_chain import (
-    Iternlm2Chat7BConversationSummaryChain,
-    ClaudeInstanceConversationSummaryChain,
-    Claude21ConversationSummaryChain,
-    Claude3HaikuConversationSummaryChain,
-    Claude3SonnetConversationSummaryChain,
-    Iternlm2Chat20BConversationSummaryChain
-)
-
-from .intention_chain import (
-    Claude21IntentRecognitionChain,
-    Claude2IntentRecognitionChain,
-    ClaudeInstanceIntentRecognitionChain,
-    Claude3HaikuIntentRecognitionChain,
-    Claude3SonnetIntentRecognitionChain,
-    Iternlm2Chat7BIntentRecognitionChain,
-    Iternlm2Chat20BIntentRecognitionChain,
-    
-)
-
-from .rag_chain import (
-    Claude21RagLLMChain,
-    Claude2RagLLMChain,
-    ClaudeInstanceRAGLLMChain,
-    Claude3HaikuRAGLLMChain,
-    Claude3SonnetRAGLLMChain,
-    Baichuan2Chat13B4BitsKnowledgeQaChain
-)
-
-
-from .translate_chain import (
-    Iternlm2Chat7BTranslateChain,
-    Iternlm2Chat20BTranslateChain
-)
-
-
-from .marketing_chains import *
-
-from .stepback_chain import (
-    Claude21StepBackChain,
-    ClaudeInstanceStepBackChain,
-    Claude2StepBackChain,
-    Claude3HaikuStepBackChain,
-    Claude3SonnetStepBackChain,
-    Iternlm2Chat7BStepBackChain,
-    Iternlm2Chat20BStepBackChain
-)
-
-
-from .hyde_chain import (
-    Claude21HydeChain,
-    Claude2HydeChain,
-    Claude3HaikuHydeChain,
-    Claude3SonnetHydeChain,
-    ClaudeInstanceHydeChain,
-    Iternlm2Chat20BHydeChain,
-    Iternlm2Chat7BHydeChain
-)
-
-from .query_rewrite_chain import (
-    Claude21QueryRewriteChain,
-    Claude2QueryRewriteChain,
-    ClaudeInstanceQueryRewriteChain,
-    Claude3HaikuQueryRewriteChain,
-    Claude3SonnetQueryRewriteChain,
-    Iternlm2Chat20BQueryRewriteChain,
-    Iternlm2Chat7BQueryRewriteChain
-)
-
-from .tool_calling_chain_claude_xml import (
-    Claude21ToolCallingChain,
-    Claude3HaikuToolCallingChain,
-    Claude2ToolCallingChain,
-    Claude3SonnetToolCallingChain,
-    ClaudeInstanceToolCallingChain
-)
-
-from .retail_chains import *
\ No newline at end of file
diff --git a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_models.py b/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_models.py
deleted file mode 100644
index 1146cbbdf..000000000
--- a/source/lambda/online/lambda_llm_generate/llm_generate_utils/llm_models.py
+++ /dev/null
@@ -1,381 +0,0 @@
-import json
-import logging
-import os
-from datetime import datetime
-
-
-import boto3
-from langchain_openai import ChatOpenAI
-from langchain_community.chat_models import BedrockChat
-from langchain_community.llms.sagemaker_endpoint import LineIterator
-
-from common_logic.common_utils.constant import (
-    MessageType,
-    LLMModelType
-)
-from common_logic.common_utils.logger_utils import get_logger
-
-AI_MESSAGE_TYPE = MessageType.AI_MESSAGE_TYPE
-HUMAN_MESSAGE_TYPE = MessageType.HUMAN_MESSAGE_TYPE
-SYSTEM_MESSAGE_TYPE = MessageType.SYSTEM_MESSAGE_TYPE
-
-logger = get_logger("llm_model")
-
-class ModeMixins:
-    @staticmethod
-    def convert_messages_role(messages:list[dict],role_map:dict):
-        """
-        Args:
-            messages (list[dict]): 
-            role_map (dict): {"current_role":"targe_role"}
-
-        Returns:
-            _type_: as messages
-        """
-        valid_roles = list(role_map.keys())
-        new_messages = []
-        for message in messages:
-            message = {**message}
-            role = message['role']
-            assert role in valid_roles,(role,valid_roles,messages)
-            message['role'] = role_map[role]
-            new_messages.append(message)
-        return new_messages    
-
-
-class ModelMeta(type):
-    def __new__(cls, name, bases, attrs):
-        new_cls = type.__new__(cls, name, bases, attrs)
-        if name == "Model" or new_cls.model_id is None:
-            return new_cls
-        new_cls.model_map[new_cls.model_id] = new_cls
-        return new_cls
-
-
-class Model(ModeMixins,metaclass=ModelMeta):
-    model_id = None
-    model_map = {}
-
-    @classmethod
-    def get_model(cls, model_id, model_kwargs=None, **kwargs):
-        return cls.model_map[model_id].create_model(model_kwargs=model_kwargs, **kwargs)
-
-# Bedrock model type
-class Claude2(Model):
-    model_id = LLMModelType.CLAUDE_2
-    default_model_kwargs = {"max_tokens": 2000, "temperature": 0.7, "top_p": 0.9}
-
-    @classmethod
-    def create_model(cls, model_kwargs=None, **kwargs):
-        model_kwargs = model_kwargs or {}
-        model_kwargs = {**cls.default_model_kwargs, **model_kwargs}
-
-        credentials_profile_name = (
-            kwargs.get("credentials_profile_name", None)
-            or os.environ.get("AWS_PROFILE", None)
-            or None
-        )
-        region_name = (
-            kwargs.get("region_name", None)
-            or os.environ.get("BEDROCK_REGION", None)
-            or None
-        )
-        llm = BedrockChat(
-            credentials_profile_name=credentials_profile_name,
-            region_name=region_name,
-            model_id=cls.model_id,
-            model_kwargs=model_kwargs,
-        )
-
-        return llm
-
-
-class ClaudeInstance(Claude2):
-    model_id = LLMModelType.CLAUDE_INSTANCE
-
-
-class Claude21(Claude2):
-    model_id = LLMModelType.CLAUDE_21
-
-
-class Claude3Sonnet(Claude2):
-    model_id = LLMModelType.CLAUDE_3_SONNET
-
-
-class Claude3Haiku(Claude2):
-    model_id = LLMModelType.CLAUDE_3_HAIKU
-
-
-class Claude35Sonnet(Claude2):
-    model_id = "anthropic.claude-3-5-sonnet-20240620-v1:0"
-
-
-class Mixtral8x7b(Claude2):
-    model_id = LLMModelType.MIXTRAL_8X7B_INSTRUCT
-    default_model_kwargs = {"max_tokens": 4096, "temperature": 0.01}
-
-# Sagemker Inference type
-class SagemakerModelBase(Model):
-    default_model_kwargs = None
-    content_type = "application/json"
-    accepts = "application/json"
-
-    @classmethod
-    def create_client(cls, region_name):
-        client = boto3.client("sagemaker-runtime", region_name=region_name)
-        return client
-
-    def __init__(self, model_kwargs=None, **kwargs) -> None:
-        self.model_kwargs = model_kwargs or {}
-        if self.default_model_kwargs is not None:
-            self.model_kwargs = {**self.default_model_kwargs, **self.model_kwargs}
-
-        self.region_name = (
-            kwargs.get("region_name", None)
-            or os.environ.get("AWS_REGION", None)
-            or None
-        )
-        self.kwargs = kwargs
-        self.endpoint_name = kwargs["endpoint_name"]
-        self.client = self.create_client(self.region_name)
-
-    @classmethod
-    def create_model(cls, model_kwargs=None, **kwargs):
-        return cls(model_kwargs=model_kwargs, **kwargs)
-
-    def transform_input(self, x):
-        raise NotImplementedError
-
-    def transform_output(self, output):
-        response = json.loads(output.read().decode("utf-8"))
-        return response
-
-    def _stream(self, x):
-        body = self.transform_input(x)
-        resp = self.client.invoke_endpoint_with_response_stream(
-            EndpointName=self.endpoint_name,
-            Body=body,
-            ContentType=self.content_type,
-        )
-        iterator = LineIterator(resp["Body"])
-        for line in iterator:
-            resp = json.loads(line)
-            error_msg = resp.get("error_msg", None)
-            if error_msg:
-                raise RuntimeError(error_msg)
-            resp_output = resp.get("outputs")
-            yield resp_output
-
-    def _invoke(self, x):
-        body = self.transform_input(x)
-        try:
-            response = self.client.invoke_endpoint(
-                EndpointName=self.endpoint_name,
-                Body=body,
-                ContentType=self.content_type,
-                Accept=self.accepts,
-            )
-        except Exception as e:
-            raise ValueError(f"Error raised by inference endpoint: {e}")
-        response = self.transform_output(response["Body"])
-        return response
-
-    def invoke(self, x, stream=False):
-        x["stream"] = stream
-        if stream:
-            return self._stream(x)
-        else:
-            return self._invoke(x)
-
-
-class Baichuan2Chat13B4Bits(SagemakerModelBase):
-    model_id = LLMModelType.BAICHUAN2_13B_CHAT
-    # content_handler=Baichuan2ContentHandlerChat()
-    default_model_kwargs = {
-        "max_new_tokens": 2048,
-        "temperature": 0.3,
-        "top_k": 5,
-        "top_p": 0.85,
-        # "repetition_penalty": 1.05,
-        "do_sample": True,
-        "timeout": 60,
-    }
-
-    def transform_input(self, x):
-        query = x["query"]
-        _chat_history = x["chat_history"]
-        _chat_history = [
-            {"role": message.type, "content": message.content}
-            for message in _chat_history
-        ]
-
-        chat_history = []
-        for message in _chat_history:
-            content = message["content"]
-            role = message["role"]
-            assert role in [
-                MessageType.HUMAN_MESSAGE_TYPE,
-                MessageType.AI_MESSAGE_TYPE,
-                MessageType.SYSTEM_MESSAGE_TYPE,
-            ], f"invalid role: {role}"
-            if role == MessageType.AI_MESSAGE_TYPE:
-                role = "assistant"
-            elif role == MessageType.HUMAN_MESSAGE_TYPE:
-                role = "user"
-
-            chat_history.append({"role": role, "content": content})
-        _messages = chat_history + [{"role": "user", "content": query}]
-        messages = []
-        system_messages = []
-        for message in _messages:
-            if message["role"] == MessageType.SYSTEM_MESSAGE_TYPE:
-                system_messages.append(message)
-            else:
-                messages.append(message)
-
-        if system_messages:
-            system_prompt = "\n".join([s["content"] for s in system_messages])
-            first_content = messages[0]["content"]
-            messages[0]["content"] = f"{system_prompt}\n{first_content}"
-
-        input_str = json.dumps(
-            {
-                "messages": messages,
-                "parameters": {"stream": x["stream"], **self.model_kwargs},
-            }
-        )
-        return input_str
-
-
-class Internlm2Chat7B(SagemakerModelBase):
-    model_id = LLMModelType.INTERNLM2_CHAT_7B
-    default_model_kwargs = {
-        "max_new_tokens": 1024,
-        "timeout": 60,
-        # 'repetition_penalty':1.05,
-        # "do_sample":True,
-        "temperature": 0.1,
-        "top_p": 0.8,
-    }
-
-    # meta_instruction = "You are a helpful AI Assistant"
-
-    def transform_input(self, x):
-        logger.info(f'prompt char num: {len(x["prompt"])}')
-        body = {
-            "query": x["prompt"],
-            # "meta_instruction": x.get('meta_instruction',self.meta_instruction),
-            "stream": x["stream"],
-            # "history": history
-        }
-        body.update(self.model_kwargs)
-        # print('body',body)
-        input_str = json.dumps(body)
-        return input_str
-
-
-class Internlm2Chat20B(Internlm2Chat7B):
-    model_id = LLMModelType.INTERNLM2_CHAT_20B
-
-
-class GLM4Chat9B(SagemakerModelBase):
-    model_id = LLMModelType.GLM_4_9B_CHAT
-    default_model_kwargs = {
-        "max_new_tokens": 1024,
-        "timeout": 60,
-        "temperature": 0.1,
-    }
-    role_map={
-                MessageType.SYSTEM_MESSAGE_TYPE: 'system',
-                MessageType.HUMAN_MESSAGE_TYPE: 'user',
-                MessageType.AI_MESSAGE_TYPE: "assistant",
-                MessageType.TOOL_MESSAGE_TYPE:  "observation"
-            }
-
-    def transform_input(self, x:dict):
-        _chat_history = self.convert_messages_role(
-            x['chat_history'],
-            role_map=self.role_map
-        )
-        chat_history = []
-        for message in _chat_history:
-            if message['role'] == "assistant":
-                content = message['content']
-                if not content.endswith("<|observation|>"):
-                    if not content.endswith("<|user|>"):
-                        message['content'] = message['content'] + "<|user|>"
-            chat_history.append(message)
-                
-        logger.info(f"glm chat_history: {chat_history}")
-        body = {
-            "chat_history": chat_history,
-            "stream": x["stream"],
-            **self.model_kwargs
-        }
-        input_str = json.dumps(body)
-        return input_str
-
-class Qwen2Instruct7B(SagemakerModelBase):
-    model_id = LLMModelType.QWEN2INSTRUCT7B
-    default_model_kwargs = {
-        "max_tokens": 1024,
-        "stop":["<|endoftext|>","<|im_end|>"],
-        "temperature": 0.1,
-    }
-    role_map={
-                MessageType.SYSTEM_MESSAGE_TYPE: 'system',
-                MessageType.HUMAN_MESSAGE_TYPE: 'user',
-                MessageType.AI_MESSAGE_TYPE: "assistant"
-            }
-
-    def transform_input(self, x:dict):
-        chat_history = self.convert_messages_role(
-            x['chat_history'],
-            role_map=self.role_map
-        )
-        
-        body = {
-            "chat_history": chat_history,
-            "stream": x["stream"],
-            **self.model_kwargs
-        }
-        logger.info(f"qwen body: {body}")
-        input_str = json.dumps(body)
-        return input_str
-
-
-class Qwen2Instruct72B(Qwen2Instruct7B):
-    model_id = LLMModelType.QWEN2INSTRUCT72B
-
-
-class Qwen2Instruct72B(Qwen2Instruct7B):
-    model_id = LLMModelType.QWEN15INSTRUCT32B
-
-
-# ChatGPT model type
-class ChatGPT35(Model):
-    model_id = "gpt-3.5-turbo-0125"
-    default_model_kwargs = {"max_tokens": 2000, "temperature": 0.7, "top_p": 0.9}
-
-    @classmethod
-    def create_model(cls, model_kwargs=None, **kwargs):
-        model_kwargs = model_kwargs or {}
-        model_kwargs = {**cls.default_model_kwargs, **model_kwargs}
-
-        credentials_profile_name = (
-            kwargs.get("credentials_profile_name", None)
-            or os.environ.get("AWS_PROFILE", None)
-            or None
-        )
-        region_name = (
-            kwargs.get("region_name", None)
-            or os.environ.get("AWS_REGION", None)
-            or None
-        )
-
-        llm = ChatOpenAI(
-            model=cls.model_id,
-            model_kwargs=model_kwargs,
-        )
-
-        return llm
diff --git a/source/lambda/online/lambda_main/main.py b/source/lambda/online/lambda_main/main.py
index fbeaa1ce5..c04963dd2 100644
--- a/source/lambda/online/lambda_main/main.py
+++ b/source/lambda/online/lambda_main/main.py
@@ -17,6 +17,7 @@
 from lambda_main.main_utils.online_entries import get_entry
 from common_logic.common_utils.response_utils import process_response
 
+
 logger = get_logger("main")
 
 sessions_table_name = os.environ.get("SESSIONS_TABLE_NAME", "")
@@ -372,5 +373,5 @@ def lambda_handler(event_body: dict, context: dict):
         error_trace = f"\n### Error trace\n\n{traceback.format_exc()}\n\n"
         send_trace(error_trace, enable_trace=enable_trace)
         process_response(event_body, error_response)
-        logger.error(f"An error occurred: {str(e)}\n{error_trace}")
+        logger.error(f"{traceback.format_exc()}\nAn error occurred: {str(e)}")
         return {"error": str(e)}
diff --git a/source/lambda/online/lambda_main/main_utils/online_entries/__init__.py b/source/lambda/online/lambda_main/main_utils/online_entries/__init__.py
index 5f4c315ba..bca58edd7 100644
--- a/source/lambda/online/lambda_main/main_utils/online_entries/__init__.py
+++ b/source/lambda/online/lambda_main/main_utils/online_entries/__init__.py
@@ -1,14 +1,14 @@
 from common_logic.common_utils.constant import EntryType
-from functions import get_tool_by_name,init_common_tools,init_retail_tools
+# from functions import get_tool_by_name,init_common_tools,init_retail_tools
 
 def get_common_entry():
     from .common_entry import main_chain_entry
-    init_common_tools()
+    # init_common_tools()
     return main_chain_entry
 
 def get_retail_entry():
     from .retail_entry import main_chain_entry
-    init_retail_tools()
+    # init_retail_tools()
     return main_chain_entry
 
 entry_map = {
diff --git a/source/lambda/online/lambda_main/main_utils/online_entries/common_entry.py b/source/lambda/online/lambda_main/main_utils/online_entries/common_entry.py
index e956a533a..394bcbdc7 100644
--- a/source/lambda/online/lambda_main/main_utils/online_entries/common_entry.py
+++ b/source/lambda/online/lambda_main/main_utils/online_entries/common_entry.py
@@ -1,40 +1,53 @@
-from typing import Annotated, Any, TypedDict
+import traceback
+import json 
+import uuid 
+import re
+from typing import Annotated, Any, TypedDict, List,Union
 
 from common_logic.common_utils.chatbot_utils import ChatbotManager
 from common_logic.common_utils.constant import (
     IndexType,
     LLMTaskType,
     SceneType,
-    ToolRuningMode,
     GUIDE_INTENTION_NOT_FOUND,
     Threshold,
 )
 from common_logic.common_utils.lambda_invoke_utils import (
-    invoke_lambda,
     is_running_local,
     node_monitor_wrapper,
     send_trace,
 )
+from langchain_core.messages import ToolMessage,AIMessage
 from common_logic.common_utils.logger_utils import get_logger
 from common_logic.common_utils.prompt_utils import get_prompt_templates_from_ddb
 from common_logic.common_utils.python_utils import add_messages, update_nest_dict
 from common_logic.common_utils.response_utils import process_response
+from common_logic.langchain_integration.tools import ToolManager
+from langchain_core.tools import BaseTool
+from langchain_core.messages.tool import ToolCall
+from langgraph.prebuilt.tool_node import ToolNode,TOOL_CALL_ERROR_TEMPLATE
+from common_logic.langchain_integration.chains import LLMChain
 from common_logic.common_utils.serialization_utils import JSONEncoder
 from common_logic.common_utils.monitor_utils import format_intention_output, format_preprocess_output, format_qq_data
 from common_logic.common_utils.ddb_utils import custom_index_desc
-from functions import get_tool_by_name
-from functions._tool_base import tool_manager
-from functions.lambda_common_tools import rag
-from lambda_main.main_utils.online_entries.agent_base import (
-    build_agent_graph,
-    tool_execution,
-)
 from lambda_main.main_utils.parse_config import CommonConfigParser
 from langgraph.graph import END, StateGraph
+from common_logic.langchain_integration.retrievers.retriever import lambda_handler as retrieve_fn
+from common_logic.common_utils.monitor_utils import (
+    format_preprocess_output,
+    format_qq_data,
+    format_intention_output
+)
+from lambda_intention_detection.intention import get_intention_results
+from lambda_query_preprocess.query_preprocess import conversation_query_rewrite
+from common_logic.langchain_integration.chains import LLMChain
+from common_logic.common_utils.serialization_utils import JSONEncoder
+
 
 logger = get_logger("common_entry")
 
 
+
 class ChatbotState(TypedDict):
     ########### input/output states ###########
     # inputs
@@ -77,6 +90,7 @@ class ChatbotState(TypedDict):
     intent_fewshot_examples: list
     # tools of retrieved intention samples in search engine, e.g. OpenSearch
     intent_fewshot_tools: list
+    all_knowledge_retrieved_list: list
 
     ########### retriever states ###########
     # contexts information retrieved in search engine, e.g. OpenSearch
@@ -87,23 +101,27 @@ class ChatbotState(TypedDict):
 
     ########### agent states ###########
     # current output of agent
-    agent_current_output: dict
-    # record messages during agent tool choose and calling, including agent message, tool ouput and error messages
-    agent_tool_history: Annotated[list[dict], add_messages]
-    # the maximum number that agent node can be called
-    agent_repeated_call_limit: int
-    # the current call time of agent
-    agent_current_call_number: int  #
-    # whehter the current call time is less than maximum number of agent call
-    agent_repeated_call_validation: bool
-    # function calling
-    # whether the output of agent can be parsed as the valid tool calling
-    function_calling_parse_ok: bool
-    # whether the current parsed tool calling is run once
-    function_calling_is_run_once: bool
-    # current tool calls
-    function_calling_parsed_tool_calls: list
-    current_agent_tools_def: list
+    # agent_current_output: dict
+    # # record messages during agent tool choose and calling, including agent message, tool ouput and error messages
+    agent_tool_history: Annotated[List[Union[AIMessage,ToolMessage]], add_messages]
+    # # the maximum number that agent node can be called
+    # agent_repeated_call_limit: int
+    # # the current call time of agent
+    # agent_current_call_number: int  #
+    # # whehter the current call time is less than maximum number of agent call
+    # agent_repeated_call_validation: bool
+    # # function calling
+    # # whether the output of agent can be parsed as the valid tool calling
+    # function_calling_parse_ok: bool
+    # # whether the current parsed tool calling is run once
+    exit_tool_calling: bool
+    # # current tool calls
+    # function_calling_parsed_tool_calls: list
+    # current_agent_tools_def: list
+    last_tool_messages: List[ToolMessage]
+    tools: List[BaseTool]
+    # the global rag tool use all knowledge
+    all_knowledge_rag_tool: BaseTool
 
 
 ####################
@@ -113,11 +131,23 @@ class ChatbotState(TypedDict):
 
 @node_monitor_wrapper
 def query_preprocess(state: ChatbotState):
-    output: str = invoke_lambda(
-        event_body=state,
-        lambda_name="Online_Query_Preprocess",
-        lambda_module_path="lambda_query_preprocess.query_preprocess",
-        handler_name="lambda_handler",
+
+    # output: str = invoke_lambda(
+    #     event_body=state,
+    #     lambda_name="Online_Query_Preprocess",
+    #     lambda_module_path="lambda_query_preprocess.query_preprocess",
+    #     handler_name="lambda_handler",
+    # )
+
+    
+    query_rewrite_llm_type = state.get("query_rewrite_llm_type",None) or LLMTaskType.CONVERSATION_SUMMARY_TYPE
+    output = conversation_query_rewrite(
+        query=state['query'],
+        chat_history=state['chat_history'],
+        message_id=state['message_id'],
+        trace_infos=state['trace_infos'],
+        chatbot_config=state['chatbot_config'],
+        query_rewrite_llm_type=query_rewrite_llm_type
     )
 
     preprocess_md = format_preprocess_output(state["query"], output)
@@ -131,12 +161,8 @@ def intention_detection(state: ChatbotState):
     retriever_params["query"] = state[
         retriever_params.get("retriever_config", {}).get("query_key", "query")
     ]
-    output: str = invoke_lambda(
-        event_body=retriever_params,
-        lambda_name="Online_Functions",
-        lambda_module_path="functions.functions_utils.retriever.retriever",
-        handler_name="lambda_handler",
-    )
+   
+    output = retrieve_fn(retriever_params)
     context_list = []
     qq_match_contexts = []
     qq_match_threshold = retriever_params["threshold"]
@@ -165,87 +191,105 @@ def intention_detection(state: ChatbotState):
     if state["chatbot_config"]["agent_config"]["only_use_rag_tool"]:
         return {"qq_match_results": context_list, "intent_type": "intention detected"}
 
-    intent_fewshot_examples, intention_ready = invoke_lambda(
-        lambda_module_path="lambda_intention_detection.intention",
-        lambda_name="Online_Intention_Detection",
-        handler_name="lambda_handler",
-        event_body=state,
+    # get intention results from aos
+    intention_config = state["chatbot_config"].get("intention_config",{})
+    query_key = intention_config.get("retriever_config",{}).get("query_key","query")
+    query = state[query_key]
+    intent_fewshot_examples, intention_ready = get_intention_results(
+        query,
+        {
+            **intention_config,
+        }
     )
 
+    intent_fewshot_tools: list[str] = list(
+        set([e["intent"] for e in intent_fewshot_examples])
+    )
+    all_knowledge_retrieved_list = []
+    markdown_table = format_intention_output(intent_fewshot_examples)
+
     group_name = state["chatbot_config"]["group_name"]
     chatbot_id = state["chatbot_config"]["chatbot_id"]
     custom_qd_index = custom_index_desc(group_name, chatbot_id)
+    
+
+    # TODO need to modify with new intent logic
+    # if not intention_ready and not custom_qd_index:
+    if not intention_ready:
+        # retrieve all knowledge
+        retriever_params = state["chatbot_config"]["private_knowledge_config"]
+        retriever_params["query"] = state[
+            retriever_params.get("retriever_config", {}).get("query_key", "query")
+        ]
+        threshold = Threshold.INTENTION_ALL_KNOWLEDGE_RETRIEVAL
+        output = retrieve_fn(retriever_params)
+
+        info_to_log = []
+        all_knowledge_retrieved_list = []
+        for doc in output["result"]["docs"]:
+            if doc['score'] >= threshold:
+                all_knowledge_retrieved_list.append(doc["page_content"])
+            info_to_log.append(f"score: {doc['score']}, page_content: {doc['page_content'][:200]}")
 
-    if not intention_ready and not custom_qd_index:
-        return {
-            "answer": GUIDE_INTENTION_NOT_FOUND,
-            "intent_type": "intention not ready",
-        }
-    elif not intention_ready and custom_qd_index:
-        intent_fewshot_examples = []
-        intent_fewshot_tools: list[str] = []
-    else:
-        intent_fewshot_tools: list[str] = list(
-            set([e["intent"] for e in intent_fewshot_examples])
-        )
-
-        markdown_table = format_intention_output(intent_fewshot_examples)
         send_trace(
-            f"{markdown_table}",
+            f"all knowledge retrieved:\n {'\n'.join(info_to_log)}",
             state["stream"],
             state["ws_connection_id"],
             state["enable_trace"],
         )
+    # elif not intention_ready and custom_qd_index:
+    #     intent_fewshot_examples = []
+    #     intent_fewshot_tools: list[str] = []
+    # else:
+    send_trace(
+        f"{markdown_table}",
+        state["stream"],
+        state["ws_connection_id"],
+        state["enable_trace"],
+    )
+    
+    # rename tool name
+    intent_fewshot_tools = [tool_rename(i) for i in intent_fewshot_tools]
+    intent_fewshot_examples = [
+        {**e, "intent": tool_rename(e["intent"])} for e in intent_fewshot_examples
+    ]
 
     return {
         "intent_fewshot_examples": intent_fewshot_examples,
         "intent_fewshot_tools": intent_fewshot_tools,
+        "all_knowledge_retrieved_list": all_knowledge_retrieved_list,
         "qq_match_results": context_list,
         "qq_match_contexts": qq_match_contexts,
-        "intent_type": "intention detected",
+        "intent_type": "intention detected"
     }
 
-
 @node_monitor_wrapper
 def agent(state: ChatbotState):
     # two cases to invoke rag function
     # 1. when valid intention fewshot found
     # 2. for the first time, agent decides to give final results
-
     # deal with once tool calling
-    if (
-        state["agent_repeated_call_validation"]
-        and state["function_calling_parse_ok"]
-        and state["agent_tool_history"]
-    ):
-        tool_execute_res = state["agent_tool_history"][-1]["additional_kwargs"][
-            "raw_tool_call_results"
-        ][0]
-        tool_name = tool_execute_res["name"]
-        output = tool_execute_res["output"]
-        tool = get_tool_by_name(tool_name, scene=SceneType.COMMON)
-        if tool.running_mode == ToolRuningMode.ONCE:
+    last_tool_messages = state["last_tool_messages"]
+    if last_tool_messages and len(last_tool_messages) == 1:
+        last_tool_message = last_tool_messages[0]
+        tool:BaseTool = ToolManager.get_tool(
+            scene=SceneType.COMMON,
+            name=last_tool_message.name
+        )
+        if tool.return_direct:
             send_trace("once tool", enable_trace=state["enable_trace"])
-            return {"answer": output["result"], "function_calling_is_run_once": True}
+            if tool.response_format == "content_and_artifact":
+                content = last_tool_message.artifact
+            else:
+                content = last_tool_message.content
+            return {"answer": content, "exit_tool_calling": True}
 
     no_intention_condition = not state["intent_fewshot_examples"]
-    first_tool_final_response = False
-    if (
-        (state["agent_current_call_number"] == 1)
-        and state["function_calling_parse_ok"]
-        and state["agent_tool_history"]
-    ):
-        tool_execute_res = state["agent_tool_history"][-1]["additional_kwargs"][
-            "raw_tool_call_results"
-        ][0]
-        tool_name = tool_execute_res["name"]
-        if tool_name == "give_final_response":
-            first_tool_final_response = True
 
     if (
-        no_intention_condition
-        or first_tool_final_response
-        or state["chatbot_config"]["agent_config"]["only_use_rag_tool"]
+        # no_intention_condition,
+        # or first_tool_final_response
+        state["chatbot_config"]["agent_config"]["only_use_rag_tool"]
     ):
         if state["chatbot_config"]["agent_config"]["only_use_rag_tool"]:
             send_trace("agent only use rag tool",
@@ -255,28 +299,72 @@ def agent(state: ChatbotState):
                 "no_intention_condition, switch to rag tool",
                 enable_trace=state["enable_trace"],
             )
-        elif first_tool_final_response:
-            send_trace(
-                "first tool is final response, switch to rag tool",
-                enable_trace=state["enable_trace"],
+         
+        all_knowledge_rag_tool = state['all_knowledge_rag_tool']
+        agent_message = AIMessage(content="",tool_calls=[
+            ToolCall(
+                id=uuid.uuid4().hex,
+                name=all_knowledge_rag_tool.name,
+                args={"query":state["query"]}
             )
+        ])
+        tools = [
+            ToolManager.get_tool(
+                scene=SceneType.COMMON,
+                name=all_knowledge_rag_tool.name
+                )
+            ]
+        return {"agent_tool_history":[agent_message],"tools":tools}
+
+    # normal call
+    agent_config = state["chatbot_config"]['agent_config']
+
+    tools_name = list(set(state['intent_fewshot_tools'] + agent_config['tools']))
+    # get tools from tool names
+    tools = [
+        ToolManager.get_tool(
+            scene=SceneType.COMMON,
+            name=name
+            ) 
+        for name in tools_name
+    ]
+    llm_config = {
+        **agent_config['llm_config'],
+        "tools": tools,
+        "fewshot_examples": state['intent_fewshot_examples'],
+        "all_knowledge_retrieved_list":state['all_knowledge_retrieved_list']
+    }
+    group_name = state['chatbot_config']['group_name']
+    chatbot_id = state['chatbot_config']['chatbot_id']
+    prompt_templates_from_ddb = get_prompt_templates_from_ddb(
+        group_name,
+        model_id = llm_config['model_id'],
+        task_type=LLMTaskType.TOOL_CALLING_API,
+        chatbot_id=chatbot_id
+    )
+    llm_config.update(**prompt_templates_from_ddb)
 
-        return {
-            "function_calling_parse_ok": True,
-            "agent_repeated_call_validation": True,
-            "function_calling_parsed_tool_calls": [
-                {
-                    "name": "rag_tool",
-                    "kwargs": {},
-                    "model_id": state["chatbot_config"]["agent_config"]["llm_config"][
-                        "model_id"
-                    ],
-                }
-            ],
-        }
-    response = app_agent.invoke(state)
+    tool_calling_chain = LLMChain.get_chain(
+        intent_type=LLMTaskType.TOOL_CALLING_API,
+        scene=SceneType.COMMON,
+        **llm_config
+    )
+    
+    agent_message:AIMessage = tool_calling_chain.invoke({
+        "query":state['query'],
+        "chat_history":state['chat_history'],
+        "agent_tool_history":state['agent_tool_history']
+    })
+
+    send_trace(
+        f"\n\n**agent_current_output:** \n{agent_message}\n\n",
+        state["stream"],
+        state["ws_connection_id"]
+    )
+    if not agent_message.tool_calls:
+        return {"answer": agent_message.content, "exit_tool_calling": True}
 
-    return response
+    return {"agent_tool_history":[agent_message],"tools":tools}
 
 
 @node_monitor_wrapper
@@ -290,27 +378,66 @@ def llm_direct_results_generation(state: ChatbotState):
     )
     logger.info(prompt_templates_from_ddb)
 
-    answer: dict = invoke_lambda(
-        event_body={
-            "llm_config": {
+    llm_config = {
                 **llm_config,
                 "stream": state["stream"],
                 "intent_type": task_type,
                 **prompt_templates_from_ddb,
-            },
-            "llm_input": {
+            }
+
+    llm_input = {
                 "query": state["query"],
                 "chat_history": state["chat_history"],
-            },
-        },
-        lambda_name="Online_LLM_Generate",
-        lambda_module_path="lambda_llm_generate.llm_generate",
-        handler_name="lambda_handler",
+            }
+    
+    chain = LLMChain.get_chain(
+        **llm_config
     )
+    answer = chain.invoke(llm_input)
+
     return {"answer": answer}
 
 
+@node_monitor_wrapper
+def tool_execution(state):
+    """executor lambda
+    Args:
+        state (NestUpdateState): _description_
+
+    Returns:
+        _type_: _description_
+    """
+    tools:List[BaseTool] = state['tools']
+
+    def handle_tool_errors(e):
+        content = TOOL_CALL_ERROR_TEMPLATE.format(error=repr(e))
+        logger.error(f"Tool execution error:\n{traceback.format_exc()}")
+        return content
+
+    tool_node = ToolNode(
+        tools,
+        handle_tool_errors=handle_tool_errors
+    )
+    last_agent_message:AIMessage = state["agent_tool_history"][-1]
+
+    tool_calls = last_agent_message.tool_calls
+
+    tool_messages:List[ToolMessage] = tool_node.invoke(
+        [AIMessage(content="",tool_calls=tool_calls)]
+    )
+
+    send_trace(f'**tool_execute_res:** \n{tool_messages}', enable_trace=state["enable_trace"])
+    return {
+            "agent_tool_history": tool_messages,
+            "last_tool_messages": tool_messages
+        }
+
+
 def final_results_preparation(state: ChatbotState):
+    answer = state['answer']
+    if isinstance(answer,str):
+        answer = re.sub("<thinking>.*?</thinking>","",answer,flags=re.S).strip()
+        state['answer'] = answer
     app_response = process_response(state["event_body"], state)
     return {"app_response": app_response}
 
@@ -337,18 +464,17 @@ def intent_route(state: dict):
 
 
 def agent_route(state: dict):
-    if state.get("function_calling_is_run_once", False):
+    if state.get("exit_tool_calling", False):
         return "no need tool calling"
+    # state["agent_repeated_call_validation"] = (
+    #     state["agent_current_call_number"] < state["agent_repeated_call_limit"]
+    # )
+    # if state["agent_repeated_call_validation"]:
 
-    state["agent_repeated_call_validation"] = (
-        state["agent_current_call_number"] < state["agent_repeated_call_limit"]
-    )
-
-    if state["agent_repeated_call_validation"]:
-        return "valid tool calling"
-    else:
-        # TODO give final strategy
-        raise RuntimeError
+    return "valid tool calling"
+    # else:
+    #     # TODO give final strategy
+    #     raise RuntimeError
 
 
 #############################
@@ -358,6 +484,7 @@ def agent_route(state: dict):
 
 def build_graph(chatbot_state_cls):
     workflow = StateGraph(chatbot_state_cls)
+
     # add node for all chat/rag/agent mode
     workflow.add_node("query_preprocess", query_preprocess)
     # chat mode
@@ -435,29 +562,14 @@ def build_graph(chatbot_state_cls):
 #####################################
 # define online sub-graph for agent #
 #####################################
-app_agent = None
 app = None
 
+def tool_rename(name:str) -> str:
+    """
+    rename the tool name
+    """
+    return name.replace("-","_")
 
-def register_rag_tool(
-    name: str,
-    description: str,
-    scene=SceneType.COMMON,
-    lambda_name: str = "lambda_common_tools",
-):
-    tool_manager.register_tool(
-        {
-            "name": name,
-            "scene": scene,
-            "lambda_name": lambda_name,
-            "lambda_module_path": rag.lambda_handler,
-            "tool_def": {
-                "name": name,
-                "description": description,
-            },
-            "running_mode": ToolRuningMode.ONCE,
-        }
-    )
 
 
 def register_rag_tool_from_config(event_body: dict):
@@ -465,14 +577,70 @@ def register_rag_tool_from_config(event_body: dict):
     chatbot_id = event_body.get("chatbot_config").get("chatbot_id", "admin")
     chatbot_manager = ChatbotManager.from_environ()
     chatbot = chatbot_manager.get_chatbot(group_name, chatbot_id)
-    logger.info(chatbot)
+    logger.info(f"chatbot info: {chatbot}")
+    registered_tool_names = []
     for index_type, item_dict in chatbot.index_ids.items():
         if index_type != IndexType.INTENTION:
             for index_content in item_dict["value"].values():
+
                 if "indexId" in index_content and "description" in index_content:
-                    register_rag_tool(
-                        index_content["indexId"], index_content["description"]
+                    # Find retriever contain index_id
+                    retrievers = event_body["chatbot_config"]["private_knowledge_config"]['retrievers']
+                    retriever = None 
+                    for retriever in retrievers:
+                        if retriever["index_name"] == index_content["indexId"]:
+                            break
+                    assert retriever is not None,retrievers
+                    rerankers = event_body["chatbot_config"]["private_knowledge_config"]['rerankers']
+                    if rerankers:
+                        rerankers = [rerankers[0]]
+                    # index_name = index_content["indexId"]
+                    index_name = tool_rename(index_content["indexId"])
+                    description = index_content["description"]
+                    # TODO give specific retriever config
+                    ToolManager.register_common_rag_tool(
+                        retriever_config={
+                            "retrievers":[retriever],
+                            "rerankers":rerankers,
+                            "llm_config": event_body["chatbot_config"]["private_knowledge_config"]['llm_config']
+                        },
+                        name=index_name,
+                        scene=SceneType.COMMON,
+                        description=description,
+                        return_direct=True
                     )
+                    registered_tool_names.append(index_name)
+                    logger.info(f"registered rag tool: {index_name}, description: {description}")
+    return registered_tool_names
+
+
+def register_custom_lambda_tools_from_config(event_body):
+    agent_config_tools = event_body['chatbot_config']['agent_config']['tools']
+    new_agent_config_tools = []
+    for tool in agent_config_tools:
+        if isinstance(tool,str):
+            new_agent_config_tools.append(tool)
+        elif isinstance(tool, dict):
+            tool_name = tool['name']
+            assert tool_name not in new_agent_config_tools, f"repeat tool: {tool_name}\n{agent_config_tools}"
+            if "lambda_name" in tool:
+                ToolManager.register_aws_lambda_as_tool(
+                    lambda_name=tool["lambda_name"],
+                    tool_def={
+                       "description":tool["description"],
+                       "properties":tool['properties'],
+                       "required":tool.get('required',[])
+                    },
+                    name=tool_name,
+                    scene=SceneType.COMMON,
+                    return_direct=tool.get("return_direct",False)
+                )
+            new_agent_config_tools.append(tool_name)
+        else:
+            raise ValueError(f"tool type {type(tool)}: {tool} is not supported")
+    
+    event_body['chatbot_config']['agent_config']['tools'] = new_agent_config_tools
+    return new_agent_config_tools
 
 
 def common_entry(event_body):
@@ -481,21 +649,15 @@ def common_entry(event_body):
     :param event_body: The event body for lambda function.
     return: answer(str)
     """
-    global app, app_agent
+    global app
     if app is None:
         app = build_graph(ChatbotState)
 
-    if app_agent is None:
-        app_agent = build_agent_graph(ChatbotState)
-
     # debuging
     if is_running_local():
         with open("common_entry_workflow.png", "wb") as f:
             f.write(app.get_graph().draw_mermaid_png())
 
-        with open("common_entry_agent_workflow.png", "wb") as f:
-            f.write(app_agent.get_graph().draw_mermaid_png())
-
     ################################################################################
     # prepare inputs and invoke graph
     event_body["chatbot_config"] = CommonConfigParser.from_chatbot_config(
@@ -510,7 +672,29 @@ def common_entry(event_body):
     message_id = event_body["custom_message_id"]
     ws_connection_id = event_body["ws_connection_id"]
     enable_trace = chatbot_config["enable_trace"]
-    register_rag_tool_from_config(event_body)
+    agent_config = event_body["chatbot_config"]["agent_config"]
+    
+    # register as rag tool for each aos index
+    # print('private_knowledge_config',event_body["chatbot_config"]["private_knowledge_config"])
+    registered_tool_names = register_rag_tool_from_config(event_body)
+    # update private knowledge tool to agent config
+    for registered_tool_name in registered_tool_names:
+        if registered_tool_name not in agent_config['tools']:
+            agent_config['tools'].append(registered_tool_name)
+
+    # register lambda tools
+    register_custom_lambda_tools_from_config(event_body)
+    # 
+    logger.info(f'event body to graph:\n{json.dumps(event_body,ensure_ascii=False,cls=JSONEncoder)}')
+
+    # define all knowledge rag tool
+    all_knowledge_rag_tool = ToolManager.register_common_rag_tool(
+                retriever_config=event_body["chatbot_config"]["private_knowledge_config"],
+                name="all_knowledge_rag_tool",
+                scene=SceneType.COMMON,
+                description="all knowledge rag tool",
+                return_direct=True
+    )
 
     # invoke graph and get results
     response = app.invoke(
@@ -528,11 +712,14 @@ def common_entry(event_body):
             "debug_infos": {},
             "extra_response": {},
             "qq_match_results": [],
-            "agent_repeated_call_limit": chatbot_config["agent_repeated_call_limit"],
-            "agent_current_call_number": 0,
-            "ddb_additional_kwargs": {},
-        }
+            "last_tool_messages":None,
+            "all_knowledge_rag_tool":all_knowledge_rag_tool,
+            "tools":None,
+            "ddb_additional_kwargs": {}
+        },
+        config={"recursion_limit": 20}
     )
+    # print('extra_response',response['extra_response'])
     return response["app_response"]
 
 
diff --git a/source/lambda/online/lambda_main/test/local_test_base.py b/source/lambda/online/lambda_main/test/local_test_base.py
index 25e351cc8..8feef1ebe 100644
--- a/source/lambda/online/lambda_main/test/local_test_base.py
+++ b/source/lambda/online/lambda_main/test/local_test_base.py
@@ -13,7 +13,7 @@
 from typing import Any
 import common_logic.common_utils.websocket_utils as websocket_utils
 from common_logic.common_utils.constant import LLMTaskType
-from langchain_core.pydantic_v1 import BaseModel, Field, validator
+from pydantic import BaseModel, Field
 
 class DummyWebSocket:
     def post_to_connection(self,ConnectionId,Data):
diff --git a/source/lambda/online/lambda_main/test/main_local_test_common.py b/source/lambda/online/lambda_main/test/main_local_test_common.py
index f058da3fe..052ef7e47 100644
--- a/source/lambda/online/lambda_main/test/main_local_test_common.py
+++ b/source/lambda/online/lambda_main/test/main_local_test_common.py
@@ -139,16 +139,111 @@ def test_multi_turns_agent_pr():
         },
     ]
 
-    # default_index_names = {
-    #     "intention":["pr_test-intention-default"],
-    #     "qq_match": [],
-    #     "private_knowledge": ['pr_test-qd-sso_poc']
-    # }
     default_index_names = {
         "intention":[],
         "qq_match": [],
         "private_knowledge": []
     }
+    # user_queries = [{
+    #         "query": "今天天气怎么样",
+    #         "use_history": True,
+    #         "enable_trace": False
+    #     }]
+    # user_queries = [{
+    #         # "query": "199乘以98等于多少",
+    #         "query": "1234乘以89878等于多少？",
+    #         "use_history": True,
+    #         "enable_trace": True
+    #     }]
+    # user_queries = [{
+    #         "query": "199乘以98等于多少",
+    #         # "query": "介绍一下MemGPT",
+    #         "use_history": True,
+    #         "enable_trace": True
+    #     }]
+    user_queries = [
+        {
+            # "query": "”我爱北京天安门“包含多少个字符?",
+            # "query": "What does 245346356356 times 346357457 equal?",  # 1089836033535
+            # "query": "9.11和9.9哪个更大？",  # 1089836033535
+            # "query": "what happened in the history of Morgan Stanley in Emerging market in 1989 ?",  
+            "query": "Tell me the team members of Morgan Stanley in China",  
+            # "query": "今天天气如何？", 
+            # "query": "介绍一下MemGPT",
+            "use_history": True,
+            "enable_trace": True
+        },
+        # {
+        #     # "query": "”我爱北京天安门“包含多少个字符?",
+        #     # "query": "11133乘以97892395等于多少",  # 1089836033535
+        #     "query": "我在上海", 
+        #     # "query": "介绍一下MemGPT",
+        #     "use_history": True,
+        #     "enable_trace": True
+        # },
+        ]
+
+    # default_index_names = {
+    #     "intention":[],
+    #     "qq_match": [],
+    #     "private_knowledge": []
+    # }
+    default_llm_config = {
+        # "model_id":'anthropic.claude-3-sonnet-20240229-v1:0',
+        # 'model_id': "anthropic.claude-3-5-sonnet-20240620-v1:0",
+        # 'model_id': "anthropic.claude-3-5-haiku-20241022-v1:0",
+        # 'model_id': "us.meta.llama3-2-90b-instruct-v1:0",
+        'model_id': "anthropic.claude-3-5-sonnet-20241022-v2:0",
+        # 'model_id': "meta.llama3-1-70b-instruct-v1:0",
+        # 'model_id':"mistral.mistral-large-2407-v1:0",
+        # 'model_id':"cohere.command-r-plus-v1:0",
+        'model_kwargs': {
+            'temperature': 0.01,
+            'max_tokens': 4096
+        }
+    }
+    # agent_config={"tools":["python_repl"]}
+    agent_config = {}
+    agent_config={
+        "tools":[
+            {
+            "lambda_name":"intelli-agent-lambda-tool-example1",
+            "name": "count_char",
+            "description": "Count the number of chars contained in a sentence.",
+            "properties": {
+                "phrase": {
+                    "type": "string",
+                    "description": "The phrase needs to count chars"
+                }
+            },
+            "required": ["phrase"],
+            "return_direct":False
+            },
+            "python_repl"
+        ]
+    }
+
+
+# {
+#     "agent_config":{
+#         "tools":[
+#             {
+#             "lambda_name":"intelli-agent-lambda-tool-example1",
+#             "name": "count_char",
+#             "description": "Count the number of chars contained in a sentence.",
+#             "properties": {
+#                 "phrase": {
+#                     "type": "string",
+#                     "description": "The phrase needs to count chars"
+#                 }
+#             },
+#             "required": ["phrase"],
+#             "return_direct":False
+#             },
+#             "python_repl"
+#         ]
+#     }
+# }
 
     for query in user_queries:
         print("==" * 50)
@@ -158,12 +253,14 @@ def test_multi_turns_agent_pr():
              session_id=session_id,
              query=query['query'],
              use_history=query['use_history'],
-             chatbot_id="pr_test",
-             group_name='pr_test',
+             chatbot_id="admin",
+             group_name='Admin',
              only_use_rag_tool=False,
              default_index_names=default_index_names,
-             enable_trace = query.get('enable_trace',True)
-             )
+             enable_trace = query.get('enable_trace',True),
+             agent_config=agent_config,
+             default_llm_config=default_llm_config
+        )
         print()
 
 
@@ -200,17 +297,13 @@ def test_qq_case_from_hanxu():
 
 
 
-
-
 def complete_test_pr():
     print("start test in agent mode")
     test_multi_turns_agent_pr()
     print("finish test in agent mode")
-
     print("start test in rag mode")
     test_multi_turns_rag_pr()
     print("finish test in rag mode")
-
     print("start test in chat mode")
     test_multi_turns_chat_pr()
     # print(srg)
@@ -409,10 +502,10 @@ def anta_test():
 if __name__ == "__main__":
     # complete_test_pr()
     # test_multi_turns_rag_pr()
-    # test_multi_turns_agent_pr()
+    test_multi_turns_agent_pr()
     # test_qq_case_from_hanxu()
     # test_multi_turns_chat_pr()
     # bigo_test()
     # sso_batch_test()
     # anta_test()
-    bigo_test()
+    # bigo_test()
diff --git a/source/lambda/online/lambda_query_preprocess/query_preprocess.py b/source/lambda/online/lambda_query_preprocess/query_preprocess.py
index 56fba45dc..de9dab401 100644
--- a/source/lambda/online/lambda_query_preprocess/query_preprocess.py
+++ b/source/lambda/online/lambda_query_preprocess/query_preprocess.py
@@ -39,7 +39,7 @@ def conversation_query_rewrite(query:str, chat_history:list, message_id:str, tra
         chatbot_id=chatbot_id
     )
     logger.info(f'conversation summary prompt templates: {prompt_templates_from_ddb}')
-
+    
     cqr_llm_chain = RunnableLambda(lambda x: invoke_lambda(
         lambda_name='Online_LLM_Generate',
         lambda_module_path="lambda_llm_generate.llm_generate",
diff --git a/source/lambda/online/requirements.txt b/source/lambda/online/requirements.txt
index 030f6808b..2795b8c1c 100644
--- a/source/lambda/online/requirements.txt
+++ b/source/lambda/online/requirements.txt
@@ -1,8 +1,8 @@
-langchain==0.2.4
+langchain-aws==0.2.6
 httpx==0.26.0
-langgraph==0.0.68
-langchain_openai==0.1.8
-langchain_community==0.2.4
+langgraph==0.2.43
+langchain_openai==0.2.6
+langchain-community==0.3.5
 langchainhub==0.1.14
 opensearch-py==2.2.0
 requests_aws4auth==1.2.2
@@ -12,4 +12,6 @@ beautifulsoup4==4.12.2
 validators==0.28.3
 openpyxl==3.1.3
 xlrd==2.0.1
-pydantic==1.10.17
+pydantic==2.9.2
+datamodel-code-generator==0.26.2
+langchain_experimental==0.3.3
\ No newline at end of file
diff --git a/source/portal/src/utils/const.ts b/source/portal/src/utils/const.ts
index cb71514b5..24f80fd28 100644
--- a/source/portal/src/utils/const.ts
+++ b/source/portal/src/utils/const.ts
@@ -22,8 +22,13 @@ export const LLM_BOT_MODEL_LIST = [
 ];
 
 export const LLM_BOT_COMMON_MODEL_LIST = [
+  'anthropic.claude-3-5-sonnet-20240620-v1:0',
+  'anthropic.claude-3-5-haiku-20241022-v1:0',
+  'meta.llama3-1-70b-instruct-v1:0',
+  'mistral.mistral-large-2407-v1:0',
+  'cohere.command-r-plus-v1:0',
   'anthropic.claude-3-sonnet-20240229-v1:0',
-  'anthropic.claude-3-haiku-20240307-v1:0',
+  'anthropic.claude-3-haiku-20240307-v1:0'
   // 'anthropic.claude-3-5-sonnet-20240620-v1:0',
 ];