From 9ebe5370d4e386f7bf30e57743bb36ab2f28b75e Mon Sep 17 00:00:00 2001 From: Bill Shi Date: Wed, 3 Jul 2024 16:00:36 -0700 Subject: [PATCH 1/4] feat(config): add global service status --- app/config.py | 57 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/app/config.py b/app/config.py index 80ee4be3..cb22bbf3 100644 --- a/app/config.py +++ b/app/config.py @@ -27,6 +27,7 @@ security = HTTPBasic() session_handler = SessionHandler() status_manager = StatusManager() +service_status = {} # Configs LLM_SERVICE = os.getenv("LLM_CONFIG", "configs/llm_config.json") @@ -128,35 +129,45 @@ def get_llm_service(llm_config): ) LogWriter.info("Setting up Milvus embedding store for InquiryAI") -embedding_store = MilvusEmbeddingStore( - embedding_service, - host=milvus_config["host"], - port=milvus_config["port"], - collection_name="tg_inquiry_documents", - support_ai_instance=False, - username=milvus_config.get("username", ""), - password=milvus_config.get("password", ""), - alias=milvus_config.get("alias", "default"), -) +try: + embedding_store = MilvusEmbeddingStore( + embedding_service, + host=milvus_config["host"], + port=milvus_config["port"], + collection_name="tg_inquiry_documents", + support_ai_instance=False, + username=milvus_config.get("username", ""), + password=milvus_config.get("password", ""), + alias=milvus_config.get("alias", "default"), + ) + service_status["embedding_store"] = {"status": "ok", "error": None} +except Exception as e: + embedding_store = None + service_status["embedding_store"] = {"status": "error", "error": str(e)} support_collection_name = milvus_config.get("collection_name", "tg_support_documents") LogWriter.info( f"Setting up Milvus embedding store for SupportAI with collection_name: {support_collection_name}" ) vertex_field = milvus_config.get("vertex_field", "vertex_id") -support_ai_embedding_store = MilvusEmbeddingStore( - embedding_service, - host=milvus_config["host"], - port=milvus_config["port"], - support_ai_instance=True, - collection_name=support_collection_name, - username=milvus_config.get("username", ""), - password=milvus_config.get("password", ""), - vector_field=milvus_config.get("vector_field", "document_vector"), - text_field=milvus_config.get("text_field", "document_content"), - vertex_field=vertex_field, - alias=milvus_config.get("alias", "default"), -) +try: + support_ai_embedding_store = MilvusEmbeddingStore( + embedding_service, + host=milvus_config["host"], + port=milvus_config["port"], + support_ai_instance=True, + collection_name=support_collection_name, + username=milvus_config.get("username", ""), + password=milvus_config.get("password", ""), + vector_field=milvus_config.get("vector_field", "document_vector"), + text_field=milvus_config.get("text_field", "document_content"), + vertex_field=vertex_field, + alias=milvus_config.get("alias", "default"), + ) + service_status["support_ai_embedding_store"] = {"status": "ok", "error": None} +except Exception as e: + support_ai_embedding_store = None + service_status["support_ai_embedding_store"] = {"status": "error", "error": str(e)} if DOC_PROCESSING_CONFIG is None or ( From 30e29fe461d760994ab7caa5817b1d5027fb58e1 Mon Sep 17 00:00:00 2001 From: Bill Shi Date: Wed, 3 Jul 2024 16:11:58 -0700 Subject: [PATCH 2/4] feat(root): read global status service in health check --- app/routers/root.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/app/routers/root.py b/app/routers/root.py index 630269e6..64322119 100644 --- a/app/routers/root.py +++ b/app/routers/root.py @@ -3,7 +3,7 @@ from fastapi import APIRouter, Response from fastapi.responses import FileResponse -from app.config import llm_config +from app.config import llm_config, service_status from pymilvus import connections, utility @@ -18,26 +18,26 @@ def read_root(): @router.get("/health") async def health(): - # Check if Milvus is up and running and if the required collections exist - connections.connect(host="milvus-standalone", port="19530") - try: + # Check if Milvus is up and running and if the required collections exist + connections.connect(host="milvus-standalone", port="19530") # Check if the required collections exist inquiry_collection_exists = utility.has_collection("tg_inquiry_documents") support_collection_exists = utility.has_collection("tg_support_documents") if inquiry_collection_exists or support_collection_exists: - return { - "status": "healthy", - "llm_completion_model": llm_config["completion_service"]["llm_model"], - "embedding_service": llm_config["embedding_service"][ - "embedding_model_service" - ], - } + service_status["milvus"] = {"status": "ok", "error": None} else: - return {"status": "Milvus is up and running, but no collection exist yet"} + service_status["milvus"] = {"status": "error", "error": "Milvus is up and running, but no collection exists"} except Exception as e: - return {"status": "Error checking Milvus health", "error": str(e)} + service_status["milvus"] = {"status": "error", "error": str(e)} + + status = { + "status": "unhealthy" if any(v["error"] is not None for v in service_status.values()) else "healthy", + "details": service_status + } + + return status @router.get("/metrics") From 8095de0d7d5b4ec1ae6932acb55f613f83a0d1be Mon Sep 17 00:00:00 2001 From: Bill Shi Date: Wed, 3 Jul 2024 16:13:06 -0700 Subject: [PATCH 3/4] fix(inquiryai): check embedding store status for every request --- app/routers/inquiryai.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/app/routers/inquiryai.py b/app/routers/inquiryai.py index b373d193..e0b82769 100644 --- a/app/routers/inquiryai.py +++ b/app/routers/inquiryai.py @@ -9,7 +9,7 @@ from fastapi.security.http import HTTPBase from app.agent import TigerGraphAgent -from app.config import embedding_service, embedding_store, llm_config, session_handler +from app.config import embedding_service, embedding_store, llm_config, session_handler, service_status from app.llm_services import ( AWS_SageMaker_Endpoint, AWSBedrock, @@ -45,10 +45,14 @@ def retrieve_answer( conn: Request, credentials: Annotated[HTTPBase, Depends(security)] ) -> CoPilotResponse: - conn = conn.state.conn logger.debug_pii( f"/{graphname}/query request_id={req_id_cv.get()} question={query.query}" ) + if service_status["embedding_store"]["error"]: + return CoPilotResponse( + natural_language_response="Something wrong with CoPilot's embedding store. It could be caused by an invalid API key to the embedding service. Please contact support to check the system health for details.", answered_question=False, response_type="inquiryai" + ) + conn = conn.state.conn logger.debug( f"/{graphname}/query request_id={req_id_cv.get()} database connection created" ) From 673f0908698af1fe3cdb6f29bffc6572ee25b1d4 Mon Sep 17 00:00:00 2001 From: Bill Shi Date: Wed, 3 Jul 2024 17:31:48 -0700 Subject: [PATCH 4/4] fix: check milvus connection at the right place --- app/config.py | 11 +++++++++-- app/routers/root.py | 14 -------------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/app/config.py b/app/config.py index cb22bbf3..09f51787 100644 --- a/app/config.py +++ b/app/config.py @@ -2,6 +2,7 @@ import os from fastapi.security import HTTPBasic +from pymilvus.exceptions import MilvusException from app.embeddings.embedding_services import ( AWS_Bedrock_Embedding, @@ -141,9 +142,12 @@ def get_llm_service(llm_config): alias=milvus_config.get("alias", "default"), ) service_status["embedding_store"] = {"status": "ok", "error": None} +except MilvusException as e: + embedding_store = None + service_status["embedding_store"] = {"status": "milvus error", "error": str(e)} except Exception as e: embedding_store = None - service_status["embedding_store"] = {"status": "error", "error": str(e)} + service_status["embedding_store"] = {"status": "embedding error", "error": str(e)} support_collection_name = milvus_config.get("collection_name", "tg_support_documents") LogWriter.info( @@ -165,9 +169,12 @@ def get_llm_service(llm_config): alias=milvus_config.get("alias", "default"), ) service_status["support_ai_embedding_store"] = {"status": "ok", "error": None} +except MilvusException as e: + support_ai_embedding_store = None + service_status["support_ai_embedding_store"] = {"status": "milvus error", "error": str(e)} except Exception as e: support_ai_embedding_store = None - service_status["support_ai_embedding_store"] = {"status": "error", "error": str(e)} + service_status["support_ai_embedding_store"] = {"status": "embedding error", "error": str(e)} if DOC_PROCESSING_CONFIG is None or ( diff --git a/app/routers/root.py b/app/routers/root.py index 64322119..d14430b4 100644 --- a/app/routers/root.py +++ b/app/routers/root.py @@ -18,20 +18,6 @@ def read_root(): @router.get("/health") async def health(): - try: - # Check if Milvus is up and running and if the required collections exist - connections.connect(host="milvus-standalone", port="19530") - # Check if the required collections exist - inquiry_collection_exists = utility.has_collection("tg_inquiry_documents") - support_collection_exists = utility.has_collection("tg_support_documents") - - if inquiry_collection_exists or support_collection_exists: - service_status["milvus"] = {"status": "ok", "error": None} - else: - service_status["milvus"] = {"status": "error", "error": "Milvus is up and running, but no collection exists"} - except Exception as e: - service_status["milvus"] = {"status": "error", "error": str(e)} - status = { "status": "unhealthy" if any(v["error"] is not None for v in service_status.values()) else "healthy", "details": service_status