Merge pull request #254 from tigergraph/GML-1837-one-dockerfile

GML-1837: consolidate docker compose files
tigergraph · Jul 31, 2024 · fdd9560 · fdd9560
2 parents a4e384c + 47cdd50
commit fdd9560
Show file tree

Hide file tree

Showing 22 changed files with 306 additions and 261 deletions.
diff --git a/common/config.py b/common/config.py
@@ -132,57 +132,57 @@ def get_llm_service(llm_config):
     f"Milvus enabled for host {milvus_config['host']} at port {milvus_config['port']}"
 )
 
-LogWriter.info("Setting up Milvus embedding store for InquiryAI")
-try:
-    embedding_store = MilvusEmbeddingStore(
-        embedding_service,
-        host=milvus_config["host"],
-        port=milvus_config["port"],
-        collection_name="tg_inquiry_documents",
-        support_ai_instance=False,
-        username=milvus_config.get("username", ""),
-        password=milvus_config.get("password", ""),
-        alias=milvus_config.get("alias", "default"),
-    )
-    service_status["embedding_store"] = {"status": "ok", "error": None}
-except MilvusException as e:
-    embedding_store = None
-    service_status["embedding_store"] = {"status": "milvus error", "error": str(e)}
-    raise
-except Exception as e:
-    embedding_store = None
-    service_status["embedding_store"] = {"status": "embedding error", "error": str(e)}
-    raise
-
-support_collection_name = milvus_config.get("collection_name", "tg_support_documents")
-LogWriter.info(
-    f"Setting up Milvus embedding store for SupportAI with collection_name: {support_collection_name}"
-)
-vertex_field = milvus_config.get("vertex_field", "vertex_id")
-try:
-    support_ai_embedding_store = MilvusEmbeddingStore(
-        embedding_service,
-        host=milvus_config["host"],
-        port=milvus_config["port"],
-        support_ai_instance=True,
-        collection_name=support_collection_name,
-        username=milvus_config.get("username", ""),
-        password=milvus_config.get("password", ""),
-        vector_field=milvus_config.get("vector_field", "document_vector"),
-        text_field=milvus_config.get("text_field", "document_content"),
-        vertex_field=vertex_field,
-        alias=milvus_config.get("alias", "default"),
-    )
-    service_status["support_ai_embedding_store"] = {"status": "ok", "error": None}
-except MilvusException as e:
-    support_ai_embedding_store = None
-    service_status["support_ai_embedding_store"] = {"status": "milvus error", "error": str(e)}
-    raise
-except Exception as e:
-    support_ai_embedding_store = None
-    service_status["support_ai_embedding_store"] = {"status": "embedding error", "error": str(e)}
-    raise
+if os.getenv("INIT_EMBED_STORE", "true")=="true":
+    LogWriter.info("Setting up Milvus embedding store for InquiryAI")
+    try:
+        embedding_store = MilvusEmbeddingStore(
+            embedding_service,
+            host=milvus_config["host"],
+            port=milvus_config["port"],
+            collection_name="tg_inquiry_documents",
+            support_ai_instance=False,
+            username=milvus_config.get("username", ""),
+            password=milvus_config.get("password", ""),
+            alias=milvus_config.get("alias", "default"),
+        )
+        service_status["embedding_store"] = {"status": "ok", "error": None}
+    except MilvusException as e:
+        embedding_store = None
+        service_status["embedding_store"] = {"status": "milvus error", "error": str(e)}
+        raise
+    except Exception as e:
+        embedding_store = None
+        service_status["embedding_store"] = {"status": "embedding error", "error": str(e)}
+        raise
 
+    support_collection_name = milvus_config.get("collection_name", "tg_support_documents")
+    LogWriter.info(
+        f"Setting up Milvus embedding store for SupportAI with collection_name: {support_collection_name}"
+    )
+    vertex_field = milvus_config.get("vertex_field", "vertex_id")
+    try:
+        support_ai_embedding_store = MilvusEmbeddingStore(
+            embedding_service,
+            host=milvus_config["host"],
+            port=milvus_config["port"],
+            support_ai_instance=True,
+            collection_name=support_collection_name,
+            username=milvus_config.get("username", ""),
+            password=milvus_config.get("password", ""),
+            vector_field=milvus_config.get("vector_field", "document_vector"),
+            text_field=milvus_config.get("text_field", "document_content"),
+            vertex_field=vertex_field,
+            alias=milvus_config.get("alias", "default"),
+        )
+        service_status["support_ai_embedding_store"] = {"status": "ok", "error": None}
+    except MilvusException as e:
+        support_ai_embedding_store = None
+        service_status["support_ai_embedding_store"] = {"status": "milvus error", "error": str(e)}
+        raise
+    except Exception as e:
+        support_ai_embedding_store = None
+        service_status["support_ai_embedding_store"] = {"status": "embedding error", "error": str(e)}
+        raise
 
 if DOC_PROCESSING_CONFIG is None or (
     DOC_PROCESSING_CONFIG.endswith(".json")

diff --git a/common/embeddings/embedding_services.py b/common/embeddings/embedding_services.py
@@ -105,7 +105,7 @@ def __init__(self, config):
         super().__init__(
             config, model_name=config.get("model_name", "OpenAI gpt-4-0613")
         )
-        from langchain.embeddings import OpenAIEmbeddings
+        from langchain_openai import OpenAIEmbeddings
 
         self.embeddings = OpenAIEmbeddings()
 

diff --git a/common/embeddings/milvus_embedding_store.py b/common/embeddings/milvus_embedding_store.py
@@ -2,7 +2,7 @@
 from time import sleep, time
 from typing import Iterable, List, Optional, Tuple
 
-from langchain_community.vectorstores import Milvus
+from langchain_milvus.vectorstores import Milvus
 from langchain_core.documents.base import Document
 from pymilvus import connections, utility
 from pymilvus.exceptions import MilvusException
@@ -82,6 +82,7 @@ def connect_to_milvus(self):
                     f"""Initializing Milvus with host={self.milvus_connection.get("host", self.milvus_connection.get("uri", "unknown host"))},
                     port={self.milvus_connection.get('port', 'unknown')}, username={self.milvus_connection.get('user', 'unknown')}, collection={self.collection_name}"""
                 )
+                LogWriter.info(f"Milvus version {utility.get_server_version()}")
                 self.milvus = Milvus(
                     embedding_function=self.embedding_service,
                     collection_name=self.collection_name,
@@ -108,7 +109,7 @@ def check_collection_exists(self):
 
     def load_documents(self):
         if not self.check_collection_exists():
-            from langchain.document_loaders import DirectoryLoader, JSONLoader
+            from langchain_community.document_loaders import DirectoryLoader, JSONLoader
 
             def metadata_func(record: dict, metadata: dict) -> dict:
                 metadata["function_header"] = record.get("function_header")
@@ -120,7 +121,7 @@ def metadata_func(record: dict, metadata: dict) -> dict:
 
             LogWriter.info("Milvus add initial load documents init()")
             loader = DirectoryLoader(
-                "./tg_documents/",
+                "./common/tg_documents/",
                 glob="*.json",
                 loader_cls=JSONLoader,
                 loader_kwargs={
@@ -131,6 +132,8 @@ def metadata_func(record: dict, metadata: dict) -> dict:
             )
             docs = loader.load()
 
+            # logger.info(f"docs: {docs}")
+
             operation_type = "load_upsert"
             metrics.milvus_query_total.labels(
                 self.collection_name, operation_type

diff --git a/common/llm_services/aws_sagemaker_endpoint.py b/common/llm_services/aws_sagemaker_endpoint.py
@@ -1,5 +1,5 @@
 import boto3
-from langchain.llms.sagemaker_endpoint import LLMContentHandler
+from langchain_community.llms.sagemaker_endpoint import LLMContentHandler
 import logging
 import json
 from typing import Dict

diff --git a/common/llm_services/openai_service.py b/common/llm_services/openai_service.py
@@ -16,8 +16,8 @@ def __init__(self, config):
                 auth_detail
             ]
 
-        from langchain.chat_models import ChatOpenAI
-
+        from langchain_community.chat_models import ChatOpenAI
+        
         model_name = config["llm_model"]
         self.llm = ChatOpenAI(
             temperature=config["model_kwargs"]["temperature"], model_name=model_name

diff --git a/...tual-consistency-service/requirements.txt → common/requirements.txt b/...tual-consistency-service/requirements.txt → common/requirements.txt
@@ -6,6 +6,7 @@ appdirs==1.4.4
 argon2-cffi==23.1.0
 argon2-cffi-bindings==21.2.0
 async-timeout==4.0.3
+asyncer==0.0.7
 attrs==23.1.0
 azure-core==1.30.1
 azure-storage-blob==12.19.1
@@ -23,17 +24,20 @@ cryptography==42.0.5
 dataclasses-json==0.5.14
 distro==1.8.0
 docker-pycreds==0.4.0
+docstring_parser==0.16
 emoji==2.8.0
 environs==9.5.0
 exceptiongroup==1.1.3
 fastapi==0.103.1
+filelock==3.15.4
 filetype==1.2.0
 frozenlist==1.4.0
+fsspec==2024.6.0
 gitdb==4.0.11
 GitPython==3.1.40
 google-api-core==2.14.0
 google-auth==2.23.4
-google-cloud-aiplatform==1.36.1
+google-cloud-aiplatform==1.52.0
 google-cloud-bigquery==3.13.0
 google-cloud-core==2.3.3
 google-cloud-resource-manager==1.10.4
@@ -50,36 +54,48 @@ h11==0.14.0
 httpcore==0.18.0
 httptools==0.6.0
 httpx==0.25.0
-huggingface_hub==0.23.0
+huggingface-hub==0.23.0
+ibm-cos-sdk==2.13.6
+ibm-cos-sdk-core==2.13.6
+ibm-cos-sdk-s3transfer==2.13.6
+ibm_watsonx_ai==1.0.11
 idna==3.4
+importlib_metadata==8.0.0
+iniconfig==2.0.0
 isodate==0.6.1
 jmespath==1.0.1
 joblib==1.3.2
 jq==1.6.0
 jsonpatch==1.33
 jsonpointer==2.4
-langchain==0.1.12
-langchain-community==0.0.28
-langchain-core==0.1.49
-langchain-experimental==0.0.54
-langchain-groq==0.1.3
-langchain-text-splitters==0.0.1
-langchainhub==0.1.14
+langchain==0.2.11
+langchain-community==0.2.10
+langchain-core==0.2.25
+langchain-experimental==0.0.63
+langchain-groq==0.1.8
+langchain-ibm==0.1.11
+langchain-text-splitters==0.2.2
+langchain_milvus==0.1.3
+langchain_openai==0.1.19
+langchainhub==0.1.20
 langdetect==1.0.9
-langgraph==0.0.40
-langsmith==0.1.24
+langgraph==0.1.16
+langsmith==0.1.94
+lomond==0.3.3
 lxml==4.9.3
 marshmallow==3.20.1
+matplotlib==3.9.1
 minio==7.2.5
 multidict==6.0.4
 mypy-extensions==1.0.0
 nltk==3.8.1
 numpy==1.26.4
-openai==1.3.7
+openai==1.37.1
 orjson==3.9.15
 packaging==23.2
 pandas==2.1.1
 pathtools==0.1.2
+pluggy==1.5.0
 prometheus_client==0.20.0
 proto-plus==1.22.3
 protobuf==4.24.4
@@ -92,20 +108,22 @@ pycryptodome==3.20.0
 pydantic==2.3.0
 pydantic_core==2.6.3
 pygit2==1.13.2
-pymilvus==2.3.6
-python-dateutil==2.8.2
+pymilvus==2.4.4
+pytest==8.2.0
+python-dateutil==2.9.0.post0
 python-dotenv==1.0.0
 python-iso639==2023.6.15
 python-magic==0.4.27
 pyTigerDriver==1.0.15
-pyTigerGraph==1.6.1
+pyTigerGraph==1.6.2
 pytz==2023.3.post1
 PyYAML==6.0.1
 rapidfuzz==3.4.0
 regex==2023.10.3
-requests==2.31.0
+requests==2.32.2
 rsa==4.9
 s3transfer==0.7.0
+scikit-learn==1.5.1
 sentry-sdk==1.32.0
 setproctitle==1.3.3
 shapely==2.0.2
@@ -117,12 +135,12 @@ SQLAlchemy==2.0.20
 starlette==0.27.0
 tabulate==0.9.0
 tenacity==8.2.3
-tiktoken==0.5.1
+tiktoken==0.7.0
 tqdm==4.66.1
 types-requests==2.31.0.6
 types-urllib3==1.26.25.14
 typing-inspect==0.9.0
-typing_extensions==4.7.1
+typing_extensions==4.8.0
 tzdata==2023.3
 ujson==5.9.0
 unstructured==0.10.23
@@ -134,3 +152,4 @@ wandb==0.15.12
 watchfiles==0.20.0
 websockets==11.0.3
 yarl==1.9.2
+zipp==3.19.2
diff --git a/common/tg_documents/get_edge_count.json b/common/tg_documents/get_edge_count.json
@@ -0,0 +1,11 @@
+{
+    "function_header": "getEdgeCount",
+    "description": "Get the number of edges at a graph level, optionally filtered by type.",
+    "docstring": "`getEdgeCount(edgeType: str = '*', sourceVertexType: str = '', targetVertexType: str = '')` → dict\nReturns the number of edges of an edge type.\nThis is a simplified version of getEdgeCountFrom(), to be used when the total number of edges of a given type is needed, regardless which vertex instance they are originated from. See documentation of getEdgeCountFrom above for more details.\nParameters:\nedgeType: The name of the edge type.\nsourceVertexType: The name of the source vertex type.\ntargetVertexType: The name of the target vertex type.\nReturns:\nA dictionary of edge_type: edge_count pairs.",
+    "param_types": {
+        "edgeType": "str",
+        "sourceVertexType": "str",
+        "targetVertexType": "str"
+    },
+    "custom_query": false
+}
diff --git a/common/tg_documents/get_edge_count_from.json b/common/tg_documents/get_edge_count_from.json
@@ -0,0 +1,13 @@
+{
+    "function_header": "getEdgeCountFrom",
+    "description": "Get the number of edges from a source vertex, and optionally to a target vertex.",
+    "docstring": "`getEdgeCountFrom(sourceVertexType: str = '', sourceVertexId: Union[str, int] = None, edgeType: str = '', targetVertexType: str = '', targetVertexId: Union[str, int] = None, where: str = '')` → dict\nReturns the number of edges from a specific vertex.\nParameters:\nsourceVertexType: The name of the source vertex type.\nsourceVertexId: The primary ID value of the source vertex instance.\nedgeType: The name of the edge type.\ntargetVertexType: The name of the target vertex type.\ntargetVertexId: The primary ID value of the target vertex instance.\nwhere: A comma separated list of conditions that are all applied on each edge’s attributes. The conditions are in logical conjunction (i.e. they are 'AND’ed' together).\nReturns:\nA dictionary of edge_type: edge_count pairs.\nUses:\nIf edgeType = '*': edge count of all edge types (no other arguments can be specified in this case).\nIf edgeType is specified only: edge count of the given edge type.\nIf sourceVertexType, edgeType, targetVertexType are specified: edge count of the given edge type between source and target vertex types.\nIf sourceVertexType, sourceVertexId are specified: edge count of all edge types from the given vertex instance.\nIf sourceVertexType, sourceVertexId, edgeType are specified: edge count of all edge types from the given vertex instance.\nIf sourceVertexType, sourceVertexId, edgeType, where are specified: the edge count of the given edge type after filtered by where condition.\nIf targetVertexId is specified, then targetVertexType must also be specified.\nIf targetVertexType is specified, then edgeType must also be specified.",
+    "param_types": {
+        "sourceVertexType": "str",
+        "sourceVertexId": "Union[str, int]",
+        "edgeType": "str",
+        "targetVertexType": "str",
+        "targetVertexId": "Union[str, int]"
+    },
+    "custom_query": false
+}
diff --git a/common/tg_documents/get_edge_stats.json b/common/tg_documents/get_edge_stats.json
@@ -0,0 +1,10 @@
+{
+    "function_header": "getEdgeStats",
+    "description": "Retrieves edge attribute statistics",
+    "docstring": "`getEdgeStats(edgeTypes: Union[str, list], skipNA: bool = False)` → dict\nReturns edge attribute statistics.\nParameters:\nedgeTypes: A single edge type name or a list of edges types names or '*' for all edges types.\nskipNA: Skip those edges that do not have attributes or none of their attributes have statistics gathered.\nReturns:\nAttribute statistics of edges; a dictionary of dictionaries.",
+    "param_types": {
+        "edgeTypes": "Union[str, list]",
+        "skipNA": "bool"
+    },
+    "custom_query": false
+}
diff --git a/common/tg_documents/get_edges.json b/common/tg_documents/get_edges.json
@@ -0,0 +1,16 @@
+{
+    "function_header": "getEdges",
+    "description": "Return edges from the database that comply with certain conditions",
+    "docstring": "`getEdges(sourceVertexType: str, sourceVertexId: Union[str, int], edgeType: str = '', targetVertexType: str = '', targetVertexId: Union[str, int] = '', where: str = '', limit: Union[int, str] = None, sort: str = '')` → Union[dict, str, pd.DataFrame]\nRetrieves edges of the given edge type originating from a specific source vertex.\nOnly sourceVertexType and sourceVertexId are required. If targetVertexId is specified, then targetVertexType must also be specified. If targetVertexType is specified, then edgeType must also be specified.\nParameters:\nsourceVertexType: The name of the source vertex type.\nsourceVertexId: The primary ID value of the source vertex instance.\nedgeType: The name of the edge type.\ntargetVertexType: The name of the target vertex type.\ntargetVertexId: The primary ID value of the target vertex instance.\nwhere: Comma separated list of conditions that are all applied on each edge’s attributes. The conditions are in logical conjunction (i.e. they are 'AND’ed' together).\nsort: Comma separated list of attributes the results should be sorted by.\nlimit: Maximum number of edge instances to be returned (after sorting).\nReturns:\nThe (selected) details of the (matching) edge instances (sorted, limited) as JSON.",
+    "param_types": {
+        "sourceVertexType": "str",
+        "sourceVertexId": "Union[str, int]",
+        "edgeType": "str",
+        "targetVertexType": "str",
+        "targetVertexId": "Union[str, int]",
+        "where": "str",
+        "limit": "Union[int, str]",
+        "sort": "str"
+    },
+    "custom_query": false
+}
diff --git a/common/tg_documents/get_vertex_count.json b/common/tg_documents/get_vertex_count.json
@@ -0,0 +1,10 @@
+{
+    "function_header": "getVertexCount",
+    "description": "Get the count of a vertex type, optionally with a where filter",
+    "docstring": "`getVertexCount(vertexType: Union[str, list] = '*', where: str = '')` → Union[int, dict]\nReturns the number of vertices of the specified type.\nParameters:\nvertexType (Union[str, list], optional): The name of the vertex type. If vertexType == '*', then count the instances of all vertex types (where cannot be specified in this case). Defaults to '*'.\nwhere (str, optional): A comma separated list of conditions that are all applied on each vertex’s attributes. The conditions are in logical conjunction (i.e. they are 'AND’ed' together). Defaults to ''.\nReturns:\nA dictionary of <vertex_type>: <vertex_count> pairs if vertexType is a list or '*'.\nAn integer of vertex count if vertexType is a single vertex type.\nUses:\nIf vertexType is specified only: count of the instances of the given vertex type(s).\nIf vertexType and where are specified: count of the instances of the given vertex type after being filtered by where condition(s).",
+    "param_types": {
+        "vertexType": "Union[str, List[str]]",
+        "where": "str"
+    },
+    "custom_query": false
+}
diff --git a/common/tg_documents/get_vertex_stats.json b/common/tg_documents/get_vertex_stats.json
@@ -0,0 +1,10 @@
+{
+    "function_header": "getVertexStats",
+    "description": "Get the statistics of vertex attributes.",
+    "docstring": "`getVertexStats(vertexTypes: Union[str, list], skipNA: bool = False) → dict`\nReturns vertex attribute statistics.\nParameters:\nvertexTypes: A single vertex type name or a list of vertex types names or '*' for all vertex types.\nskipNA: Skip those non-applicable vertices that do not have attributes or none of their attributes have statistics gathered.\nReturns:\nA dictionary of various vertex stats for each vertex type specified.",
+    "param_types": {
+        "vertexTypes": "Union[str, list]",
+        "skipNA": "bool"
+    },
+    "custom_query": false
+}