diff --git a/common/config.py b/common/config.py index aa6bd7c1..718d2c43 100644 --- a/common/config.py +++ b/common/config.py @@ -132,57 +132,57 @@ def get_llm_service(llm_config): f"Milvus enabled for host {milvus_config['host']} at port {milvus_config['port']}" ) -LogWriter.info("Setting up Milvus embedding store for InquiryAI") -try: - embedding_store = MilvusEmbeddingStore( - embedding_service, - host=milvus_config["host"], - port=milvus_config["port"], - collection_name="tg_inquiry_documents", - support_ai_instance=False, - username=milvus_config.get("username", ""), - password=milvus_config.get("password", ""), - alias=milvus_config.get("alias", "default"), - ) - service_status["embedding_store"] = {"status": "ok", "error": None} -except MilvusException as e: - embedding_store = None - service_status["embedding_store"] = {"status": "milvus error", "error": str(e)} - raise -except Exception as e: - embedding_store = None - service_status["embedding_store"] = {"status": "embedding error", "error": str(e)} - raise - -support_collection_name = milvus_config.get("collection_name", "tg_support_documents") -LogWriter.info( - f"Setting up Milvus embedding store for SupportAI with collection_name: {support_collection_name}" -) -vertex_field = milvus_config.get("vertex_field", "vertex_id") -try: - support_ai_embedding_store = MilvusEmbeddingStore( - embedding_service, - host=milvus_config["host"], - port=milvus_config["port"], - support_ai_instance=True, - collection_name=support_collection_name, - username=milvus_config.get("username", ""), - password=milvus_config.get("password", ""), - vector_field=milvus_config.get("vector_field", "document_vector"), - text_field=milvus_config.get("text_field", "document_content"), - vertex_field=vertex_field, - alias=milvus_config.get("alias", "default"), - ) - service_status["support_ai_embedding_store"] = {"status": "ok", "error": None} -except MilvusException as e: - support_ai_embedding_store = None - service_status["support_ai_embedding_store"] = {"status": "milvus error", "error": str(e)} - raise -except Exception as e: - support_ai_embedding_store = None - service_status["support_ai_embedding_store"] = {"status": "embedding error", "error": str(e)} - raise +if os.getenv("INIT_EMBED_STORE", "true")=="true": + LogWriter.info("Setting up Milvus embedding store for InquiryAI") + try: + embedding_store = MilvusEmbeddingStore( + embedding_service, + host=milvus_config["host"], + port=milvus_config["port"], + collection_name="tg_inquiry_documents", + support_ai_instance=False, + username=milvus_config.get("username", ""), + password=milvus_config.get("password", ""), + alias=milvus_config.get("alias", "default"), + ) + service_status["embedding_store"] = {"status": "ok", "error": None} + except MilvusException as e: + embedding_store = None + service_status["embedding_store"] = {"status": "milvus error", "error": str(e)} + raise + except Exception as e: + embedding_store = None + service_status["embedding_store"] = {"status": "embedding error", "error": str(e)} + raise + support_collection_name = milvus_config.get("collection_name", "tg_support_documents") + LogWriter.info( + f"Setting up Milvus embedding store for SupportAI with collection_name: {support_collection_name}" + ) + vertex_field = milvus_config.get("vertex_field", "vertex_id") + try: + support_ai_embedding_store = MilvusEmbeddingStore( + embedding_service, + host=milvus_config["host"], + port=milvus_config["port"], + support_ai_instance=True, + collection_name=support_collection_name, + username=milvus_config.get("username", ""), + password=milvus_config.get("password", ""), + vector_field=milvus_config.get("vector_field", "document_vector"), + text_field=milvus_config.get("text_field", "document_content"), + vertex_field=vertex_field, + alias=milvus_config.get("alias", "default"), + ) + service_status["support_ai_embedding_store"] = {"status": "ok", "error": None} + except MilvusException as e: + support_ai_embedding_store = None + service_status["support_ai_embedding_store"] = {"status": "milvus error", "error": str(e)} + raise + except Exception as e: + support_ai_embedding_store = None + service_status["support_ai_embedding_store"] = {"status": "embedding error", "error": str(e)} + raise if DOC_PROCESSING_CONFIG is None or ( DOC_PROCESSING_CONFIG.endswith(".json") diff --git a/common/embeddings/embedding_services.py b/common/embeddings/embedding_services.py index c76bf46d..7195edf4 100644 --- a/common/embeddings/embedding_services.py +++ b/common/embeddings/embedding_services.py @@ -105,7 +105,7 @@ def __init__(self, config): super().__init__( config, model_name=config.get("model_name", "OpenAI gpt-4-0613") ) - from langchain.embeddings import OpenAIEmbeddings + from langchain_openai import OpenAIEmbeddings self.embeddings = OpenAIEmbeddings() diff --git a/common/embeddings/milvus_embedding_store.py b/common/embeddings/milvus_embedding_store.py index 8a52d05f..e9bbdfe1 100644 --- a/common/embeddings/milvus_embedding_store.py +++ b/common/embeddings/milvus_embedding_store.py @@ -2,7 +2,7 @@ from time import sleep, time from typing import Iterable, List, Optional, Tuple -from langchain_community.vectorstores import Milvus +from langchain_milvus.vectorstores import Milvus from langchain_core.documents.base import Document from pymilvus import connections, utility from pymilvus.exceptions import MilvusException @@ -82,6 +82,7 @@ def connect_to_milvus(self): f"""Initializing Milvus with host={self.milvus_connection.get("host", self.milvus_connection.get("uri", "unknown host"))}, port={self.milvus_connection.get('port', 'unknown')}, username={self.milvus_connection.get('user', 'unknown')}, collection={self.collection_name}""" ) + LogWriter.info(f"Milvus version {utility.get_server_version()}") self.milvus = Milvus( embedding_function=self.embedding_service, collection_name=self.collection_name, @@ -108,7 +109,7 @@ def check_collection_exists(self): def load_documents(self): if not self.check_collection_exists(): - from langchain.document_loaders import DirectoryLoader, JSONLoader + from langchain_community.document_loaders import DirectoryLoader, JSONLoader def metadata_func(record: dict, metadata: dict) -> dict: metadata["function_header"] = record.get("function_header") @@ -120,7 +121,7 @@ def metadata_func(record: dict, metadata: dict) -> dict: LogWriter.info("Milvus add initial load documents init()") loader = DirectoryLoader( - "./tg_documents/", + "./common/tg_documents/", glob="*.json", loader_cls=JSONLoader, loader_kwargs={ @@ -131,6 +132,8 @@ def metadata_func(record: dict, metadata: dict) -> dict: ) docs = loader.load() + # logger.info(f"docs: {docs}") + operation_type = "load_upsert" metrics.milvus_query_total.labels( self.collection_name, operation_type diff --git a/common/llm_services/aws_sagemaker_endpoint.py b/common/llm_services/aws_sagemaker_endpoint.py index 287f9636..fcc1cf36 100644 --- a/common/llm_services/aws_sagemaker_endpoint.py +++ b/common/llm_services/aws_sagemaker_endpoint.py @@ -1,5 +1,5 @@ import boto3 -from langchain.llms.sagemaker_endpoint import LLMContentHandler +from langchain_community.llms.sagemaker_endpoint import LLMContentHandler import logging import json from typing import Dict diff --git a/common/llm_services/openai_service.py b/common/llm_services/openai_service.py index 914f6364..81d3281e 100644 --- a/common/llm_services/openai_service.py +++ b/common/llm_services/openai_service.py @@ -16,8 +16,8 @@ def __init__(self, config): auth_detail ] - from langchain.chat_models import ChatOpenAI - + from langchain_community.chat_models import ChatOpenAI + model_name = config["llm_model"] self.llm = ChatOpenAI( temperature=config["model_kwargs"]["temperature"], model_name=model_name diff --git a/eventual-consistency-service/requirements.txt b/common/requirements.txt similarity index 73% rename from eventual-consistency-service/requirements.txt rename to common/requirements.txt index 90cc7f2c..d45f2a60 100644 --- a/eventual-consistency-service/requirements.txt +++ b/common/requirements.txt @@ -6,6 +6,7 @@ appdirs==1.4.4 argon2-cffi==23.1.0 argon2-cffi-bindings==21.2.0 async-timeout==4.0.3 +asyncer==0.0.7 attrs==23.1.0 azure-core==1.30.1 azure-storage-blob==12.19.1 @@ -23,17 +24,20 @@ cryptography==42.0.5 dataclasses-json==0.5.14 distro==1.8.0 docker-pycreds==0.4.0 +docstring_parser==0.16 emoji==2.8.0 environs==9.5.0 exceptiongroup==1.1.3 fastapi==0.103.1 +filelock==3.15.4 filetype==1.2.0 frozenlist==1.4.0 +fsspec==2024.6.0 gitdb==4.0.11 GitPython==3.1.40 google-api-core==2.14.0 google-auth==2.23.4 -google-cloud-aiplatform==1.36.1 +google-cloud-aiplatform==1.52.0 google-cloud-bigquery==3.13.0 google-cloud-core==2.3.3 google-cloud-resource-manager==1.10.4 @@ -50,36 +54,48 @@ h11==0.14.0 httpcore==0.18.0 httptools==0.6.0 httpx==0.25.0 -huggingface_hub==0.23.0 +huggingface-hub==0.23.0 +ibm-cos-sdk==2.13.6 +ibm-cos-sdk-core==2.13.6 +ibm-cos-sdk-s3transfer==2.13.6 +ibm_watsonx_ai==1.0.11 idna==3.4 +importlib_metadata==8.0.0 +iniconfig==2.0.0 isodate==0.6.1 jmespath==1.0.1 joblib==1.3.2 jq==1.6.0 jsonpatch==1.33 jsonpointer==2.4 -langchain==0.1.12 -langchain-community==0.0.28 -langchain-core==0.1.49 -langchain-experimental==0.0.54 -langchain-groq==0.1.3 -langchain-text-splitters==0.0.1 -langchainhub==0.1.14 +langchain==0.2.11 +langchain-community==0.2.10 +langchain-core==0.2.25 +langchain-experimental==0.0.63 +langchain-groq==0.1.8 +langchain-ibm==0.1.11 +langchain-text-splitters==0.2.2 +langchain_milvus==0.1.3 +langchain_openai==0.1.19 +langchainhub==0.1.20 langdetect==1.0.9 -langgraph==0.0.40 -langsmith==0.1.24 +langgraph==0.1.16 +langsmith==0.1.94 +lomond==0.3.3 lxml==4.9.3 marshmallow==3.20.1 +matplotlib==3.9.1 minio==7.2.5 multidict==6.0.4 mypy-extensions==1.0.0 nltk==3.8.1 numpy==1.26.4 -openai==1.3.7 +openai==1.37.1 orjson==3.9.15 packaging==23.2 pandas==2.1.1 pathtools==0.1.2 +pluggy==1.5.0 prometheus_client==0.20.0 proto-plus==1.22.3 protobuf==4.24.4 @@ -92,20 +108,22 @@ pycryptodome==3.20.0 pydantic==2.3.0 pydantic_core==2.6.3 pygit2==1.13.2 -pymilvus==2.3.6 -python-dateutil==2.8.2 +pymilvus==2.4.4 +pytest==8.2.0 +python-dateutil==2.9.0.post0 python-dotenv==1.0.0 python-iso639==2023.6.15 python-magic==0.4.27 pyTigerDriver==1.0.15 -pyTigerGraph==1.6.1 +pyTigerGraph==1.6.2 pytz==2023.3.post1 PyYAML==6.0.1 rapidfuzz==3.4.0 regex==2023.10.3 -requests==2.31.0 +requests==2.32.2 rsa==4.9 s3transfer==0.7.0 +scikit-learn==1.5.1 sentry-sdk==1.32.0 setproctitle==1.3.3 shapely==2.0.2 @@ -117,12 +135,12 @@ SQLAlchemy==2.0.20 starlette==0.27.0 tabulate==0.9.0 tenacity==8.2.3 -tiktoken==0.5.1 +tiktoken==0.7.0 tqdm==4.66.1 types-requests==2.31.0.6 types-urllib3==1.26.25.14 typing-inspect==0.9.0 -typing_extensions==4.7.1 +typing_extensions==4.8.0 tzdata==2023.3 ujson==5.9.0 unstructured==0.10.23 @@ -134,3 +152,4 @@ wandb==0.15.12 watchfiles==0.20.0 websockets==11.0.3 yarl==1.9.2 +zipp==3.19.2 diff --git a/common/tg_documents/get_edge_count.json b/common/tg_documents/get_edge_count.json new file mode 100644 index 00000000..c3039dd0 --- /dev/null +++ b/common/tg_documents/get_edge_count.json @@ -0,0 +1,11 @@ +{ + "function_header": "getEdgeCount", + "description": "Get the number of edges at a graph level, optionally filtered by type.", + "docstring": "`getEdgeCount(edgeType: str = '*', sourceVertexType: str = '', targetVertexType: str = '')` → dict\nReturns the number of edges of an edge type.\nThis is a simplified version of getEdgeCountFrom(), to be used when the total number of edges of a given type is needed, regardless which vertex instance they are originated from. See documentation of getEdgeCountFrom above for more details.\nParameters:\nedgeType: The name of the edge type.\nsourceVertexType: The name of the source vertex type.\ntargetVertexType: The name of the target vertex type.\nReturns:\nA dictionary of edge_type: edge_count pairs.", + "param_types": { + "edgeType": "str", + "sourceVertexType": "str", + "targetVertexType": "str" + }, + "custom_query": false +} \ No newline at end of file diff --git a/common/tg_documents/get_edge_count_from.json b/common/tg_documents/get_edge_count_from.json new file mode 100644 index 00000000..78416acb --- /dev/null +++ b/common/tg_documents/get_edge_count_from.json @@ -0,0 +1,13 @@ +{ + "function_header": "getEdgeCountFrom", + "description": "Get the number of edges from a source vertex, and optionally to a target vertex.", + "docstring": "`getEdgeCountFrom(sourceVertexType: str = '', sourceVertexId: Union[str, int] = None, edgeType: str = '', targetVertexType: str = '', targetVertexId: Union[str, int] = None, where: str = '')` → dict\nReturns the number of edges from a specific vertex.\nParameters:\nsourceVertexType: The name of the source vertex type.\nsourceVertexId: The primary ID value of the source vertex instance.\nedgeType: The name of the edge type.\ntargetVertexType: The name of the target vertex type.\ntargetVertexId: The primary ID value of the target vertex instance.\nwhere: A comma separated list of conditions that are all applied on each edge’s attributes. The conditions are in logical conjunction (i.e. they are 'AND’ed' together).\nReturns:\nA dictionary of edge_type: edge_count pairs.\nUses:\nIf edgeType = '*': edge count of all edge types (no other arguments can be specified in this case).\nIf edgeType is specified only: edge count of the given edge type.\nIf sourceVertexType, edgeType, targetVertexType are specified: edge count of the given edge type between source and target vertex types.\nIf sourceVertexType, sourceVertexId are specified: edge count of all edge types from the given vertex instance.\nIf sourceVertexType, sourceVertexId, edgeType are specified: edge count of all edge types from the given vertex instance.\nIf sourceVertexType, sourceVertexId, edgeType, where are specified: the edge count of the given edge type after filtered by where condition.\nIf targetVertexId is specified, then targetVertexType must also be specified.\nIf targetVertexType is specified, then edgeType must also be specified.", + "param_types": { + "sourceVertexType": "str", + "sourceVertexId": "Union[str, int]", + "edgeType": "str", + "targetVertexType": "str", + "targetVertexId": "Union[str, int]" + }, + "custom_query": false +} \ No newline at end of file diff --git a/common/tg_documents/get_edge_stats.json b/common/tg_documents/get_edge_stats.json new file mode 100644 index 00000000..69a26ee1 --- /dev/null +++ b/common/tg_documents/get_edge_stats.json @@ -0,0 +1,10 @@ +{ + "function_header": "getEdgeStats", + "description": "Retrieves edge attribute statistics", + "docstring": "`getEdgeStats(edgeTypes: Union[str, list], skipNA: bool = False)` → dict\nReturns edge attribute statistics.\nParameters:\nedgeTypes: A single edge type name or a list of edges types names or '*' for all edges types.\nskipNA: Skip those edges that do not have attributes or none of their attributes have statistics gathered.\nReturns:\nAttribute statistics of edges; a dictionary of dictionaries.", + "param_types": { + "edgeTypes": "Union[str, list]", + "skipNA": "bool" + }, + "custom_query": false +} \ No newline at end of file diff --git a/common/tg_documents/get_edges.json b/common/tg_documents/get_edges.json new file mode 100644 index 00000000..293d9de5 --- /dev/null +++ b/common/tg_documents/get_edges.json @@ -0,0 +1,16 @@ +{ + "function_header": "getEdges", + "description": "Return edges from the database that comply with certain conditions", + "docstring": "`getEdges(sourceVertexType: str, sourceVertexId: Union[str, int], edgeType: str = '', targetVertexType: str = '', targetVertexId: Union[str, int] = '', where: str = '', limit: Union[int, str] = None, sort: str = '')` → Union[dict, str, pd.DataFrame]\nRetrieves edges of the given edge type originating from a specific source vertex.\nOnly sourceVertexType and sourceVertexId are required. If targetVertexId is specified, then targetVertexType must also be specified. If targetVertexType is specified, then edgeType must also be specified.\nParameters:\nsourceVertexType: The name of the source vertex type.\nsourceVertexId: The primary ID value of the source vertex instance.\nedgeType: The name of the edge type.\ntargetVertexType: The name of the target vertex type.\ntargetVertexId: The primary ID value of the target vertex instance.\nwhere: Comma separated list of conditions that are all applied on each edge’s attributes. The conditions are in logical conjunction (i.e. they are 'AND’ed' together).\nsort: Comma separated list of attributes the results should be sorted by.\nlimit: Maximum number of edge instances to be returned (after sorting).\nReturns:\nThe (selected) details of the (matching) edge instances (sorted, limited) as JSON.", + "param_types": { + "sourceVertexType": "str", + "sourceVertexId": "Union[str, int]", + "edgeType": "str", + "targetVertexType": "str", + "targetVertexId": "Union[str, int]", + "where": "str", + "limit": "Union[int, str]", + "sort": "str" + }, + "custom_query": false +} diff --git a/common/tg_documents/get_vertex_count.json b/common/tg_documents/get_vertex_count.json new file mode 100644 index 00000000..e3c67710 --- /dev/null +++ b/common/tg_documents/get_vertex_count.json @@ -0,0 +1,10 @@ +{ + "function_header": "getVertexCount", + "description": "Get the count of a vertex type, optionally with a where filter", + "docstring": "`getVertexCount(vertexType: Union[str, list] = '*', where: str = '')` → Union[int, dict]\nReturns the number of vertices of the specified type.\nParameters:\nvertexType (Union[str, list], optional): The name of the vertex type. If vertexType == '*', then count the instances of all vertex types (where cannot be specified in this case). Defaults to '*'.\nwhere (str, optional): A comma separated list of conditions that are all applied on each vertex’s attributes. The conditions are in logical conjunction (i.e. they are 'AND’ed' together). Defaults to ''.\nReturns:\nA dictionary of : pairs if vertexType is a list or '*'.\nAn integer of vertex count if vertexType is a single vertex type.\nUses:\nIf vertexType is specified only: count of the instances of the given vertex type(s).\nIf vertexType and where are specified: count of the instances of the given vertex type after being filtered by where condition(s).", + "param_types": { + "vertexType": "Union[str, List[str]]", + "where": "str" + }, + "custom_query": false +} diff --git a/common/tg_documents/get_vertex_stats.json b/common/tg_documents/get_vertex_stats.json new file mode 100644 index 00000000..06e4922a --- /dev/null +++ b/common/tg_documents/get_vertex_stats.json @@ -0,0 +1,10 @@ +{ + "function_header": "getVertexStats", + "description": "Get the statistics of vertex attributes.", + "docstring": "`getVertexStats(vertexTypes: Union[str, list], skipNA: bool = False) → dict`\nReturns vertex attribute statistics.\nParameters:\nvertexTypes: A single vertex type name or a list of vertex types names or '*' for all vertex types.\nskipNA: Skip those non-applicable vertices that do not have attributes or none of their attributes have statistics gathered.\nReturns:\nA dictionary of various vertex stats for each vertex type specified.", + "param_types": { + "vertexTypes": "Union[str, list]", + "skipNA": "bool" + }, + "custom_query": false +} \ No newline at end of file diff --git a/common/tg_documents/get_vertices.json b/common/tg_documents/get_vertices.json new file mode 100644 index 00000000..270029da --- /dev/null +++ b/common/tg_documents/get_vertices.json @@ -0,0 +1,12 @@ +{ + "function_header": "getVertices", + "description": "Get a sample of vertices", + "docstring": "`getVertices(vertexType: str, where: str = '', limit: Union[int, str] = None, sort: str = '')` → dict\nRetrieves vertices of the given vertex type.\nNote: The primary ID of a vertex instance is NOT an attribute, thus cannot be used in select, where or sort parameters (unless the WITH primary_id_as_attribute clause was used when the vertex type was created).\nUse getVerticesById() if you need to retrieve vertices by their primary ID.\nParameters:\nvertexType: The name of the vertex type.\nwhere: Comma separated list of conditions that are all applied on each vertex' attributes. The conditions are in logical conjunction (i.e. they are 'AND’ed' together).\nsort: Comma separated list of attributes the results should be sorted by. Add '-' in front of the attribute to sort in descending order. Must be used with limit.\nlimit: Maximum number of vertex instances to be returned (after sorting). Must be used with sort.\nReturns:\nThe (selected) details of the (matching) vertex instances (sorted, limited) as dictionary, JSON or pandas DataFrame.", + "param_types": { + "vertexType": "str", + "where": "str", + "limit": "Union[int, str]", + "sort": "str" + }, + "custom_query": false +} \ No newline at end of file diff --git a/common/tg_documents/get_vertices_by_id.json b/common/tg_documents/get_vertices_by_id.json new file mode 100644 index 00000000..e40b6637 --- /dev/null +++ b/common/tg_documents/get_vertices_by_id.json @@ -0,0 +1,10 @@ +{ + "function_header": "getVerticesById", + "description": "Get vertex information by vertex ID", + "docstring": "`getVerticesById(vertexType: str, vertexIds: Union[int, str, list])` → Union[list, str, pd.DataFrame]\nRetrieves vertices of the given vertex type, identified by their ID.\nParameters:\nvertexType: The name of the vertex type.\nvertexIds: A single vertex ID or a list of vertex IDs.\nReturns:\nThe (selected) details of the (matching) vertex instances as JSON.", + "param_types": { + "vertexType": "str", + "vertexIds": "Union[int, str, List[Union[int, str]]" + }, + "custom_query": false +} \ No newline at end of file diff --git a/common/tg_documents/tg_bfs.json b/common/tg_documents/tg_bfs.json new file mode 100644 index 00000000..bf03d27e --- /dev/null +++ b/common/tg_documents/tg_bfs.json @@ -0,0 +1,12 @@ +{ + "function_header": "tg_bfs", + "description": "The BFS or breath first search algorithm finds the nodes that are reachable or connected within a specified number of edges from a source vertex. It will find unweighted shortest path distances from a source node up to a limited distance away.", + "docstring": "This query finds the unweighted shortest path distances from a source vertex called v_start to all nodes with types v_type_set that are up to max_hops hops away, using edge types that are e_type_set. Run the query with `gds.featurizer().runAlgorithm('tg_bfs', params={'v_type_set': ['INSERT_v_types_HERE'], 'e_type_set': ['INSERT_e_types_HERE'], 'max_hops': 'INSERT_max_hops_HERE', 'v_start': { 'id': 'INSERT_ID_HERE', 'type':'INSERT_v_type_here' } })`, where the parameters are: \n 'v_type_set': The set of vertex types to run the algorithm on. \n 'e_type_set': The set of edge types to run the algorithm on. \n 'max_hops': The maximum number of hops or distance away to run the algorithm on. \n 'v_start': The source node to run the algorithm from. ", + "param_types": { + "v_type_set" : "LIST", + "e_type_set" : "LIST", + "max_hops" : "INT", + "v_start" : "VERTEX" + }, + "custom_query": false +} \ No newline at end of file diff --git a/common/tg_documents/tg_pagerank.json b/common/tg_documents/tg_pagerank.json new file mode 100644 index 00000000..d65c2dff --- /dev/null +++ b/common/tg_documents/tg_pagerank.json @@ -0,0 +1,11 @@ +{ + "function_header": "tg_pagerank", + "description": "The PageRank algorithm finds the most influential or most important nodes in a network. The query outputs ranks the vertices with a score of importance.", + "docstring": "This query finds the influence of each node in the graph. The scores are defined recursively and depend on the influence of incoming neighboring vertices.\n Run the query with `gds.featurizer().runAlgorithm('tg_pagerank', params={'v_type': 'INSERT_v_type_HERE', 'e_type': 'INSERT_e_type_HERE', 'top_k' : 'INSERT_top_k_HERE' })`, where the parameters are: \n 'v_type': The vertex type to run the algorithm on. \n 'e_type': The edge type to run the algorithm on.\n 'top_k': The number of top scoring vertices to return to the user.", + "param_types": { + "v_type" : "STRING", + "e_type" : "STRING", + "top_k" : "INT" + }, + "custom_query": false +} \ No newline at end of file diff --git a/common/tg_documents/tg_shortest_ss_no_wt.json b/common/tg_documents/tg_shortest_ss_no_wt.json new file mode 100644 index 00000000..eabd6598 --- /dev/null +++ b/common/tg_documents/tg_shortest_ss_no_wt.json @@ -0,0 +1,11 @@ +{ + "function_header": "tg_shortest_ss_no_wt", + "description": "The single-source shortest paths algorithm with no weights algorithm calculates the shortest path distances from a source node to all other nodes in a graph with unweighted edges.", + "docstring": "This query finds the shortest path distances from a source vertex to all other vertices with types v_type_set in the graph using unweighted edges that have edge type e_type_set. Run the query with `gds.featurizer().runAlgorithm('tg_shortest_ss_no_wt', params={'source': { 'id': 'INSERT_ID_HERE', 'type':'INSERT_v_type_here' }, 'v_type_set': 'INSERT_v_type_set_HERE', 'e_type_set': 'INSERT_e_type_set_HERE' })` , where the parameters are: \n 'source': The source node to run the algorithm from. \n 'v_type_set': The set of vertex types to run the algorithm on. \n 'e_type_set': The edge types to run the algorithm on.", + "param_types": { + "source" : "VERTEX", + "v_type_set" : "LIST", + "e_type_set" : "LIST" + }, + "custom_query": false +} \ No newline at end of file diff --git a/copilot/Dockerfile b/copilot/Dockerfile index 2f58923d..17ad6672 100644 --- a/copilot/Dockerfile +++ b/copilot/Dockerfile @@ -1,7 +1,7 @@ FROM python:3.11.8 WORKDIR /code -COPY copilot/requirements.txt requirements.txt +COPY common/requirements.txt requirements.txt RUN apt-get update && apt-get upgrade -y RUN pip install -r requirements.txt diff --git a/copilot/requirements.txt b/copilot/requirements.txt index 5709f69b..d45f2a60 100644 --- a/copilot/requirements.txt +++ b/copilot/requirements.txt @@ -68,17 +68,19 @@ joblib==1.3.2 jq==1.6.0 jsonpatch==1.33 jsonpointer==2.4 -langchain==0.2.8 -langchain-community==0.2.7 -langchain-core==0.2.19 -langchain-experimental==0.0.62 -langchain-groq==0.1.6 -langchain-ibm==0.1.10 +langchain==0.2.11 +langchain-community==0.2.10 +langchain-core==0.2.25 +langchain-experimental==0.0.63 +langchain-groq==0.1.8 +langchain-ibm==0.1.11 langchain-text-splitters==0.2.2 -langchainhub==0.1.14 +langchain_milvus==0.1.3 +langchain_openai==0.1.19 +langchainhub==0.1.20 langdetect==1.0.9 -langgraph==0.1.8 -langsmith==0.1.86 +langgraph==0.1.16 +langsmith==0.1.94 lomond==0.3.3 lxml==4.9.3 marshmallow==3.20.1 @@ -88,7 +90,7 @@ multidict==6.0.4 mypy-extensions==1.0.0 nltk==3.8.1 numpy==1.26.4 -openai==1.3.7 +openai==1.37.1 orjson==3.9.15 packaging==23.2 pandas==2.1.1 @@ -106,7 +108,7 @@ pycryptodome==3.20.0 pydantic==2.3.0 pydantic_core==2.6.3 pygit2==1.13.2 -pymilvus==2.3.6 +pymilvus==2.4.4 pytest==8.2.0 python-dateutil==2.9.0.post0 python-dotenv==1.0.0 @@ -133,7 +135,7 @@ SQLAlchemy==2.0.20 starlette==0.27.0 tabulate==0.9.0 tenacity==8.2.3 -tiktoken==0.5.1 +tiktoken==0.7.0 tqdm==4.66.1 types-requests==2.31.0.6 types-urllib3==1.26.25.14 diff --git a/docker-compose-with-apps.yml b/docker-compose-with-apps.yml deleted file mode 100644 index 0432f55c..00000000 --- a/docker-compose-with-apps.yml +++ /dev/null @@ -1,156 +0,0 @@ -services: - copilot: - image: tigergraphml/copilot:latest - container_name: copilot - build: - context: . - dockerfile: copilot/Dockerfile - ports: - - 8000:8000 - depends_on: - - eventual-consistency-service - - chat-history - environment: - LLM_CONFIG: "/code/configs/llm_config.json" - DB_CONFIG: "/code/configs/db_config.json" - MILVUS_CONFIG: "/code/configs/milvus_config.json" - LOGLEVEL: "INFO" - USE_CYPHER: "false" - volumes: - - ./configs/:/code/configs - - ./common:/code/common - eventual-consistency-service: - image: tigergraphml/ecc:latest - container_name: eventual-consistency-service - build: - context: . - dockerfile: eventual-consistency-service/Dockerfile - ports: - - 8001:8001 - environment: - LLM_CONFIG: "/code/configs/llm_config.json" - DB_CONFIG: "/code/configs/db_config.json" - MILVUS_CONFIG: "/code/configs/milvus_config.json" - LOGLEVEL: "INFO" - volumes: - - ./configs/:/code/configs - - ./common:/code/common - chat-history: - image: tigergraphml/chat-history:latest - container_name: chat-history - build: - context: chat-history/ - dockerfile: Dockerfile - ports: - - 8002:8002 - environment: - CONFIG: "/configs/config.json" - LOGLEVEL: "INFO" - volumes: - - ./chat-history/:/configs - - # report-service: - # image: tigergraphml/report-service:latest - # container_name: report-service - # build: - # context: . - # dockerfile: report-service/Dockerfile - # ports: - # - 8002:8002 - # environment: - # LLM_CONFIG: "/code/configs/llm_config.json" - # DB_CONFIG: "/code/configs/db_config.json" - # MILVUS_CONFIG: "/code/configs/milvus_config.json" - # LOGLEVEL: "INFO" - # volumes: - # - ./configs/:/code/configs - # - ./common:/code/common - - ui: - image: tigergraphml/copilot-ui:latest - container_name: ui - build: - context: copilot-ui - dockerfile: Dockerfile - ports: - - 3000:3000 - depends_on: - - copilot - etcd: - container_name: milvus-etcd - image: quay.io/coreos/etcd:v3.5.5 - environment: - - ETCD_AUTO_COMPACTION_MODE=revision - - ETCD_AUTO_COMPACTION_RETENTION=1000 - - ETCD_QUOTA_BACKEND_BYTES=4294967296 - - ETCD_SNAPSHOT_COUNT=50000 - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd - command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd - healthcheck: - test: ["CMD", "etcdctl", "endpoint", "health"] - interval: 30s - timeout: 20s - retries: 3 - - minio: - container_name: milvus-minio - image: minio/minio:RELEASE.2023-03-20T20-16-18Z - environment: - MINIO_ACCESS_KEY: minioadmin - MINIO_SECRET_KEY: minioadmin - ports: - - "9001:9001" - - "9002:9000" - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data - command: minio server /minio_data --console-address ":9001" - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9002/minio/health/live"] - interval: 30s - timeout: 20s - retries: 3 - - milvus-standalone: - container_name: milvus-standalone - image: milvusdb/milvus:v2.3.10 - command: ["milvus", "run", "standalone"] - security_opt: - - seccomp:unconfined - environment: - ETCD_ENDPOINTS: etcd:2379 - MINIO_ADDRESS: minio:9000 - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] - interval: 30s - start_period: 90s - timeout: 20s - retries: 3 - ports: - - "19530:19530" - - "9091:9091" - depends_on: - - "etcd" - - "minio" - - # tigergraph: - # image: tigergraph/tigergraph:latest - # ports: - # - "14022:22" - # - "9000:9000" - # - "14240:14240" - # environment: - # - ~/data:/home/tigergraph/mydata - # - tg-data:/home/tigergraph - # ulimits: - # nofile: - # soft: 1000000 - # hard: 1000000 - -networks: - default: - name: milvus -volumes: - tg-data: {} diff --git a/docker-compose.yml b/docker-compose.yml index ac77e058..6aa8c01f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,9 +18,6 @@ services: USE_CYPHER: "true" volumes: - ./configs/:/code/configs - - ./common:/code/common - networks: - - copilot_local eventual-consistency-service: image: tigergraphml/ecc:latest @@ -35,11 +32,9 @@ services: DB_CONFIG: "/code/configs/db_config.json" MILVUS_CONFIG: "/code/configs/milvus_config.json" LOGLEVEL: "INFO" + INIT_EMBED_STORE: "false" volumes: - ./configs/:/code/configs - - ./common:/code/common - networks: - - copilot_local chat-history: image: tigergraphml/chat-history:latest @@ -50,12 +45,11 @@ services: ports: - 8002:8002 environment: - CONFIG: "/configs/config.json" + CONFIG: "/configs/chat_config.json" LOGLEVEL: "INFO" volumes: - ./configs/:/configs - networks: - - copilot_local + # report-service: # image: tigergraphml/report-service:latest # container_name: report-service @@ -72,7 +66,7 @@ services: # volumes: # - ./configs/:/code/configs # - ./common:/code/common - # + ui: image: tigergraphml/copilot-ui:latest container_name: ui @@ -83,8 +77,65 @@ services: - 3000:3000 depends_on: - copilot - networks: - - copilot_local + + etcd: + container_name: milvus-etcd + image: quay.io/coreos/etcd:v3.5.5 + environment: + - ETCD_AUTO_COMPACTION_MODE=revision + - ETCD_AUTO_COMPACTION_RETENTION=1000 + - ETCD_QUOTA_BACKEND_BYTES=4294967296 + - ETCD_SNAPSHOT_COUNT=50000 + volumes: + - milvus_etcd:/etcd + command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd + healthcheck: + test: ["CMD", "etcdctl", "endpoint", "health"] + interval: 30s + timeout: 20s + retries: 3 + + minio: + container_name: milvus-minio + image: minio/minio:RELEASE.2023-03-20T20-16-18Z + environment: + MINIO_ACCESS_KEY: minioadmin + MINIO_SECRET_KEY: minioadmin + ports: + - "9001:9001" + - "9000:9000" + volumes: + - milvus_minio:/minio_data + command: minio server /minio_data --console-address ":9001" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + + milvus-standalone: + container_name: milvus-standalone + image: milvusdb/milvus:v2.4.6 + command: ["milvus", "run", "standalone"] + security_opt: + - seccomp:unconfined + environment: + ETCD_ENDPOINTS: etcd:2379 + MINIO_ADDRESS: minio:9000 + volumes: + - milvus:/var/lib/milvus + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] + interval: 30s + start_period: 90s + timeout: 20s + retries: 3 + ports: + - "19530:19530" + - "9091:9091" + depends_on: + - "etcd" + - "minio" nginx: container_name: nginx @@ -96,8 +147,8 @@ services: depends_on: - ui - copilot - networks: - - copilot_local -networks: - copilot_local: +volumes: + milvus_etcd: + milvus_minio: + milvus: diff --git a/eventual-consistency-service/Dockerfile b/eventual-consistency-service/Dockerfile index 53853b29..bd31c0fe 100644 --- a/eventual-consistency-service/Dockerfile +++ b/eventual-consistency-service/Dockerfile @@ -1,7 +1,7 @@ FROM python:3.11.8 WORKDIR /code -COPY eventual-consistency-service/requirements.txt requirements.txt +COPY common/requirements.txt requirements.txt RUN apt-get update && apt-get upgrade -y RUN pip install -r requirements.txt