Skip to content

Commit

Permalink
Merge pull request #254 from tigergraph/GML-1837-one-dockerfile
Browse files Browse the repository at this point in the history
GML-1837: consolidate docker compose files
  • Loading branch information
billshitg authored Jul 31, 2024
2 parents a4e384c + 47cdd50 commit fdd9560
Show file tree
Hide file tree
Showing 22 changed files with 306 additions and 261 deletions.
100 changes: 50 additions & 50 deletions common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,57 +132,57 @@ def get_llm_service(llm_config):
f"Milvus enabled for host {milvus_config['host']} at port {milvus_config['port']}"
)

LogWriter.info("Setting up Milvus embedding store for InquiryAI")
try:
embedding_store = MilvusEmbeddingStore(
embedding_service,
host=milvus_config["host"],
port=milvus_config["port"],
collection_name="tg_inquiry_documents",
support_ai_instance=False,
username=milvus_config.get("username", ""),
password=milvus_config.get("password", ""),
alias=milvus_config.get("alias", "default"),
)
service_status["embedding_store"] = {"status": "ok", "error": None}
except MilvusException as e:
embedding_store = None
service_status["embedding_store"] = {"status": "milvus error", "error": str(e)}
raise
except Exception as e:
embedding_store = None
service_status["embedding_store"] = {"status": "embedding error", "error": str(e)}
raise

support_collection_name = milvus_config.get("collection_name", "tg_support_documents")
LogWriter.info(
f"Setting up Milvus embedding store for SupportAI with collection_name: {support_collection_name}"
)
vertex_field = milvus_config.get("vertex_field", "vertex_id")
try:
support_ai_embedding_store = MilvusEmbeddingStore(
embedding_service,
host=milvus_config["host"],
port=milvus_config["port"],
support_ai_instance=True,
collection_name=support_collection_name,
username=milvus_config.get("username", ""),
password=milvus_config.get("password", ""),
vector_field=milvus_config.get("vector_field", "document_vector"),
text_field=milvus_config.get("text_field", "document_content"),
vertex_field=vertex_field,
alias=milvus_config.get("alias", "default"),
)
service_status["support_ai_embedding_store"] = {"status": "ok", "error": None}
except MilvusException as e:
support_ai_embedding_store = None
service_status["support_ai_embedding_store"] = {"status": "milvus error", "error": str(e)}
raise
except Exception as e:
support_ai_embedding_store = None
service_status["support_ai_embedding_store"] = {"status": "embedding error", "error": str(e)}
raise
if os.getenv("INIT_EMBED_STORE", "true")=="true":
LogWriter.info("Setting up Milvus embedding store for InquiryAI")
try:
embedding_store = MilvusEmbeddingStore(
embedding_service,
host=milvus_config["host"],
port=milvus_config["port"],
collection_name="tg_inquiry_documents",
support_ai_instance=False,
username=milvus_config.get("username", ""),
password=milvus_config.get("password", ""),
alias=milvus_config.get("alias", "default"),
)
service_status["embedding_store"] = {"status": "ok", "error": None}
except MilvusException as e:
embedding_store = None
service_status["embedding_store"] = {"status": "milvus error", "error": str(e)}
raise
except Exception as e:
embedding_store = None
service_status["embedding_store"] = {"status": "embedding error", "error": str(e)}
raise

support_collection_name = milvus_config.get("collection_name", "tg_support_documents")
LogWriter.info(
f"Setting up Milvus embedding store for SupportAI with collection_name: {support_collection_name}"
)
vertex_field = milvus_config.get("vertex_field", "vertex_id")
try:
support_ai_embedding_store = MilvusEmbeddingStore(
embedding_service,
host=milvus_config["host"],
port=milvus_config["port"],
support_ai_instance=True,
collection_name=support_collection_name,
username=milvus_config.get("username", ""),
password=milvus_config.get("password", ""),
vector_field=milvus_config.get("vector_field", "document_vector"),
text_field=milvus_config.get("text_field", "document_content"),
vertex_field=vertex_field,
alias=milvus_config.get("alias", "default"),
)
service_status["support_ai_embedding_store"] = {"status": "ok", "error": None}
except MilvusException as e:
support_ai_embedding_store = None
service_status["support_ai_embedding_store"] = {"status": "milvus error", "error": str(e)}
raise
except Exception as e:
support_ai_embedding_store = None
service_status["support_ai_embedding_store"] = {"status": "embedding error", "error": str(e)}
raise

if DOC_PROCESSING_CONFIG is None or (
DOC_PROCESSING_CONFIG.endswith(".json")
Expand Down
2 changes: 1 addition & 1 deletion common/embeddings/embedding_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def __init__(self, config):
super().__init__(
config, model_name=config.get("model_name", "OpenAI gpt-4-0613")
)
from langchain.embeddings import OpenAIEmbeddings
from langchain_openai import OpenAIEmbeddings

self.embeddings = OpenAIEmbeddings()

Expand Down
9 changes: 6 additions & 3 deletions common/embeddings/milvus_embedding_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from time import sleep, time
from typing import Iterable, List, Optional, Tuple

from langchain_community.vectorstores import Milvus
from langchain_milvus.vectorstores import Milvus
from langchain_core.documents.base import Document
from pymilvus import connections, utility
from pymilvus.exceptions import MilvusException
Expand Down Expand Up @@ -82,6 +82,7 @@ def connect_to_milvus(self):
f"""Initializing Milvus with host={self.milvus_connection.get("host", self.milvus_connection.get("uri", "unknown host"))},
port={self.milvus_connection.get('port', 'unknown')}, username={self.milvus_connection.get('user', 'unknown')}, collection={self.collection_name}"""
)
LogWriter.info(f"Milvus version {utility.get_server_version()}")
self.milvus = Milvus(
embedding_function=self.embedding_service,
collection_name=self.collection_name,
Expand All @@ -108,7 +109,7 @@ def check_collection_exists(self):

def load_documents(self):
if not self.check_collection_exists():
from langchain.document_loaders import DirectoryLoader, JSONLoader
from langchain_community.document_loaders import DirectoryLoader, JSONLoader

def metadata_func(record: dict, metadata: dict) -> dict:
metadata["function_header"] = record.get("function_header")
Expand All @@ -120,7 +121,7 @@ def metadata_func(record: dict, metadata: dict) -> dict:

LogWriter.info("Milvus add initial load documents init()")
loader = DirectoryLoader(
"./tg_documents/",
"./common/tg_documents/",
glob="*.json",
loader_cls=JSONLoader,
loader_kwargs={
Expand All @@ -131,6 +132,8 @@ def metadata_func(record: dict, metadata: dict) -> dict:
)
docs = loader.load()

# logger.info(f"docs: {docs}")

operation_type = "load_upsert"
metrics.milvus_query_total.labels(
self.collection_name, operation_type
Expand Down
2 changes: 1 addition & 1 deletion common/llm_services/aws_sagemaker_endpoint.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import boto3
from langchain.llms.sagemaker_endpoint import LLMContentHandler
from langchain_community.llms.sagemaker_endpoint import LLMContentHandler
import logging
import json
from typing import Dict
Expand Down
4 changes: 2 additions & 2 deletions common/llm_services/openai_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ def __init__(self, config):
auth_detail
]

from langchain.chat_models import ChatOpenAI

from langchain_community.chat_models import ChatOpenAI
model_name = config["llm_model"]
self.llm = ChatOpenAI(
temperature=config["model_kwargs"]["temperature"], model_name=model_name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ appdirs==1.4.4
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
async-timeout==4.0.3
asyncer==0.0.7
attrs==23.1.0
azure-core==1.30.1
azure-storage-blob==12.19.1
Expand All @@ -23,17 +24,20 @@ cryptography==42.0.5
dataclasses-json==0.5.14
distro==1.8.0
docker-pycreds==0.4.0
docstring_parser==0.16
emoji==2.8.0
environs==9.5.0
exceptiongroup==1.1.3
fastapi==0.103.1
filelock==3.15.4
filetype==1.2.0
frozenlist==1.4.0
fsspec==2024.6.0
gitdb==4.0.11
GitPython==3.1.40
google-api-core==2.14.0
google-auth==2.23.4
google-cloud-aiplatform==1.36.1
google-cloud-aiplatform==1.52.0
google-cloud-bigquery==3.13.0
google-cloud-core==2.3.3
google-cloud-resource-manager==1.10.4
Expand All @@ -50,36 +54,48 @@ h11==0.14.0
httpcore==0.18.0
httptools==0.6.0
httpx==0.25.0
huggingface_hub==0.23.0
huggingface-hub==0.23.0
ibm-cos-sdk==2.13.6
ibm-cos-sdk-core==2.13.6
ibm-cos-sdk-s3transfer==2.13.6
ibm_watsonx_ai==1.0.11
idna==3.4
importlib_metadata==8.0.0
iniconfig==2.0.0
isodate==0.6.1
jmespath==1.0.1
joblib==1.3.2
jq==1.6.0
jsonpatch==1.33
jsonpointer==2.4
langchain==0.1.12
langchain-community==0.0.28
langchain-core==0.1.49
langchain-experimental==0.0.54
langchain-groq==0.1.3
langchain-text-splitters==0.0.1
langchainhub==0.1.14
langchain==0.2.11
langchain-community==0.2.10
langchain-core==0.2.25
langchain-experimental==0.0.63
langchain-groq==0.1.8
langchain-ibm==0.1.11
langchain-text-splitters==0.2.2
langchain_milvus==0.1.3
langchain_openai==0.1.19
langchainhub==0.1.20
langdetect==1.0.9
langgraph==0.0.40
langsmith==0.1.24
langgraph==0.1.16
langsmith==0.1.94
lomond==0.3.3
lxml==4.9.3
marshmallow==3.20.1
matplotlib==3.9.1
minio==7.2.5
multidict==6.0.4
mypy-extensions==1.0.0
nltk==3.8.1
numpy==1.26.4
openai==1.3.7
openai==1.37.1
orjson==3.9.15
packaging==23.2
pandas==2.1.1
pathtools==0.1.2
pluggy==1.5.0
prometheus_client==0.20.0
proto-plus==1.22.3
protobuf==4.24.4
Expand All @@ -92,20 +108,22 @@ pycryptodome==3.20.0
pydantic==2.3.0
pydantic_core==2.6.3
pygit2==1.13.2
pymilvus==2.3.6
python-dateutil==2.8.2
pymilvus==2.4.4
pytest==8.2.0
python-dateutil==2.9.0.post0
python-dotenv==1.0.0
python-iso639==2023.6.15
python-magic==0.4.27
pyTigerDriver==1.0.15
pyTigerGraph==1.6.1
pyTigerGraph==1.6.2
pytz==2023.3.post1
PyYAML==6.0.1
rapidfuzz==3.4.0
regex==2023.10.3
requests==2.31.0
requests==2.32.2
rsa==4.9
s3transfer==0.7.0
scikit-learn==1.5.1
sentry-sdk==1.32.0
setproctitle==1.3.3
shapely==2.0.2
Expand All @@ -117,12 +135,12 @@ SQLAlchemy==2.0.20
starlette==0.27.0
tabulate==0.9.0
tenacity==8.2.3
tiktoken==0.5.1
tiktoken==0.7.0
tqdm==4.66.1
types-requests==2.31.0.6
types-urllib3==1.26.25.14
typing-inspect==0.9.0
typing_extensions==4.7.1
typing_extensions==4.8.0
tzdata==2023.3
ujson==5.9.0
unstructured==0.10.23
Expand All @@ -134,3 +152,4 @@ wandb==0.15.12
watchfiles==0.20.0
websockets==11.0.3
yarl==1.9.2
zipp==3.19.2
11 changes: 11 additions & 0 deletions common/tg_documents/get_edge_count.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"function_header": "getEdgeCount",
"description": "Get the number of edges at a graph level, optionally filtered by type.",
"docstring": "`getEdgeCount(edgeType: str = '*', sourceVertexType: str = '', targetVertexType: str = '')` → dict\nReturns the number of edges of an edge type.\nThis is a simplified version of getEdgeCountFrom(), to be used when the total number of edges of a given type is needed, regardless which vertex instance they are originated from. See documentation of getEdgeCountFrom above for more details.\nParameters:\nedgeType: The name of the edge type.\nsourceVertexType: The name of the source vertex type.\ntargetVertexType: The name of the target vertex type.\nReturns:\nA dictionary of edge_type: edge_count pairs.",
"param_types": {
"edgeType": "str",
"sourceVertexType": "str",
"targetVertexType": "str"
},
"custom_query": false
}
13 changes: 13 additions & 0 deletions common/tg_documents/get_edge_count_from.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"function_header": "getEdgeCountFrom",
"description": "Get the number of edges from a source vertex, and optionally to a target vertex.",
"docstring": "`getEdgeCountFrom(sourceVertexType: str = '', sourceVertexId: Union[str, int] = None, edgeType: str = '', targetVertexType: str = '', targetVertexId: Union[str, int] = None, where: str = '')` → dict\nReturns the number of edges from a specific vertex.\nParameters:\nsourceVertexType: The name of the source vertex type.\nsourceVertexId: The primary ID value of the source vertex instance.\nedgeType: The name of the edge type.\ntargetVertexType: The name of the target vertex type.\ntargetVertexId: The primary ID value of the target vertex instance.\nwhere: A comma separated list of conditions that are all applied on each edge’s attributes. The conditions are in logical conjunction (i.e. they are 'AND’ed' together).\nReturns:\nA dictionary of edge_type: edge_count pairs.\nUses:\nIf edgeType = '*': edge count of all edge types (no other arguments can be specified in this case).\nIf edgeType is specified only: edge count of the given edge type.\nIf sourceVertexType, edgeType, targetVertexType are specified: edge count of the given edge type between source and target vertex types.\nIf sourceVertexType, sourceVertexId are specified: edge count of all edge types from the given vertex instance.\nIf sourceVertexType, sourceVertexId, edgeType are specified: edge count of all edge types from the given vertex instance.\nIf sourceVertexType, sourceVertexId, edgeType, where are specified: the edge count of the given edge type after filtered by where condition.\nIf targetVertexId is specified, then targetVertexType must also be specified.\nIf targetVertexType is specified, then edgeType must also be specified.",
"param_types": {
"sourceVertexType": "str",
"sourceVertexId": "Union[str, int]",
"edgeType": "str",
"targetVertexType": "str",
"targetVertexId": "Union[str, int]"
},
"custom_query": false
}
10 changes: 10 additions & 0 deletions common/tg_documents/get_edge_stats.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"function_header": "getEdgeStats",
"description": "Retrieves edge attribute statistics",
"docstring": "`getEdgeStats(edgeTypes: Union[str, list], skipNA: bool = False)` → dict\nReturns edge attribute statistics.\nParameters:\nedgeTypes: A single edge type name or a list of edges types names or '*' for all edges types.\nskipNA: Skip those edges that do not have attributes or none of their attributes have statistics gathered.\nReturns:\nAttribute statistics of edges; a dictionary of dictionaries.",
"param_types": {
"edgeTypes": "Union[str, list]",
"skipNA": "bool"
},
"custom_query": false
}
16 changes: 16 additions & 0 deletions common/tg_documents/get_edges.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"function_header": "getEdges",
"description": "Return edges from the database that comply with certain conditions",
"docstring": "`getEdges(sourceVertexType: str, sourceVertexId: Union[str, int], edgeType: str = '', targetVertexType: str = '', targetVertexId: Union[str, int] = '', where: str = '', limit: Union[int, str] = None, sort: str = '')` → Union[dict, str, pd.DataFrame]\nRetrieves edges of the given edge type originating from a specific source vertex.\nOnly sourceVertexType and sourceVertexId are required. If targetVertexId is specified, then targetVertexType must also be specified. If targetVertexType is specified, then edgeType must also be specified.\nParameters:\nsourceVertexType: The name of the source vertex type.\nsourceVertexId: The primary ID value of the source vertex instance.\nedgeType: The name of the edge type.\ntargetVertexType: The name of the target vertex type.\ntargetVertexId: The primary ID value of the target vertex instance.\nwhere: Comma separated list of conditions that are all applied on each edge’s attributes. The conditions are in logical conjunction (i.e. they are 'AND’ed' together).\nsort: Comma separated list of attributes the results should be sorted by.\nlimit: Maximum number of edge instances to be returned (after sorting).\nReturns:\nThe (selected) details of the (matching) edge instances (sorted, limited) as JSON.",
"param_types": {
"sourceVertexType": "str",
"sourceVertexId": "Union[str, int]",
"edgeType": "str",
"targetVertexType": "str",
"targetVertexId": "Union[str, int]",
"where": "str",
"limit": "Union[int, str]",
"sort": "str"
},
"custom_query": false
}
10 changes: 10 additions & 0 deletions common/tg_documents/get_vertex_count.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"function_header": "getVertexCount",
"description": "Get the count of a vertex type, optionally with a where filter",
"docstring": "`getVertexCount(vertexType: Union[str, list] = '*', where: str = '')` → Union[int, dict]\nReturns the number of vertices of the specified type.\nParameters:\nvertexType (Union[str, list], optional): The name of the vertex type. If vertexType == '*', then count the instances of all vertex types (where cannot be specified in this case). Defaults to '*'.\nwhere (str, optional): A comma separated list of conditions that are all applied on each vertex’s attributes. The conditions are in logical conjunction (i.e. they are 'AND’ed' together). Defaults to ''.\nReturns:\nA dictionary of <vertex_type>: <vertex_count> pairs if vertexType is a list or '*'.\nAn integer of vertex count if vertexType is a single vertex type.\nUses:\nIf vertexType is specified only: count of the instances of the given vertex type(s).\nIf vertexType and where are specified: count of the instances of the given vertex type after being filtered by where condition(s).",
"param_types": {
"vertexType": "Union[str, List[str]]",
"where": "str"
},
"custom_query": false
}
10 changes: 10 additions & 0 deletions common/tg_documents/get_vertex_stats.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"function_header": "getVertexStats",
"description": "Get the statistics of vertex attributes.",
"docstring": "`getVertexStats(vertexTypes: Union[str, list], skipNA: bool = False) → dict`\nReturns vertex attribute statistics.\nParameters:\nvertexTypes: A single vertex type name or a list of vertex types names or '*' for all vertex types.\nskipNA: Skip those non-applicable vertices that do not have attributes or none of their attributes have statistics gathered.\nReturns:\nA dictionary of various vertex stats for each vertex type specified.",
"param_types": {
"vertexTypes": "Union[str, list]",
"skipNA": "bool"
},
"custom_query": false
}
Loading

0 comments on commit fdd9560

Please sign in to comment.