From 58c0cc7ad0c605c8be90c83363bfed0fe01e5855 Mon Sep 17 00:00:00 2001 From: ahmed-tg <160754390+ahmed-tg@users.noreply.github.com> Date: Thu, 6 Jun 2024 13:34:13 -0400 Subject: [PATCH] Gml 1666 ecc as a microservice (#167) * GML-1666 - Refactored into microservices. Common, Copilot and Eventual Consistency Checker * Moved storage classes and gsql scripts to common as well * A bit more refactoring - of the pyschemas, and made it possible test the services * GML-1666 - Eventual Consistency Checker More cleanup, this time of the common/gsql queries. Improved eventual consistency check. Refactored connection logic in the ECC. Added a status method * Fixed the docker compose files for the separate services * Fixed everything, and with multiprocessing * Replaced processing with threading * Revert the docker compose to include milvus again * Removed logs from being tracked in ECC * Remove the test logs as well * Fixed some imports and a dangling conflict * Merged from dev * dockerized * fix typo * move tests out one at a time to resolve import issues * update reqs path * incremental changes * incremental changes * incremental changes * incremental changes * incremental changes * incremental changes * incremental changes * incremental changes * incremental changes * incremental changes * removed unused stuff * Testing the onprem nightly with a pull request github action * More testing * More testing * Fixed the dockerfile configs again * More testing * More testing * Running from the top level again * Changed context back to top level * Removed config symlinks too. minimized the build script * Readded the configs * Readded the symlinks and removals from the build script * Using build instead of nightly for latest * rm testing run script * fixing llm tests * fixing llm tests * fixing llm tests * fixing llm tests * Updated the other workflows --------- Co-authored-by: Rob Rossmiller Co-authored-by: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com> --- .github/workflows/build-test-ci.yaml | 10 +- .github/workflows/cloud-build-deploy-ci.yaml | 16 +- .github/workflows/cloud-build-nightly.yaml | 16 +- .github/workflows/onprem-build-nightly.yaml | 14 +- .github/workflows/onprem-build.yaml | 16 +- .github/workflows/pull-test-merge.yaml | 9 +- .gitignore | 9 +- Dockerfile | 21 -- README.md | 12 +- app/supportai/extractors/__init__.py | 3 - app/util.py | 190 ------------------ common/__init__.py | 0 .../supportai => common}/chunkers/__init__.py | 0 .../chunkers/base_chunker.py | 0 .../chunkers/character_chunker.py | 2 +- .../chunkers/regex_chunker.py | 2 +- .../chunkers/semantic_chunker.py | 4 +- common/common | 1 + {app => common}/config.py | 12 +- common/db/connections.py | 109 ++++++++++ .../embeddings/base_embedding_store.py | 0 .../embeddings/embedding_services.py | 6 +- .../embeddings/faiss_embedding_store.py | 10 +- .../embeddings/milvus_embedding_store.py | 15 +- .../extractors/BaseExtractor.py | 0 .../LLMEntityRelationshipExtractor.py | 6 +- common/extractors/__init__.py | 3 + .../gsql}/HNSW/HNSW_BuildIndex.gsql | 0 .../gsql}/HNSW/HNSW_CreateEntrypoint.gsql | 0 .../gsql}/HNSW/HNSW_DeleteIndex.gsql | 0 .../gsql}/HNSW/HNSW_Search.gsql | 0 .../Build_Community_Concepts.gsql | 0 .../concept_creation/Build_Concept_Tree.gsql | 0 .../Build_Entity_Concepts.gsql | 0 .../Build_Relationship_Concepts.gsql | 0 ...EntityRelationshipConceptCooccurrence.gsql | 0 .../Entity_Relationship_Retrieval.gsql | 0 .../retrievers/HNSW_Chunk_Sibling_Search.gsql | 0 .../gsql}/retrievers/HNSW_Overlap_Search.gsql | 0 .../gsql}/retrievers/HNSW_Search_Content.gsql | 0 .../gsql}/retrievers/HNSW_Search_Sub.gsql | 0 .../gsql/supportai/ECC_Status.gsql | 0 .../gsql/supportai/Scan_For_Updates.gsql | 0 .../SupportAI_DataSourceCreation.gsql | 0 .../supportai/SupportAI_IndexCreation.gsql | 0 .../supportai/SupportAI_InitialLoadCSV.gsql | 0 .../supportai/SupportAI_InitialLoadJSON.gsql | 0 .../gsql/supportai/SupportAI_Schema.gsql | 0 .../Update_Vertices_Processing_Status.gsql | 0 {app => common}/llm_services/__init__.py | 0 .../llm_services/aws_bedrock_service.py | 8 +- .../llm_services/aws_sagemaker_endpoint.py | 10 +- .../llm_services/azure_openai_service.py | 6 +- {app => common}/llm_services/base_llm.py | 0 .../llm_services/google_vertexai_service.py | 6 +- .../llm_services/groq_llm_service.py | 6 +- .../llm_services/huggingface_endpoint.py | 6 +- {app => common}/llm_services/ollama.py | 7 +- .../llm_services/openai_service.py | 6 +- {app => common/logs}/log.py | 0 {app/tools => common/logs}/logwriter.py | 2 +- {app => common}/metrics/prometheus_metrics.py | 0 {app => common}/metrics/tg_proxy.py | 8 +- .../entity_relationship_extraction.txt | 0 .../generate_function.txt | 0 .../map_question_to_schema.txt | 0 .../aws_bedrock_titan/generate_function.txt | 0 .../map_question_to_schema.txt | 0 .../entity_relationship_extraction.txt | 0 .../generate_function.txt | 0 .../map_question_to_schema.txt | 0 .../entity_relationship_extraction.txt | 0 .../gcp_vertexai_palm/generate_function.txt | 0 .../map_question_to_schema.txt | 0 .../prompts/llama_70b/generate_function.txt | 0 .../llama_70b/map_question_to_schema.txt | 0 .../entity_relationship_extraction.txt | 0 .../prompts/openai_gpt4/generate_function.txt | 0 .../openai_gpt4/map_question_to_schema.txt | 0 {app => common}/py_schemas/__init__.py | 0 {app => common}/py_schemas/schemas.py | 0 {app => common}/py_schemas/tool_io_schemas.py | 0 {app => common}/session.py | 0 {app => common}/status.py | 0 {app => common}/storage/__init__.py | 0 {app => common}/storage/azure_blob_store.py | 2 +- {app => common}/storage/base_blob_store.py | 0 {app => common}/storage/google_blob_store.py | 2 +- {app => common}/storage/s3_blob_store.py | 2 +- copilot/.dockerignore | 5 + copilot/Dockerfile | 18 ++ copilot/Dockerfile.tests | 21 ++ {app => copilot/app}/__init__.py | 0 copilot/app/agent.py | 111 ++++++++++ {app => copilot/app}/agent/__init__.py | 0 {app => copilot/app}/agent/agent.py | 18 +- .../app}/agent/agent_generation.py | 7 +- {app => copilot/app}/agent/agent_graph.py | 20 +- .../app}/agent/agent_hallucination_check.py | 6 +- {app => copilot/app}/agent/agent_rewrite.py | 8 +- {app => copilot/app}/agent/agent_router.py | 4 +- .../app}/agent/agent_usefulness_check.py | 5 +- copilot/app/common | 1 + copilot/app/configs | 1 + {app => copilot/app}/main.py | 12 +- {app => copilot/app}/routers/__init__.py | 0 {app => copilot/app}/routers/inquiryai.py | 18 +- {app => copilot/app}/routers/queryai.py | 20 +- {app => copilot/app}/routers/root.py | 4 +- {app => copilot/app}/routers/supportai.py | 116 +++++------ {app => copilot/app}/static/chat.html | 0 {app => copilot/app}/static/favicon.ico | Bin {app => copilot/app}/supportai/README.md | 0 .../concept_management/create_concepts.py | 2 +- .../supportai/retrievers/BaseRetriever.py | 10 +- .../retrievers/EntityRelationshipRetriever.py | 6 +- .../retrievers/HNSWOverlapRetriever.py | 4 +- .../supportai/retrievers/HNSWRetriever.py | 4 +- .../retrievers/HNSWSiblingRetriever.py | 4 +- .../app}/supportai/retrievers/__init__.py | 0 .../app}/supportai/supportai_ingest.py | 20 +- .../app}/tg_documents/get_edge_count.json | 0 .../tg_documents/get_edge_count_from.json | 0 .../app}/tg_documents/get_edge_stats.json | 0 .../app}/tg_documents/get_edges.json | 0 .../app}/tg_documents/get_vertex_count.json | 0 .../app}/tg_documents/get_vertex_stats.json | 0 .../app}/tg_documents/get_vertices.json | 0 .../app}/tg_documents/get_vertices_by_id.json | 0 {app => copilot/app}/tg_documents/tg_bfs.json | 0 .../app}/tg_documents/tg_pagerank.json | 0 .../tg_documents/tg_shortest_ss_no_wt.json | 0 {app => copilot/app}/tools/__init__.py | 0 {app => copilot/app}/tools/generate_cypher.py | 5 +- .../app}/tools/generate_function.py | 12 +- .../app}/tools/map_question_to_schema.py | 8 +- .../app}/tools/validation_utils.py | 4 +- {docs => copilot/docs}/Contributing.md | 0 {docs => copilot/docs}/DeveloperGuide.md | 4 +- .../docs}/img/CoPilot-UX-Demo.png | Bin .../docs}/img/InquiryAI-Architecture.png | Bin .../docs}/img/SupportAI-Architecture.png | Bin .../docs}/img/SupportAISchema.png | Bin {docs => copilot/docs}/img/SwaggerDocUX.png | Bin .../docs}/img/TG-CoPilot-Architecture.png | Bin .../docs}/notebooks/DigitalInfraDemo.ipynb | 0 .../docs}/notebooks/SupportAIDemo.ipynb | 0 .../TransactionFraudInvestigation.ipynb | 0 .../docs}/notebooks/VisualizeAgent.ipynb | 6 +- requirements.txt => copilot/requirements.txt | 0 {tests => copilot/tests}/app | 0 copilot/tests/common | 1 + copilot/tests/configs | 1 + {tests => copilot/tests}/conftest.py | 2 +- .../tests}/create_wandb_report.py | 0 {tests => copilot/tests}/parse_test_config.py | 0 {tests => copilot/tests}/perf/.gitignore | 0 {tests => copilot/tests}/perf/README.md | 0 .../tests}/perf/customMetrics.js | 0 {tests => copilot/tests}/perf/run.sh | 0 {tests => copilot/tests}/perf/script.js | 0 copilot/tests/run_tests.sh | 123 ++++++++++++ .../tests}/test_azure_gpt35_turbo_instruct.py | 2 +- {tests => copilot/tests}/test_bedrock.py | 2 +- .../tests}/test_character_chunker.py | 2 +- .../tests}/test_credit_card_redaction.py | 2 +- .../tests}/test_crud_endpoint.py | 0 .../tests}/test_gcp_text-bison.py | 2 +- .../tests}/test_groq_mixtral8x7b.py | 2 +- .../tests}/test_huggingface_llama70b.py | 2 +- .../tests}/test_huggingface_phi3.py | 2 +- {tests => copilot/tests}/test_ingest.py | 2 +- {tests => copilot/tests}/test_inquiryai.py | 0 .../tests}/test_inquiryai_milvus.py | 2 +- {tests => copilot/tests}/test_log_writer.py | 30 +-- .../tests}/test_milvus_embedding_store.py | 8 +- .../tests}/test_openai_gpt35-turbo.py | 2 +- {tests => copilot/tests}/test_openai_gpt4.py | 2 +- {tests => copilot/tests}/test_openai_gpt4o.py | 2 +- .../DigitalInfra/DigitalInfraQuestions.tsv | 0 .../test_questions/DigitalInfra/README.md | 0 .../DigitalInfra/gsql/create_data_source.gsql | 0 .../DigitalInfra/gsql/create_graph.gsql | 0 .../DigitalInfra/gsql/create_load_job.gsql | 0 .../DigitalInfra/gsql/create_schema.gsql | 0 .../ms_dependency_chain.gsql | 0 .../ms_dependency_chain_prompt.json | 0 .../DigitalInfra/run_load_jobs.json | 0 .../DigitalInfra/setup_dataset.py | 0 .../OGB_MAG/OGB_MAGQuestions.tsv | 0 .../author_fields_of_study.gsql | 0 .../author_fields_of_study_prompt.json | 0 .../test_questions/OGB_MAG/setup_dataset.py | 0 .../OGB_MAG/tg_pagerank/tg_pagerank.gsql | 0 .../tg_pagerank/tg_pagerank_prompt.json | 0 .../OGB_MAG_SHORT/OGB_MAG_SHORTQuestions.tsv | 8 + .../tests}/test_questions/Synthea/README.md | 0 .../Synthea/SyntheaQuestions.tsv | 0 .../Synthea/gsql/create_data_source.gsql | 0 .../Synthea/gsql/create_graph.gsql | 0 .../Synthea/gsql/create_load_job.gsql | 0 .../Synthea/gsql/create_schema.gsql | 0 .../test_questions/Synthea/run_load_jobs.json | 0 .../test_questions/Synthea/setup_dataset.py | 0 .../tests}/test_regex_chunker.py | 2 +- .../tests}/test_sagemaker_llama7b.py | 0 .../tests}/test_semantic_chunker.py | 4 +- {tests => copilot/tests}/test_service.py | 0 {tests => copilot/tests}/test_supportai.py | 0 .../test_supportai_load_ingest_creation.py | 0 .../tests}/test_validate_function_call.py | 2 +- .../udfs}/milvus/rest/ExprFunctions.hpp | 0 .../udfs}/milvus/rest/ExprUtil.hpp | 0 .../udfs}/milvus/rest/install.gsql | 0 {udfs => copilot/udfs}/milvus/rest/test.gsql | 0 docker-compose-with-apps.yml | 116 +++++++++++ docker-compose.yml | 93 ++------- eventual-consistency-service/.dockerignore | 5 + eventual-consistency-service/Dockerfile | 18 ++ .../Dockerfile.tests | 2 +- eventual-consistency-service/app/__init__.py | 0 eventual-consistency-service/app/common | 1 + eventual-consistency-service/app/configs | 1 + .../app}/eventual_consistency_checker.py | 40 ++-- eventual-consistency-service/app/main.py | 137 +++++++++++++ eventual-consistency-service/requirements.txt | 136 +++++++++++++ eventual-consistency-service/tests/app | 1 + eventual-consistency-service/tests/common | 1 + .../test_eventual_consistency_checker.py | 15 +- scripts/build.sh | 3 + scripts/build_copilot.sh | 14 ++ scripts/build_ecc.sh | 15 ++ tests/run_tests.sh | 122 ----------- tests/test_questions/.DS_Store | Bin 6148 -> 0 bytes 234 files changed, 1235 insertions(+), 733 deletions(-) delete mode 100644 Dockerfile delete mode 100644 app/supportai/extractors/__init__.py delete mode 100644 app/util.py create mode 100644 common/__init__.py rename {app/supportai => common}/chunkers/__init__.py (100%) rename {app/supportai => common}/chunkers/base_chunker.py (100%) rename {app/supportai => common}/chunkers/character_chunker.py (92%) rename {app/supportai => common}/chunkers/regex_chunker.py (88%) rename {app/supportai => common}/chunkers/semantic_chunker.py (88%) create mode 120000 common/common rename {app => common}/config.py (95%) create mode 100644 common/db/connections.py rename {app => common}/embeddings/base_embedding_store.py (100%) rename {app => common}/embeddings/embedding_services.py (97%) rename {app => common}/embeddings/faiss_embedding_store.py (94%) rename {app => common}/embeddings/milvus_embedding_store.py (98%) rename {app/supportai => common}/extractors/BaseExtractor.py (100%) rename {app/supportai => common}/extractors/LLMEntityRelationshipExtractor.py (97%) create mode 100644 common/extractors/__init__.py rename {app/gsql/supportai => common/gsql}/HNSW/HNSW_BuildIndex.gsql (100%) rename {app/gsql/supportai => common/gsql}/HNSW/HNSW_CreateEntrypoint.gsql (100%) rename {app/gsql/supportai => common/gsql}/HNSW/HNSW_DeleteIndex.gsql (100%) rename {app/gsql/supportai => common/gsql}/HNSW/HNSW_Search.gsql (100%) rename {app/gsql/supportai => common/gsql}/concept_curation/concept_creation/Build_Community_Concepts.gsql (100%) rename {app/gsql/supportai => common/gsql}/concept_curation/concept_creation/Build_Concept_Tree.gsql (100%) rename {app/gsql/supportai => common/gsql}/concept_curation/concept_creation/Build_Entity_Concepts.gsql (100%) rename {app/gsql/supportai => common/gsql}/concept_curation/concept_creation/Build_Relationship_Concepts.gsql (100%) rename {app/gsql/supportai => common/gsql}/concept_curation/concept_creation/getEntityRelationshipConceptCooccurrence.gsql (100%) rename {app/gsql/supportai => common/gsql}/retrievers/Entity_Relationship_Retrieval.gsql (100%) rename {app/gsql/supportai => common/gsql}/retrievers/HNSW_Chunk_Sibling_Search.gsql (100%) rename {app/gsql/supportai => common/gsql}/retrievers/HNSW_Overlap_Search.gsql (100%) rename {app/gsql/supportai => common/gsql}/retrievers/HNSW_Search_Content.gsql (100%) rename {app/gsql/supportai => common/gsql}/retrievers/HNSW_Search_Sub.gsql (100%) rename {app => common}/gsql/supportai/ECC_Status.gsql (100%) rename {app => common}/gsql/supportai/Scan_For_Updates.gsql (100%) rename {app => common}/gsql/supportai/SupportAI_DataSourceCreation.gsql (100%) rename {app => common}/gsql/supportai/SupportAI_IndexCreation.gsql (100%) rename {app => common}/gsql/supportai/SupportAI_InitialLoadCSV.gsql (100%) rename {app => common}/gsql/supportai/SupportAI_InitialLoadJSON.gsql (100%) rename {app => common}/gsql/supportai/SupportAI_Schema.gsql (100%) rename {app => common}/gsql/supportai/Update_Vertices_Processing_Status.gsql (100%) rename {app => common}/llm_services/__init__.py (100%) rename {app => common}/llm_services/aws_bedrock_service.py (92%) rename {app => common}/llm_services/aws_sagemaker_endpoint.py (94%) rename {app => common}/llm_services/azure_openai_service.py (92%) rename {app => common}/llm_services/base_llm.py (100%) rename {app => common}/llm_services/google_vertexai_service.py (89%) rename {app => common}/llm_services/groq_llm_service.py (90%) rename {app => common}/llm_services/huggingface_endpoint.py (92%) rename {app => common}/llm_services/ollama.py (89%) rename {app => common}/llm_services/openai_service.py (91%) rename {app => common/logs}/log.py (100%) rename {app/tools => common/logs}/logwriter.py (98%) rename {app => common}/metrics/prometheus_metrics.py (100%) rename {app => common}/metrics/tg_proxy.py (95%) rename {app => common}/prompts/aws_bedrock_claude3haiku/entity_relationship_extraction.txt (100%) rename {app => common}/prompts/aws_bedrock_claude3haiku/generate_function.txt (100%) rename {app => common}/prompts/aws_bedrock_claude3haiku/map_question_to_schema.txt (100%) rename {app => common}/prompts/aws_bedrock_titan/generate_function.txt (100%) rename {app => common}/prompts/aws_bedrock_titan/map_question_to_schema.txt (100%) rename {app => common}/prompts/azure_open_ai_gpt35_turbo_instruct/entity_relationship_extraction.txt (100%) rename {app => common}/prompts/azure_open_ai_gpt35_turbo_instruct/generate_function.txt (100%) rename {app => common}/prompts/azure_open_ai_gpt35_turbo_instruct/map_question_to_schema.txt (100%) rename {app => common}/prompts/gcp_vertexai_palm/entity_relationship_extraction.txt (100%) rename {app => common}/prompts/gcp_vertexai_palm/generate_function.txt (100%) rename {app => common}/prompts/gcp_vertexai_palm/map_question_to_schema.txt (100%) rename {app => common}/prompts/llama_70b/generate_function.txt (100%) rename {app => common}/prompts/llama_70b/map_question_to_schema.txt (100%) rename {app => common}/prompts/openai_gpt4/entity_relationship_extraction.txt (100%) rename {app => common}/prompts/openai_gpt4/generate_function.txt (100%) rename {app => common}/prompts/openai_gpt4/map_question_to_schema.txt (100%) rename {app => common}/py_schemas/__init__.py (100%) rename {app => common}/py_schemas/schemas.py (100%) rename {app => common}/py_schemas/tool_io_schemas.py (100%) rename {app => common}/session.py (100%) rename {app => common}/status.py (100%) rename {app => common}/storage/__init__.py (100%) rename {app => common}/storage/azure_blob_store.py (92%) rename {app => common}/storage/base_blob_store.py (100%) rename {app => common}/storage/google_blob_store.py (91%) rename {app => common}/storage/s3_blob_store.py (92%) create mode 100644 copilot/.dockerignore create mode 100644 copilot/Dockerfile create mode 100644 copilot/Dockerfile.tests rename {app => copilot/app}/__init__.py (100%) create mode 100644 copilot/app/agent.py rename {app => copilot/app}/agent/__init__.py (100%) rename {app => copilot/app}/agent/agent.py (88%) rename {app => copilot/app}/agent/agent_generation.py (95%) rename {app => copilot/app}/agent/agent_graph.py (96%) rename {app => copilot/app}/agent/agent_hallucination_check.py (96%) rename {app => copilot/app}/agent/agent_rewrite.py (95%) rename {app => copilot/app}/agent/agent_router.py (96%) rename {app => copilot/app}/agent/agent_usefulness_check.py (96%) create mode 120000 copilot/app/common create mode 120000 copilot/app/configs rename {app => copilot/app}/main.py (94%) rename {app => copilot/app}/routers/__init__.py (100%) rename {app => copilot/app}/routers/inquiryai.py (97%) rename {app => copilot/app}/routers/queryai.py (75%) rename {app => copilot/app}/routers/root.py (94%) rename {app => copilot/app}/routers/supportai.py (83%) rename {app => copilot/app}/static/chat.html (100%) rename {app => copilot/app}/static/favicon.ico (100%) rename {app => copilot/app}/supportai/README.md (100%) rename {app => copilot/app}/supportai/concept_management/create_concepts.py (97%) rename {app => copilot/app}/supportai/retrievers/BaseRetriever.py (88%) rename {app => copilot/app}/supportai/retrievers/EntityRelationshipRetriever.py (85%) rename {app => copilot/app}/supportai/retrievers/HNSWOverlapRetriever.py (92%) rename {app => copilot/app}/supportai/retrievers/HNSWRetriever.py (91%) rename {app => copilot/app}/supportai/retrievers/HNSWSiblingRetriever.py (93%) rename {app => copilot/app}/supportai/retrievers/__init__.py (100%) rename {app => copilot/app}/supportai/supportai_ingest.py (96%) rename {app => copilot/app}/tg_documents/get_edge_count.json (100%) rename {app => copilot/app}/tg_documents/get_edge_count_from.json (100%) rename {app => copilot/app}/tg_documents/get_edge_stats.json (100%) rename {app => copilot/app}/tg_documents/get_edges.json (100%) rename {app => copilot/app}/tg_documents/get_vertex_count.json (100%) rename {app => copilot/app}/tg_documents/get_vertex_stats.json (100%) rename {app => copilot/app}/tg_documents/get_vertices.json (100%) rename {app => copilot/app}/tg_documents/get_vertices_by_id.json (100%) rename {app => copilot/app}/tg_documents/tg_bfs.json (100%) rename {app => copilot/app}/tg_documents/tg_pagerank.json (100%) rename {app => copilot/app}/tg_documents/tg_shortest_ss_no_wt.json (100%) rename {app => copilot/app}/tools/__init__.py (100%) rename {app => copilot/app}/tools/generate_cypher.py (97%) rename {app => copilot/app}/tools/generate_function.py (96%) rename {app => copilot/app}/tools/map_question_to_schema.py (96%) rename {app => copilot/app}/tools/validation_utils.py (98%) rename {docs => copilot/docs}/Contributing.md (100%) rename {docs => copilot/docs}/DeveloperGuide.md (98%) rename {docs => copilot/docs}/img/CoPilot-UX-Demo.png (100%) rename {docs => copilot/docs}/img/InquiryAI-Architecture.png (100%) rename {docs => copilot/docs}/img/SupportAI-Architecture.png (100%) rename {docs => copilot/docs}/img/SupportAISchema.png (100%) rename {docs => copilot/docs}/img/SwaggerDocUX.png (100%) rename {docs => copilot/docs}/img/TG-CoPilot-Architecture.png (100%) rename {docs => copilot/docs}/notebooks/DigitalInfraDemo.ipynb (100%) rename {docs => copilot/docs}/notebooks/SupportAIDemo.ipynb (100%) rename {docs => copilot/docs}/notebooks/TransactionFraudInvestigation.ipynb (100%) rename {docs => copilot/docs}/notebooks/VisualizeAgent.ipynb (99%) rename requirements.txt => copilot/requirements.txt (100%) rename {tests => copilot/tests}/app (100%) create mode 120000 copilot/tests/common create mode 120000 copilot/tests/configs rename {tests => copilot/tests}/conftest.py (96%) rename {tests => copilot/tests}/create_wandb_report.py (100%) rename {tests => copilot/tests}/parse_test_config.py (100%) rename {tests => copilot/tests}/perf/.gitignore (100%) rename {tests => copilot/tests}/perf/README.md (100%) rename {tests => copilot/tests}/perf/customMetrics.js (100%) rename {tests => copilot/tests}/perf/run.sh (100%) rename {tests => copilot/tests}/perf/script.js (100%) create mode 100755 copilot/tests/run_tests.sh rename {tests => copilot/tests}/test_azure_gpt35_turbo_instruct.py (97%) rename {tests => copilot/tests}/test_bedrock.py (97%) rename {tests => copilot/tests}/test_character_chunker.py (97%) rename {tests => copilot/tests}/test_credit_card_redaction.py (96%) rename {tests => copilot/tests}/test_crud_endpoint.py (100%) rename {tests => copilot/tests}/test_gcp_text-bison.py (97%) rename {tests => copilot/tests}/test_groq_mixtral8x7b.py (97%) rename {tests => copilot/tests}/test_huggingface_llama70b.py (97%) rename {tests => copilot/tests}/test_huggingface_phi3.py (97%) rename {tests => copilot/tests}/test_ingest.py (99%) rename {tests => copilot/tests}/test_inquiryai.py (100%) rename {tests => copilot/tests}/test_inquiryai_milvus.py (99%) rename {tests => copilot/tests}/test_log_writer.py (74%) rename {tests => copilot/tests}/test_milvus_embedding_store.py (87%) rename {tests => copilot/tests}/test_openai_gpt35-turbo.py (97%) rename {tests => copilot/tests}/test_openai_gpt4.py (97%) rename {tests => copilot/tests}/test_openai_gpt4o.py (97%) rename {tests => copilot/tests}/test_questions/DigitalInfra/DigitalInfraQuestions.tsv (100%) rename {tests => copilot/tests}/test_questions/DigitalInfra/README.md (100%) rename {tests => copilot/tests}/test_questions/DigitalInfra/gsql/create_data_source.gsql (100%) rename {tests => copilot/tests}/test_questions/DigitalInfra/gsql/create_graph.gsql (100%) rename {tests => copilot/tests}/test_questions/DigitalInfra/gsql/create_load_job.gsql (100%) rename {tests => copilot/tests}/test_questions/DigitalInfra/gsql/create_schema.gsql (100%) rename {tests => copilot/tests}/test_questions/DigitalInfra/ms_dependency_chain/ms_dependency_chain.gsql (100%) rename {tests => copilot/tests}/test_questions/DigitalInfra/ms_dependency_chain/ms_dependency_chain_prompt.json (100%) rename {tests => copilot/tests}/test_questions/DigitalInfra/run_load_jobs.json (100%) rename {tests => copilot/tests}/test_questions/DigitalInfra/setup_dataset.py (100%) rename {tests => copilot/tests}/test_questions/OGB_MAG/OGB_MAGQuestions.tsv (100%) rename {tests => copilot/tests}/test_questions/OGB_MAG/author_fields_of_study/author_fields_of_study.gsql (100%) rename {tests => copilot/tests}/test_questions/OGB_MAG/author_fields_of_study/author_fields_of_study_prompt.json (100%) rename {tests => copilot/tests}/test_questions/OGB_MAG/setup_dataset.py (100%) rename {tests => copilot/tests}/test_questions/OGB_MAG/tg_pagerank/tg_pagerank.gsql (100%) rename {tests => copilot/tests}/test_questions/OGB_MAG/tg_pagerank/tg_pagerank_prompt.json (100%) create mode 100644 copilot/tests/test_questions/OGB_MAG_SHORT/OGB_MAG_SHORTQuestions.tsv rename {tests => copilot/tests}/test_questions/Synthea/README.md (100%) rename {tests => copilot/tests}/test_questions/Synthea/SyntheaQuestions.tsv (100%) rename {tests => copilot/tests}/test_questions/Synthea/gsql/create_data_source.gsql (100%) rename {tests => copilot/tests}/test_questions/Synthea/gsql/create_graph.gsql (100%) rename {tests => copilot/tests}/test_questions/Synthea/gsql/create_load_job.gsql (100%) rename {tests => copilot/tests}/test_questions/Synthea/gsql/create_schema.gsql (100%) rename {tests => copilot/tests}/test_questions/Synthea/run_load_jobs.json (100%) rename {tests => copilot/tests}/test_questions/Synthea/setup_dataset.py (100%) rename {tests => copilot/tests}/test_regex_chunker.py (95%) rename {tests => copilot/tests}/test_sagemaker_llama7b.py (100%) rename {tests => copilot/tests}/test_semantic_chunker.py (86%) rename {tests => copilot/tests}/test_service.py (100%) rename {tests => copilot/tests}/test_supportai.py (100%) rename {tests => copilot/tests}/test_supportai_load_ingest_creation.py (100%) rename {tests => copilot/tests}/test_validate_function_call.py (99%) rename {udfs => copilot/udfs}/milvus/rest/ExprFunctions.hpp (100%) rename {udfs => copilot/udfs}/milvus/rest/ExprUtil.hpp (100%) rename {udfs => copilot/udfs}/milvus/rest/install.gsql (100%) rename {udfs => copilot/udfs}/milvus/rest/test.gsql (100%) create mode 100644 docker-compose-with-apps.yml create mode 100644 eventual-consistency-service/.dockerignore create mode 100644 eventual-consistency-service/Dockerfile rename Dockerfile.tests => eventual-consistency-service/Dockerfile.tests (93%) create mode 100644 eventual-consistency-service/app/__init__.py create mode 120000 eventual-consistency-service/app/common create mode 120000 eventual-consistency-service/app/configs rename {app/sync => eventual-consistency-service/app}/eventual_consistency_checker.py (90%) create mode 100644 eventual-consistency-service/app/main.py create mode 100644 eventual-consistency-service/requirements.txt create mode 120000 eventual-consistency-service/tests/app create mode 120000 eventual-consistency-service/tests/common rename {tests => eventual-consistency-service/tests}/test_eventual_consistency_checker.py (83%) create mode 100755 scripts/build.sh create mode 100755 scripts/build_copilot.sh create mode 100755 scripts/build_ecc.sh delete mode 100755 tests/run_tests.sh delete mode 100644 tests/test_questions/.DS_Store diff --git a/.github/workflows/build-test-ci.yaml b/.github/workflows/build-test-ci.yaml index 1379ce90..4792e5ce 100644 --- a/.github/workflows/build-test-ci.yaml +++ b/.github/workflows/build-test-ci.yaml @@ -21,11 +21,11 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Build and push Docker image + - name: Build Docker.tests image uses: docker/build-push-action@v5 with: - context: . - file: ./Dockerfile.tests + context: . + file: copilot/Dockerfile.tests push: false load: true tags: nlqs/tests:0.1 @@ -66,13 +66,13 @@ jobs: if: ${{ github.event_name }} == 'pull_request' run: | docker rm -f nlqs-tests || true - docker run -it -v $(pwd)/configs/:/code/configs -e GOOGLE_APPLICATION_CREDENTIALS=/code/configs/GCP_CREDS.json -e WANDB_API_KEY=${{ secrets.WANDB_API_KEY }} -e PR_NUMBER=${{ github.event.number }} --name nlqs-tests -d nlqs/tests:0.1 + docker run -it -v $(pwd)/configs/:/code/app/configs -e GOOGLE_APPLICATION_CREDENTIALS=/code/app/configs/GCP_CREDS.json -e WANDB_API_KEY=${{ secrets.WANDB_API_KEY }} -e PR_NUMBER=${{ github.event.number }} --name nlqs-tests -d nlqs/tests:0.1 - name: Run Docker Container for Regress if: ${{ github.event_name == 'schedule' }} run: | docker rm -f nlqs-tests || true - docker run -it -v $(pwd)/configs/:/code/configs -e GOOGLE_APPLICATION_CREDENTIALS=/code/configs/GCP_CREDS.json -e WANDB_API_KEY=${{ secrets.WANDB_API_KEY }} -e PR_NUMBER="DailyRegression" --name nlqs-tests -d nlqs/tests:0.1 + docker run -it -v $(pwd)/configs/:/code/app/configs -e GOOGLE_APPLICATION_CREDENTIALS=/code/app/configs/GCP_CREDS.json -e WANDB_API_KEY=${{ secrets.WANDB_API_KEY }} -e PR_NUMBER="DailyRegression" --name nlqs-tests -d nlqs/tests:0.1 - name: Execute PR Tests if: github.event_name == 'pull_request' diff --git a/.github/workflows/cloud-build-deploy-ci.yaml b/.github/workflows/cloud-build-deploy-ci.yaml index 0ff50b49..885bf333 100644 --- a/.github/workflows/cloud-build-deploy-ci.yaml +++ b/.github/workflows/cloud-build-deploy-ci.yaml @@ -33,12 +33,22 @@ jobs: echo "IMAGE=$IMAGE" >> $GITHUB_ENV echo "IMAGE=$IMAGE" >> $GITHUB_OUTPUT${{ needs.setup.outputs.image }} - - name: Build and push Docker image + - name: Build and push Docker image CoPilot uses: docker/build-push-action@v5 with: context: . - file: ./Dockerfile + file: ./copilot/Dockerfile push: true tags: | ${{ env.IMAGE }} - tginternal/copilot:cloud-latest \ No newline at end of file + tginternal/copilot:cloud-latest + + - name: Build and push Docker image ECC + uses: docker/build-push-action@v5 + with: + context: . + file: ./eventual-consistency-service/Dockerfile + push: true + tags: | + ${{ env.IMAGE }} + tginternal/ecc:cloud-latest \ No newline at end of file diff --git a/.github/workflows/cloud-build-nightly.yaml b/.github/workflows/cloud-build-nightly.yaml index 3d575674..eebca1c8 100644 --- a/.github/workflows/cloud-build-nightly.yaml +++ b/.github/workflows/cloud-build-nightly.yaml @@ -33,12 +33,22 @@ jobs: echo "IMAGE=$IMAGE" >> $GITHUB_ENV echo "IMAGE=$IMAGE" >> $GITHUB_OUTPUT${{ needs.setup.outputs.image }} - - name: Build and push Docker image + - name: Build and push Docker image CoPilot uses: docker/build-push-action@v5 with: context: . - file: ./Dockerfile + file: ./copilot/Dockerfile push: true tags: | ${{ env.IMAGE }} - tginternal/copilot:cloud-dev \ No newline at end of file + tginternal/copilot:cloud-dev + + - name: Build and push Docker image ECC + uses: docker/build-push-action@v5 + with: + context: . + file: ./eventual-consistency-service/Dockerfile + push: true + tags: | + ${{ env.IMAGE }} + tginternal/ecc:cloud-dev \ No newline at end of file diff --git a/.github/workflows/onprem-build-nightly.yaml b/.github/workflows/onprem-build-nightly.yaml index e5b900e1..6814ed74 100644 --- a/.github/workflows/onprem-build-nightly.yaml +++ b/.github/workflows/onprem-build-nightly.yaml @@ -33,12 +33,22 @@ jobs: echo "IMAGE=$IMAGE" >> $GITHUB_ENV echo "IMAGE=$IMAGE" >> $GITHUB_OUTPUT${{ needs.setup.outputs.image }} - - name: Build and push Docker image + - name: Build and push Docker image CoPilot uses: docker/build-push-action@v5 with: context: . - file: ./Dockerfile + file: ./copilot/Dockerfile push: true tags: | ${{ env.IMAGE }} tigergraphml/copilot:dev + + - name: Build and push Docker image ECC + uses: docker/build-push-action@v5 + with: + context: . + file: ./eventual-consistency-service/Dockerfile + push: true + tags: | + ${{ env.IMAGE }} + tigergraphml/ecc:dev diff --git a/.github/workflows/onprem-build.yaml b/.github/workflows/onprem-build.yaml index fae992ea..ff2b46d1 100644 --- a/.github/workflows/onprem-build.yaml +++ b/.github/workflows/onprem-build.yaml @@ -33,12 +33,22 @@ jobs: echo "IMAGE=$IMAGE" >> $GITHUB_ENV echo "IMAGE=$IMAGE" >> $GITHUB_OUTPUT${{ needs.setup.outputs.image }} - - name: Build and push Docker image + - name: Build and push Docker image CoPilot uses: docker/build-push-action@v5 with: context: . - file: ./Dockerfile + file: ./copilot/Dockerfile push: true tags: | ${{ env.IMAGE }} - tigergraphml/copilot:latest \ No newline at end of file + tigergraphml/copilot:latest + + - name: Build and push Docker image ECC + uses: docker/build-push-action@v5 + with: + context: . + file: ./eventual-consistency-service/Dockerfile + push: true + tags: | + ${{ env.IMAGE }} + tigergraphml/ecc:latest \ No newline at end of file diff --git a/.github/workflows/pull-test-merge.yaml b/.github/workflows/pull-test-merge.yaml index 533dba97..654703d8 100644 --- a/.github/workflows/pull-test-merge.yaml +++ b/.github/workflows/pull-test-merge.yaml @@ -33,12 +33,11 @@ jobs: python -m venv venv source venv/bin/activate python -m pip install --upgrade pip - pip install -r requirements.txt + pip install -r copilot/requirements.txt pip install pytest - name: Create db config run: | - source venv/bin/activate mkdir configs echo "$DB_CONFIG" > configs/db_config.json echo "$LLM_CONFIG_OPENAI_GPT4" > configs/llm_config.json @@ -65,7 +64,9 @@ jobs: - name: Run pytest run: | source venv/bin/activate - ./venv/bin/python -m pytest --disable-warnings + cp -r copilot/tests/*test* copilot/tests/create_wandb_report.py copilot/app/ + cd copilot/app + python -m pytest --disable-warnings env: DB_CONFIG: ${{ secrets.DB_CONFIG }} LLM_CONFIG: ${{ secrets.LLM_CONFIG_OPENAI_GPT4 }} @@ -78,4 +79,4 @@ jobs: LLM_TEST_EVALUATOR: ${{ secrets.LLM_TEST_EVALUATOR }} MILVUS_CONFIG: ${{ secrets.MILVUS_CONFIG }} PYTHONPATH: /opt/actions-runner/_work/CoPilot/CoPilot:/opt/actions-runner/_work/CoPilot/CoPilot/tests:/opt/actions-runner/_work/CoPilot/CoPilot/tests/app:/opt/actions-runner/_work/_tool/Python/3.11.8/x64/lib/python3.11/site-packages - \ No newline at end of file + diff --git a/.gitignore b/.gitignore index 408d9766..70d53200 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,11 @@ log.WARNING logs/* tmp .idea -volumes \ No newline at end of file +eventual-consistency-service/app/logs/* +eventual-consistency-service/logs/* +eventual-consistency-service/tests/logs/* +copilot/app/logs/* +copilot/logs/* +copilot/tests/logs/* +volumes +build/ diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 105e6633..00000000 --- a/Dockerfile +++ /dev/null @@ -1,21 +0,0 @@ -FROM python:3.11.8 -WORKDIR /code - - -COPY ./requirements.txt /code/requirements.txt - - -RUN apt-get update && apt-get upgrade -y -RUN pip install -r /code/requirements.txt - - -COPY ./app /code/app - -ENV LLM_CONFIG="/llm_config.json" -ENV DB_CONFIG="/db_config.json" -ENV MILVUS_CONFIG="/milvus_config.json" - -# INFO, DEBUG, DEBUG_PII -ENV LOGLEVEL="INFO" - -CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/README.md b/README.md index 538532a7..9c058ca2 100644 --- a/README.md +++ b/README.md @@ -350,11 +350,21 @@ TigerGraph CoPilot is designed to be easily extensible. The service can be confi ## Testing A family of tests are included under the `tests` directory. If you would like to add more tests please refer to the [guide here](./docs/DeveloperGuide.md#adding-a-new-test-suite). A shell script `run_tests.sh` is also included in the folder which is the driver for running the tests. The easiest way to use this script is to execute it in the Docker Container for testing. +### Testing with Pytest +You can run testing for each service by going to the top level of the service's directory and running `python -m pytest` + +e.g. (from the top level) +```sh +cd copilot +python -m pytest +cd .. +``` + ### Test in Docker Container First, make sure that all your LLM service provider configuration files are working properly. The configs will be mounted for the container to access. Also make sure that all the dependencies such as database and Milvus are ready. If not, you can run the included docker compose file to create those services. ```sh -docker compose docker-compose.yml up -d --build +docker compose up -d --build ``` If you want to use Weights And Biases for logging the test results, your WandB API key needs to be set in an environment variable on the host machine. diff --git a/app/supportai/extractors/__init__.py b/app/supportai/extractors/__init__.py deleted file mode 100644 index 259cc35c..00000000 --- a/app/supportai/extractors/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from app.supportai.extractors.LLMEntityRelationshipExtractor import ( - LLMEntityRelationshipExtractor, -) diff --git a/app/util.py b/app/util.py deleted file mode 100644 index 16a5e7af..00000000 --- a/app/util.py +++ /dev/null @@ -1,190 +0,0 @@ -import logging -from typing import Annotated - -from fastapi import Depends, HTTPException, status -from fastapi.security import HTTPAuthorizationCredentials, HTTPBasicCredentials -from pyTigerGraph import TigerGraphConnection -from pyTigerGraph.pyTigerGraphException import TigerGraphException -from requests import HTTPError - -from app.config import (db_config, doc_processing_config, embedding_service, - get_llm_service, llm_config, milvus_config, security) -from app.embeddings.milvus_embedding_store import MilvusEmbeddingStore -from app.metrics.tg_proxy import TigerGraphConnectionProxy -from app.sync.eventual_consistency_checker import EventualConsistencyChecker -from app.tools.logwriter import LogWriter - -logger = logging.getLogger(__name__) -consistency_checkers = {} - - -def get_db_connection_id_token( - graphname: str, - credentials: Annotated[HTTPAuthorizationCredentials, Depends(security)], -) -> TigerGraphConnectionProxy: - conn = TigerGraphConnection( - host=db_config["hostname"], - restppPort=db_config.get("restppPort", "9000"), - gsPort=db_config.get("gsPort", "14240"), - graphname=graphname, - apiToken=credentials, - tgCloud=True, - sslPort=db_config.get("gsPort", "14240"), - ) - conn.customizeHeader( - timeout=db_config["default_timeout"] * 1000, responseSize=5000000 - ) - conn = TigerGraphConnectionProxy(conn, auth_mode="id_token") - - try: - conn.gsql("USE GRAPH " + graphname) - except HTTPError: - LogWriter.error("Failed to connect to TigerGraph. Incorrect ID Token.") - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Incorrect token", - headers={"WWW-Authenticate": "Bearer"}, - ) - except TigerGraphException as e: - LogWriter.error(f"Failed to get token: {e}") - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="Failed to get token - is the database running?" - ) - LogWriter.info("Connected to TigerGraph with ID Token") - return conn - - -def get_db_connection_pwd( - graphname, credentials: Annotated[HTTPBasicCredentials, Depends(security)] -) -> TigerGraphConnectionProxy: - conn = TigerGraphConnection( - host=db_config["hostname"], - restppPort=db_config.get("restppPort", "9000"), - gsPort=db_config.get("gsPort", "14240"), - username=credentials.username, - password=credentials.password, - graphname=graphname, - ) - - if db_config["getToken"]: - try: - apiToken = conn._post( - conn.restppUrl + "/requesttoken", - authMode="pwd", - data=str({"graph": conn.graphname}), - resKey="results", - )["token"] - except HTTPError: - LogWriter.error("Failed to get token") - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Incorrect username or password", - headers={"WWW-Authenticate": "Basic"}, - ) - except TigerGraphException as e: - LogWriter.error(f"Failed to get token: {e}") - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="Failed to get token - is the database running?" - ) - - conn = TigerGraphConnection( - host=db_config["hostname"], - restppPort=db_config.get("restppPort", "9000"), - gsPort=db_config.get("gsPort", "14240"), - username=credentials.username, - password=credentials.password, - graphname=graphname, - apiToken=apiToken, - ) - - conn.customizeHeader( - timeout=db_config["default_timeout"] * 1000, responseSize=5000000 - ) - conn = TigerGraphConnectionProxy(conn) - LogWriter.info("Connected to TigerGraph with password") - return conn - - -def get_eventual_consistency_checker(graphname: str, conn: TigerGraphConnectionProxy): - if not db_config.get("enable_consistency_checker", True): - logger.debug("Eventual consistency checker disabled") - return - - check_interval_seconds = milvus_config.get("sync_interval_seconds", 30 * 60) - - if graphname not in consistency_checkers: - vector_indices = {} - if milvus_config.get("enabled") == "true": - vertex_field = milvus_config.get("vertex_field", "vertex_id") - index_names = milvus_config.get( - "indexes", - ["Document", "DocumentChunk", "Entity", "Relationship", "Concept"], - ) - for index_name in index_names: - vector_indices[graphname + "_" + index_name] = MilvusEmbeddingStore( - embedding_service, - host=milvus_config["host"], - port=milvus_config["port"], - support_ai_instance=True, - collection_name=graphname + "_" + index_name, - username=milvus_config.get("username", ""), - password=milvus_config.get("password", ""), - vector_field=milvus_config.get("vector_field", "document_vector"), - text_field=milvus_config.get("text_field", "document_content"), - vertex_field=vertex_field, - ) - - if doc_processing_config.get("chunker") == "semantic": - from app.supportai.chunkers.semantic_chunker import SemanticChunker - - chunker = SemanticChunker( - embedding_service, - doc_processing_config["chunker_config"].get("method", "percentile"), - doc_processing_config["chunker_config"].get("threshold", 0.95), - ) - elif doc_processing_config.get("chunker") == "regex": - from app.supportai.chunkers.regex_chunker import RegexChunker - - chunker = RegexChunker( - pattern=doc_processing_config["chunker_config"].get( - "pattern", "\\r?\\n" - ) - ) - elif doc_processing_config.get("chunker") == "character": - from app.supportai.chunkers.character_chunker import \ - CharacterChunker - - chunker = CharacterChunker( - chunk_size=doc_processing_config["chunker_config"].get( - "chunk_size", 1024 - ), - overlap_size=doc_processing_config["chunker_config"].get( - "overlap_size", 0 - ), - ) - else: - raise ValueError("Invalid chunker type") - - if doc_processing_config.get("extractor") == "llm": - from app.supportai.extractors import LLMEntityRelationshipExtractor - - extractor = LLMEntityRelationshipExtractor(get_llm_service(llm_config)) - else: - raise ValueError("Invalid extractor type") - - checker = EventualConsistencyChecker( - check_interval_seconds, - graphname, - vertex_field, # FIXME: if milvus is not enabled, this is not defined and will crash here (vertex_field used before assignment) - embedding_service, - index_names, - vector_indices, - conn, - chunker, - extractor, - ) - checker.initialize() - consistency_checkers[graphname] = checker - return consistency_checkers[graphname] diff --git a/common/__init__.py b/common/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/supportai/chunkers/__init__.py b/common/chunkers/__init__.py similarity index 100% rename from app/supportai/chunkers/__init__.py rename to common/chunkers/__init__.py diff --git a/app/supportai/chunkers/base_chunker.py b/common/chunkers/base_chunker.py similarity index 100% rename from app/supportai/chunkers/base_chunker.py rename to common/chunkers/base_chunker.py diff --git a/app/supportai/chunkers/character_chunker.py b/common/chunkers/character_chunker.py similarity index 92% rename from app/supportai/chunkers/character_chunker.py rename to common/chunkers/character_chunker.py index bf4f73d1..536c41fb 100644 --- a/app/supportai/chunkers/character_chunker.py +++ b/common/chunkers/character_chunker.py @@ -1,4 +1,4 @@ -from app.supportai.chunkers.base_chunker import BaseChunker +from common.chunkers.base_chunker import BaseChunker class CharacterChunker(BaseChunker): diff --git a/app/supportai/chunkers/regex_chunker.py b/common/chunkers/regex_chunker.py similarity index 88% rename from app/supportai/chunkers/regex_chunker.py rename to common/chunkers/regex_chunker.py index 702708a1..19345c5e 100644 --- a/app/supportai/chunkers/regex_chunker.py +++ b/common/chunkers/regex_chunker.py @@ -1,4 +1,4 @@ -from app.supportai.chunkers.base_chunker import BaseChunker +from common.chunkers.base_chunker import BaseChunker import re from typing import List diff --git a/app/supportai/chunkers/semantic_chunker.py b/common/chunkers/semantic_chunker.py similarity index 88% rename from app/supportai/chunkers/semantic_chunker.py rename to common/chunkers/semantic_chunker.py index 9480a4e7..0ee1e533 100644 --- a/app/supportai/chunkers/semantic_chunker.py +++ b/common/chunkers/semantic_chunker.py @@ -1,5 +1,5 @@ -from app.supportai.chunkers.base_chunker import BaseChunker -from app.embeddings.embedding_services import EmbeddingModel +from common.chunkers.base_chunker import BaseChunker +from common.embeddings.embedding_services import EmbeddingModel from langchain_experimental.text_splitter import ( SemanticChunker as LangChainSemanticChunker, ) diff --git a/common/common b/common/common new file mode 120000 index 00000000..dc879abe --- /dev/null +++ b/common/common @@ -0,0 +1 @@ +../../common \ No newline at end of file diff --git a/app/config.py b/common/config.py similarity index 95% rename from app/config.py rename to common/config.py index 80ee4be3..8eb9432a 100644 --- a/app/config.py +++ b/common/config.py @@ -3,14 +3,14 @@ from fastapi.security import HTTPBasic -from app.embeddings.embedding_services import ( +from common.embeddings.embedding_services import ( AWS_Bedrock_Embedding, AzureOpenAI_Ada002, OpenAI_Embedding, VertexAI_PaLM_Embedding, ) -from app.embeddings.milvus_embedding_store import MilvusEmbeddingStore -from app.llm_services import ( +from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore +from common.llm_services import ( AWS_SageMaker_Endpoint, AWSBedrock, AzureOpenAI, @@ -20,9 +20,9 @@ Ollama, HuggingFaceEndpoint ) -from app.session import SessionHandler -from app.status import StatusManager -from app.tools.logwriter import LogWriter +from common.session import SessionHandler +from common.status import StatusManager +from common.logs.logwriter import LogWriter security = HTTPBasic() session_handler = SessionHandler() diff --git a/common/db/connections.py b/common/db/connections.py new file mode 100644 index 00000000..ef4854a6 --- /dev/null +++ b/common/db/connections.py @@ -0,0 +1,109 @@ +import logging +from typing import Annotated + +from fastapi import Depends, HTTPException, status +from fastapi.security import HTTPBasicCredentials, HTTPAuthorizationCredentials +from pyTigerGraph import TigerGraphConnection +from pyTigerGraph.pyTigerGraphException import TigerGraphException +from requests import HTTPError + +from common.config import ( + db_config, + security, +) +from common.metrics.tg_proxy import TigerGraphConnectionProxy +from common.logs.logwriter import LogWriter + +logger = logging.getLogger(__name__) +consistency_checkers = {} + + +def get_db_connection_id_token( + graphname: str, + credentials: Annotated[HTTPAuthorizationCredentials, Depends(security)], +) -> TigerGraphConnectionProxy: + conn = TigerGraphConnection( + host=db_config["hostname"], + graphname=graphname, + apiToken=credentials, + tgCloud=True, + sslPort=14240, + ) + conn.customizeHeader( + timeout=db_config["default_timeout"] * 1000, responseSize=5000000 + ) + conn = TigerGraphConnectionProxy(conn, auth_mode="id_token") + + try: + conn.gsql("USE GRAPH " + graphname) + except HTTPError: + LogWriter.error("Failed to connect to TigerGraph. Incorrect ID Token.") + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Incorrect token", + headers={"WWW-Authenticate": "Bearer"}, + ) + except TigerGraphException as e: + LogWriter.error(f"Failed to get token: {e}") + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Failed to get token - is the database running?" + ) + + LogWriter.info("Connected to TigerGraph with ID Token") + return conn + + +def get_db_connection_pwd( + graphname, credentials: Annotated[HTTPBasicCredentials, Depends(security)] +) -> TigerGraphConnectionProxy: + conn = elevate_db_connection_to_token(db_config["hostname"], credentials.username, credentials.password, graphname) + + conn.customizeHeader( + timeout=db_config["default_timeout"] * 1000, responseSize=5000000 + ) + conn = TigerGraphConnectionProxy(conn) + LogWriter.info("Connected to TigerGraph with password") + return conn + + +def elevate_db_connection_to_token(host, username, password, graphname) -> TigerGraphConnectionProxy: + conn = TigerGraphConnection( + host=host, + username=username, + password=password, + graphname=graphname + ) + + if db_config["getToken"]: + try: + apiToken = conn._post( + conn.restppUrl + "/requesttoken", + authMode="pwd", + data=str({"graph": conn.graphname}), + resKey="results", + )["token"] + except HTTPError: + LogWriter.error("Failed to get token") + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Incorrect username or password", + headers={"WWW-Authenticate": "Basic"}, + ) + except TigerGraphException as e: + LogWriter.error(f"Failed to get token: {e}") + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Failed to get token - is the database running?" + ) + + + conn = TigerGraphConnection( + host=db_config["hostname"], + username=username, + password=password, + graphname=graphname, + apiToken=apiToken + ) + + return conn \ No newline at end of file diff --git a/app/embeddings/base_embedding_store.py b/common/embeddings/base_embedding_store.py similarity index 100% rename from app/embeddings/base_embedding_store.py rename to common/embeddings/base_embedding_store.py diff --git a/app/embeddings/embedding_services.py b/common/embeddings/embedding_services.py similarity index 97% rename from app/embeddings/embedding_services.py rename to common/embeddings/embedding_services.py index e462009a..c76bf46d 100644 --- a/app/embeddings/embedding_services.py +++ b/common/embeddings/embedding_services.py @@ -3,9 +3,9 @@ from langchain.schema.embeddings import Embeddings import logging import time -from app.log import req_id_cv -from app.metrics.prometheus_metrics import metrics -from app.tools.logwriter import LogWriter +from common.logs.log import req_id_cv +from common.metrics.prometheus_metrics import metrics +from common.logs.logwriter import LogWriter logger = logging.getLogger(__name__) diff --git a/app/embeddings/faiss_embedding_store.py b/common/embeddings/faiss_embedding_store.py similarity index 94% rename from app/embeddings/faiss_embedding_store.py rename to common/embeddings/faiss_embedding_store.py index f9737ccd..ca5383aa 100644 --- a/app/embeddings/faiss_embedding_store.py +++ b/common/embeddings/faiss_embedding_store.py @@ -1,7 +1,7 @@ -from app.embeddings.base_embedding_store import EmbeddingStore -from app.embeddings.embedding_services import EmbeddingModel -from app.log import req_id_cv -from app.tools.logwriter import LogWriter +from common.embeddings.base_embedding_store import EmbeddingStore +from common.embeddings.embedding_services import EmbeddingModel +from common.logs.log import req_id_cv +from common.logs.logwriter import LogWriter import logging from typing import Iterable, Tuple, List @@ -36,7 +36,7 @@ def metadata_func(record: dict, metadata: dict) -> dict: return metadata loader = DirectoryLoader( - "./app/pytg_documents/", + "./pytg_documents/", glob="*.json", loader_cls=JSONLoader, loader_kwargs={ diff --git a/app/embeddings/milvus_embedding_store.py b/common/embeddings/milvus_embedding_store.py similarity index 98% rename from app/embeddings/milvus_embedding_store.py rename to common/embeddings/milvus_embedding_store.py index 6bb188ad..6a18fd70 100644 --- a/app/embeddings/milvus_embedding_store.py +++ b/common/embeddings/milvus_embedding_store.py @@ -7,11 +7,11 @@ from pymilvus import connections, utility from pymilvus.exceptions import MilvusException -from app.embeddings.base_embedding_store import EmbeddingStore -from app.embeddings.embedding_services import EmbeddingModel -from app.log import req_id_cv -from app.metrics.prometheus_metrics import metrics -from app.tools.logwriter import LogWriter +from common.embeddings.base_embedding_store import EmbeddingStore +from common.embeddings.embedding_services import EmbeddingModel +from common.logs.log import req_id_cv +from common.metrics.prometheus_metrics import metrics +from common.logs.logwriter import LogWriter logger = logging.getLogger(__name__) @@ -119,7 +119,7 @@ def metadata_func(record: dict, metadata: dict) -> dict: LogWriter.info("Milvus add initial load documents init()") loader = DirectoryLoader( - "./app/tg_documents/", + "./tg_documents/", glob="*.json", loader_cls=JSONLoader, loader_kwargs={ @@ -209,9 +209,8 @@ def add_embeddings( raise Exception(error_message) except Exception as e: - error_message = f"An error occurred while registerin document: {str(e)}" + error_message = f"An error occurred while registering document: {str(e)}" LogWriter.error(error_message) - raise e def get_pks( self, diff --git a/app/supportai/extractors/BaseExtractor.py b/common/extractors/BaseExtractor.py similarity index 100% rename from app/supportai/extractors/BaseExtractor.py rename to common/extractors/BaseExtractor.py diff --git a/app/supportai/extractors/LLMEntityRelationshipExtractor.py b/common/extractors/LLMEntityRelationshipExtractor.py similarity index 97% rename from app/supportai/extractors/LLMEntityRelationshipExtractor.py rename to common/extractors/LLMEntityRelationshipExtractor.py index 732f1d7e..d5a0a970 100644 --- a/app/supportai/extractors/LLMEntityRelationshipExtractor.py +++ b/common/extractors/LLMEntityRelationshipExtractor.py @@ -1,6 +1,6 @@ -from app.llm_services import LLM_Model -from app.supportai.extractors.BaseExtractor import BaseExtractor -from app.py_schemas import KnowledgeGraph +from common.llm_services import LLM_Model +from common.extractors.BaseExtractor import BaseExtractor +from common.py_schemas import KnowledgeGraph from typing import List import json diff --git a/common/extractors/__init__.py b/common/extractors/__init__.py new file mode 100644 index 00000000..ced539e4 --- /dev/null +++ b/common/extractors/__init__.py @@ -0,0 +1,3 @@ +from common.extractors.LLMEntityRelationshipExtractor import ( + LLMEntityRelationshipExtractor, +) diff --git a/app/gsql/supportai/HNSW/HNSW_BuildIndex.gsql b/common/gsql/HNSW/HNSW_BuildIndex.gsql similarity index 100% rename from app/gsql/supportai/HNSW/HNSW_BuildIndex.gsql rename to common/gsql/HNSW/HNSW_BuildIndex.gsql diff --git a/app/gsql/supportai/HNSW/HNSW_CreateEntrypoint.gsql b/common/gsql/HNSW/HNSW_CreateEntrypoint.gsql similarity index 100% rename from app/gsql/supportai/HNSW/HNSW_CreateEntrypoint.gsql rename to common/gsql/HNSW/HNSW_CreateEntrypoint.gsql diff --git a/app/gsql/supportai/HNSW/HNSW_DeleteIndex.gsql b/common/gsql/HNSW/HNSW_DeleteIndex.gsql similarity index 100% rename from app/gsql/supportai/HNSW/HNSW_DeleteIndex.gsql rename to common/gsql/HNSW/HNSW_DeleteIndex.gsql diff --git a/app/gsql/supportai/HNSW/HNSW_Search.gsql b/common/gsql/HNSW/HNSW_Search.gsql similarity index 100% rename from app/gsql/supportai/HNSW/HNSW_Search.gsql rename to common/gsql/HNSW/HNSW_Search.gsql diff --git a/app/gsql/supportai/concept_curation/concept_creation/Build_Community_Concepts.gsql b/common/gsql/concept_curation/concept_creation/Build_Community_Concepts.gsql similarity index 100% rename from app/gsql/supportai/concept_curation/concept_creation/Build_Community_Concepts.gsql rename to common/gsql/concept_curation/concept_creation/Build_Community_Concepts.gsql diff --git a/app/gsql/supportai/concept_curation/concept_creation/Build_Concept_Tree.gsql b/common/gsql/concept_curation/concept_creation/Build_Concept_Tree.gsql similarity index 100% rename from app/gsql/supportai/concept_curation/concept_creation/Build_Concept_Tree.gsql rename to common/gsql/concept_curation/concept_creation/Build_Concept_Tree.gsql diff --git a/app/gsql/supportai/concept_curation/concept_creation/Build_Entity_Concepts.gsql b/common/gsql/concept_curation/concept_creation/Build_Entity_Concepts.gsql similarity index 100% rename from app/gsql/supportai/concept_curation/concept_creation/Build_Entity_Concepts.gsql rename to common/gsql/concept_curation/concept_creation/Build_Entity_Concepts.gsql diff --git a/app/gsql/supportai/concept_curation/concept_creation/Build_Relationship_Concepts.gsql b/common/gsql/concept_curation/concept_creation/Build_Relationship_Concepts.gsql similarity index 100% rename from app/gsql/supportai/concept_curation/concept_creation/Build_Relationship_Concepts.gsql rename to common/gsql/concept_curation/concept_creation/Build_Relationship_Concepts.gsql diff --git a/app/gsql/supportai/concept_curation/concept_creation/getEntityRelationshipConceptCooccurrence.gsql b/common/gsql/concept_curation/concept_creation/getEntityRelationshipConceptCooccurrence.gsql similarity index 100% rename from app/gsql/supportai/concept_curation/concept_creation/getEntityRelationshipConceptCooccurrence.gsql rename to common/gsql/concept_curation/concept_creation/getEntityRelationshipConceptCooccurrence.gsql diff --git a/app/gsql/supportai/retrievers/Entity_Relationship_Retrieval.gsql b/common/gsql/retrievers/Entity_Relationship_Retrieval.gsql similarity index 100% rename from app/gsql/supportai/retrievers/Entity_Relationship_Retrieval.gsql rename to common/gsql/retrievers/Entity_Relationship_Retrieval.gsql diff --git a/app/gsql/supportai/retrievers/HNSW_Chunk_Sibling_Search.gsql b/common/gsql/retrievers/HNSW_Chunk_Sibling_Search.gsql similarity index 100% rename from app/gsql/supportai/retrievers/HNSW_Chunk_Sibling_Search.gsql rename to common/gsql/retrievers/HNSW_Chunk_Sibling_Search.gsql diff --git a/app/gsql/supportai/retrievers/HNSW_Overlap_Search.gsql b/common/gsql/retrievers/HNSW_Overlap_Search.gsql similarity index 100% rename from app/gsql/supportai/retrievers/HNSW_Overlap_Search.gsql rename to common/gsql/retrievers/HNSW_Overlap_Search.gsql diff --git a/app/gsql/supportai/retrievers/HNSW_Search_Content.gsql b/common/gsql/retrievers/HNSW_Search_Content.gsql similarity index 100% rename from app/gsql/supportai/retrievers/HNSW_Search_Content.gsql rename to common/gsql/retrievers/HNSW_Search_Content.gsql diff --git a/app/gsql/supportai/retrievers/HNSW_Search_Sub.gsql b/common/gsql/retrievers/HNSW_Search_Sub.gsql similarity index 100% rename from app/gsql/supportai/retrievers/HNSW_Search_Sub.gsql rename to common/gsql/retrievers/HNSW_Search_Sub.gsql diff --git a/app/gsql/supportai/ECC_Status.gsql b/common/gsql/supportai/ECC_Status.gsql similarity index 100% rename from app/gsql/supportai/ECC_Status.gsql rename to common/gsql/supportai/ECC_Status.gsql diff --git a/app/gsql/supportai/Scan_For_Updates.gsql b/common/gsql/supportai/Scan_For_Updates.gsql similarity index 100% rename from app/gsql/supportai/Scan_For_Updates.gsql rename to common/gsql/supportai/Scan_For_Updates.gsql diff --git a/app/gsql/supportai/SupportAI_DataSourceCreation.gsql b/common/gsql/supportai/SupportAI_DataSourceCreation.gsql similarity index 100% rename from app/gsql/supportai/SupportAI_DataSourceCreation.gsql rename to common/gsql/supportai/SupportAI_DataSourceCreation.gsql diff --git a/app/gsql/supportai/SupportAI_IndexCreation.gsql b/common/gsql/supportai/SupportAI_IndexCreation.gsql similarity index 100% rename from app/gsql/supportai/SupportAI_IndexCreation.gsql rename to common/gsql/supportai/SupportAI_IndexCreation.gsql diff --git a/app/gsql/supportai/SupportAI_InitialLoadCSV.gsql b/common/gsql/supportai/SupportAI_InitialLoadCSV.gsql similarity index 100% rename from app/gsql/supportai/SupportAI_InitialLoadCSV.gsql rename to common/gsql/supportai/SupportAI_InitialLoadCSV.gsql diff --git a/app/gsql/supportai/SupportAI_InitialLoadJSON.gsql b/common/gsql/supportai/SupportAI_InitialLoadJSON.gsql similarity index 100% rename from app/gsql/supportai/SupportAI_InitialLoadJSON.gsql rename to common/gsql/supportai/SupportAI_InitialLoadJSON.gsql diff --git a/app/gsql/supportai/SupportAI_Schema.gsql b/common/gsql/supportai/SupportAI_Schema.gsql similarity index 100% rename from app/gsql/supportai/SupportAI_Schema.gsql rename to common/gsql/supportai/SupportAI_Schema.gsql diff --git a/app/gsql/supportai/Update_Vertices_Processing_Status.gsql b/common/gsql/supportai/Update_Vertices_Processing_Status.gsql similarity index 100% rename from app/gsql/supportai/Update_Vertices_Processing_Status.gsql rename to common/gsql/supportai/Update_Vertices_Processing_Status.gsql diff --git a/app/llm_services/__init__.py b/common/llm_services/__init__.py similarity index 100% rename from app/llm_services/__init__.py rename to common/llm_services/__init__.py diff --git a/app/llm_services/aws_bedrock_service.py b/common/llm_services/aws_bedrock_service.py similarity index 92% rename from app/llm_services/aws_bedrock_service.py rename to common/llm_services/aws_bedrock_service.py index 90d530d5..d0dcaf1d 100644 --- a/app/llm_services/aws_bedrock_service.py +++ b/common/llm_services/aws_bedrock_service.py @@ -1,9 +1,9 @@ -from app.llm_services import LLM_Model +import boto3 from langchain_community.chat_models import BedrockChat import logging -from app.log import req_id_cv -import boto3 -from app.tools.logwriter import LogWriter +from common.llm_services import LLM_Model +from common.logs.log import req_id_cv +from common.logs.logwriter import LogWriter logger = logging.getLogger(__name__) diff --git a/app/llm_services/aws_sagemaker_endpoint.py b/common/llm_services/aws_sagemaker_endpoint.py similarity index 94% rename from app/llm_services/aws_sagemaker_endpoint.py rename to common/llm_services/aws_sagemaker_endpoint.py index e4fe061d..287f9636 100644 --- a/app/llm_services/aws_sagemaker_endpoint.py +++ b/common/llm_services/aws_sagemaker_endpoint.py @@ -1,11 +1,11 @@ -from app.llm_services import LLM_Model +import boto3 from langchain.llms.sagemaker_endpoint import LLMContentHandler +import logging import json from typing import Dict -import boto3 -import logging -from app.log import req_id_cv -from app.tools.logwriter import LogWriter +from common.llm_services import LLM_Model +from common.logs.log import req_id_cv +from common.logs.logwriter import LogWriter logger = logging.getLogger(__name__) diff --git a/app/llm_services/azure_openai_service.py b/common/llm_services/azure_openai_service.py similarity index 92% rename from app/llm_services/azure_openai_service.py rename to common/llm_services/azure_openai_service.py index a7e7759e..2094125c 100644 --- a/app/llm_services/azure_openai_service.py +++ b/common/llm_services/azure_openai_service.py @@ -1,8 +1,8 @@ -from app.llm_services import LLM_Model import os import logging -from app.log import req_id_cv -from app.tools.logwriter import LogWriter +from common.llm_services import LLM_Model +from common.logs.log import req_id_cv +from common.logs.logwriter import LogWriter logger = logging.getLogger(__name__) diff --git a/app/llm_services/base_llm.py b/common/llm_services/base_llm.py similarity index 100% rename from app/llm_services/base_llm.py rename to common/llm_services/base_llm.py diff --git a/app/llm_services/google_vertexai_service.py b/common/llm_services/google_vertexai_service.py similarity index 89% rename from app/llm_services/google_vertexai_service.py rename to common/llm_services/google_vertexai_service.py index 118e1e60..22679f54 100644 --- a/app/llm_services/google_vertexai_service.py +++ b/common/llm_services/google_vertexai_service.py @@ -1,7 +1,7 @@ -from app.llm_services import LLM_Model import logging -from app.log import req_id_cv -from app.tools.logwriter import LogWriter +from common.llm_services import LLM_Model +from common.logs.log import req_id_cv +from common.logs.logwriter import LogWriter logger = logging.getLogger(__name__) diff --git a/app/llm_services/groq_llm_service.py b/common/llm_services/groq_llm_service.py similarity index 90% rename from app/llm_services/groq_llm_service.py rename to common/llm_services/groq_llm_service.py index 2f2d9ee9..afa6f896 100644 --- a/app/llm_services/groq_llm_service.py +++ b/common/llm_services/groq_llm_service.py @@ -1,8 +1,8 @@ -from app.llm_services import LLM_Model import os import logging -from app.log import req_id_cv -from app.tools.logwriter import LogWriter +from common.llm_services import LLM_Model +from common.logs.log import req_id_cv +from common.logs.logwriter import LogWriter logger = logging.getLogger(__name__) diff --git a/app/llm_services/huggingface_endpoint.py b/common/llm_services/huggingface_endpoint.py similarity index 92% rename from app/llm_services/huggingface_endpoint.py rename to common/llm_services/huggingface_endpoint.py index eacd2f29..2151966a 100644 --- a/app/llm_services/huggingface_endpoint.py +++ b/common/llm_services/huggingface_endpoint.py @@ -1,8 +1,8 @@ -from app.llm_services import LLM_Model import os import logging -from app.log import req_id_cv -from app.tools.logwriter import LogWriter +from common.llm_services import LLM_Model +from common.logs.log import req_id_cv +from common.logs.logwriter import LogWriter logger = logging.getLogger(__name__) diff --git a/app/llm_services/ollama.py b/common/llm_services/ollama.py similarity index 89% rename from app/llm_services/ollama.py rename to common/llm_services/ollama.py index 472e82a1..bdb0b44f 100644 --- a/app/llm_services/ollama.py +++ b/common/llm_services/ollama.py @@ -1,8 +1,7 @@ -from app.llm_services import LLM_Model -import os import logging -from app.log import req_id_cv -from app.tools.logwriter import LogWriter +from common.llm_services import LLM_Model +from common.logs.log import req_id_cv +from common.logs.logwriter import LogWriter logger = logging.getLogger(__name__) diff --git a/app/llm_services/openai_service.py b/common/llm_services/openai_service.py similarity index 91% rename from app/llm_services/openai_service.py rename to common/llm_services/openai_service.py index 8a4ff733..fe98b2bb 100644 --- a/app/llm_services/openai_service.py +++ b/common/llm_services/openai_service.py @@ -1,8 +1,8 @@ -from app.llm_services import LLM_Model import os import logging -from app.log import req_id_cv -from app.tools.logwriter import LogWriter +from common.llm_services import LLM_Model +from common.logs.log import req_id_cv +from common.logs.logwriter import LogWriter logger = logging.getLogger(__name__) diff --git a/app/log.py b/common/logs/log.py similarity index 100% rename from app/log.py rename to common/logs/log.py diff --git a/app/tools/logwriter.py b/common/logs/logwriter.py similarity index 98% rename from app/tools/logwriter.py rename to common/logs/logwriter.py index d4bc0317..ff13feed 100644 --- a/app/tools/logwriter.py +++ b/common/logs/logwriter.py @@ -4,7 +4,7 @@ import re from logging.handlers import RotatingFileHandler -from app.log import audit_formatter, formatter, get_log_config, log_file_paths +from common.logs.log import audit_formatter, formatter, get_log_config, log_file_paths log_config = get_log_config() diff --git a/app/metrics/prometheus_metrics.py b/common/metrics/prometheus_metrics.py similarity index 100% rename from app/metrics/prometheus_metrics.py rename to common/metrics/prometheus_metrics.py diff --git a/app/metrics/tg_proxy.py b/common/metrics/tg_proxy.py similarity index 95% rename from app/metrics/tg_proxy.py rename to common/metrics/tg_proxy.py index 47889572..5bfcb494 100644 --- a/app/metrics/tg_proxy.py +++ b/common/metrics/tg_proxy.py @@ -1,10 +1,10 @@ import time import re from pyTigerGraph import TigerGraphConnection -from app.metrics.prometheus_metrics import metrics -from app.tools.logwriter import LogWriter +from common.metrics.prometheus_metrics import metrics +from common.logs.logwriter import LogWriter import logging -from app.log import req_id_cv +from common.logs.log import req_id_cv logger = logging.getLogger(__name__) @@ -105,7 +105,7 @@ def _runInstalledQuery(self, query_name, params, usePost=False): return result def __del__(self): - if self.auth_mode == "pwd": + if self.auth_mode == "pwd" and self._tg_connection.apiToken != '': resp = self._tg_connection._delete( self._tg_connection.restppUrl + "/requesttoken", authMode="pwd", diff --git a/app/prompts/aws_bedrock_claude3haiku/entity_relationship_extraction.txt b/common/prompts/aws_bedrock_claude3haiku/entity_relationship_extraction.txt similarity index 100% rename from app/prompts/aws_bedrock_claude3haiku/entity_relationship_extraction.txt rename to common/prompts/aws_bedrock_claude3haiku/entity_relationship_extraction.txt diff --git a/app/prompts/aws_bedrock_claude3haiku/generate_function.txt b/common/prompts/aws_bedrock_claude3haiku/generate_function.txt similarity index 100% rename from app/prompts/aws_bedrock_claude3haiku/generate_function.txt rename to common/prompts/aws_bedrock_claude3haiku/generate_function.txt diff --git a/app/prompts/aws_bedrock_claude3haiku/map_question_to_schema.txt b/common/prompts/aws_bedrock_claude3haiku/map_question_to_schema.txt similarity index 100% rename from app/prompts/aws_bedrock_claude3haiku/map_question_to_schema.txt rename to common/prompts/aws_bedrock_claude3haiku/map_question_to_schema.txt diff --git a/app/prompts/aws_bedrock_titan/generate_function.txt b/common/prompts/aws_bedrock_titan/generate_function.txt similarity index 100% rename from app/prompts/aws_bedrock_titan/generate_function.txt rename to common/prompts/aws_bedrock_titan/generate_function.txt diff --git a/app/prompts/aws_bedrock_titan/map_question_to_schema.txt b/common/prompts/aws_bedrock_titan/map_question_to_schema.txt similarity index 100% rename from app/prompts/aws_bedrock_titan/map_question_to_schema.txt rename to common/prompts/aws_bedrock_titan/map_question_to_schema.txt diff --git a/app/prompts/azure_open_ai_gpt35_turbo_instruct/entity_relationship_extraction.txt b/common/prompts/azure_open_ai_gpt35_turbo_instruct/entity_relationship_extraction.txt similarity index 100% rename from app/prompts/azure_open_ai_gpt35_turbo_instruct/entity_relationship_extraction.txt rename to common/prompts/azure_open_ai_gpt35_turbo_instruct/entity_relationship_extraction.txt diff --git a/app/prompts/azure_open_ai_gpt35_turbo_instruct/generate_function.txt b/common/prompts/azure_open_ai_gpt35_turbo_instruct/generate_function.txt similarity index 100% rename from app/prompts/azure_open_ai_gpt35_turbo_instruct/generate_function.txt rename to common/prompts/azure_open_ai_gpt35_turbo_instruct/generate_function.txt diff --git a/app/prompts/azure_open_ai_gpt35_turbo_instruct/map_question_to_schema.txt b/common/prompts/azure_open_ai_gpt35_turbo_instruct/map_question_to_schema.txt similarity index 100% rename from app/prompts/azure_open_ai_gpt35_turbo_instruct/map_question_to_schema.txt rename to common/prompts/azure_open_ai_gpt35_turbo_instruct/map_question_to_schema.txt diff --git a/app/prompts/gcp_vertexai_palm/entity_relationship_extraction.txt b/common/prompts/gcp_vertexai_palm/entity_relationship_extraction.txt similarity index 100% rename from app/prompts/gcp_vertexai_palm/entity_relationship_extraction.txt rename to common/prompts/gcp_vertexai_palm/entity_relationship_extraction.txt diff --git a/app/prompts/gcp_vertexai_palm/generate_function.txt b/common/prompts/gcp_vertexai_palm/generate_function.txt similarity index 100% rename from app/prompts/gcp_vertexai_palm/generate_function.txt rename to common/prompts/gcp_vertexai_palm/generate_function.txt diff --git a/app/prompts/gcp_vertexai_palm/map_question_to_schema.txt b/common/prompts/gcp_vertexai_palm/map_question_to_schema.txt similarity index 100% rename from app/prompts/gcp_vertexai_palm/map_question_to_schema.txt rename to common/prompts/gcp_vertexai_palm/map_question_to_schema.txt diff --git a/app/prompts/llama_70b/generate_function.txt b/common/prompts/llama_70b/generate_function.txt similarity index 100% rename from app/prompts/llama_70b/generate_function.txt rename to common/prompts/llama_70b/generate_function.txt diff --git a/app/prompts/llama_70b/map_question_to_schema.txt b/common/prompts/llama_70b/map_question_to_schema.txt similarity index 100% rename from app/prompts/llama_70b/map_question_to_schema.txt rename to common/prompts/llama_70b/map_question_to_schema.txt diff --git a/app/prompts/openai_gpt4/entity_relationship_extraction.txt b/common/prompts/openai_gpt4/entity_relationship_extraction.txt similarity index 100% rename from app/prompts/openai_gpt4/entity_relationship_extraction.txt rename to common/prompts/openai_gpt4/entity_relationship_extraction.txt diff --git a/app/prompts/openai_gpt4/generate_function.txt b/common/prompts/openai_gpt4/generate_function.txt similarity index 100% rename from app/prompts/openai_gpt4/generate_function.txt rename to common/prompts/openai_gpt4/generate_function.txt diff --git a/app/prompts/openai_gpt4/map_question_to_schema.txt b/common/prompts/openai_gpt4/map_question_to_schema.txt similarity index 100% rename from app/prompts/openai_gpt4/map_question_to_schema.txt rename to common/prompts/openai_gpt4/map_question_to_schema.txt diff --git a/app/py_schemas/__init__.py b/common/py_schemas/__init__.py similarity index 100% rename from app/py_schemas/__init__.py rename to common/py_schemas/__init__.py diff --git a/app/py_schemas/schemas.py b/common/py_schemas/schemas.py similarity index 100% rename from app/py_schemas/schemas.py rename to common/py_schemas/schemas.py diff --git a/app/py_schemas/tool_io_schemas.py b/common/py_schemas/tool_io_schemas.py similarity index 100% rename from app/py_schemas/tool_io_schemas.py rename to common/py_schemas/tool_io_schemas.py diff --git a/app/session.py b/common/session.py similarity index 100% rename from app/session.py rename to common/session.py diff --git a/app/status.py b/common/status.py similarity index 100% rename from app/status.py rename to common/status.py diff --git a/app/storage/__init__.py b/common/storage/__init__.py similarity index 100% rename from app/storage/__init__.py rename to common/storage/__init__.py diff --git a/app/storage/azure_blob_store.py b/common/storage/azure_blob_store.py similarity index 92% rename from app/storage/azure_blob_store.py rename to common/storage/azure_blob_store.py index f9f577e4..c285ee0f 100644 --- a/app/storage/azure_blob_store.py +++ b/common/storage/azure_blob_store.py @@ -1,4 +1,4 @@ -from app.storage.base_blob_store import BlobStorage +from common.storage.base_blob_store import BlobStorage from azure.storage.blob import BlobServiceClient diff --git a/app/storage/base_blob_store.py b/common/storage/base_blob_store.py similarity index 100% rename from app/storage/base_blob_store.py rename to common/storage/base_blob_store.py diff --git a/app/storage/google_blob_store.py b/common/storage/google_blob_store.py similarity index 91% rename from app/storage/google_blob_store.py rename to common/storage/google_blob_store.py index ce871859..007e1ba4 100644 --- a/app/storage/google_blob_store.py +++ b/common/storage/google_blob_store.py @@ -1,4 +1,4 @@ -from app.storage.base_blob_store import BlobStorage +from common.storage.base_blob_store import BlobStorage from google.cloud import storage diff --git a/app/storage/s3_blob_store.py b/common/storage/s3_blob_store.py similarity index 92% rename from app/storage/s3_blob_store.py rename to common/storage/s3_blob_store.py index ae3fcf1a..3bc17591 100644 --- a/app/storage/s3_blob_store.py +++ b/common/storage/s3_blob_store.py @@ -1,4 +1,4 @@ -from app.storage.base_blob_store import BlobStorage +from common.storage.base_blob_store import BlobStorage import boto3 diff --git a/copilot/.dockerignore b/copilot/.dockerignore new file mode 100644 index 00000000..5b04df42 --- /dev/null +++ b/copilot/.dockerignore @@ -0,0 +1,5 @@ +Dockerfile +Dockerfile.tests +docs +tests +udfs diff --git a/copilot/Dockerfile b/copilot/Dockerfile new file mode 100644 index 00000000..19423e9c --- /dev/null +++ b/copilot/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.11.8 +WORKDIR /code + +COPY copilot/requirements.txt requirements.txt + +RUN apt-get update && apt-get upgrade -y +RUN pip install -r requirements.txt + +COPY copilot/app /code +COPY common /code/common + +ENV LLM_CONFIG="/code/configs/llm_config.json" +ENV DB_CONFIG="/code/configs/db_config.json" +ENV MILVUS_CONFIG="/code/configs/milvus_config.json" +ENV LOGLEVEL="INFO" + +EXPOSE 8000 +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/copilot/Dockerfile.tests b/copilot/Dockerfile.tests new file mode 100644 index 00000000..09e86197 --- /dev/null +++ b/copilot/Dockerfile.tests @@ -0,0 +1,21 @@ +FROM python:3.11-bullseye +WORKDIR /code + + +COPY ./copilot/requirements.txt /code/requirements.txt + + +RUN apt-get update && apt-get upgrade -y +RUN pip install -r /code/requirements.txt + + +COPY ./copilot/app /code/app +COPY ./common /code/app/common +COPY ./copilot/tests /code/tests + +COPY ./.git /code/.git + +# INFO, DEBUG, DEBUG_PII +ENV LOGLEVEL="DEBUG" + +WORKDIR /code/tests diff --git a/app/__init__.py b/copilot/app/__init__.py similarity index 100% rename from app/__init__.py rename to copilot/app/__init__.py diff --git a/copilot/app/agent.py b/copilot/app/agent.py new file mode 100644 index 00000000..5fd0f5c6 --- /dev/null +++ b/copilot/app/agent.py @@ -0,0 +1,111 @@ +import time +from langchain.agents import AgentType, initialize_agent +from typing import List, Union +import logging + +from pyTigerGraph import TigerGraphConnection + +from agent.agent_graph import TigerGraphAgentGraph +from tools import GenerateFunction, MapQuestionToSchema + +from common.embeddings.embedding_services import EmbeddingModel +from common.embeddings.base_embedding_store import EmbeddingStore +from common.metrics.prometheus_metrics import metrics +from common.metrics.tg_proxy import TigerGraphConnectionProxy +from common.llm_services.base_llm import LLM_Model + +from common.logs.log import req_id_cv +from common.logs.logwriter import LogWriter + +from typing_extensions import TypedDict + +logger = logging.getLogger(__name__) + + + +class TigerGraphAgent: + """TigerGraph Agent Class + + The TigerGraph Agent Class combines the various dependencies needed for a AI Agent to reason with data in a TigerGraph database. + + Args: + llm_provider (LLM_Model): + a LLM_Model class that connects to an external LLM API service. + db_connection (TigerGraphConnection): + a PyTigerGraph TigerGraphConnection object instantiated to interact with the desired database/graph and authenticated with correct roles. + embedding_model (EmbeddingModel): + a EmbeddingModel class that connects to an external embedding API service. + embedding_store (EmbeddingStore): + a EmbeddingStore class that connects to an embedding store to retrieve pyTigerGraph and custom query documentation from. + """ + + def __init__( + self, + llm_provider: LLM_Model, + db_connection: TigerGraphConnectionProxy, + embedding_model: EmbeddingModel, + embedding_store: EmbeddingStore, + ): + self.conn = db_connection + + self.llm = llm_provider + self.model_name = embedding_model.model_name + self.embedding_model = embedding_model + self.embedding_store = embedding_store + + self.mq2s = MapQuestionToSchema( + self.conn, self.llm.model, self.llm.map_question_schema_prompt + ) + self.gen_func = GenerateFunction( + self.conn, + self.llm.model, + self.llm.generate_function_prompt, + embedding_model, + embedding_store, + ) + + self.agent = TigerGraphAgentGraph( + self.llm, self.conn, self.embedding_model, self.embedding_store, self.mq2s, self.gen_func + ).create_graph() + + + logger.debug(f"request_id={req_id_cv.get()} agent initialized") + + def question_for_agent(self, question: str): + """Question for Agent. + + Ask the agent a question to be answered by the database. Returns the agent resoposne or raises an exception. + + Args: + question (str): + The question to ask the agent + """ + start_time = time.time() + metrics.llm_inprogress_requests.labels(self.model_name).inc() + + try: + LogWriter.info(f"request_id={req_id_cv.get()} ENTRY question_for_agent") + logger.debug_pii( + f"request_id={req_id_cv.get()} question_for_agent question={question}" + ) + + for output in self.agent.stream({"question": question}): + for key, value in output.items(): + LogWriter.info(f"request_id={req_id_cv.get()} executed node {key}") + + LogWriter.info(f"request_id={req_id_cv.get()} EXIT question_for_agent") + return value["answer"] + except Exception as e: + metrics.llm_query_error_total.labels(self.model_name).inc() + LogWriter.error(f"request_id={req_id_cv.get()} FAILURE question_for_agent") + import traceback + + traceback.print_exc() + raise e + finally: + metrics.llm_request_total.labels(self.model_name).inc() + metrics.llm_inprogress_requests.labels(self.model_name).dec() + duration = time.time() - start_time + metrics.llm_request_duration_seconds.labels(self.model_name).observe( + duration + ) diff --git a/app/agent/__init__.py b/copilot/app/agent/__init__.py similarity index 100% rename from app/agent/__init__.py rename to copilot/app/agent/__init__.py diff --git a/app/agent/agent.py b/copilot/app/agent/agent.py similarity index 88% rename from app/agent/agent.py rename to copilot/app/agent/agent.py index 9cc1cbc7..e6162834 100644 --- a/app/agent/agent.py +++ b/copilot/app/agent/agent.py @@ -5,17 +5,17 @@ from pyTigerGraph import TigerGraphConnection -from app.tools import GenerateFunction, MapQuestionToSchema, GenerateCypher -from app.embeddings.embedding_services import EmbeddingModel -from app.embeddings.base_embedding_store import EmbeddingStore +from agent.agent_graph import TigerGraphAgentGraph +from tools import GenerateCypher, GenerateFunction, MapQuestionToSchema -from app.metrics.prometheus_metrics import metrics -from app.metrics.tg_proxy import TigerGraphConnectionProxy -from app.llm_services.base_llm import LLM_Model -from app.agent.agent_graph import TigerGraphAgentGraph +from common.embeddings.embedding_services import EmbeddingModel +from common.embeddings.base_embedding_store import EmbeddingStore +from common.llm_services.base_llm import LLM_Model +from common.metrics.prometheus_metrics import metrics +from common.metrics.tg_proxy import TigerGraphConnectionProxy -from app.log import req_id_cv -from app.tools.logwriter import LogWriter +from common.logs.log import req_id_cv +from common.logs.logwriter import LogWriter from typing_extensions import TypedDict diff --git a/app/agent/agent_generation.py b/copilot/app/agent/agent_generation.py similarity index 95% rename from app/agent/agent_generation.py rename to copilot/app/agent/agent_generation.py index 2d367cbc..055c22af 100644 --- a/app/agent/agent_generation.py +++ b/copilot/app/agent/agent_generation.py @@ -1,8 +1,9 @@ + +import logging from langchain.prompts import PromptTemplate from langchain_core.output_parsers import PydanticOutputParser -from app.tools.logwriter import LogWriter -import logging -from app.log import req_id_cv +from common.logs.logwriter import LogWriter +from common.logs.log import req_id_cv from langchain.pydantic_v1 import BaseModel, Field logger = logging.getLogger(__name__) diff --git a/app/agent/agent_graph.py b/copilot/app/agent/agent_graph.py similarity index 96% rename from app/agent/agent_graph.py rename to copilot/app/agent/agent_graph.py index c964ca89..d74ca064 100644 --- a/app/agent/agent_graph.py +++ b/copilot/app/agent/agent_graph.py @@ -3,22 +3,22 @@ from typing import Optional from langgraph.graph import END, StateGraph -from app.agent.agent_generation import TigerGraphAgentGenerator -from app.agent.agent_router import TigerGraphAgentRouter -from app.agent.agent_hallucination_check import TigerGraphAgentHallucinationCheck -from app.agent.agent_usefulness_check import TigerGraphAgentUsefulnessCheck -from app.agent.agent_rewrite import TigerGraphAgentRewriter +from agent.agent_generation import TigerGraphAgentGenerator +from agent.agent_router import TigerGraphAgentRouter +from agent.agent_hallucination_check import TigerGraphAgentHallucinationCheck +from agent.agent_usefulness_check import TigerGraphAgentUsefulnessCheck +from agent.agent_rewrite import TigerGraphAgentRewriter -from app.tools import MapQuestionToSchemaException -from app.supportai.retrievers import HNSWOverlapRetriever +from tools import MapQuestionToSchemaException +from supportai.retrievers import HNSWOverlapRetriever -from app.py_schemas import (MapQuestionToSchemaResponse, +from common.py_schemas import (MapQuestionToSchemaResponse, CoPilotResponse) from pyTigerGraph.pyTigerGraphException import TigerGraphException import logging -from app.log import req_id_cv +from common.logs.log import req_id_cv logger = logging.getLogger(__name__) @@ -361,4 +361,4 @@ def create_graph(self): app = self.workflow.compile() return app - \ No newline at end of file + diff --git a/app/agent/agent_hallucination_check.py b/copilot/app/agent/agent_hallucination_check.py similarity index 96% rename from app/agent/agent_hallucination_check.py rename to copilot/app/agent/agent_hallucination_check.py index 77c813b8..d4251c6a 100644 --- a/app/agent/agent_hallucination_check.py +++ b/copilot/app/agent/agent_hallucination_check.py @@ -1,8 +1,8 @@ +import logging from langchain.prompts import PromptTemplate from langchain_core.output_parsers import PydanticOutputParser -from app.tools.logwriter import LogWriter -import logging -from app.log import req_id_cv +from common.logs.logwriter import LogWriter +from common.logs.log import req_id_cv from langchain.pydantic_v1 import BaseModel, Field logger = logging.getLogger(__name__) diff --git a/app/agent/agent_rewrite.py b/copilot/app/agent/agent_rewrite.py similarity index 95% rename from app/agent/agent_rewrite.py rename to copilot/app/agent/agent_rewrite.py index f923ebbe..b1c13bf6 100644 --- a/app/agent/agent_rewrite.py +++ b/copilot/app/agent/agent_rewrite.py @@ -1,10 +1,12 @@ + +import logging from langchain.prompts import PromptTemplate from langchain_core.output_parsers import PydanticOutputParser -from app.tools.logwriter import LogWriter -import logging -from app.log import req_id_cv +from common.logs.log import req_id_cv +from common.logs.logwriter import LogWriter from langchain.pydantic_v1 import BaseModel, Field + logger = logging.getLogger(__name__) class QuestionRewriteResponse(BaseModel): diff --git a/app/agent/agent_router.py b/copilot/app/agent/agent_router.py similarity index 96% rename from app/agent/agent_router.py rename to copilot/app/agent/agent_router.py index 4a6b3d19..3099580f 100644 --- a/app/agent/agent_router.py +++ b/copilot/app/agent/agent_router.py @@ -1,9 +1,9 @@ from langchain.prompts import PromptTemplate from langchain_core.output_parsers import PydanticOutputParser -from app.tools.logwriter import LogWriter +from common.logs.logwriter import LogWriter from pyTigerGraph.pyTigerGraph import TigerGraphConnection import logging -from app.log import req_id_cv +from common.logs.log import req_id_cv from langchain.pydantic_v1 import BaseModel, Field diff --git a/app/agent/agent_usefulness_check.py b/copilot/app/agent/agent_usefulness_check.py similarity index 96% rename from app/agent/agent_usefulness_check.py rename to copilot/app/agent/agent_usefulness_check.py index a620ab5a..83c3b99b 100644 --- a/app/agent/agent_usefulness_check.py +++ b/copilot/app/agent/agent_usefulness_check.py @@ -1,9 +1,8 @@ from langchain.prompts import PromptTemplate from langchain_core.output_parsers import PydanticOutputParser -from app.tools.logwriter import LogWriter -import json +from common.logs.logwriter import LogWriter import logging -from app.log import req_id_cv +from common.logs.log import req_id_cv from langchain.pydantic_v1 import BaseModel, Field logger = logging.getLogger(__name__) diff --git a/copilot/app/common b/copilot/app/common new file mode 120000 index 00000000..248927d2 --- /dev/null +++ b/copilot/app/common @@ -0,0 +1 @@ +../../common/ \ No newline at end of file diff --git a/copilot/app/configs b/copilot/app/configs new file mode 120000 index 00000000..5992d109 --- /dev/null +++ b/copilot/app/configs @@ -0,0 +1 @@ +../../configs \ No newline at end of file diff --git a/app/main.py b/copilot/app/main.py similarity index 94% rename from app/main.py rename to copilot/app/main.py index ca3613de..34b51265 100644 --- a/app/main.py +++ b/copilot/app/main.py @@ -10,12 +10,12 @@ from starlette.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse -from app import routers -from app.config import PATH_PREFIX, PRODUCTION -from app.log import req_id_cv -from app.metrics.prometheus_metrics import metrics as pmetrics -from app.tools.logwriter import LogWriter -from app.util import get_db_connection_pwd, get_db_connection_id_token +import routers +from common.config import PATH_PREFIX, PRODUCTION +from common.logs.log import req_id_cv +from common.metrics.prometheus_metrics import metrics as pmetrics +from common.logs.logwriter import LogWriter +from common.db.connections import get_db_connection_pwd, get_db_connection_id_token if PRODUCTION: app = FastAPI( diff --git a/app/routers/__init__.py b/copilot/app/routers/__init__.py similarity index 100% rename from app/routers/__init__.py rename to copilot/app/routers/__init__.py diff --git a/app/routers/inquiryai.py b/copilot/app/routers/inquiryai.py similarity index 97% rename from app/routers/inquiryai.py rename to copilot/app/routers/inquiryai.py index 22d10687..1888fd23 100644 --- a/app/routers/inquiryai.py +++ b/copilot/app/routers/inquiryai.py @@ -8,9 +8,9 @@ from fastapi.responses import HTMLResponse from fastapi.security.http import HTTPBase -from app.agent import TigerGraphAgent -from app.config import embedding_service, embedding_store, llm_config, session_handler -from app.llm_services import ( +from agent import TigerGraphAgent +from common.config import embedding_service, embedding_store, llm_config, session_handler +from common.llm_services import ( AWS_SageMaker_Endpoint, AWSBedrock, AzureOpenAI, @@ -20,9 +20,9 @@ Ollama, HuggingFaceEndpoint ) -from app.log import req_id_cv -from app.metrics.prometheus_metrics import metrics as pmetrics -from app.py_schemas.schemas import ( +from common.logs.log import req_id_cv +from common.metrics.prometheus_metrics import metrics as pmetrics +from common.py_schemas.schemas import ( CoPilotResponse, GSQLQueryInfo, GSQLQueryList, @@ -30,8 +30,8 @@ QueryDeleteRequest, QueryUpsertRequest, ) -from app.tools.logwriter import LogWriter -from app.tools.validation_utils import MapQuestionToSchemaException +from common.logs.logwriter import LogWriter +from tools.validation_utils import MapQuestionToSchemaException logger = logging.getLogger(__name__) router = APIRouter(tags=["InquiryAI"]) @@ -492,7 +492,7 @@ def logout(graphname, session_id: str, credentials: Annotated[HTTPBase, Depends( @router.get("/{graphname}/chat") def chat(request: Request): - return HTMLResponse(open("app/static/chat.html").read()) + return HTMLResponse(open("static/chat.html").read()) @router.websocket("/{graphname}/ws") diff --git a/app/routers/queryai.py b/copilot/app/routers/queryai.py similarity index 75% rename from app/routers/queryai.py rename to copilot/app/routers/queryai.py index d143b9a7..de9a74d0 100644 --- a/app/routers/queryai.py +++ b/copilot/app/routers/queryai.py @@ -1,24 +1,18 @@ -import json import logging import traceback -from typing import List, Union, Annotated +from typing import Annotated -from fastapi import APIRouter, HTTPException, Request, Depends -from fastapi.responses import HTMLResponse +from fastapi import APIRouter, Request, Depends from fastapi.security.http import HTTPBase -from app.agent import TigerGraphAgent -from app.config import embedding_service, embedding_store, llm_config, session_handler -from app.tools import GenerateCypher -from app.log import req_id_cv -from app.metrics.prometheus_metrics import metrics as pmetrics -from app.py_schemas.schemas import ( +from common.config import get_llm_service, llm_config +from common.logs.log import req_id_cv +from common.py_schemas.schemas import ( CoPilotResponse, NaturalLanguageQuery ) - -from app.config import get_llm_service -from app.tools.logwriter import LogWriter +from tools import GenerateCypher +from common.logs.logwriter import LogWriter logger = logging.getLogger(__name__) router = APIRouter(tags=["QueryAI"]) diff --git a/app/routers/root.py b/copilot/app/routers/root.py similarity index 94% rename from app/routers/root.py rename to copilot/app/routers/root.py index 630269e6..e983c98d 100644 --- a/app/routers/root.py +++ b/copilot/app/routers/root.py @@ -3,7 +3,7 @@ from fastapi import APIRouter, Response from fastapi.responses import FileResponse -from app.config import llm_config +from common.config import llm_config from pymilvus import connections, utility @@ -49,4 +49,4 @@ async def metrics(): @router.get("/favicon.ico", include_in_schema=False) async def favicon(): - return FileResponse("app/static/favicon.ico") + return FileResponse("static/favicon.ico") diff --git a/app/routers/supportai.py b/copilot/app/routers/supportai.py similarity index 83% rename from app/routers/supportai.py rename to copilot/app/routers/supportai.py index 987da931..d3a258b4 100644 --- a/app/routers/supportai.py +++ b/copilot/app/routers/supportai.py @@ -5,40 +5,32 @@ from fastapi import APIRouter, BackgroundTasks, Depends, Request from fastapi.security.http import HTTPBase - -from app.config import embedding_service, embedding_store, get_llm_service, llm_config - -from app.py_schemas.schemas import ( - CoPilotResponse, - CreateIngestConfig, - LoadingInfo, - SupportAIQuestion, -) -from app.supportai.concept_management.create_concepts import ( - CommunityConceptCreator, - EntityConceptCreator, - HigherLevelConceptCreator, - RelationshipConceptCreator, -) -from app.supportai.retrievers import ( - EntityRelationshipRetriever, - HNSWOverlapRetriever, - HNSWRetriever, - HNSWSiblingRetriever, -) - -from app.util import get_eventual_consistency_checker +from supportai.concept_management.create_concepts import ( + CommunityConceptCreator, EntityConceptCreator, HigherLevelConceptCreator, + RelationshipConceptCreator) +from supportai.retrievers import (EntityRelationshipRetriever, + HNSWOverlapRetriever, HNSWRetriever, + HNSWSiblingRetriever) + +from common.config import (db_config, embedding_service, embedding_store, + get_llm_service, llm_config) +from common.logs.logwriter import LogWriter +from common.py_schemas.schemas import (CoPilotResponse, CreateIngestConfig, + LoadingInfo, SupportAIQuestion) logger = logging.getLogger(__name__) router = APIRouter(tags=["SupportAI"]) security = HTTPBase(scheme="basic", auto_error=False) + @router.post("/{graphname}/supportai/initialize") -def initialize(graphname, conn: Request, credentials: Annotated[HTTPBase, Depends(security)]): +def initialize( + graphname, conn: Request, credentials: Annotated[HTTPBase, Depends(security)] +): conn = conn.state.conn # need to open the file using the absolute path - file_path = "app/gsql/supportai/SupportAI_Schema.gsql" + file_path = "common/gsql/supportai/SupportAI_Schema.gsql" with open(file_path, "r") as f: schema = f.read() schema_res = conn.gsql( @@ -47,7 +39,7 @@ def initialize(graphname, conn: Request, credentials: Annotated[HTTPBase, Depend ) ) - file_path = "app/gsql/supportai/SupportAI_IndexCreation.gsql" + file_path = "common/gsql/supportai/SupportAI_IndexCreation.gsql" with open(file_path) as f: index = f.read() index_res = conn.gsql( @@ -56,7 +48,7 @@ def initialize(graphname, conn: Request, credentials: Annotated[HTTPBase, Depend ) ) - file_path = "app/gsql/supportai/Scan_For_Updates.gsql" + file_path = "common/gsql/supportai/Scan_For_Updates.gsql" with open(file_path) as f: scan_for_updates = f.read() res = conn.gsql( @@ -67,7 +59,7 @@ def initialize(graphname, conn: Request, credentials: Annotated[HTTPBase, Depend + "\n INSTALL QUERY Scan_For_Updates" ) - file_path = "app/gsql/supportai/Update_Vertices_Processing_Status.gsql" + file_path = "common/gsql/supportai/Update_Vertices_Processing_Status.gsql" with open(file_path) as f: update_vertices = f.read() res = conn.gsql( @@ -86,11 +78,16 @@ def initialize(graphname, conn: Request, credentials: Annotated[HTTPBase, Depend @router.post("/{graphname}/supportai/create_ingest") -def create_ingest(graphname, ingest_config: CreateIngestConfig, conn: Request, credentials: Annotated[HTTPBase, Depends(security)]): +def create_ingest( + graphname, + ingest_config: CreateIngestConfig, + conn: Request, + credentials: Annotated[HTTPBase, Depends(security)], +): conn = conn.state.conn if ingest_config.file_format.lower() == "json": - file_path = "app/gsql/supportai/SupportAI_InitialLoadJSON.gsql" + file_path = "common/gsql/supportai/SupportAI_InitialLoadJSON.gsql" with open(file_path) as f: ingest_template = f.read() @@ -101,7 +98,7 @@ def create_ingest(graphname, ingest_config: CreateIngestConfig, conn: Request, c ingest_template = ingest_template.replace('"content"', '"{}"'.format(doc_text)) if ingest_config.file_format.lower() == "csv": - file_path = "app/gsql/supportai/SupportAI_InitialLoadCSV.gsql" + file_path = "common/gsql/supportai/SupportAI_InitialLoadCSV.gsql" with open(file_path) as f: ingest_template = f.read() @@ -115,7 +112,7 @@ def create_ingest(graphname, ingest_config: CreateIngestConfig, conn: Request, c ingest_template = ingest_template.replace('"\\n"', '"{}"'.format(eol)) ingest_template = ingest_template.replace('"double"', '"{}"'.format(quote)) - file_path = "app/gsql/supportai/SupportAI_DataSourceCreation.gsql" + file_path = "common/gsql/supportai/SupportAI_DataSourceCreation.gsql" with open(file_path) as f: data_stream_conn = f.read() @@ -215,12 +212,10 @@ def create_ingest(graphname, ingest_config: CreateIngestConfig, conn: Request, c def ingest( graphname, loader_info: LoadingInfo, - background_tasks: BackgroundTasks, conn: Request, - credentials: Annotated[HTTPBase, Depends(security)] + credentials: Annotated[HTTPBase, Depends(security)], ): conn = conn.state.conn - background_tasks.add_task(get_eventual_consistency_checker, graphname, conn) if loader_info.file_path is None: raise Exception("File path not provided") if loader_info.load_job_id is None: @@ -261,7 +256,12 @@ def ingest( @router.post("/{graphname}/supportai/search") -def search(graphname, query: SupportAIQuestion, conn: Request, credentials: Annotated[HTTPBase, Depends(security)]): +def search( + graphname, + query: SupportAIQuestion, + conn: Request, + credentials: Annotated[HTTPBase, Depends(security)], +): conn = conn.state.conn if query.method.lower() == "hnswoverlap": retriever = HNSWOverlapRetriever( @@ -310,7 +310,12 @@ def search(graphname, query: SupportAIQuestion, conn: Request, credentials: Anno @router.post("/{graphname}/supportai/answerquestion") -def answer_question(graphname, query: SupportAIQuestion, conn: Request, credentials: Annotated[HTTPBase, Depends(security)]): +def answer_question( + graphname, + query: SupportAIQuestion, + conn: Request, + credentials: Annotated[HTTPBase, Depends(security)], +): conn = conn.state.conn resp = CoPilotResponse resp.response_type = "supportai" @@ -367,13 +372,9 @@ def answer_question(graphname, query: SupportAIQuestion, conn: Request, credenti @router.get("/{graphname}/supportai/buildconcepts") def build_concepts( - graphname, - background_tasks: BackgroundTasks, - conn: Request, - credentials: Annotated[HTTPBase, Depends(security)] + graphname, conn: Request, credentials: Annotated[HTTPBase, Depends(security)] ): conn = conn.state.conn - background_tasks.add_task(get_eventual_consistency_checker, graphname) rels_concepts = RelationshipConceptCreator(conn, llm_config, embedding_service) rels_concepts.create_concepts() ents_concepts = EntityConceptCreator(conn, llm_config, embedding_service) @@ -386,20 +387,21 @@ def build_concepts( return {"status": "success"} -@router.get("/{graphname}/supportai/forceupdate") -async def force_update(graphname: str, background_tasks: BackgroundTasks, conn: Request, credentials: Annotated[HTTPBase, Depends(security)]): - conn = conn.state.conn - background_tasks.add_task(get_eventual_consistency_checker, graphname, conn) - return {"status": "success"} - - @router.get("/{graphname}/supportai/consistency_status") -def consistency_status(graphname: str, conn: Request, credentials: Annotated[HTTPBase, Depends(security)]): - conn = conn.state.conn - ecc = get_eventual_consistency_checker(graphname, conn) - return ecc.get_status() - +def ecc( + graphname, + conn: Request, + credentials: Annotated[HTTPBase, Depends(security)], + bg_tasks: BackgroundTasks, +): + from httpx import get as http_get -@router.get("/{graphname}/supportai/auth_check") -def auth_check(graphname: str, credentials: Annotated[HTTPBase, Depends(security)]): - return {"status": "success"} \ No newline at end of file + ecc = ( + db_config.get("ecc", "http://localhost:8001") + + f"/{graphname}/consistency_status" + ) + LogWriter.info(f"Sending ECC request to: {ecc}") + bg_tasks.add_task( + http_get, ecc, headers={"Authorization": conn.headers["authorization"]} + ) + return {"status": "submitted"} diff --git a/app/static/chat.html b/copilot/app/static/chat.html similarity index 100% rename from app/static/chat.html rename to copilot/app/static/chat.html diff --git a/app/static/favicon.ico b/copilot/app/static/favicon.ico similarity index 100% rename from app/static/favicon.ico rename to copilot/app/static/favicon.ico diff --git a/app/supportai/README.md b/copilot/app/supportai/README.md similarity index 100% rename from app/supportai/README.md rename to copilot/app/supportai/README.md diff --git a/app/supportai/concept_management/create_concepts.py b/copilot/app/supportai/concept_management/create_concepts.py similarity index 97% rename from app/supportai/concept_management/create_concepts.py rename to copilot/app/supportai/concept_management/create_concepts.py index c6ca44b8..42cc3866 100644 --- a/app/supportai/concept_management/create_concepts.py +++ b/copilot/app/supportai/concept_management/create_concepts.py @@ -6,7 +6,7 @@ def __init__(self, conn, llm, embedding_service): def _install_query(self, query_name): with open( - f"app/gsql/supportai/concept_curation/concept_creation/{query_name}.gsql", + f"common/gsql/supportai/concept_curation/concept_creation/{query_name}.gsql", "r", ) as f: query = f.read() diff --git a/app/supportai/retrievers/BaseRetriever.py b/copilot/app/supportai/retrievers/BaseRetriever.py similarity index 88% rename from app/supportai/retrievers/BaseRetriever.py rename to copilot/app/supportai/retrievers/BaseRetriever.py index 6b11235f..8528e204 100644 --- a/app/supportai/retrievers/BaseRetriever.py +++ b/copilot/app/supportai/retrievers/BaseRetriever.py @@ -1,7 +1,7 @@ -from app.embeddings.embedding_services import EmbeddingModel -from app.embeddings.base_embedding_store import EmbeddingStore -from app.metrics.tg_proxy import TigerGraphConnectionProxy -from app.llm_services.base_llm import LLM_Model +from common.embeddings.embedding_services import EmbeddingModel +from common.embeddings.base_embedding_store import EmbeddingStore +from common.metrics.tg_proxy import TigerGraphConnectionProxy +from common.llm_services.base_llm import LLM_Model from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate @@ -20,7 +20,7 @@ def __init__( self.embedding_store = embedding_store def _install_query(self, query_name): - with open(f"app/gsql/supportai/retrievers/{query_name}.gsql", "r") as f: + with open(f"common/gsql/supportai/retrievers/{query_name}.gsql", "r") as f: query = f.read() res = self.conn.gsql( "USE GRAPH " diff --git a/app/supportai/retrievers/EntityRelationshipRetriever.py b/copilot/app/supportai/retrievers/EntityRelationshipRetriever.py similarity index 85% rename from app/supportai/retrievers/EntityRelationshipRetriever.py rename to copilot/app/supportai/retrievers/EntityRelationshipRetriever.py index de2308ae..a69fabc2 100644 --- a/app/supportai/retrievers/EntityRelationshipRetriever.py +++ b/copilot/app/supportai/retrievers/EntityRelationshipRetriever.py @@ -1,6 +1,6 @@ -from app.supportai.retrievers import BaseRetriever -from app.supportai.extractors import LLMEntityRelationshipExtractor -from app.metrics.tg_proxy import TigerGraphConnectionProxy +from supportai.retrievers import BaseRetriever +from common.extractors import LLMEntityRelationshipExtractor +from common.metrics.tg_proxy import TigerGraphConnectionProxy class EntityRelationshipRetriever(BaseRetriever): diff --git a/app/supportai/retrievers/HNSWOverlapRetriever.py b/copilot/app/supportai/retrievers/HNSWOverlapRetriever.py similarity index 92% rename from app/supportai/retrievers/HNSWOverlapRetriever.py rename to copilot/app/supportai/retrievers/HNSWOverlapRetriever.py index 47a7b1b6..fefa6380 100644 --- a/app/supportai/retrievers/HNSWOverlapRetriever.py +++ b/copilot/app/supportai/retrievers/HNSWOverlapRetriever.py @@ -1,5 +1,5 @@ -from app.supportai.retrievers import BaseRetriever -from app.metrics.tg_proxy import TigerGraphConnectionProxy +from supportai.retrievers import BaseRetriever +from common.metrics.tg_proxy import TigerGraphConnectionProxy class HNSWOverlapRetriever(BaseRetriever): diff --git a/app/supportai/retrievers/HNSWRetriever.py b/copilot/app/supportai/retrievers/HNSWRetriever.py similarity index 91% rename from app/supportai/retrievers/HNSWRetriever.py rename to copilot/app/supportai/retrievers/HNSWRetriever.py index 39b9dd7e..7bbf28de 100644 --- a/app/supportai/retrievers/HNSWRetriever.py +++ b/copilot/app/supportai/retrievers/HNSWRetriever.py @@ -1,5 +1,5 @@ -from app.supportai.retrievers import BaseRetriever -from app.metrics.tg_proxy import TigerGraphConnectionProxy +from supportai.retrievers import BaseRetriever +from common.metrics.tg_proxy import TigerGraphConnectionProxy class HNSWRetriever(BaseRetriever): diff --git a/app/supportai/retrievers/HNSWSiblingRetriever.py b/copilot/app/supportai/retrievers/HNSWSiblingRetriever.py similarity index 93% rename from app/supportai/retrievers/HNSWSiblingRetriever.py rename to copilot/app/supportai/retrievers/HNSWSiblingRetriever.py index 744d89fb..d77685fa 100644 --- a/app/supportai/retrievers/HNSWSiblingRetriever.py +++ b/copilot/app/supportai/retrievers/HNSWSiblingRetriever.py @@ -1,5 +1,5 @@ -from app.metrics.tg_proxy import TigerGraphConnectionProxy -from app.supportai.retrievers import BaseRetriever +from common.metrics.tg_proxy import TigerGraphConnectionProxy +from supportai.retrievers import BaseRetriever class HNSWSiblingRetriever(BaseRetriever): diff --git a/app/supportai/retrievers/__init__.py b/copilot/app/supportai/retrievers/__init__.py similarity index 100% rename from app/supportai/retrievers/__init__.py rename to copilot/app/supportai/retrievers/__init__.py diff --git a/app/supportai/supportai_ingest.py b/copilot/app/supportai/supportai_ingest.py similarity index 96% rename from app/supportai/supportai_ingest.py rename to copilot/app/supportai/supportai_ingest.py index 6a5b2ffa..b0d39093 100644 --- a/app/supportai/supportai_ingest.py +++ b/copilot/app/supportai/supportai_ingest.py @@ -1,13 +1,13 @@ -from app.metrics.tg_proxy import TigerGraphConnectionProxy -from app.storage.azure_blob_store import AzureBlobStore -from app.storage.google_blob_store import GoogleBlobStore -from app.storage.s3_blob_store import S3BlobStore -from app.py_schemas import BatchDocumentIngest, Document, DocumentChunk, KnowledgeGraph +from common.metrics.tg_proxy import TigerGraphConnectionProxy +from common.storage.azure_blob_store import AzureBlobStore +from common.storage.google_blob_store import GoogleBlobStore +from common.storage.s3_blob_store import S3BlobStore +from common.py_schemas import BatchDocumentIngest, Document, DocumentChunk, KnowledgeGraph from typing import List, Union import json from datetime import datetime -from app.status import Status, IngestionProgress -from app.supportai.extractors import LLMEntityRelationshipExtractor +from common.status import Status, IngestionProgress +from common.extractors import LLMEntityRelationshipExtractor from langchain.prompts import ChatPromptTemplate from langchain.output_parsers import PydanticOutputParser @@ -32,17 +32,17 @@ def chunk_documents(self, documents, chunker, chunker_params): def chunk_document(self, document, chunker, chunker_params): if chunker.lower() == "regex": - from app.supportai.chunkers.regex_chunker import RegexChunker + from common.chunkers.regex_chunker import RegexChunker chunker = RegexChunker(chunker_params["pattern"]) elif chunker.lower() == "characters": - from app.supportai.chunkers.character_chunker import CharacterChunker + from common.chunkers.character_chunker import CharacterChunker chunker = CharacterChunker( chunker_params["chunk_size"], chunker_params.get("overlap", 0) ) elif chunker.lower() == "semantic": - from app.supportai.chunkers.semantic_chunker import SemanticChunker + from common.chunkers.semantic_chunker import SemanticChunker chunker = SemanticChunker( self.embedding_service, diff --git a/app/tg_documents/get_edge_count.json b/copilot/app/tg_documents/get_edge_count.json similarity index 100% rename from app/tg_documents/get_edge_count.json rename to copilot/app/tg_documents/get_edge_count.json diff --git a/app/tg_documents/get_edge_count_from.json b/copilot/app/tg_documents/get_edge_count_from.json similarity index 100% rename from app/tg_documents/get_edge_count_from.json rename to copilot/app/tg_documents/get_edge_count_from.json diff --git a/app/tg_documents/get_edge_stats.json b/copilot/app/tg_documents/get_edge_stats.json similarity index 100% rename from app/tg_documents/get_edge_stats.json rename to copilot/app/tg_documents/get_edge_stats.json diff --git a/app/tg_documents/get_edges.json b/copilot/app/tg_documents/get_edges.json similarity index 100% rename from app/tg_documents/get_edges.json rename to copilot/app/tg_documents/get_edges.json diff --git a/app/tg_documents/get_vertex_count.json b/copilot/app/tg_documents/get_vertex_count.json similarity index 100% rename from app/tg_documents/get_vertex_count.json rename to copilot/app/tg_documents/get_vertex_count.json diff --git a/app/tg_documents/get_vertex_stats.json b/copilot/app/tg_documents/get_vertex_stats.json similarity index 100% rename from app/tg_documents/get_vertex_stats.json rename to copilot/app/tg_documents/get_vertex_stats.json diff --git a/app/tg_documents/get_vertices.json b/copilot/app/tg_documents/get_vertices.json similarity index 100% rename from app/tg_documents/get_vertices.json rename to copilot/app/tg_documents/get_vertices.json diff --git a/app/tg_documents/get_vertices_by_id.json b/copilot/app/tg_documents/get_vertices_by_id.json similarity index 100% rename from app/tg_documents/get_vertices_by_id.json rename to copilot/app/tg_documents/get_vertices_by_id.json diff --git a/app/tg_documents/tg_bfs.json b/copilot/app/tg_documents/tg_bfs.json similarity index 100% rename from app/tg_documents/tg_bfs.json rename to copilot/app/tg_documents/tg_bfs.json diff --git a/app/tg_documents/tg_pagerank.json b/copilot/app/tg_documents/tg_pagerank.json similarity index 100% rename from app/tg_documents/tg_pagerank.json rename to copilot/app/tg_documents/tg_pagerank.json diff --git a/app/tg_documents/tg_shortest_ss_no_wt.json b/copilot/app/tg_documents/tg_shortest_ss_no_wt.json similarity index 100% rename from app/tg_documents/tg_shortest_ss_no_wt.json rename to copilot/app/tg_documents/tg_shortest_ss_no_wt.json diff --git a/app/tools/__init__.py b/copilot/app/tools/__init__.py similarity index 100% rename from app/tools/__init__.py rename to copilot/app/tools/__init__.py diff --git a/app/tools/generate_cypher.py b/copilot/app/tools/generate_cypher.py similarity index 97% rename from app/tools/generate_cypher.py rename to copilot/app/tools/generate_cypher.py index c548ce18..98170fe2 100644 --- a/app/tools/generate_cypher.py +++ b/copilot/app/tools/generate_cypher.py @@ -3,10 +3,7 @@ from langchain.prompts import PromptTemplate from langchain.tools import BaseTool from langchain.llms.base import LLM - -from app.log import req_id_cv -from app.metrics.tg_proxy import TigerGraphConnectionProxy -from app.tools.logwriter import LogWriter +from common.metrics.tg_proxy import TigerGraphConnectionProxy logger = logging.getLogger(__name__) diff --git a/app/tools/generate_function.py b/copilot/app/tools/generate_function.py similarity index 96% rename from app/tools/generate_function.py rename to copilot/app/tools/generate_function.py index c28d0b21..4ac7a2e1 100644 --- a/app/tools/generate_function.py +++ b/copilot/app/tools/generate_function.py @@ -10,12 +10,12 @@ from langchain.tools import BaseTool from langchain.tools.base import ToolException -from app.embeddings.base_embedding_store import EmbeddingStore -from app.embeddings.embedding_services import EmbeddingModel -from app.log import req_id_cv -from app.metrics.tg_proxy import TigerGraphConnectionProxy -from app.py_schemas import GenerateFunctionResponse, MapQuestionToSchemaResponse -from app.tools.logwriter import LogWriter +from common.embeddings.base_embedding_store import EmbeddingStore +from common.embeddings.embedding_services import EmbeddingModel +from common.logs.log import req_id_cv +from common.metrics.tg_proxy import TigerGraphConnectionProxy +from common.py_schemas import GenerateFunctionResponse, MapQuestionToSchemaResponse +from common.logs.logwriter import LogWriter from .validation_utils import ( InvalidFunctionCallException, diff --git a/app/tools/map_question_to_schema.py b/copilot/app/tools/map_question_to_schema.py similarity index 96% rename from app/tools/map_question_to_schema.py rename to copilot/app/tools/map_question_to_schema.py index 80d24dd8..376f8433 100644 --- a/app/tools/map_question_to_schema.py +++ b/copilot/app/tools/map_question_to_schema.py @@ -5,14 +5,14 @@ from langchain.prompts import PromptTemplate from langchain.output_parsers import PydanticOutputParser from langchain.pydantic_v1 import BaseModel, Field, validator -from app.metrics.tg_proxy import TigerGraphConnectionProxy -from app.py_schemas import MapQuestionToSchemaResponse, MapAttributeToAttributeResponse +from common.metrics.tg_proxy import TigerGraphConnectionProxy +from common.py_schemas import MapQuestionToSchemaResponse, MapAttributeToAttributeResponse from typing import List, Dict from .validation_utils import validate_schema, MapQuestionToSchemaException import re import logging -from app.log import req_id_cv -from app.tools.logwriter import LogWriter +from common.logs.log import req_id_cv +from common.logs.logwriter import LogWriter logger = logging.getLogger(__name__) diff --git a/app/tools/validation_utils.py b/copilot/app/tools/validation_utils.py similarity index 98% rename from app/tools/validation_utils.py rename to copilot/app/tools/validation_utils.py index 08ee2079..b00b643e 100644 --- a/app/tools/validation_utils.py +++ b/copilot/app/tools/validation_utils.py @@ -5,8 +5,8 @@ """ import logging -from app.log import req_id_cv -from app.tools.logwriter import LogWriter +from common.logs.log import req_id_cv +from common.logs.logwriter import LogWriter logger = logging.getLogger(__name__) diff --git a/docs/Contributing.md b/copilot/docs/Contributing.md similarity index 100% rename from docs/Contributing.md rename to copilot/docs/Contributing.md diff --git a/docs/DeveloperGuide.md b/copilot/docs/DeveloperGuide.md similarity index 98% rename from docs/DeveloperGuide.md rename to copilot/docs/DeveloperGuide.md index 736069b5..5a824a13 100644 --- a/docs/DeveloperGuide.md +++ b/copilot/docs/DeveloperGuide.md @@ -45,7 +45,7 @@ class MyEmbeddingService(BaseEmbeddingService): 2. Implement the needed methods for your service. If you utilize a LangChain-supported embedding service, you can use the `BaseEmbeddingService` class as a reference. If you are using a custom endpoint, you will need to implement the `embed_documents` and `embed_query` methods accordingly. 3. Import your service and dd your service to the `app/main.py` file where the `EmbeddingService` class is instantiated. For example: ```python -from app.embeddings.embedding_service import MyembeddingService +from common.embeddings.embedding_service import MyembeddingService if llm_config["embedding_service"]["embedding_model_service"].lower() == "MyEmbeddingService": embedding_service = MyEmbeddingService(llm_config["embedding_service"]) @@ -65,7 +65,7 @@ from .service_name import ServiceName ``` 4. Import and instantiate your service in the `app/main.py` file. For example: ```python -from app.llm_services import ServiceName +from common.llm_services import ServiceName # Within the instantiation of the Agent class elif block elif llm_config["completion_service"]["llm_service"].lower() == "my_service": diff --git a/docs/img/CoPilot-UX-Demo.png b/copilot/docs/img/CoPilot-UX-Demo.png similarity index 100% rename from docs/img/CoPilot-UX-Demo.png rename to copilot/docs/img/CoPilot-UX-Demo.png diff --git a/docs/img/InquiryAI-Architecture.png b/copilot/docs/img/InquiryAI-Architecture.png similarity index 100% rename from docs/img/InquiryAI-Architecture.png rename to copilot/docs/img/InquiryAI-Architecture.png diff --git a/docs/img/SupportAI-Architecture.png b/copilot/docs/img/SupportAI-Architecture.png similarity index 100% rename from docs/img/SupportAI-Architecture.png rename to copilot/docs/img/SupportAI-Architecture.png diff --git a/docs/img/SupportAISchema.png b/copilot/docs/img/SupportAISchema.png similarity index 100% rename from docs/img/SupportAISchema.png rename to copilot/docs/img/SupportAISchema.png diff --git a/docs/img/SwaggerDocUX.png b/copilot/docs/img/SwaggerDocUX.png similarity index 100% rename from docs/img/SwaggerDocUX.png rename to copilot/docs/img/SwaggerDocUX.png diff --git a/docs/img/TG-CoPilot-Architecture.png b/copilot/docs/img/TG-CoPilot-Architecture.png similarity index 100% rename from docs/img/TG-CoPilot-Architecture.png rename to copilot/docs/img/TG-CoPilot-Architecture.png diff --git a/docs/notebooks/DigitalInfraDemo.ipynb b/copilot/docs/notebooks/DigitalInfraDemo.ipynb similarity index 100% rename from docs/notebooks/DigitalInfraDemo.ipynb rename to copilot/docs/notebooks/DigitalInfraDemo.ipynb diff --git a/docs/notebooks/SupportAIDemo.ipynb b/copilot/docs/notebooks/SupportAIDemo.ipynb similarity index 100% rename from docs/notebooks/SupportAIDemo.ipynb rename to copilot/docs/notebooks/SupportAIDemo.ipynb diff --git a/docs/notebooks/TransactionFraudInvestigation.ipynb b/copilot/docs/notebooks/TransactionFraudInvestigation.ipynb similarity index 100% rename from docs/notebooks/TransactionFraudInvestigation.ipynb rename to copilot/docs/notebooks/TransactionFraudInvestigation.ipynb diff --git a/docs/notebooks/VisualizeAgent.ipynb b/copilot/docs/notebooks/VisualizeAgent.ipynb similarity index 99% rename from docs/notebooks/VisualizeAgent.ipynb rename to copilot/docs/notebooks/VisualizeAgent.ipynb index 38fcb823..2547133c 100644 --- a/docs/notebooks/VisualizeAgent.ipynb +++ b/copilot/docs/notebooks/VisualizeAgent.ipynb @@ -20,9 +20,9 @@ "metadata": {}, "outputs": [], "source": [ - "from app.llm_services import LLM_Model\n", - "from app.embeddings.base_embedding_store import EmbeddingStore\n", - "from app.embeddings.embedding_services import EmbeddingModel\n", + "from common.llm_services import LLM_Model\n", + "from common.embeddings.base_embedding_store import EmbeddingStore\n", + "from common.embeddings.embedding_services import EmbeddingModel\n", "from app.tools import GenerateFunction, MapQuestionToSchema, GenerateCypher\n", "from pyTigerGraph import TigerGraphConnection" ] diff --git a/requirements.txt b/copilot/requirements.txt similarity index 100% rename from requirements.txt rename to copilot/requirements.txt diff --git a/tests/app b/copilot/tests/app similarity index 100% rename from tests/app rename to copilot/tests/app diff --git a/copilot/tests/common b/copilot/tests/common new file mode 120000 index 00000000..dc879abe --- /dev/null +++ b/copilot/tests/common @@ -0,0 +1 @@ +../../common \ No newline at end of file diff --git a/copilot/tests/configs b/copilot/tests/configs new file mode 120000 index 00000000..5992d109 --- /dev/null +++ b/copilot/tests/configs @@ -0,0 +1 @@ +../../configs \ No newline at end of file diff --git a/tests/conftest.py b/copilot/tests/conftest.py similarity index 96% rename from tests/conftest.py rename to copilot/tests/conftest.py index da1d792e..6e548335 100644 --- a/tests/conftest.py +++ b/copilot/tests/conftest.py @@ -16,4 +16,4 @@ def pytest_collection_modifyitems(config, items): # Mark the test module as skipped if the error message contains the specified substring deselected_modules.add(item.module.__name__) # Remove the deselected modules from the test items list - items[:] = [item for item in items if item.module.__name__ not in deselected_modules] \ No newline at end of file + items[:] = [item for item in items if item.module.__name__ not in deselected_modules] diff --git a/tests/create_wandb_report.py b/copilot/tests/create_wandb_report.py similarity index 100% rename from tests/create_wandb_report.py rename to copilot/tests/create_wandb_report.py diff --git a/tests/parse_test_config.py b/copilot/tests/parse_test_config.py similarity index 100% rename from tests/parse_test_config.py rename to copilot/tests/parse_test_config.py diff --git a/tests/perf/.gitignore b/copilot/tests/perf/.gitignore similarity index 100% rename from tests/perf/.gitignore rename to copilot/tests/perf/.gitignore diff --git a/tests/perf/README.md b/copilot/tests/perf/README.md similarity index 100% rename from tests/perf/README.md rename to copilot/tests/perf/README.md diff --git a/tests/perf/customMetrics.js b/copilot/tests/perf/customMetrics.js similarity index 100% rename from tests/perf/customMetrics.js rename to copilot/tests/perf/customMetrics.js diff --git a/tests/perf/run.sh b/copilot/tests/perf/run.sh similarity index 100% rename from tests/perf/run.sh rename to copilot/tests/perf/run.sh diff --git a/tests/perf/script.js b/copilot/tests/perf/script.js similarity index 100% rename from tests/perf/script.js rename to copilot/tests/perf/script.js diff --git a/copilot/tests/run_tests.sh b/copilot/tests/run_tests.sh new file mode 100755 index 00000000..c458661c --- /dev/null +++ b/copilot/tests/run_tests.sh @@ -0,0 +1,123 @@ +#!/bin/bash +export DB_CONFIG=./configs/db_config.json +export MILVUS_CONFIG=./configs/milvus_config.json +export LOGLEVEL=INFO + +# Set default values +llm_service="all" +schema="all" +use_wandb="true" + +# Check if llm_service argument is provided +if [ "$#" -ge 1 ]; then + llm_service="$1" +fi + +# Check if schema argument is provided +if [ "$#" -ge 2 ]; then + schema="$2" +fi + +# Check if use_wandb argument is provided +if [ "$#" -ge 3 ]; then + use_wandb="$3" +fi + +# Define the m.ing of Python script names to JSON config file names +azure_gpt35_script="test_azure_gpt35_turbo_instruct.py" +azure_gpt35_config="./configs/azure_llm_config.json" + +openai_gpt35_script="test_openai_gpt35-turbo.py" +openai_gpt35_config="./configs/openai_gpt3.5-turbo_config.json" + +openai_gpt4_script="test_openai_gpt4.py" +openai_gpt4_config="./configs/openai_gpt4_config.json" + +huggingface_phi3_script="test_huggingface_phi3.py" +huggingface_phi3_config="./configs/huggingface_severless_endpoint_phi3_config.json" + +openai_gpt4o_script="test_openai_gpt4o.py" +openai_gpt4o_config="./configs/openai_gpt4o_config.json" + +gcp_textbison_script="test_gcp_text-bison.py" +gcp_textbison_config="./configs/gcp_text-bison_config.json" + +groq_mixtral_script="test_groq_mixtral8x7b.py" +groq_mixtral_config="./configs/groq_mixtral_config.json" + +aws_bedrock_script="test_bedrock.py" +aws_bedrock_config="./configs/bedrock_config.json" + +huggingface_llama3_script="test_huggingface_llama70b.py" +huggingface_llama3_config="./configs/huggingface_llama70b_config.json" + +# Function to execute a service +execute_service() { + local service="$1" + local config_file="$2" + cp $service test_service.py parse_test_config.py app + + # Export the path to the config file as an environment variable + export LLM_CONFIG="$config_file" + + if [ $use_wandb = "true" ]; then + python app/$service --schema $schema + else + python app/$service --schema $schema --no-wandb + fi + + # Unset the environment variable after the Python script execution + unset CONFIG_FILE_PATH + rm app/$service app/test_service.py app/parse_test_config.py +} + +# Check the value of llm_service and execute the corresponding Python script(s) +case "$llm_service" in +"azure_gpt35") + execute_service "$azure_gpt35_script" "$azure_gpt35_config" + ;; +"openai_gpt35") + execute_service "$openai_gpt35_script" "$openai_gpt35_config" + ;; +"openai_gpt4") + execute_service "$openai_gpt4_script" "$openai_gpt4_config" + ;; +"openai_gpt4o") + execute_service "$openai_gpt4o_script" "$openai_gpt4o_config" + ;; +"gcp_textbison") + execute_service "$gcp_textbison_script" "$gcp_textbison_config" + ;; +"huggingface_phi3") + execute_service "$huggingface_phi3_script" "$huggingface_phi3_config" + ;; +"groq_mixtral") + execute_service "$groq_mixtral_script" "$groq_mixtral_config" + ;; +"aws_bedrock") + execute_service "$aws_bedrock_script" "$aws_bedrock_config" + ;; +"huggingface_llama3") + execute_service "$huggingface_llama3_script" "$huggingface_llama3_config" + ;; +"all") + echo "Executing all services..." + for service_script_pair in "$azure_gpt35_script $azure_gpt35_config" \ + "$openai_gpt35_script $openai_gpt35_config" \ + "$openai_gpt4_script $openai_gpt4_config" \ + "$gcp_textbison_script $gcp_textbison_config" \ + "$groq_mixtral_script $groq_mixtral_config" \ + "$aws_bedrock_script $aws_bedrock_config" \ + "$openai_gpt4o_script $openai_gpt4o_config" \ + "$huggingface_llama3_script $huggingface_llama3_config" \ + "$huggingface_phi3_script $huggingface_phi3_config"; do + execute_service $service_script_pair + done + ;; +*) + echo "Unknown llm_service: $llm_service" + exit 1 + ;; +esac + +python create_wandb_report.py diff --git a/tests/test_azure_gpt35_turbo_instruct.py b/copilot/tests/test_azure_gpt35_turbo_instruct.py similarity index 97% rename from tests/test_azure_gpt35_turbo_instruct.py rename to copilot/tests/test_azure_gpt35_turbo_instruct.py index a94a2f28..90dfe384 100644 --- a/tests/test_azure_gpt35_turbo_instruct.py +++ b/copilot/tests/test_azure_gpt35_turbo_instruct.py @@ -13,7 +13,7 @@ class TestWithAzure(CommonTests, unittest.TestCase): @classmethod def setUpClass(cls) -> None: - from app.main import app + from main import app cls.client = TestClient(app) cls.llm_service = "azure_gpt3.5_turbo_instruct" diff --git a/tests/test_bedrock.py b/copilot/tests/test_bedrock.py similarity index 97% rename from tests/test_bedrock.py rename to copilot/tests/test_bedrock.py index 75b6e5f4..76bc8430 100644 --- a/tests/test_bedrock.py +++ b/copilot/tests/test_bedrock.py @@ -10,7 +10,7 @@ class TestWithClaude3Bedrock(CommonTests, unittest.TestCase): @classmethod def setUpClass(cls) -> None: - from app.main import app + from main import app cls.client = TestClient(app) cls.llm_service = "claude-3-haiku" diff --git a/tests/test_character_chunker.py b/copilot/tests/test_character_chunker.py similarity index 97% rename from tests/test_character_chunker.py rename to copilot/tests/test_character_chunker.py index fad1aab2..f132ce7e 100644 --- a/tests/test_character_chunker.py +++ b/copilot/tests/test_character_chunker.py @@ -1,5 +1,5 @@ import unittest -from app.supportai.chunkers.character_chunker import CharacterChunker +from common.chunkers.character_chunker import CharacterChunker class TestCharacterChunker(unittest.TestCase): diff --git a/tests/test_credit_card_redaction.py b/copilot/tests/test_credit_card_redaction.py similarity index 96% rename from tests/test_credit_card_redaction.py rename to copilot/tests/test_credit_card_redaction.py index 71130a75..8d2ae80e 100644 --- a/tests/test_credit_card_redaction.py +++ b/copilot/tests/test_credit_card_redaction.py @@ -1,7 +1,7 @@ import re import unittest -from app.tools.logwriter import LogWriter +from common.logs.logwriter import LogWriter class TestCreditCardRedaction(unittest.TestCase): def setUp(self): diff --git a/tests/test_crud_endpoint.py b/copilot/tests/test_crud_endpoint.py similarity index 100% rename from tests/test_crud_endpoint.py rename to copilot/tests/test_crud_endpoint.py diff --git a/tests/test_gcp_text-bison.py b/copilot/tests/test_gcp_text-bison.py similarity index 97% rename from tests/test_gcp_text-bison.py rename to copilot/tests/test_gcp_text-bison.py index 612389af..778edf7b 100644 --- a/tests/test_gcp_text-bison.py +++ b/copilot/tests/test_gcp_text-bison.py @@ -14,7 +14,7 @@ class TestWithVertexAI(CommonTests, unittest.TestCase): @classmethod def setUpClass(cls) -> None: - from app.main import app + from main import app cls.client = TestClient(app) cls.llm_service = "gemini-1.5-flash-preview-0514" diff --git a/tests/test_groq_mixtral8x7b.py b/copilot/tests/test_groq_mixtral8x7b.py similarity index 97% rename from tests/test_groq_mixtral8x7b.py rename to copilot/tests/test_groq_mixtral8x7b.py index c03dd61a..c2298eb5 100644 --- a/tests/test_groq_mixtral8x7b.py +++ b/copilot/tests/test_groq_mixtral8x7b.py @@ -14,7 +14,7 @@ class TestWithGroq(CommonTests, unittest.TestCase): @classmethod def setUpClass(cls) -> None: - from app.main import app + from main import app cls.client = TestClient(app) cls.llm_service = "mixtral-8x7b-32768" diff --git a/tests/test_huggingface_llama70b.py b/copilot/tests/test_huggingface_llama70b.py similarity index 97% rename from tests/test_huggingface_llama70b.py rename to copilot/tests/test_huggingface_llama70b.py index 65112345..0298ebd5 100644 --- a/tests/test_huggingface_llama70b.py +++ b/copilot/tests/test_huggingface_llama70b.py @@ -14,7 +14,7 @@ class TestWithHuggingFace(CommonTests, unittest.TestCase): @classmethod def setUpClass(cls) -> None: - from app.main import app + from main import app cls.client = TestClient(app) cls.llm_service = "Llama3-70b" diff --git a/tests/test_huggingface_phi3.py b/copilot/tests/test_huggingface_phi3.py similarity index 97% rename from tests/test_huggingface_phi3.py rename to copilot/tests/test_huggingface_phi3.py index 2515c1af..ba3a8825 100644 --- a/tests/test_huggingface_phi3.py +++ b/copilot/tests/test_huggingface_phi3.py @@ -14,7 +14,7 @@ class TestWithHuggingFace(CommonTests, unittest.TestCase): @classmethod def setUpClass(cls) -> None: - from app.main import app + from main import app cls.client = TestClient(app) cls.llm_service = "microsoft/Phi-3-mini-4k-instruct" diff --git a/tests/test_ingest.py b/copilot/tests/test_ingest.py similarity index 99% rename from tests/test_ingest.py rename to copilot/tests/test_ingest.py index 2177c962..92dd6829 100644 --- a/tests/test_ingest.py +++ b/copilot/tests/test_ingest.py @@ -1,7 +1,7 @@ import unittest from unittest.mock import patch, MagicMock from app.supportai.supportai_ingest import BatchIngestion -from app.status import IngestionProgress +from common.status import IngestionProgress class TestBatchIngestion(unittest.TestCase): diff --git a/tests/test_inquiryai.py b/copilot/tests/test_inquiryai.py similarity index 100% rename from tests/test_inquiryai.py rename to copilot/tests/test_inquiryai.py diff --git a/tests/test_inquiryai_milvus.py b/copilot/tests/test_inquiryai_milvus.py similarity index 99% rename from tests/test_inquiryai_milvus.py rename to copilot/tests/test_inquiryai_milvus.py index 391383ed..efb21763 100644 --- a/tests/test_inquiryai_milvus.py +++ b/copilot/tests/test_inquiryai_milvus.py @@ -11,7 +11,7 @@ class TestInquiryAI(unittest.TestCase): def setUp(self): - from app.main import app + from main import app self.client = TestClient(app) db_config = os.getenv("DB_CONFIG") diff --git a/tests/test_log_writer.py b/copilot/tests/test_log_writer.py similarity index 74% rename from tests/test_log_writer.py rename to copilot/tests/test_log_writer.py index 06213e05..80c8e3f5 100644 --- a/tests/test_log_writer.py +++ b/copilot/tests/test_log_writer.py @@ -3,12 +3,12 @@ import os import json from unittest.mock import call, patch, MagicMock -from app.tools.logwriter import LogWriter +from common.logs.logwriter import LogWriter class TestLogWriter(unittest.TestCase): - @patch("app.tools.logwriter.os.makedirs") - @patch("app.tools.logwriter.RotatingFileHandler") + @patch("common.logs.logwriter.os.makedirs") + @patch("common.logs.logwriter.RotatingFileHandler") def test_initialization(self, mock_handler, mock_makedirs): """Test that loggers are initialized correctly.""" LogWriter.initialize_logger() @@ -24,9 +24,9 @@ def test_mask_pii(self): self.assertNotEqual(masked_email, email) self.assertIn("[EMAIL REDACTED]", masked_email) - @patch("app.tools.logwriter.os.makedirs") - @patch("app.tools.logwriter.RotatingFileHandler") - @patch("app.tools.logwriter.logging.Logger.info") + @patch("common.logs.logwriter.os.makedirs") + @patch("common.logs.logwriter.RotatingFileHandler") + @patch("common.logs.logwriter.logging.Logger.info") def test_audit_log(self, mock_info, mock_handler, mock_makedirs): """Test audit logging with structured data.""" test_message = { @@ -40,25 +40,25 @@ def test_audit_log(self, mock_info, mock_handler, mock_makedirs): logged_message = json.loads(args[0]) self.assertEqual(logged_message["userName"], "testUser") - @patch("app.tools.logwriter.os.makedirs") - @patch("app.tools.logwriter.RotatingFileHandler") - @patch("app.tools.logwriter.logging.Logger.info") + @patch("common.logs.logwriter.os.makedirs") + @patch("common.logs.logwriter.RotatingFileHandler") + @patch("common.logs.logwriter.logging.Logger.info") def test_info_log(self, mock_error, mock_handler, mock_makedirs): """Test info logging.""" LogWriter.log("info", "This is an info message", mask_pii=False) mock_error.assert_called_once_with("This is an info message") - @patch("app.tools.logwriter.os.makedirs") - @patch("app.tools.logwriter.RotatingFileHandler") - @patch("app.tools.logwriter.logging.Logger.warning") + @patch("common.logs.logwriter.os.makedirs") + @patch("common.logs.logwriter.RotatingFileHandler") + @patch("common.logs.logwriter.logging.Logger.warning") def test_warning_log(self, mock_warning, mock_handler, mock_makedirs): """Test warning logging.""" LogWriter.log("warning", "This is a warning message", mask_pii=False) mock_warning.assert_called_once_with("This is a warning message") - @patch("app.tools.logwriter.os.makedirs") - @patch("app.tools.logwriter.RotatingFileHandler") - @patch("app.tools.logwriter.logging.Logger.error") + @patch("common.logs.logwriter.os.makedirs") + @patch("common.logs.logwriter.RotatingFileHandler") + @patch("common.logs.logwriter.logging.Logger.error") def test_error_log(self, mock_error, mock_handler, mock_makedirs): """Test error logging.""" LogWriter.log("error", "This is an error", mask_pii=False) diff --git a/tests/test_milvus_embedding_store.py b/copilot/tests/test_milvus_embedding_store.py similarity index 87% rename from tests/test_milvus_embedding_store.py rename to copilot/tests/test_milvus_embedding_store.py index 771cd890..3efcf940 100644 --- a/tests/test_milvus_embedding_store.py +++ b/copilot/tests/test_milvus_embedding_store.py @@ -3,13 +3,13 @@ import unittest from unittest.mock import patch, MagicMock -from app.embeddings.milvus_embedding_store import MilvusEmbeddingStore +from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore from langchain_core.documents import Document class TestMilvusEmbeddingStore(unittest.TestCase): - @patch("app.embeddings.embedding_services.EmbeddingModel") - @patch("app.embeddings.milvus_embedding_store.MilvusEmbeddingStore.connect_to_milvus") + @patch("common.embeddings.embedding_services.EmbeddingModel") + @patch("common.embeddings.milvus_embedding_store.MilvusEmbeddingStore.connect_to_milvus") def test_add_embeddings(self, mock_connect, mock_embedding_model): query = "What is the meaning of life?" embedded_query = [0.1, 0.2, 0.3] @@ -29,7 +29,7 @@ def test_add_embeddings(self, mock_connect, mock_embedding_model): embedding_store.add_embeddings(embeddings=[(query, embedded_documents)]) embedding_store.milvus.add_texts.assert_called_once_with(texts=[query], metadatas=[]) - @patch("app.embeddings.milvus_embedding_store.MilvusEmbeddingStore.connect_to_milvus") + @patch("common.embeddings.milvus_embedding_store.MilvusEmbeddingStore.connect_to_milvus") def test_retrieve_embeddings(self, mock_connect): mock_connect.return_value = None embedded_query = [0.1, 0.2, 0.3] diff --git a/tests/test_openai_gpt35-turbo.py b/copilot/tests/test_openai_gpt35-turbo.py similarity index 97% rename from tests/test_openai_gpt35-turbo.py rename to copilot/tests/test_openai_gpt35-turbo.py index f3884695..9b85a3c4 100644 --- a/tests/test_openai_gpt35-turbo.py +++ b/copilot/tests/test_openai_gpt35-turbo.py @@ -14,7 +14,7 @@ class TestWithOpenAI(CommonTests, unittest.TestCase): @classmethod def setUpClass(cls) -> None: - from app.main import app + from main import app cls.client = TestClient(app) cls.llm_service = "openai_gpt-3.5-turbo-1106" diff --git a/tests/test_openai_gpt4.py b/copilot/tests/test_openai_gpt4.py similarity index 97% rename from tests/test_openai_gpt4.py rename to copilot/tests/test_openai_gpt4.py index 68d4ef08..df3082a2 100644 --- a/tests/test_openai_gpt4.py +++ b/copilot/tests/test_openai_gpt4.py @@ -14,7 +14,7 @@ class TestWithOpenAI(CommonTests, unittest.TestCase): @classmethod def setUpClass(cls) -> None: - from app.main import app + from main import app cls.client = TestClient(app) cls.llm_service = "openai_gpt-4-0613" diff --git a/tests/test_openai_gpt4o.py b/copilot/tests/test_openai_gpt4o.py similarity index 97% rename from tests/test_openai_gpt4o.py rename to copilot/tests/test_openai_gpt4o.py index d2f5ce4f..df4fe94f 100644 --- a/tests/test_openai_gpt4o.py +++ b/copilot/tests/test_openai_gpt4o.py @@ -14,7 +14,7 @@ class TestWithOpenAI(CommonTests, unittest.TestCase): @classmethod def setUpClass(cls) -> None: - from app.main import app + from main import app cls.client = TestClient(app) cls.llm_service = "gpt-4o-2024-05-13" diff --git a/tests/test_questions/DigitalInfra/DigitalInfraQuestions.tsv b/copilot/tests/test_questions/DigitalInfra/DigitalInfraQuestions.tsv similarity index 100% rename from tests/test_questions/DigitalInfra/DigitalInfraQuestions.tsv rename to copilot/tests/test_questions/DigitalInfra/DigitalInfraQuestions.tsv diff --git a/tests/test_questions/DigitalInfra/README.md b/copilot/tests/test_questions/DigitalInfra/README.md similarity index 100% rename from tests/test_questions/DigitalInfra/README.md rename to copilot/tests/test_questions/DigitalInfra/README.md diff --git a/tests/test_questions/DigitalInfra/gsql/create_data_source.gsql b/copilot/tests/test_questions/DigitalInfra/gsql/create_data_source.gsql similarity index 100% rename from tests/test_questions/DigitalInfra/gsql/create_data_source.gsql rename to copilot/tests/test_questions/DigitalInfra/gsql/create_data_source.gsql diff --git a/tests/test_questions/DigitalInfra/gsql/create_graph.gsql b/copilot/tests/test_questions/DigitalInfra/gsql/create_graph.gsql similarity index 100% rename from tests/test_questions/DigitalInfra/gsql/create_graph.gsql rename to copilot/tests/test_questions/DigitalInfra/gsql/create_graph.gsql diff --git a/tests/test_questions/DigitalInfra/gsql/create_load_job.gsql b/copilot/tests/test_questions/DigitalInfra/gsql/create_load_job.gsql similarity index 100% rename from tests/test_questions/DigitalInfra/gsql/create_load_job.gsql rename to copilot/tests/test_questions/DigitalInfra/gsql/create_load_job.gsql diff --git a/tests/test_questions/DigitalInfra/gsql/create_schema.gsql b/copilot/tests/test_questions/DigitalInfra/gsql/create_schema.gsql similarity index 100% rename from tests/test_questions/DigitalInfra/gsql/create_schema.gsql rename to copilot/tests/test_questions/DigitalInfra/gsql/create_schema.gsql diff --git a/tests/test_questions/DigitalInfra/ms_dependency_chain/ms_dependency_chain.gsql b/copilot/tests/test_questions/DigitalInfra/ms_dependency_chain/ms_dependency_chain.gsql similarity index 100% rename from tests/test_questions/DigitalInfra/ms_dependency_chain/ms_dependency_chain.gsql rename to copilot/tests/test_questions/DigitalInfra/ms_dependency_chain/ms_dependency_chain.gsql diff --git a/tests/test_questions/DigitalInfra/ms_dependency_chain/ms_dependency_chain_prompt.json b/copilot/tests/test_questions/DigitalInfra/ms_dependency_chain/ms_dependency_chain_prompt.json similarity index 100% rename from tests/test_questions/DigitalInfra/ms_dependency_chain/ms_dependency_chain_prompt.json rename to copilot/tests/test_questions/DigitalInfra/ms_dependency_chain/ms_dependency_chain_prompt.json diff --git a/tests/test_questions/DigitalInfra/run_load_jobs.json b/copilot/tests/test_questions/DigitalInfra/run_load_jobs.json similarity index 100% rename from tests/test_questions/DigitalInfra/run_load_jobs.json rename to copilot/tests/test_questions/DigitalInfra/run_load_jobs.json diff --git a/tests/test_questions/DigitalInfra/setup_dataset.py b/copilot/tests/test_questions/DigitalInfra/setup_dataset.py similarity index 100% rename from tests/test_questions/DigitalInfra/setup_dataset.py rename to copilot/tests/test_questions/DigitalInfra/setup_dataset.py diff --git a/tests/test_questions/OGB_MAG/OGB_MAGQuestions.tsv b/copilot/tests/test_questions/OGB_MAG/OGB_MAGQuestions.tsv similarity index 100% rename from tests/test_questions/OGB_MAG/OGB_MAGQuestions.tsv rename to copilot/tests/test_questions/OGB_MAG/OGB_MAGQuestions.tsv diff --git a/tests/test_questions/OGB_MAG/author_fields_of_study/author_fields_of_study.gsql b/copilot/tests/test_questions/OGB_MAG/author_fields_of_study/author_fields_of_study.gsql similarity index 100% rename from tests/test_questions/OGB_MAG/author_fields_of_study/author_fields_of_study.gsql rename to copilot/tests/test_questions/OGB_MAG/author_fields_of_study/author_fields_of_study.gsql diff --git a/tests/test_questions/OGB_MAG/author_fields_of_study/author_fields_of_study_prompt.json b/copilot/tests/test_questions/OGB_MAG/author_fields_of_study/author_fields_of_study_prompt.json similarity index 100% rename from tests/test_questions/OGB_MAG/author_fields_of_study/author_fields_of_study_prompt.json rename to copilot/tests/test_questions/OGB_MAG/author_fields_of_study/author_fields_of_study_prompt.json diff --git a/tests/test_questions/OGB_MAG/setup_dataset.py b/copilot/tests/test_questions/OGB_MAG/setup_dataset.py similarity index 100% rename from tests/test_questions/OGB_MAG/setup_dataset.py rename to copilot/tests/test_questions/OGB_MAG/setup_dataset.py diff --git a/tests/test_questions/OGB_MAG/tg_pagerank/tg_pagerank.gsql b/copilot/tests/test_questions/OGB_MAG/tg_pagerank/tg_pagerank.gsql similarity index 100% rename from tests/test_questions/OGB_MAG/tg_pagerank/tg_pagerank.gsql rename to copilot/tests/test_questions/OGB_MAG/tg_pagerank/tg_pagerank.gsql diff --git a/tests/test_questions/OGB_MAG/tg_pagerank/tg_pagerank_prompt.json b/copilot/tests/test_questions/OGB_MAG/tg_pagerank/tg_pagerank_prompt.json similarity index 100% rename from tests/test_questions/OGB_MAG/tg_pagerank/tg_pagerank_prompt.json rename to copilot/tests/test_questions/OGB_MAG/tg_pagerank/tg_pagerank_prompt.json diff --git a/copilot/tests/test_questions/OGB_MAG_SHORT/OGB_MAG_SHORTQuestions.tsv b/copilot/tests/test_questions/OGB_MAG_SHORT/OGB_MAG_SHORTQuestions.tsv new file mode 100644 index 00000000..ee4f6bb2 --- /dev/null +++ b/copilot/tests/test_questions/OGB_MAG_SHORT/OGB_MAG_SHORTQuestions.tsv @@ -0,0 +1,8 @@ +Question Type Question Theme Question Schema Mapped Question Function Call Answer +COUNT VERTEX COUNT(Paper) How many papers are there? How many Paper Vertexes are there? getVertexCount(vertexType="Paper") 736389 +COUNT VERTEX WHERE ATTRIBUTE COUNT(Paper) WHERE(y=1) How many papers where y is equal to 1? How many Paper Vertexes are there with a y attribute of 1? getVertexCount('Paper', where='y=1') 30902 +COUNT EDGE COUNT(CITES) How many citations are there? How many CITES Edges are there? getEdgeCount(edgeType="CITES") 5416271 +COUNT VERTEX WHERE EDGE COUNT(Paper) WHERE(WROTE EDGE -> Author 20) How many papers has author 20 wrote? How many Paper Vertexes has Author Vertexes 30 WROTE Edges? getEdgeCountFrom(sourceVertexType="Author", sourceVertexId="20", edgeType="WROTE") 38 +SELECT VERTEX WHERE EDGE SELECT(Paper) WHERE(WROTE EDGE -> Author 30) Can you provide a list of papers written by author 30? What Paper Vertexes has Author Vertexes 30 WROTE Edges? getEdges(sourceVertexType="Author", sourceVertexId="30", edgeType="WROTE", targetVertexType="Paper") [{"e_type": "WROTE","directed": true,"from_id": "30","from_type": "Author","to_id": "431327","to_type": "Paper","attributes": {}},{"e_type": "WROTE","directed": true,"from_id": "30","from_type": "Author","to_id": "366332","to_type": "Paper","attributes": {}},{"e_type": "WROTE","directed": true,"from_id": "30","from_type": "Author","to_id": "142586","to_type": "Paper","attributes": {}},{"e_type": "WROTE","directed": true,"from_id": "30","from_type": "Author","to_id": "360139","to_type": "Paper","attributes": {}}] +COUNT VERTEX COUNT(Institution) How many universities are there? How many Institution Vertexes are there? getVertexCount(vertexType="Institution") 8740 +COUNT VERTEX COUNT(FieldOfStudy) an you tell me the count of different fields of study? How many FieldOfStudy Vertexes are there? getVertexCount(vertexType="FieldOfStudy") 59965 \ No newline at end of file diff --git a/tests/test_questions/Synthea/README.md b/copilot/tests/test_questions/Synthea/README.md similarity index 100% rename from tests/test_questions/Synthea/README.md rename to copilot/tests/test_questions/Synthea/README.md diff --git a/tests/test_questions/Synthea/SyntheaQuestions.tsv b/copilot/tests/test_questions/Synthea/SyntheaQuestions.tsv similarity index 100% rename from tests/test_questions/Synthea/SyntheaQuestions.tsv rename to copilot/tests/test_questions/Synthea/SyntheaQuestions.tsv diff --git a/tests/test_questions/Synthea/gsql/create_data_source.gsql b/copilot/tests/test_questions/Synthea/gsql/create_data_source.gsql similarity index 100% rename from tests/test_questions/Synthea/gsql/create_data_source.gsql rename to copilot/tests/test_questions/Synthea/gsql/create_data_source.gsql diff --git a/tests/test_questions/Synthea/gsql/create_graph.gsql b/copilot/tests/test_questions/Synthea/gsql/create_graph.gsql similarity index 100% rename from tests/test_questions/Synthea/gsql/create_graph.gsql rename to copilot/tests/test_questions/Synthea/gsql/create_graph.gsql diff --git a/tests/test_questions/Synthea/gsql/create_load_job.gsql b/copilot/tests/test_questions/Synthea/gsql/create_load_job.gsql similarity index 100% rename from tests/test_questions/Synthea/gsql/create_load_job.gsql rename to copilot/tests/test_questions/Synthea/gsql/create_load_job.gsql diff --git a/tests/test_questions/Synthea/gsql/create_schema.gsql b/copilot/tests/test_questions/Synthea/gsql/create_schema.gsql similarity index 100% rename from tests/test_questions/Synthea/gsql/create_schema.gsql rename to copilot/tests/test_questions/Synthea/gsql/create_schema.gsql diff --git a/tests/test_questions/Synthea/run_load_jobs.json b/copilot/tests/test_questions/Synthea/run_load_jobs.json similarity index 100% rename from tests/test_questions/Synthea/run_load_jobs.json rename to copilot/tests/test_questions/Synthea/run_load_jobs.json diff --git a/tests/test_questions/Synthea/setup_dataset.py b/copilot/tests/test_questions/Synthea/setup_dataset.py similarity index 100% rename from tests/test_questions/Synthea/setup_dataset.py rename to copilot/tests/test_questions/Synthea/setup_dataset.py diff --git a/tests/test_regex_chunker.py b/copilot/tests/test_regex_chunker.py similarity index 95% rename from tests/test_regex_chunker.py rename to copilot/tests/test_regex_chunker.py index 2b79fa1e..d1a6b4ef 100644 --- a/tests/test_regex_chunker.py +++ b/copilot/tests/test_regex_chunker.py @@ -1,5 +1,5 @@ import unittest -from app.supportai.chunkers.regex_chunker import RegexChunker +from common.chunkers.regex_chunker import RegexChunker class TestRegexChunker(unittest.TestCase): diff --git a/tests/test_sagemaker_llama7b.py b/copilot/tests/test_sagemaker_llama7b.py similarity index 100% rename from tests/test_sagemaker_llama7b.py rename to copilot/tests/test_sagemaker_llama7b.py diff --git a/tests/test_semantic_chunker.py b/copilot/tests/test_semantic_chunker.py similarity index 86% rename from tests/test_semantic_chunker.py rename to copilot/tests/test_semantic_chunker.py index d230f596..92ab7de4 100644 --- a/tests/test_semantic_chunker.py +++ b/copilot/tests/test_semantic_chunker.py @@ -1,10 +1,10 @@ import unittest from unittest.mock import Mock, patch -from app.supportai.chunkers.semantic_chunker import SemanticChunker +from common.chunkers.semantic_chunker import SemanticChunker class TestSemanticChunker(unittest.TestCase): - @patch("app.embeddings.embedding_services.EmbeddingModel") + @patch("common.embeddings.embedding_services.EmbeddingModel") @patch("langchain_experimental.text_splitter.SemanticChunker.create_documents") def test_chunk_single_string(self, create_documents, MockEmbeddingModel): mock_emb_service = MockEmbeddingModel() diff --git a/tests/test_service.py b/copilot/tests/test_service.py similarity index 100% rename from tests/test_service.py rename to copilot/tests/test_service.py diff --git a/tests/test_supportai.py b/copilot/tests/test_supportai.py similarity index 100% rename from tests/test_supportai.py rename to copilot/tests/test_supportai.py diff --git a/tests/test_supportai_load_ingest_creation.py b/copilot/tests/test_supportai_load_ingest_creation.py similarity index 100% rename from tests/test_supportai_load_ingest_creation.py rename to copilot/tests/test_supportai_load_ingest_creation.py diff --git a/tests/test_validate_function_call.py b/copilot/tests/test_validate_function_call.py similarity index 99% rename from tests/test_validate_function_call.py rename to copilot/tests/test_validate_function_call.py index 25cd2b77..bab20c87 100644 --- a/tests/test_validate_function_call.py +++ b/copilot/tests/test_validate_function_call.py @@ -6,7 +6,7 @@ import app import pytest from fastapi.testclient import TestClient -from app.py_schemas.schemas import Document +from common.py_schemas.schemas import Document from app.tools.validation_utils import ( validate_function_call, InvalidFunctionCallException, diff --git a/udfs/milvus/rest/ExprFunctions.hpp b/copilot/udfs/milvus/rest/ExprFunctions.hpp similarity index 100% rename from udfs/milvus/rest/ExprFunctions.hpp rename to copilot/udfs/milvus/rest/ExprFunctions.hpp diff --git a/udfs/milvus/rest/ExprUtil.hpp b/copilot/udfs/milvus/rest/ExprUtil.hpp similarity index 100% rename from udfs/milvus/rest/ExprUtil.hpp rename to copilot/udfs/milvus/rest/ExprUtil.hpp diff --git a/udfs/milvus/rest/install.gsql b/copilot/udfs/milvus/rest/install.gsql similarity index 100% rename from udfs/milvus/rest/install.gsql rename to copilot/udfs/milvus/rest/install.gsql diff --git a/udfs/milvus/rest/test.gsql b/copilot/udfs/milvus/rest/test.gsql similarity index 100% rename from udfs/milvus/rest/test.gsql rename to copilot/udfs/milvus/rest/test.gsql diff --git a/docker-compose-with-apps.yml b/docker-compose-with-apps.yml new file mode 100644 index 00000000..aa4603a6 --- /dev/null +++ b/docker-compose-with-apps.yml @@ -0,0 +1,116 @@ +version: '3.8' + +services: + copilot: + container_name: copilot + build: ./copilot + ports: + - 8000:8000 + depends_on: + - milvus-standalone + environment: + LLM_CONFIG: "/code/configs/llm_config.json" + DB_CONFIG: "/code/configs/db_config.json" + MILVUS_CONFIG: "/code/configs/milvus_config.json" + LOGLEVEL: "INFO" + MILVUS_HOST: "milvus-standalone" + USE_CYPHER: "false" + volumes: + - ./configs/:/code/configs + - ./common:/code/common + + eventual-consistency-service: + container_name: eventual-consistency-service + build: ./eventual-consistency-service + ports: + - 8001:8001 + depends_on: + - milvus-standalone + environment: + LLM_CONFIG: "/code/configs/llm_config.json" + DB_CONFIG: "/code/configs/db_config.json" + MILVUS_CONFIG: "/code/configs/milvus_config.json" + LOGLEVEL: "INFO" + MILVUS_HOST: "milvus-standalone" + volumes: + - ./configs/:/code/configs + - ./common:/code/common + + etcd: + container_name: milvus-etcd + image: quay.io/coreos/etcd:v3.5.5 + environment: + - ETCD_AUTO_COMPACTION_MODE=revision + - ETCD_AUTO_COMPACTION_RETENTION=1000 + - ETCD_QUOTA_BACKEND_BYTES=4294967296 + - ETCD_SNAPSHOT_COUNT=50000 + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd + command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd + healthcheck: + test: ["CMD", "etcdctl", "endpoint", "health"] + interval: 30s + timeout: 20s + retries: 3 + + minio: + container_name: milvus-minio + image: minio/minio:RELEASE.2023-03-20T20-16-18Z + environment: + MINIO_ACCESS_KEY: minioadmin + MINIO_SECRET_KEY: minioadmin + ports: + - "9001:9001" + - "9002:9000" + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data + command: minio server /minio_data --console-address ":9001" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9002/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + + milvus-standalone: + container_name: milvus-standalone + image: milvusdb/milvus:v2.3.10 + command: ["milvus", "run", "standalone"] + security_opt: + - seccomp:unconfined + environment: + ETCD_ENDPOINTS: etcd:2379 + MINIO_ADDRESS: minio:9000 + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] + interval: 30s + start_period: 90s + timeout: 20s + retries: 3 + ports: + - "19530:19530" + - "9091:9091" + depends_on: + - "etcd" + - "minio" + + tigergraph: + image: tigergraph/tigergraph:latest + ports: + - "14022:22" + - "9000:9000" + - "14240:14240" + environment: + - ~/data:/home/tigergraph/mydata + - tg-data:/home/tigergraph + ulimits: + nofile: + soft: 1000000 + hard: 1000000 + +networks: + default: + name: milvus +volumes: + tg-data: {} diff --git a/docker-compose.yml b/docker-compose.yml index d61e0149..0abe54d0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,98 +1,31 @@ -version: '3.8' - services: copilot: + image: tigergraphml/copilot container_name: copilot - image: tigergraphml/copilot:0.5.0 ports: - 8000:8000 depends_on: - - milvus-standalone + - eventual-consistency-service environment: LLM_CONFIG: "/code/configs/llm_config.json" DB_CONFIG: "/code/configs/db_config.json" MILVUS_CONFIG: "/code/configs/milvus_config.json" LOGLEVEL: "INFO" - MILVUS_HOST: "milvus-standalone" USE_CYPHER: "false" volumes: - ./configs/:/code/configs + - ./common:/code/common - etcd: - container_name: milvus-etcd - image: quay.io/coreos/etcd:v3.5.5 - environment: - - ETCD_AUTO_COMPACTION_MODE=revision - - ETCD_AUTO_COMPACTION_RETENTION=1000 - - ETCD_QUOTA_BACKEND_BYTES=4294967296 - - ETCD_SNAPSHOT_COUNT=50000 - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd - command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd - healthcheck: - test: ["CMD", "etcdctl", "endpoint", "health"] - interval: 30s - timeout: 20s - retries: 3 - - minio: - container_name: milvus-minio - image: minio/minio:RELEASE.2023-03-20T20-16-18Z - environment: - MINIO_ACCESS_KEY: minioadmin - MINIO_SECRET_KEY: minioadmin + eventual-consistency-service: + image: tigergraphml/ecc + container_name: eventual-consistency-service ports: - - "9001:9001" - - "9002:9000" - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data - command: minio server /minio_data --console-address ":9001" - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9002/minio/health/live"] - interval: 30s - timeout: 20s - retries: 3 - - milvus-standalone: - container_name: milvus-standalone - image: milvusdb/milvus:v2.3.10 - command: ["milvus", "run", "standalone"] - security_opt: - - seccomp:unconfined + - 8001:8001 environment: - ETCD_ENDPOINTS: etcd:2379 - MINIO_ADDRESS: minio:9000 + LLM_CONFIG: "/code/configs/llm_config.json" + DB_CONFIG: "/code/configs/db_config.json" + MILVUS_CONFIG: "/code/configs/milvus_config.json" + LOGLEVEL: "INFO" volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] - interval: 30s - start_period: 90s - timeout: 20s - retries: 3 - ports: - - "19530:19530" - - "9091:9091" - depends_on: - - "etcd" - - "minio" - - tigergraph: - image: tigergraph/tigergraph:latest - ports: - - "14022:22" - - "9000:9000" - - "14240:14240" - environment: - - ~/data:/home/tigergraph/mydata - - tg-data:/home/tigergraph - ulimits: - nofile: - soft: 1000000 - hard: 1000000 - -networks: - default: - name: milvus -volumes: - tg-data: {} + - ./configs/:/code/configs + - ./common:/code/common diff --git a/eventual-consistency-service/.dockerignore b/eventual-consistency-service/.dockerignore new file mode 100644 index 00000000..5b04df42 --- /dev/null +++ b/eventual-consistency-service/.dockerignore @@ -0,0 +1,5 @@ +Dockerfile +Dockerfile.tests +docs +tests +udfs diff --git a/eventual-consistency-service/Dockerfile b/eventual-consistency-service/Dockerfile new file mode 100644 index 00000000..55a0a6d6 --- /dev/null +++ b/eventual-consistency-service/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.11.8 +WORKDIR /code + +COPY eventual-consistency-service/requirements.txt requirements.txt + +RUN apt-get update && apt-get upgrade -y +RUN pip install -r requirements.txt + +COPY eventual-consistency-service/app /code +COPY common /code/common + +ENV LLM_CONFIG="/code/configs/llm_config.json" +ENV DB_CONFIG="/code/configs/db_config.json" +ENV MILVUS_CONFIG="/code/configs/milvus_config.json" +ENV LOGLEVEL="INFO" + +EXPOSE 8001 +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8001"] diff --git a/Dockerfile.tests b/eventual-consistency-service/Dockerfile.tests similarity index 93% rename from Dockerfile.tests rename to eventual-consistency-service/Dockerfile.tests index 6fff0d59..842e209f 100644 --- a/Dockerfile.tests +++ b/eventual-consistency-service/Dockerfile.tests @@ -17,6 +17,6 @@ COPY ./tests /code/tests COPY ./.git /code/.git # INFO, DEBUG, DEBUG_PII -ENV LOGLEVEL="DEBUG" +ENV LOGLEVEL="INFO" WORKDIR /code/tests diff --git a/eventual-consistency-service/app/__init__.py b/eventual-consistency-service/app/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/eventual-consistency-service/app/common b/eventual-consistency-service/app/common new file mode 120000 index 00000000..dc879abe --- /dev/null +++ b/eventual-consistency-service/app/common @@ -0,0 +1 @@ +../../common \ No newline at end of file diff --git a/eventual-consistency-service/app/configs b/eventual-consistency-service/app/configs new file mode 120000 index 00000000..5992d109 --- /dev/null +++ b/eventual-consistency-service/app/configs @@ -0,0 +1 @@ +../../configs \ No newline at end of file diff --git a/app/sync/eventual_consistency_checker.py b/eventual-consistency-service/app/eventual_consistency_checker.py similarity index 90% rename from app/sync/eventual_consistency_checker.py rename to eventual-consistency-service/app/eventual_consistency_checker.py index dd0a565b..50aea1d0 100644 --- a/app/sync/eventual_consistency_checker.py +++ b/eventual-consistency-service/app/eventual_consistency_checker.py @@ -1,14 +1,14 @@ +import json import logging import time from typing import Dict, List -from app.config import doc_processing_config -from app.embeddings.embedding_services import EmbeddingModel -from app.embeddings.milvus_embedding_store import MilvusEmbeddingStore -from app.metrics.tg_proxy import TigerGraphConnectionProxy -from app.supportai.chunkers import BaseChunker -from app.supportai.extractors import BaseExtractor -from app.tools.logwriter import LogWriter +from common.logs.logwriter import LogWriter +from common.embeddings.embedding_services import EmbeddingModel +from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore +from common.metrics.tg_proxy import TigerGraphConnectionProxy +from common.chunkers import BaseChunker +from common.extractors import BaseExtractor logger = logging.getLogger(__name__) @@ -25,6 +25,7 @@ def __init__( conn: TigerGraphConnectionProxy, chunker: BaseChunker, extractor: BaseExtractor, + batch_size = 10 ): self.interval_seconds = interval_seconds self.graphname = graphname @@ -36,6 +37,7 @@ def __init__( self.embedding_stores = embedding_stores self.chunker = chunker self.extractor = extractor + self.batch_size = batch_size self._check_query_install("Scan_For_Updates") self._check_query_install("Update_Vertices_Processing_Status") @@ -43,7 +45,7 @@ def __init__( def _install_query(self, query_name): LogWriter.info(f"Installing query {query_name}") - with open(f"app/gsql/supportai/{query_name}.gsql", "r") as f: + with open(f"common/gsql/supportai/{query_name}.gsql", "r") as f: query = f.read() res = self.conn.gsql( "USE GRAPH " @@ -53,6 +55,11 @@ def _install_query(self, query_name): + "\n INSTALL QUERY " + query_name ) + + if "error" in str(res).lower(): + LogWriter.error(res) + raise Exception(f"Eventual consistency checker failed to install query {query_name}") + return res def _check_query_install(self, query_name): @@ -182,11 +189,10 @@ def _upsert_rels(self, src_id, src_type, relationships): def fetch_and_process_vertex(self): v_types_to_scan = self.embedding_indices vertex_ids_content_map: dict = {} - batch_size = doc_processing_config.get("batch_size", 10) for v_type in v_types_to_scan: LogWriter.info(f"Fetching vertex ids and content for vertex type: {v_type}") vertex_ids_content_map = self.conn.runInstalledQuery( - "Scan_For_Updates", {"v_type": v_type, "num_samples": batch_size} + "Scan_For_Updates", {"v_type": v_type, "num_samples": self.batch_size} )[0]["@@v_and_text"] vertex_ids = [vertex_id for vertex_id in vertex_ids_content_map.keys()] @@ -243,18 +249,16 @@ def initialize(self): f"Eventual Consistency Check running for graphname {self.graphname} " ) self.is_initialized = True - ok = True - while ok: - ok = self.fetch_and_process_vertex() - LogWriter.info( - f"Eventual Consistency Check finished for graphname {self.graphname}. Success={ok}" - ) + while True: + self.fetch_and_process_vertex() + time.sleep(self.interval_seconds) def get_status(self): statuses = {} for v_type in self.embedding_indices: status = self.conn.runInstalledQuery( "ECC_Status", {"v_type": v_type} - )[0]["results"] + )[0] + LogWriter.info(f"ECC_Status for graphname {self.graphname}: {status}") statuses[v_type] = status - return self.is_initialized \ No newline at end of file + return statuses \ No newline at end of file diff --git a/eventual-consistency-service/app/main.py b/eventual-consistency-service/app/main.py new file mode 100644 index 00000000..0efb42ea --- /dev/null +++ b/eventual-consistency-service/app/main.py @@ -0,0 +1,137 @@ +import logging +from typing import Annotated + +from fastapi import Depends, FastAPI, BackgroundTasks +from fastapi.security.http import HTTPBase + +from common.config import ( + db_config, + embedding_service, + get_llm_service, + llm_config, + milvus_config, + security, + doc_processing_config, +) +from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore +from common.logs.logwriter import LogWriter +from common.metrics.tg_proxy import TigerGraphConnectionProxy +from common.db.connections import elevate_db_connection_to_token +from eventual_consistency_checker import EventualConsistencyChecker +import json +from threading import Thread + +logger = logging.getLogger(__name__) +consistency_checkers = {} + +app = FastAPI() + +@app.on_event("startup") +def startup_event(): + if not db_config.get("enable_consistency_checker", True): + LogWriter.info("Eventual consistency checker disabled") + return + + startup_checkers = db_config.get("graph_names", []) + for graphname in startup_checkers: + conn = elevate_db_connection_to_token(db_config["hostname"], db_config["username"], db_config["password"], graphname) + start_ecc_in_thread(graphname, conn) + +def start_ecc_in_thread(graphname: str, conn: TigerGraphConnectionProxy): + thread = Thread(target=initialize_eventual_consistency_checker, args=(graphname, conn), daemon=True) + thread.start() + LogWriter.info(f"Eventual consistency checker started for graph {graphname}") + +def initialize_eventual_consistency_checker(graphname: str, conn: TigerGraphConnectionProxy): + check_interval_seconds = milvus_config.get("sync_interval_seconds", 30 * 60) + if graphname not in consistency_checkers: + vector_indices = {} + if milvus_config.get("enabled") == "true": + vertex_field = milvus_config.get("vertex_field", "vertex_id") + index_names = milvus_config.get( + "indexes", + ["Document", "DocumentChunk", "Entity", "Relationship", "Concept"], + ) + for index_name in index_names: + vector_indices[graphname + "_" + index_name] = MilvusEmbeddingStore( + embedding_service, + host=milvus_config["host"], + port=milvus_config["port"], + support_ai_instance=True, + collection_name=graphname + "_" + index_name, + username=milvus_config.get("username", ""), + password=milvus_config.get("password", ""), + vector_field=milvus_config.get("vector_field", "document_vector"), + text_field=milvus_config.get("text_field", "document_content"), + vertex_field=vertex_field, + ) + + if doc_processing_config.get("chunker") == "semantic": + from common.chunkers.semantic_chunker import SemanticChunker + + chunker = SemanticChunker( + embedding_service, + doc_processing_config["chunker_config"].get("method", "percentile"), + doc_processing_config["chunker_config"].get("threshold", 0.95), + ) + elif doc_processing_config.get("chunker") == "regex": + from common.chunkers.regex_chunker import RegexChunker + + chunker = RegexChunker( + pattern=doc_processing_config["chunker_config"].get( + "pattern", "\\r?\\n" + ) + ) + elif doc_processing_config.get("chunker") == "character": + from common.chunkers.character_chunker import CharacterChunker + + chunker = CharacterChunker( + chunk_size=doc_processing_config["chunker_config"].get( + "chunk_size", 1024 + ), + overlap_size=doc_processing_config["chunker_config"].get( + "overlap_size", 0 + ), + ) + else: + raise ValueError("Invalid chunker type") + + if doc_processing_config.get("extractor") == "llm": + from common.extractors import LLMEntityRelationshipExtractor + + extractor = LLMEntityRelationshipExtractor(get_llm_service(llm_config)) + else: + raise ValueError("Invalid extractor type") + + checker = EventualConsistencyChecker( + check_interval_seconds, + graphname, + vertex_field, # FIXME: if milvus is not enabled, this is not defined and will crash here (vertex_field used before assignment) + embedding_service, + index_names, + vector_indices, + conn, + chunker, + extractor, + ) + consistency_checkers[graphname] = checker + checker.initialize() + return consistency_checkers[graphname] + +@app.get("/") +def root(): + LogWriter.info(f"Healthcheck") + return {"status": "ok"} + +@app.get("/{graphname}/consistency_status") +def consistency_status(graphname: str, credentials: Annotated[HTTPBase, Depends(security)]): + if graphname in consistency_checkers: + ecc = consistency_checkers[graphname] + status = json.dumps(ecc.get_status()) + else: + conn = elevate_db_connection_to_token(db_config["hostname"], credentials.username, credentials.password, graphname) + start_ecc_in_thread(graphname, conn) + status = f"Eventual consistency checker started for graph {graphname}" + + LogWriter.info(f"Returning consistency status for {graphname}: {status}") + return status diff --git a/eventual-consistency-service/requirements.txt b/eventual-consistency-service/requirements.txt new file mode 100644 index 00000000..90cc7f2c --- /dev/null +++ b/eventual-consistency-service/requirements.txt @@ -0,0 +1,136 @@ +aiohttp==3.9.3 +aiosignal==1.3.1 +annotated-types==0.5.0 +anyio==3.7.1 +appdirs==1.4.4 +argon2-cffi==23.1.0 +argon2-cffi-bindings==21.2.0 +async-timeout==4.0.3 +attrs==23.1.0 +azure-core==1.30.1 +azure-storage-blob==12.19.1 +backoff==2.2.1 +beautifulsoup4==4.12.2 +boto3==1.28.83 +botocore==1.31.83 +cachetools==5.3.2 +certifi==2023.7.22 +cffi==1.16.0 +chardet==5.2.0 +charset-normalizer==3.2.0 +click==8.1.7 +cryptography==42.0.5 +dataclasses-json==0.5.14 +distro==1.8.0 +docker-pycreds==0.4.0 +emoji==2.8.0 +environs==9.5.0 +exceptiongroup==1.1.3 +fastapi==0.103.1 +filetype==1.2.0 +frozenlist==1.4.0 +gitdb==4.0.11 +GitPython==3.1.40 +google-api-core==2.14.0 +google-auth==2.23.4 +google-cloud-aiplatform==1.36.1 +google-cloud-bigquery==3.13.0 +google-cloud-core==2.3.3 +google-cloud-resource-manager==1.10.4 +google-cloud-storage==2.13.0 +google-crc32c==1.5.0 +google-resumable-media==2.6.0 +googleapis-common-protos==1.61.0 +greenlet==2.0.2 +groq==0.5.0 +grpc-google-iam-v1==0.12.7 +grpcio==1.59.2 +grpcio-status==1.59.2 +h11==0.14.0 +httpcore==0.18.0 +httptools==0.6.0 +httpx==0.25.0 +huggingface_hub==0.23.0 +idna==3.4 +isodate==0.6.1 +jmespath==1.0.1 +joblib==1.3.2 +jq==1.6.0 +jsonpatch==1.33 +jsonpointer==2.4 +langchain==0.1.12 +langchain-community==0.0.28 +langchain-core==0.1.49 +langchain-experimental==0.0.54 +langchain-groq==0.1.3 +langchain-text-splitters==0.0.1 +langchainhub==0.1.14 +langdetect==1.0.9 +langgraph==0.0.40 +langsmith==0.1.24 +lxml==4.9.3 +marshmallow==3.20.1 +minio==7.2.5 +multidict==6.0.4 +mypy-extensions==1.0.0 +nltk==3.8.1 +numpy==1.26.4 +openai==1.3.7 +orjson==3.9.15 +packaging==23.2 +pandas==2.1.1 +pathtools==0.1.2 +prometheus_client==0.20.0 +proto-plus==1.22.3 +protobuf==4.24.4 +psutil==5.9.6 +pyarrow==15.0.1 +pyasn1==0.5.0 +pyasn1-modules==0.3.0 +pycparser==2.21 +pycryptodome==3.20.0 +pydantic==2.3.0 +pydantic_core==2.6.3 +pygit2==1.13.2 +pymilvus==2.3.6 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-iso639==2023.6.15 +python-magic==0.4.27 +pyTigerDriver==1.0.15 +pyTigerGraph==1.6.1 +pytz==2023.3.post1 +PyYAML==6.0.1 +rapidfuzz==3.4.0 +regex==2023.10.3 +requests==2.31.0 +rsa==4.9 +s3transfer==0.7.0 +sentry-sdk==1.32.0 +setproctitle==1.3.3 +shapely==2.0.2 +six==1.16.0 +smmap==5.0.1 +sniffio==1.3.0 +soupsieve==2.5 +SQLAlchemy==2.0.20 +starlette==0.27.0 +tabulate==0.9.0 +tenacity==8.2.3 +tiktoken==0.5.1 +tqdm==4.66.1 +types-requests==2.31.0.6 +types-urllib3==1.26.25.14 +typing-inspect==0.9.0 +typing_extensions==4.7.1 +tzdata==2023.3 +ujson==5.9.0 +unstructured==0.10.23 +urllib3==1.26.18 +uvicorn==0.23.2 +uvloop==0.17.0 +validators==0.22.0 +wandb==0.15.12 +watchfiles==0.20.0 +websockets==11.0.3 +yarl==1.9.2 diff --git a/eventual-consistency-service/tests/app b/eventual-consistency-service/tests/app new file mode 120000 index 00000000..5df94d99 --- /dev/null +++ b/eventual-consistency-service/tests/app @@ -0,0 +1 @@ +../app \ No newline at end of file diff --git a/eventual-consistency-service/tests/common b/eventual-consistency-service/tests/common new file mode 120000 index 00000000..dc879abe --- /dev/null +++ b/eventual-consistency-service/tests/common @@ -0,0 +1 @@ +../../common \ No newline at end of file diff --git a/tests/test_eventual_consistency_checker.py b/eventual-consistency-service/tests/test_eventual_consistency_checker.py similarity index 83% rename from tests/test_eventual_consistency_checker.py rename to eventual-consistency-service/tests/test_eventual_consistency_checker.py index 620f056c..74ca1a2e 100644 --- a/tests/test_eventual_consistency_checker.py +++ b/eventual-consistency-service/tests/test_eventual_consistency_checker.py @@ -1,14 +1,13 @@ -import asyncio import unittest import pytest from unittest.mock import Mock, patch, MagicMock -from app.sync.eventual_consistency_checker import EventualConsistencyChecker +from app.eventual_consistency_checker import EventualConsistencyChecker class TestEventualConsistencyChecker(unittest.TestCase): - @patch("app.embeddings.milvus_embedding_store.MilvusEmbeddingStore") - @patch("app.embeddings.embedding_services.EmbeddingModel") - @patch("app.util.get_db_connection_id_token", return_value=Mock()) + @patch("common.embeddings.milvus_embedding_store.MilvusEmbeddingStore") + @patch("common.embeddings.embedding_services.EmbeddingModel") + @patch("common.db.connections.get_db_connection_id_token", return_value=Mock()) def test_initialization( self, mock_get_db_connection, @@ -36,9 +35,9 @@ def test_initialization( checker.initialize() self.assertTrue(checker.is_initialized) - @patch("app.embeddings.milvus_embedding_store.MilvusEmbeddingStore") - @patch("app.embeddings.embedding_services.EmbeddingModel") - @patch("app.util.get_db_connection_id_token", return_value=Mock()) + @patch("common.embeddings.milvus_embedding_store.MilvusEmbeddingStore") + @patch("common.embeddings.embedding_services.EmbeddingModel") + @patch("common.db.connections.get_db_connection_id_token", return_value=Mock()) def test_fetch_and_process_vertex( self, mock_get_db_connection, mock_embedding_model, mock_embedding_store ): diff --git a/scripts/build.sh b/scripts/build.sh new file mode 100755 index 00000000..7c6b6d8f --- /dev/null +++ b/scripts/build.sh @@ -0,0 +1,3 @@ +./build_copilot.sh +./build_ecc.sh +rm -rf build diff --git a/scripts/build_copilot.sh b/scripts/build_copilot.sh new file mode 100755 index 00000000..72553801 --- /dev/null +++ b/scripts/build_copilot.sh @@ -0,0 +1,14 @@ +# setup +buildPath="build/copilot" +cd .. +rm -rf build +mkdir build + +# copy assets into build +cp -R copilot $buildPath +rm $buildPath/app/configs $buildPath/app/common +cp -R configs $buildPath/app + +# docker build +docker build -t copilot -f copilot/Dockerfile . +rm -rf build diff --git a/scripts/build_ecc.sh b/scripts/build_ecc.sh new file mode 100755 index 00000000..08d2c324 --- /dev/null +++ b/scripts/build_ecc.sh @@ -0,0 +1,15 @@ +# setup +buildPath="build/ecc" +cd .. +rootPath=`pwd` +rm -rf build +mkdir build + +# copy assets into build +cp -R eventual-consistency-service $buildPath +rm $buildPath/app/configs $buildPath/app/common +cp -R configs $buildPath/app + +# docker build +docker build -t ecc -f eventual-consistency-service/Dockerfile . +rm -rf build diff --git a/tests/run_tests.sh b/tests/run_tests.sh deleted file mode 100755 index 04c10157..00000000 --- a/tests/run_tests.sh +++ /dev/null @@ -1,122 +0,0 @@ -#!/bin/sh -export DB_CONFIG=../configs/db_config.json -export MILVUS_CONFIG=../configs/milvus_config.json -export LOGLEVEL=INFO - -# Set default values -llm_service="all" -schema="all" -use_wandb="true" - -# Check if llm_service argument is provided -if [ "$#" -ge 1 ]; then - llm_service="$1" -fi - -# Check if schema argument is provided -if [ "$#" -ge 2 ]; then - schema="$2" -fi - -# Check if use_wandb argument is provided -if [ "$#" -ge 3 ]; then - use_wandb="$3" -fi - -# Define the mapping of Python script names to JSON config file names -azure_gpt35_script="test_azure_gpt35_turbo_instruct.py" -azure_gpt35_config="../configs/azure_llm_config.json" - -openai_gpt35_script="test_openai_gpt35-turbo.py" -openai_gpt35_config="../configs/openai_gpt3.5-turbo_config.json" - -openai_gpt4_script="test_openai_gpt4.py" -openai_gpt4_config="../configs/openai_gpt4_config.json" - -huggingface_phi3_script="test_huggingface_phi3.py" -huggingface_phi3_config="../configs/huggingface_severless_endpoint_phi3_config.json" - -openai_gpt4o_script="test_openai_gpt4o.py" -openai_gpt4o_config="../configs/openai_gpt4o_config.json" - -gcp_textbison_script="test_gcp_text-bison.py" -gcp_textbison_config="../configs/gcp_text-bison_config.json" - -groq_mixtral_script="test_groq_mixtral8x7b.py" -groq_mixtral_config="../configs/groq_mixtral_config.json" - -aws_bedrock_script="test_bedrock.py" -aws_bedrock_config="../configs/bedrock_config.json" - -huggingface_llama3_script="test_huggingface_llama70b.py" -huggingface_llama3_config="../configs/huggingface_llama70b_config.json" - -# Function to execute a service -execute_service() { - local service="$1" - local config_file="$2" - - # Export the path to the config file as an environment variable - export LLM_CONFIG="$config_file" - - if [ "$use_wandb" = "true" ]; then - python "$service" --schema "$schema" - else - python "$service" --schema "$schema" --no-wandb - fi - - # Unset the environment variable after the Python script execution - unset CONFIG_FILE_PATH -} - -# Check the value of llm_service and execute the corresponding Python script(s) -case "$llm_service" in - "azure_gpt35") - execute_service "$azure_gpt35_script" "$azure_gpt35_config" - ;; - "openai_gpt35") - execute_service "$openai_gpt35_script" "$openai_gpt35_config" - ;; - "openai_gpt4") - execute_service "$openai_gpt4_script" "$openai_gpt4_config" - ;; - "openai_gpt4o") - execute_service "$openai_gpt4o_script" "$openai_gpt4o_config" - ;; - "gcp_textbison") - execute_service "$gcp_textbison_script" "$gcp_textbison_config" - ;; - "huggingface_phi3") - execute_service "$huggingface_phi3_script" "$huggingface_phi3_config" - ;; - "groq_mixtral") - execute_service "$groq_mixtral_script" "$groq_mixtral_config" - ;; - "aws_bedrock") - execute_service "$aws_bedrock_script" "$aws_bedrock_config" - ;; - "huggingface_llama3") - execute_service "$huggingface_llama3_script" "$huggingface_llama3_config" - ;; - "all") - echo "Executing all services..." - for service_script_pair in "$azure_gpt35_script $azure_gpt35_config" \ - "$openai_gpt35_script $openai_gpt35_config" \ - "$openai_gpt4_script $openai_gpt4_config" \ - "$gcp_textbison_script $gcp_textbison_config" \ - "$groq_mixtral_script $groq_mixtral_config" \ - "$aws_bedrock_script $aws_bedrock_config" \ - "$openai_gpt4o_script $openai_gpt4o_config" \ - "$huggingface_llama3_script $huggingface_llama3_config" \ - "$huggingface_phi3_script $huggingface_phi3_config"; do - execute_service $service_script_pair - done - ;; - *) - echo "Unknown llm_service: $llm_service" - exit 1 - ;; -esac - -python create_wandb_report.py - diff --git a/tests/test_questions/.DS_Store b/tests/test_questions/.DS_Store deleted file mode 100644 index c7fd5762063150e2e118c3dfd01ed09c62fe0eb5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK!AiqG5S?wSO{hW-3OxqA7Ob@v#Y?F5;MIs8RBA$M4aTgrsX3HF&iX@siQnVQ z?p7?d7Y`z324>&v>`az<2|HN;5S?+j2T%n73ze{7(=C<$fOk>eyBsa#JlShi*N zs47vt$jktm$bSk1w73PW|bUOMS6XzPtHR^N_YG-_o*;$wyicq_w-%;TpT#Y<3 z1I)lY14TWpQvW|)e*d3O;u$l*46GCbqSW=e9W2T8)`jAz*E*