From 09c3eeb3045356892f1ee1568306f6b8673bc63a Mon Sep 17 00:00:00 2001 From: Liang Lv Date: Sat, 25 Jan 2025 14:01:57 +0800 Subject: [PATCH 1/6] Refactor docker compose of the dataprep microservice (#1227) * Refactor dataprep docker compose files Signed-off-by: lvliang-intel --- .../deployment/docker_compose/compose.yaml | 258 ++++++++++++++++++ .../docker_compose/compose_elasticsearch.yaml | 41 --- .../docker_compose/compose_milvus.yaml | 69 ----- .../compose_neo4j_langchain.yaml | 48 ---- .../compose_neo4j_llamaindex.yaml | 97 ------- .../docker_compose/compose_opensearch.yaml | 65 ----- .../docker_compose/compose_pgvector.yaml | 41 --- .../docker_compose/compose_pipecone.yaml | 40 --- .../docker_compose/compose_qdrant.yaml | 46 ---- .../compose_redis_multimodal.yaml | 29 -- .../docker_compose/compose_vdms.yaml | 28 -- .../compose_vdms_multimodal.yaml | 28 -- comps/dataprep/src/Dockerfile | 2 +- .../src/integrations/neo4j_llamaindex.py | 2 +- .../deployment/docker_compose/compose.yaml | 8 +- .../deployment/docker_compose/compose.yaml | 11 +- .../deployment/docker_compose/compose.yaml | 22 +- comps/third_parties/neo4j/src/README.md | 29 ++ comps/third_parties/neo4j/src/__init__.py | 2 + .../deployment/docker_compose/compose.yaml | 8 +- .../deployment/docker_compose/compose.yaml | 9 +- .../deployment/docker_compose/compose.yaml | 14 +- comps/third_parties/qdrant/src/README.md | 21 ++ comps/third_parties/qdrant/src/__init__.py | 2 + comps/third_parties/qdrant/src/init.sql | 1 + .../deployment/docker_compose/compose.yaml | 13 +- .../deployment/docker_compose/compose.yaml | 13 +- tests/dataprep/test_dataprep_elasticsearch.sh | 67 ++--- tests/dataprep/test_dataprep_milvus.sh | 50 ++-- .../test_dataprep_neo4j_on_intel_hpu.sh | 99 +++---- tests/dataprep/test_dataprep_opensearch.sh | 81 +++--- tests/dataprep/test_dataprep_pgvector.sh | 53 ++-- tests/dataprep/test_dataprep_pinecone.sh | 27 +- tests/dataprep/test_dataprep_qdrant.sh | 50 ++-- tests/dataprep/test_dataprep_redis.sh | 58 ++-- .../test_dataprep_redis_multimodal.sh | 95 +++---- tests/dataprep/test_dataprep_vdms.sh | 38 +-- .../dataprep/test_dataprep_vdms_multimodal.sh | 47 ++-- ...ls_hallucination_detection_on_intel_hpu.sh | 1 + .../test_retrievers_neo4j_on_intel_hpu.sh | 175 ++++++++++++ .../retrievers/test_retrievers_opensearch.sh | 2 +- 41 files changed, 863 insertions(+), 927 deletions(-) create mode 100644 comps/dataprep/deployment/docker_compose/compose.yaml delete mode 100644 comps/dataprep/deployment/docker_compose/compose_elasticsearch.yaml delete mode 100644 comps/dataprep/deployment/docker_compose/compose_milvus.yaml delete mode 100644 comps/dataprep/deployment/docker_compose/compose_neo4j_langchain.yaml delete mode 100644 comps/dataprep/deployment/docker_compose/compose_neo4j_llamaindex.yaml delete mode 100644 comps/dataprep/deployment/docker_compose/compose_opensearch.yaml delete mode 100644 comps/dataprep/deployment/docker_compose/compose_pgvector.yaml delete mode 100644 comps/dataprep/deployment/docker_compose/compose_pipecone.yaml delete mode 100644 comps/dataprep/deployment/docker_compose/compose_qdrant.yaml delete mode 100644 comps/dataprep/deployment/docker_compose/compose_redis_multimodal.yaml delete mode 100644 comps/dataprep/deployment/docker_compose/compose_vdms.yaml delete mode 100644 comps/dataprep/deployment/docker_compose/compose_vdms_multimodal.yaml create mode 100644 comps/third_parties/neo4j/src/README.md create mode 100644 comps/third_parties/neo4j/src/__init__.py create mode 100644 comps/third_parties/qdrant/src/README.md create mode 100644 comps/third_parties/qdrant/src/__init__.py create mode 100644 comps/third_parties/qdrant/src/init.sql create mode 100644 tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh diff --git a/comps/dataprep/deployment/docker_compose/compose.yaml b/comps/dataprep/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..ef54a69e0c --- /dev/null +++ b/comps/dataprep/deployment/docker_compose/compose.yaml @@ -0,0 +1,258 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +include: + - ../../../third_parties/elasticsearch/deployment/docker_compose/compose.yaml + - ../../../third_parties/neo4j/deployment/docker_compose/compose.yaml + - ../../../third_parties/opensearch/deployment/docker_compose/compose.yaml + - ../../../third_parties/pgvector/deployment/docker_compose/compose.yaml + - ../../../third_parties/qdrant/deployment/docker_compose/compose.yaml + - ../../../third_parties/redis/deployment/docker_compose/compose.yaml + - ../../../third_parties/vdms/deployment/docker_compose/compose.yaml + - ../../../third_parties/tgi/deployment/docker_compose/compose.yaml + - ../../../third_parties/tei/deployment/docker_compose/compose.yaml + +services: + + dataprep-elasticsearch: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-elasticsearch + ports: + - "${DATAPREP_PORT:-11100}:5000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_ELASTICSEARCH" + ES_CONNECTION_STRING: ${ES_CONNECTION_STRING} + INDEX_NAME: ${INDEX_NAME} + TEI_ENDPOINT: ${TEI_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + depends_on: + elasticsearch-vector-db: + condition: service_healthy + + dataprep-milvus: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-milvus-server + ports: + - "${DATAPREP_PORT:-11101}:5000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MILVUS" + MILVUS_HOST: ${MILVUS_HOST} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + depends_on: + tei-embedding-serving: + condition: service_healthy + + dataprep-neo4j-llamaindex: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-neo4j-llamaindex + depends_on: + neo4j-apoc: + condition: service_healthy + tgi-gaudi-server: + condition: service_healthy + tei-embedding-serving: + condition: service_healthy + ports: + - "${DATAPREP_PORT:-11103}:5000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + host_ip: ${host_ip} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_NEO4J_LLAMAINDEX" + NEO4J_URL: ${NEO4J_URL} + NEO4J_USERNAME: ${NEO4J_USERNAME} + NEO4J_PASSWORD: ${NEO4J_PASSWORD} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + OPENAI_API_KEY: ${OPENAI_API_KEY} + OPENAI_EMBEDDING_MODEL: ${OPENAI_EMBEDDING_MODEL} + OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL} + EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID} + LLM_MODEL_ID: ${LLM_MODEL_ID} + LOGFLAG: ${LOGFLAG} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + + dataprep-opensearch: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-opensearch-server + ports: + - "${DATAPREP_PORT:-11104}:5000" + depends_on: + opensearch-vector-db: + condition: service_healthy + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + OPENSEARCH_INITIAL_ADMIN_PASSWORD: ${OPENSEARCH_INITIAL_ADMIN_PASSWORD} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_OPENSEARCH" + OPENSEARCH_URL: ${OPENSEARCH_URL} + INDEX_NAME: ${INDEX_NAME} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + security_opt: + - no-new-privileges:true + + dataprep-pgvector: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-pgvector-server + ports: + - "${DATAPREP_PORT:-11105}:5000" + depends_on: + pgvector-db: + condition: service_healthy + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_PGVECTOR" + PG_CONNECTION_STRING: ${PG_CONNECTION_STRING} + restart: unless-stopped + + dataprep-pinecone: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-pinecone-server + ports: + - "${DATAPREP_PORT:-11106}:5000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_PINECONE" + PINECONE_API_KEY: ${PINECONE_API_KEY} + PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + + dataprep-qdrant: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-qdrant-server + depends_on: + qdrant-vector-db: + condition: service_healthy + tei-embedding-serving: + condition: service_healthy + ports: + - "${DATAPREP_PORT:-11107}:5000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_QDRANT" + QDRANT_HOST: ${QDRANT_HOST} + QDRANT_PORT: ${QDRANT_PORT} + COLLECTION_NAME: ${COLLECTION_NAME} + TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + + dataprep-redis: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + redis-vector-db: + condition: service_healthy + tei-embedding-serving: + condition: service_healthy + ports: + - "${DATAPREP_PORT:-11108}:5000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_REDIS" + REDIS_HOST: ${REDIS_HOST} + REDIS_PORT: ${REDIS_PORT} + REDIS_URL: ${REDIS_URL} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + INDEX_NAME: ${INDEX_NAME} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + + dataprep-multimodal-redis: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-multimodal-redis-server + ports: + - "${DATAPREP_PORT:-11109}:5000" + depends_on: + redis-vector-db: + condition: service_healthy + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MULTIMODAL_DATAPREP: true + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS" + REDIS_HOST: ${REDIS_HOST} + REDIS_PORT: ${REDIS_PORT} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + LVM_ENDPOINT: ${LVM_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + + dataprep-vdms: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-vdms-server + ports: + - "${DATAPREP_PORT:-11110}:5000" + depends_on: + vdms-vector-db: + condition: service_healthy + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_VDMS" + VDMS_HOST: ${VDMS_HOST} + VDMS_PORT: ${VDMS_PORT} + COLLECTION_NAME: ${COLLECTION_NAME} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + + dataprep-vdms-multimodal: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-vdms-multimodal-server + ports: + - "${DATAPREP_PORT:-11111}:5000" + depends_on: + vdms-vector-db: + condition: service_healthy + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MULTIMODAL_DATAPREP: true + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALVDMS" + VDMS_HOST: ${VDMS_HOST} + VDMS_PORT: ${VDMS_PORT} + COLLECTION_NAME: ${INDEX_NAME} + restart: unless-stopped + +networks: + default: + driver: bridge + opensearch-net: diff --git a/comps/dataprep/deployment/docker_compose/compose_elasticsearch.yaml b/comps/dataprep/deployment/docker_compose/compose_elasticsearch.yaml deleted file mode 100644 index 01d818eac0..0000000000 --- a/comps/dataprep/deployment/docker_compose/compose_elasticsearch.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - elasticsearch-vector-db: - hostname: db - container_name: elasticsearch-vector-db - image: docker.elastic.co/elasticsearch/elasticsearch:8.16.0 - ports: - - "9200:9200" - - "9300:9300" - restart: always - ipc: host - environment: - - ES_JAVA_OPTS=-Xms1g -Xmx1g - - discovery.type=single-node - - xpack.security.enabled=false - - bootstrap.memory_lock=false - - no_proxy= ${no_proxy} - - http_proxy= ${http_proxy} - - https_proxy= ${https_proxy} - - dataprep-elasticsearch: - image: opea/dataprep-elasticsearch:latest - container_name: dataprep-elasticsearch - ports: - - "6011:6011" - ipc: host - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - ES_CONNECTION_STRING: ${ES_CONNECTION_STRING} - INDEX_NAME: ${INDEX_NAME} - TEI_ENDPOINT: ${TEI_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/deployment/docker_compose/compose_milvus.yaml b/comps/dataprep/deployment/docker_compose/compose_milvus.yaml deleted file mode 100644 index 8dde3fed06..0000000000 --- a/comps/dataprep/deployment/docker_compose/compose_milvus.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: '3.5' - -services: - etcd: - container_name: milvus-etcd - image: quay.io/coreos/etcd:v3.5.5 - environment: - - ETCD_AUTO_COMPACTION_MODE=revision - - ETCD_AUTO_COMPACTION_RETENTION=1000 - - ETCD_QUOTA_BACKEND_BYTES=4294967296 - - ETCD_SNAPSHOT_COUNT=50000 - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd - command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd - healthcheck: - test: ["CMD", "etcdctl", "endpoint", "health"] - interval: 30s - timeout: 20s - retries: 3 - - minio: - container_name: milvus-minio - image: minio/minio:RELEASE.2023-03-20T20-16-18Z - environment: - MINIO_ACCESS_KEY: minioadmin - MINIO_SECRET_KEY: minioadmin - ports: - - "5044:9001" - - "5043:9000" - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data - command: minio server /minio_data --console-address ":9001" - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] - interval: 30s - timeout: 20s - retries: 3 - - standalone: - container_name: milvus-standalone - image: milvusdb/milvus:v2.4.9 - command: ["milvus", "run", "standalone"] - security_opt: - - seccomp:unconfined - environment: - ETCD_ENDPOINTS: etcd:2379 - MINIO_ADDRESS: minio:9000 - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus - - ${DOCKER_VOLUME_DIRECTORY:-.}/milvus.yaml:/milvus/configs/milvus.yaml - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] - interval: 30s - start_period: 90s - timeout: 20s - retries: 3 - ports: - - "19530:19530" - - "9091:9091" - depends_on: - - "etcd" - - "minio" - -networks: - default: - name: milvus diff --git a/comps/dataprep/deployment/docker_compose/compose_neo4j_langchain.yaml b/comps/dataprep/deployment/docker_compose/compose_neo4j_langchain.yaml deleted file mode 100644 index c5c6adf23d..0000000000 --- a/comps/dataprep/deployment/docker_compose/compose_neo4j_langchain.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - neo4j-vector-db: - image: neo4j/neo4j - container_name: neo4j-graph-db - ports: - - "6337:6337" - - "6338:6338" - tgi_gaudi_service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 - container_name: tgi-service - ports: - - "8088:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HF_TOKEN} - command: --model-id ${LLM_MODEL_ID} --auto-truncate --max-input-tokens 1024 --max-total-tokens 2048 - dataprep-neo4j: - image: opea/gen-ai-comps:dataprep-neo4j-xeon-server - container_name: dataprep-neo4j-server - depends_on: - - neo4j-vector-db - - tgi_gaudi_service - ports: - - "6007:6007" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - NEO4J_URL: ${NEO4J_URL} - NEO4J_USERNAME: ${NEO4J_USERNAME} - NEO4J_PASSWORD: ${NEO4J_PASSWORD} - TGI_LLM_ENDPOINT: ${TEI_ENDPOINT} - OPENAI_KEY: ${OPENAI_API_KEY} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/deployment/docker_compose/compose_neo4j_llamaindex.yaml b/comps/dataprep/deployment/docker_compose/compose_neo4j_llamaindex.yaml deleted file mode 100644 index ac160f6997..0000000000 --- a/comps/dataprep/deployment/docker_compose/compose_neo4j_llamaindex.yaml +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" -services: - neo4j-apoc: - image: neo4j:latest - container_name: neo4j-apoc - volumes: - - /$HOME/neo4j/logs:/logs - - /$HOME/neo4j/config:/config - - /$HOME/neo4j/data:/data - - /$HOME/neo4j/plugins:/plugins - ipc: host - environment: - - NEO4J_AUTH=${NEO4J_USERNAME}/${NEO4J_PASSWORD} - - NEO4J_PLUGINS=["apoc"] - - NEO4J_apoc_export_file_enabled=true - - NEO4J_apoc_import_file_enabled=true - - NEO4J_apoc_import_file_use__neo4j__config=true - - NEO4J_dbms_security_procedures_unrestricted=apoc.\* - ports: - - "7474:7474" - - "7687:7687" - restart: always - tei-embedding-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - container_name: tei-embedding-server - ports: - - "6006:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - NO_PROXY: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - ipc: host - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate - tgi-gaudi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 - container_name: tgi-gaudi-server - ports: - - "6005:80" - volumes: - - "./data:/data" - environment: - no_proxy: ${no_proxy} - NO_PROXY: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - ENABLE_HPU_GRAPH: true - LIMIT_HPU_GRAPH: true - USE_FLASH_ATTENTION: true - FLASH_ATTENTION_RECOMPUTE: true - runtime: habana - cap_add: - - SYS_NICE - ipc: host - command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 - dataprep-neo4j-llamaindex: - image: opea/dataprep-neo4j-llamaindex:latest - container_name: dataprep-neo4j-server - depends_on: - - neo4j-apoc - - tgi-gaudi-service - - tei-embedding-service - ports: - - "6004:6004" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - host_ip: ${host_ip} - NEO4J_URL: ${NEO4J_URL} - NEO4J_USERNAME: ${NEO4J_USERNAME} - NEO4J_PASSWORD: ${NEO4J_PASSWORD} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - OPENAI_API_KEY: ${OPENAI_API_KEY} - OPENAI_EMBEDDING_MODEL: ${OPENAI_EMBEDDING_MODEL} - OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL} - EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID} - LLM_MODEL_ID: ${LLM_MODEL_ID} - LOGFLAG: ${LOGFLAG} - restart: unless-stopped -networks: - default: - driver: bridge diff --git a/comps/dataprep/deployment/docker_compose/compose_opensearch.yaml b/comps/dataprep/deployment/docker_compose/compose_opensearch.yaml deleted file mode 100644 index 7699bee1ce..0000000000 --- a/comps/dataprep/deployment/docker_compose/compose_opensearch.yaml +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - opensearch-vector-db: - image: opensearchproject/opensearch:latest - container_name: opensearch-vector-db - environment: - - cluster.name=opensearch-cluster - - node.name=opensearch-vector-db - - discovery.seed_hosts=opensearch-vector-db - - cluster.initial_master_nodes=opensearch-vector-db - - bootstrap.memory_lock=true # along with the memlock settings below, disables swapping - - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # minimum and maximum Java heap size, recommend setting both to 50% of system RAM - - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD} # Sets the demo admin user password when using demo configuration, required for OpenSearch 2.12 and later - ulimits: - memlock: - soft: -1 - hard: -1 - nofile: - soft: 65536 # maximum number of open files for the OpenSearch user, set to at least 65536 on modern systems - hard: 65536 - ports: - - 9200:9200 - - 9600:9600 # required for Performance Analyzer - networks: - - opensearch-net - security_opt: - - no-new-privileges:true - tei-embedding-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - container_name: tei-embedding-server - ports: - - "6060:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate - dataprep-opensearch: - image: opea/dataprep-opensearch:latest - container_name: dataprep-opensearch-server - ports: - - 6007:6007 - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - OPENSEARCH_URL: ${OPENSEARCH_URL} - INDEX_NAME: ${INDEX_NAME} - TEI_ENDPOINT: ${TEI_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - restart: unless-stopped - security_opt: - - no-new-privileges:true - -networks: - default: - driver: bridge - opensearch-net: diff --git a/comps/dataprep/deployment/docker_compose/compose_pgvector.yaml b/comps/dataprep/deployment/docker_compose/compose_pgvector.yaml deleted file mode 100644 index 54ff7b802e..0000000000 --- a/comps/dataprep/deployment/docker_compose/compose_pgvector.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - pgvector-vector-db: - hostname: db - container_name: pgvector-vector-db - image: pgvector/pgvector:0.7.0-pg16 - ports: - - "5432:5432" - restart: always - ipc: host - environment: - - POSTGRES_DB=vectordb - - POSTGRES_USER=testuser - - POSTGRES_PASSWORD=testpwd - - POSTGRES_HOST_AUTH_METHOD=trust - - no_proxy= ${no_proxy} - - http_proxy= ${http_proxy} - - https_proxy= ${https_proxy} - volumes: - - ./init.sql:/docker-entrypoint-initdb.d/init.sql - - dataprep-pgvector: - image: opea/dataprep-pgvector:latest - container_name: dataprep-pgvector - ports: - - "6007:6007" - ipc: host - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PG_CONNECTION_STRING: ${PG_CONNECTION_STRING} - INDEX_NAME: ${INDEX_NAME} - TEI_ENDPOINT: ${TEI_ENDPOINT} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/deployment/docker_compose/compose_pipecone.yaml b/comps/dataprep/deployment/docker_compose/compose_pipecone.yaml deleted file mode 100644 index 851bad5864..0000000000 --- a/comps/dataprep/deployment/docker_compose/compose_pipecone.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - tei-embedding-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - container_name: tei-embedding-server - ports: - - "6006:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate - dataprep-pinecone: - image: opea/dataprep-pinecone:latest - container_name: dataprep-pinecone-server - ports: - - "6007:6007" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PINECONE_API_KEY: ${PINECONE_API_KEY} - PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/deployment/docker_compose/compose_qdrant.yaml b/comps/dataprep/deployment/docker_compose/compose_qdrant.yaml deleted file mode 100644 index aaf2a17ddc..0000000000 --- a/comps/dataprep/deployment/docker_compose/compose_qdrant.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - qdrant-vector-db: - image: qdrant/qdrant - container_name: qdrant-vector-db - ports: - - "6333:6333" - - "6334:6334" - tei-embedding-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - container_name: tei-embedding-server - ports: - - "6006:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate - dataprep-qdrant: - image: opea/gen-ai-comps:dataprep-qdrant-xeon-server - container_name: dataprep-qdrant-server - depends_on: - - qdrant-vector-db - - tei-embedding-service - ports: - - "6007:6007" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - QDRANT_HOST: ${QDRANT_HOST} - QDRANT_PORT: ${QDRANT_PORT} - COLLECTION_NAME: ${COLLECTION_NAME} - TEI_ENDPOINT: ${TEI_ENDPOINT} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/deployment/docker_compose/compose_redis_multimodal.yaml b/comps/dataprep/deployment/docker_compose/compose_redis_multimodal.yaml deleted file mode 100644 index e3dc78a97b..0000000000 --- a/comps/dataprep/deployment/docker_compose/compose_redis_multimodal.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - redis-vector-db: - image: redis/redis-stack:7.2.0-v9 - container_name: redis-vector-db - ports: - - "6379:6379" - - "8001:8001" - dataprep-multimodal-redis: - image: opea/dataprep-multimodal-redis:latest - container_name: dataprep-multimodal-redis - ports: - - "6007:6007" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - REDIS_URL: ${REDIS_URL} - INDEX_NAME: ${INDEX_NAME} - LVM_ENDPOINT: ${LVM_ENDPOINT} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/deployment/docker_compose/compose_vdms.yaml b/comps/dataprep/deployment/docker_compose/compose_vdms.yaml deleted file mode 100644 index 46880119e5..0000000000 --- a/comps/dataprep/deployment/docker_compose/compose_vdms.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - vdms-vector-db: - image: intellabs/vdms:latest - container_name: vdms-vector-db - ports: - - "55555:55555" - dataprep-vdms: - image: opea/dataprep-vdms:latest - container_name: dataprep-vdms-server - ports: - - "6007:6007" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - VDMS_HOST: ${VDMS_HOST} - VDMS_PORT: ${VDMS_PORT} - COLLECTION_NAME: ${COLLECTION_NAME} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/deployment/docker_compose/compose_vdms_multimodal.yaml b/comps/dataprep/deployment/docker_compose/compose_vdms_multimodal.yaml deleted file mode 100644 index 8cb0571346..0000000000 --- a/comps/dataprep/deployment/docker_compose/compose_vdms_multimodal.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - vdms-vector-db: - image: intellabs/vdms:latest - container_name: vdms-vector-db - ports: - - "55555:55555" - dataprep-vdms-multimodal: - image: opea/dataprep-vdms-multimodal:latest - container_name: dataprep-vdms--multimodal-server - ports: - - "6007:6007" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - VDMS_HOST: ${VDMS_HOST} - VDMS_PORT: ${VDMS_PORT} - INDEX_NAME: ${INDEX_NAME} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/src/Dockerfile b/comps/dataprep/src/Dockerfile index abd33d5214..6355b36a4b 100644 --- a/comps/dataprep/src/Dockerfile +++ b/comps/dataprep/src/Dockerfile @@ -42,7 +42,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ fi && \ pip install --no-cache-dir torch torchvision ${PIP_EXTRA_INDEX_URL} && \ pip install --no-cache-dir ${PIP_EXTRA_INDEX_URL} -r /home/user/comps/dataprep/src/requirements.txt && \ - pip install opentelemetry-api==1.27.0 opentelemetry-exporter-otlp==1.27.0 opentelemetry-sdk==1.27.0 + pip install opentelemetry-api==1.29.0 opentelemetry-exporter-otlp==1.29.0 opentelemetry-sdk==1.29.0 ENV PYTHONPATH=$PYTHONPATH:/home/user diff --git a/comps/dataprep/src/integrations/neo4j_llamaindex.py b/comps/dataprep/src/integrations/neo4j_llamaindex.py index 77c12d236f..e970499971 100644 --- a/comps/dataprep/src/integrations/neo4j_llamaindex.py +++ b/comps/dataprep/src/integrations/neo4j_llamaindex.py @@ -67,7 +67,7 @@ # LLM/Embedding endpoints TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", f"http://{host_ip}:6005") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT ", f"http://{host_ip}:6006") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", f"http://{host_ip}:6006") OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") OPENAI_EMBEDDING_MODEL = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small") diff --git a/comps/retrievers/deployment/docker_compose/compose.yaml b/comps/retrievers/deployment/docker_compose/compose.yaml index f96c076a5f..708f7819d0 100644 --- a/comps/retrievers/deployment/docker_compose/compose.yaml +++ b/comps/retrievers/deployment/docker_compose/compose.yaml @@ -65,7 +65,7 @@ services: VDMS_USE_CLIP: 0 host_ip: ${host_ip} depends_on: - neo4j-db: + neo4j-apoc: condition: service_healthy tei-embedding-serving: condition: service_healthy @@ -82,7 +82,7 @@ services: OPENSEARCH_URL: ${OPENSEARCH_URL} OPENSEARCH_INDEX_NAME: ${INDEX_NAME} depends_on: - opensearch-db: + opensearch-vector-db: condition: service_healthy tei-embedding-serving: condition: service_healthy @@ -126,7 +126,7 @@ services: QDRANT_PORT: ${QDRANT_PORT} INDEX_NAME: ${INDEX_NAME} depends_on: - qdrant-db: + qdrant-vector-db: condition: service_healthy retriever-redis: @@ -166,7 +166,7 @@ services: VDMS_PORT: ${VDMS_PORT} VDMS_USE_CLIP: ${VDMS_USE_CLIP} depends_on: - vdms-db: + vdms-vector-db: condition: service_healthy tei-embedding-serving: condition: service_healthy diff --git a/comps/third_parties/elasticsearch/deployment/docker_compose/compose.yaml b/comps/third_parties/elasticsearch/deployment/docker_compose/compose.yaml index 4d43fcb0b3..d2600ca5aa 100644 --- a/comps/third_parties/elasticsearch/deployment/docker_compose/compose.yaml +++ b/comps/third_parties/elasticsearch/deployment/docker_compose/compose.yaml @@ -9,12 +9,15 @@ services: - "${ELASTICSEARCH_PORT1:-9200}:9200" - "${ELASTICSEARCH_PORT2:-9300}:9300" environment: + - no_proxy=${no_proxy} + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} - ES_JAVA_OPTS=-Xms1g -Xmx1g - discovery.type=single-node - xpack.security.enabled=false - bootstrap.memory_lock=false healthcheck: - test: ["CMD-SHELL", "sleep 10 && exit 0"] - interval: 1s - timeout: 15s - retries: 1 + test: curl -s http://localhost:9200/_health_report | grep '"status":"green"' + interval: 10s + timeout: 5s + retries: 5 diff --git a/comps/third_parties/neo4j/deployment/docker_compose/compose.yaml b/comps/third_parties/neo4j/deployment/docker_compose/compose.yaml index 2050fa45a9..6a1ccc1d4a 100644 --- a/comps/third_parties/neo4j/deployment/docker_compose/compose.yaml +++ b/comps/third_parties/neo4j/deployment/docker_compose/compose.yaml @@ -2,22 +2,32 @@ # SPDX-License-Identifier: Apache-2.0 services: - neo4j-db: + neo4j-apoc: image: neo4j:latest - container_name: neo4j-db + container_name: neo4j-apoc ports: - "${NEO4J_PORT1:-7474}:7474" - "${NEO4J_PORT2:-7687}:7687" + volumes: + - /$HOME/neo4j/logs:/logs + - /$HOME/neo4j/config:/config + - /$HOME/neo4j/data:/data + - /$HOME/neo4j/plugins:/plugins ipc: host environment: + - no_proxy=${no_proxy} + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} - NEO4J_AUTH=${NEO4J_USERNAME}/${NEO4J_PASSWORD} - NEO4J_PLUGINS=["apoc"] - NEO4J_apoc_export_file_enabled=true - NEO4J_apoc_import_file_enabled=true - NEO4J_apoc_import_file_use__neo4j__config=true - NEO4J_dbms_security_procedures_unrestricted=apoc.\* + restart: always healthcheck: - test: ["CMD-SHELL", "sleep 10 && exit 0"] - interval: 1s - timeout: 15s - retries: 1 + test: wget http://localhost:7474 || exit 1 + interval: 5s + timeout: 10s + retries: 20 + start_period: 3s diff --git a/comps/third_parties/neo4j/src/README.md b/comps/third_parties/neo4j/src/README.md new file mode 100644 index 0000000000..868021d6ca --- /dev/null +++ b/comps/third_parties/neo4j/src/README.md @@ -0,0 +1,29 @@ +# Start Neo4J Server + +## 1. Download Neo4J image + +```bash +docker pull neo4j:latest +``` + +## 2. Configure the username, password and dbname + +```bash +export NEO4J_AUTH=neo4j/password +export NEO4J_PLUGINS=\[\"apoc\"\] +``` + +## 3. Run Neo4J service + +To launch Neo4j locally, first ensure you have docker installed. Then, you can launch the database with the following docker command. + +```bash +docker run \ + -p 7474:7474 -p 7687:7687 \ + -v $PWD/data:/data -v $PWD/plugins:/plugins \ + --name neo4j-apoc \ + -d \ + -e NEO4J_AUTH=neo4j/password \ + -e NEO4J_PLUGINS=\[\"apoc\"\] \ + neo4j:latest +``` diff --git a/comps/third_parties/neo4j/src/__init__.py b/comps/third_parties/neo4j/src/__init__.py new file mode 100644 index 0000000000..916f3a44b2 --- /dev/null +++ b/comps/third_parties/neo4j/src/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/third_parties/opensearch/deployment/docker_compose/compose.yaml b/comps/third_parties/opensearch/deployment/docker_compose/compose.yaml index ef3e8312d8..c5b2846c21 100644 --- a/comps/third_parties/opensearch/deployment/docker_compose/compose.yaml +++ b/comps/third_parties/opensearch/deployment/docker_compose/compose.yaml @@ -2,10 +2,14 @@ # SPDX-License-Identifier: Apache-2.0 services: - opensearch-db: + opensearch-vector-db: image: opensearchproject/opensearch:latest - container_name: opensearch-db + container_name: opensearch-vector-db environment: + - no_proxy=${no_proxy} + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} + - host_ip=${host_ip} - cluster.name=opensearch-cluster - node.name=opensearch-vector-db - discovery.seed_hosts=opensearch-vector-db diff --git a/comps/third_parties/pgvector/deployment/docker_compose/compose.yaml b/comps/third_parties/pgvector/deployment/docker_compose/compose.yaml index 8a36b1d0de..53329642b1 100644 --- a/comps/third_parties/pgvector/deployment/docker_compose/compose.yaml +++ b/comps/third_parties/pgvector/deployment/docker_compose/compose.yaml @@ -17,7 +17,8 @@ services: volumes: - ${VOLUMES_PATH:-./init.sql}:/docker-entrypoint-initdb.d/init.sql healthcheck: - test: ["CMD-SHELL", "sleep 10 && exit 0"] - interval: 1s - timeout: 15s - retries: 1 + test: pg_isready + interval: 60s + retries: 3 + start_period: 10s + timeout: 10s diff --git a/comps/third_parties/qdrant/deployment/docker_compose/compose.yaml b/comps/third_parties/qdrant/deployment/docker_compose/compose.yaml index b1f1ed8e1a..9c1542425f 100644 --- a/comps/third_parties/qdrant/deployment/docker_compose/compose.yaml +++ b/comps/third_parties/qdrant/deployment/docker_compose/compose.yaml @@ -2,16 +2,20 @@ # SPDX-License-Identifier: Apache-2.0 services: - qdrant-db: + qdrant-vector-db: image: qdrant/qdrant - container_name: qdrant-db + container_name: qdrant-vector-db ports: - "${QDRANT_PORT:-6333}:6333" - environment: - HTTPS_PROXY: ${http_proxy} - HTTP_PROXY: ${https_proxy} + - "6334:6334" healthcheck: + #test: curl -s http://${host_ip}:6334/healthz | grep -q 'healthz check passed' || exit 1 # TODO, This is the real healthcheck, but not work test: ["CMD-SHELL", "sleep 10 && exit 0"] interval: 1s timeout: 15s retries: 1 + environment: + - no_proxy=${no_proxy} + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} + - host_ip=${host_ip} diff --git a/comps/third_parties/qdrant/src/README.md b/comps/third_parties/qdrant/src/README.md new file mode 100644 index 0000000000..b23060cfda --- /dev/null +++ b/comps/third_parties/qdrant/src/README.md @@ -0,0 +1,21 @@ +# Start PGVector server + +## 1. Download Pgvector image + +```bash +docker pull pgvector/pgvector:0.7.0-pg16 +``` + +## 2. Configure the username, password and dbname + +```bash +export POSTGRES_USER=testuser +export POSTGRES_PASSWORD=testpwd +export POSTGRES_DB=vectordb +``` + +## 3. Run Pgvector service + +```bash +docker run --name vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -d -v ./init.sql:/docker-entrypoint-initdb.d/init.sql -p 5432:5432 pgvector/pgvector:0.7.0-pg16 +``` diff --git a/comps/third_parties/qdrant/src/__init__.py b/comps/third_parties/qdrant/src/__init__.py new file mode 100644 index 0000000000..916f3a44b2 --- /dev/null +++ b/comps/third_parties/qdrant/src/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/third_parties/qdrant/src/init.sql b/comps/third_parties/qdrant/src/init.sql new file mode 100644 index 0000000000..0aa0fc2255 --- /dev/null +++ b/comps/third_parties/qdrant/src/init.sql @@ -0,0 +1 @@ +CREATE EXTENSION IF NOT EXISTS vector; diff --git a/comps/third_parties/redis/deployment/docker_compose/compose.yaml b/comps/third_parties/redis/deployment/docker_compose/compose.yaml index 4633b9b1b3..4060104c6d 100644 --- a/comps/third_parties/redis/deployment/docker_compose/compose.yaml +++ b/comps/third_parties/redis/deployment/docker_compose/compose.yaml @@ -9,10 +9,11 @@ services: - "${REDIS_PORT1:-6379}:6379" - "${REDIS_PORT2:-8001}:8001" environment: - HTTPS_PROXY: ${http_proxy} - HTTP_PROXY: ${https_proxy} + - no_proxy=${no_proxy} + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} healthcheck: - test: ["CMD-SHELL", "sleep 10 && exit 0"] - interval: 1s - timeout: 15s - retries: 1 + test: ["CMD", "redis-cli", "ping"] + timeout: 10s + retries: 3 + start_period: 10s diff --git a/comps/third_parties/vdms/deployment/docker_compose/compose.yaml b/comps/third_parties/vdms/deployment/docker_compose/compose.yaml index eb0cf2f9c4..455d3530ce 100644 --- a/comps/third_parties/vdms/deployment/docker_compose/compose.yaml +++ b/comps/third_parties/vdms/deployment/docker_compose/compose.yaml @@ -2,13 +2,14 @@ # SPDX-License-Identifier: Apache-2.0 services: - vdms-db: + vdms-vector-db: image: intellabs/vdms:latest - container_name: vdms-db + container_name: vdms-vector-db ports: - "${VDMS_PORT:-55555}:55555" + environment: + - no_proxy=${no_proxy} + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} healthcheck: - test: ["CMD-SHELL", "sleep 10 && exit 0"] - interval: 1s - timeout: 15s - retries: 1 + test: ["CMD-SHELL", "sleep 8 && echo 'This is a healthcheck test.' || exit 1"] diff --git a/tests/dataprep/test_dataprep_elasticsearch.sh b/tests/dataprep/test_dataprep_elasticsearch.sh index a5def2f047..0d712c307a 100644 --- a/tests/dataprep/test_dataprep_elasticsearch.sh +++ b/tests/dataprep/test_dataprep_elasticsearch.sh @@ -7,7 +7,7 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -dataprep_service_port=6011 +DATAPREP_PORT=11100 function build_docker_images() { cd $WORKPATH @@ -16,48 +16,34 @@ function build_docker_images() { docker pull docker.elastic.co/elasticsearch/elasticsearch:8.16.0 # build dataprep image for elasticsearch - docker build --no-cache -t opea/dataprep-elasticsearch:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/src/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-elasticsearch built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-elasticsearch built successful" + echo "opea/dataprep built successful" fi } function start_service() { - # elasticsearch - elasticsearch_port=9200 - docker run -d --name test-comps-vectorstore-elasticsearch -e ES_JAVA_OPTS="-Xms1g -Xmx1g" -e "discovery.type=single-node" -e "xpack.security.enabled=false" -p $elasticsearch_port:9200 -p 9300:9300 docker.elastic.co/elasticsearch/elasticsearch:8.16.0 - export ES_CONNECTION_STRING="http://${ip_address}:${elasticsearch_port}" - sleep 10s - - # data-prep - INDEX_NAME="test-elasticsearch" - docker run -d --name="test-comps-dataprep-elasticsearch" -p $dataprep_service_port:5000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_ELASTICSEARCH" opea/dataprep-elasticsearch:comps - sleep 15s - - bash $WORKPATH/tests/utils/wait-for-it.sh $ip_address:$dataprep_service_port -s -t 100 -- echo "Dataprep service up" - DATAPREP_UP=$? - if [ ${DATAPREP_UP} -ne 0 ]; then - echo "Could not start Dataprep service." - return 1 - fi - - sleep 5s - bash $WORKPATH/tests/utils/wait-for-it.sh ${ip_address}:$dataprep_service_port -s -t 1 -- echo "Dataprep service still up" - DATAPREP_UP=$? - if [ ${DATAPREP_UP} -ne 0 ]; then - echo "Dataprep service crashed." - return 1 - fi + echo "Starting microservice" + export ELASTICSEARCH_PORT1=12300 + export ES_CONNECTION_STRING="http://${ip_address}:${ELASTICSEARCH_PORT1}" + export INDEX_NAME="test-elasticsearch" + export TAG=comps + service_name="elasticsearch-vector-db dataprep-elasticsearch" + cd $WORKPATH + cd comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 1m + echo "Microservice started" } function validate_microservice() { cd $LOG_PATH # test /v1/dataprep - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/ingest" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") @@ -70,17 +56,17 @@ function validate_microservice() { echo "[ dataprep ] Content is as expected." else echo "[ dataprep ] Content does not match the expected result: $CONTENT" - docker logs test-comps-dataprep-elasticsearch >> ${LOG_PATH}/dataprep.log + docker logs dataprep-elasticsearch >> ${LOG_PATH}/dataprep.log exit 1 fi else echo "[ dataprep ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-elasticsearch >> ${LOG_PATH}/dataprep.log + docker logs dataprep-elasticsearch >> ${LOG_PATH}/dataprep.log exit 1 fi # test /v1/dataprep/get_file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/get" HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") if [ "$HTTP_STATUS" -eq 200 ]; then echo "[ dataprep - file ] HTTP status is 200. Checking content..." @@ -90,33 +76,30 @@ function validate_microservice() { echo "[ dataprep - file ] Content is as expected." else echo "[ dataprep - file ] Content does not match the expected result: $CONTENT" - docker logs test-comps-dataprep-elasticsearch >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-elasticsearch >> ${LOG_PATH}/dataprep_file.log exit 1 fi else echo "[ dataprep - file ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-elasticsearch >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-elasticsearch >> ${LOG_PATH}/dataprep_file.log exit 1 fi # test /v1/dataprep/delete_file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/delete" HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL") if [ "$HTTP_STATUS" -eq 200 ]; then echo "[ dataprep - del ] HTTP status is 200." - docker logs test-comps-dataprep-elasticsearch >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-elasticsearch >> ${LOG_PATH}/dataprep_del.log else echo "[ dataprep - del ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-elasticsearch >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-elasticsearch >> ${LOG_PATH}/dataprep_del.log exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-vectorstore-elasticsearch*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - - cid=$(docker ps -aq --filter "name=test-comps-dataprep-elasticsearch*") + cid=$(docker ps -aq --filter "name=elasticsearch-vector-db" --filter "name=dataprep-elasticsearch") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/dataprep/test_dataprep_milvus.sh b/tests/dataprep/test_dataprep_milvus.sh index 7de1f20b5f..603fb671cd 100644 --- a/tests/dataprep/test_dataprep_milvus.sh +++ b/tests/dataprep/test_dataprep_milvus.sh @@ -7,17 +7,18 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +DATAPREP_PORT=11101 function build_docker_images() { cd $WORKPATH echo $(pwd) # dataprep milvus image - docker build --no-cache -t opea/dataprep-milvus:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-milvus built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-milvus built successful" + echo "opea/dataprep built successful" fi } @@ -27,19 +28,17 @@ function start_service() { # wget https://raw.githubusercontent.com/milvus-io/milvus/v2.4.9/configs/milvus.yaml # wget https://github.com/milvus-io/milvus/releases/download/v2.4.9/milvus-standalone-docker-compose.yml -O docker-compose.yml # sed '/- \${DOCKER_VOLUME_DIRECTORY:-\.}\/volumes\/milvus:\/var\/lib\/milvus/a \ \ \ \ \ \ - \${DOCKER_VOLUME_DIRECTORY:-\.}\/milvus.yaml:\/milvus\/configs\/milvus.yaml' -i docker-compose.yml - docker compose -f docker-compose.yaml up -d - - # start embedding service - embed_port=5021 - embed_model="BAAI/bge-base-en-v1.5" - docker run -d -p $embed_port:80 -v ./data:/data --name test-comps-dataprep-milvus-tei-server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $embed_model - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${embed_port}" - - # start dataprep service - MILVUS_HOST=${ip_address} - dataprep_service_port=5022 - HF_TOKEN=${HF_TOKEN} - docker run -d --name="test-comps-dataprep-milvus-server" -p ${dataprep_service_port}:5000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} -e HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} -e LOGFLAG=true -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MILVUS" --ipc=host opea/dataprep-milvus:comps + docker compose up -d + sleep 30 + + export host_ip=${ip_address} + export TEI_EMBEDDER_PORT=12005 + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export MILVUS_HOST=${ip_address} + export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" + service_name="dataprep-milvus tei-embedding-serving" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d sleep 1m } @@ -91,43 +90,42 @@ function validate_service() { function validate_microservice() { cd $LOG_PATH - dataprep_service_port=5022 # test /v1/dataprep/delete validate_service \ - "http://${ip_address}:${dataprep_service_port}/v1/dataprep/delete" \ + "http://${ip_address}:${DATAPREP_PORT}/v1/dataprep/delete" \ '{"status":true}' \ "dataprep_del" \ - "test-comps-dataprep-milvus-server" + "dataprep-milvus-server" # test /v1/dataprep upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt validate_service \ - "http://${ip_address}:${dataprep_service_port}/v1/dataprep/ingest" \ + "http://${ip_address}:${DATAPREP_PORT}/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_file" \ - "test-comps-dataprep-milvus-server" + "dataprep-milvus-server" # test /v1/dataprep upload link validate_service \ - "http://${ip_address}:${dataprep_service_port}/v1/dataprep/ingest" \ + "http://${ip_address}:${DATAPREP_PORT}/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_link" \ - "test-comps-dataprep-milvus-server" + "dataprep-milvus-server" # test /v1/dataprep/get_file validate_service \ - "http://${ip_address}:${dataprep_service_port}/v1/dataprep/get" \ + "http://${ip_address}:${DATAPREP_PORT}/v1/dataprep/get" \ '{"name":' \ "dataprep_get" \ - "test-comps-dataprep-milvus-server" + "dataprep-milvus-server" } function stop_docker() { cd $WORKPATH rm -rf milvus/ - cid=$(docker ps -aq --filter "name=test-comps-dataprep-milvus*") + cid=$(docker ps -aq --filter "name=dataprep-milvus*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi cid=$(docker ps -aq --filter "name=milvus-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi diff --git a/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh b/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh index a1d050f6ed..2b923bb66d 100755 --- a/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh +++ b/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh @@ -7,71 +7,44 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +DATAPREP_PORT=11103 +LLM_ENDPOINT_PORT=10510 function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/dataprep-neo4j:comps --build-arg no_proxy=$no_proxy --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg no_proxy=$no_proxy --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-neo4j built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-neo4j built successful" + echo "opea/dataprep built successful" fi docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1 docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 } function start_service() { - # neo4j-apoc - docker run -d -p 7474:7474 -p 7687:7687 --name test-comps-neo4j-apoc --env NEO4J_AUTH=neo4j/neo4jtest -e NEO4J_apoc_export_file_enabled=true -e NEO4J_apoc_import_file_enabled=true -e NEO4J_apoc_import_file_use__neo4j__config=true -e NEO4J_PLUGINS=\[\"apoc\"\] neo4j:latest - #sleep 30s - - # tei endpoint - emb_model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-dataprep-neo4j-tei-endpoint" -p 6006:80 -v ./data:/data -e no_proxy=$no_proxy -e http_proxy=$http_proxy \ - -e https_proxy=$https_proxy -e HF_TOKEN=$HF_TOKEN --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $emb_model - sleep 30s - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006" - - # tgi gaudi endpoint - # "Meta-Llama-3.1-8B-Instruct" used for CI testing. Might not be good enough to extract high quality graph - model="meta-llama/Meta-Llama-3.1-8B-Instruct" - docker run -d --name="test-comps-dataprep-neo4j-tgi-endpoint" -p 6005:80 -v ./data:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e TEXT_GENERATION_SERVER_IGNORE_EOS_TOKEN=false \ - -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true \ - -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice -e no_proxy=$no_proxy -e http_proxy=$http_proxy -e https_proxy=$https_proxy \ - --ipc=host --pull always ghcr.io/huggingface/tgi-gaudi:2.3.1 --model-id $model --max-input-tokens 1024 --max-total-tokens 3000 - sleep 30s - # extra time to load large model - echo "Waiting for tgi gaudi ready" - n=0 - ready=false - until [[ "$n" -ge 300 ]] || [[ $ready == true ]]; do - docker logs test-comps-dataprep-neo4j-tgi-endpoint &> ${LOG_PATH}/tgi-gaudi-service.log - n=$((n+1)) - if grep -q Connected ${LOG_PATH}/tgi-gaudi-service.log; then - ready=true - break - fi - sleep 5s - done - if [[ "$ready" == true ]]; then - echo "Service started successfully" - else - echo "Service failed to start within the expected time frame" - exit 1 - fi - export TGI_LLM_ENDPOINT="http://${ip_address}:6005" - - # dataprep neo4j - # Not testing openai code path since not able to provide key for cicd - docker run -d --name="test-comps-dataprep-neo4j-server" -p 6004:5000 -v ./data:/data --ipc=host -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT \ - -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e EMBEDDING_MODEL_ID=$emb_model -e LLM_MODEL_ID=$model -e host_ip=$ip_address -e no_proxy=$no_proxy \ - -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URI="bolt://${ip_address}:7687" -e NEO4J_USERNAME="neo4j" \ - -e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j:comps - sleep 30s - export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6004" - + service_name="neo4j-apoc tei-embedding-serving tgi-gaudi-server dataprep-neo4j-llamaindex" + export host_ip=${ip_address} + export TAG="comps" + export NEO4J_AUTH="neo4j/neo4jtest" + export NEO4J_URL="bolt://${ip_address}:7687" + export NEO4J_USERNAME="neo4j" + export NEO4J_PASSWORD="neo4jtest" + export NEO4J_apoc_export_file_enabled=true + export NEO4J_apoc_import_file_use__neo4j__config=true + export NEO4J_PLUGINS=\[\"apoc\"\] + export TEI_EMBEDDER_PORT=12006 + export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct" + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}" + export LLM_ENDPOINT_PORT=10510 + export TGI_LLM_ENDPOINT="http://${ip_address}:${LLM_ENDPOINT_PORT}" + + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 1m } function validate_service() { @@ -122,35 +95,35 @@ function validate_microservice() { "${ip_address}:7474" \ "200 OK" \ "neo4j-apoc" \ - "test-comps-neo4j-apoc" \ + "neo4j-apoc" \ "" sleep 1m # retrieval can't curl as expected, try to wait for more time # tgi for llm service validate_service \ - "${ip_address}:6005/generate" \ + "${ip_address}:${LLM_ENDPOINT_PORT}/generate" \ "generated_text" \ "tgi-gaudi-service" \ - "test-comps-dataprep-neo4j-tgi-endpoint" \ + "tgi-gaudi-server" \ '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' # test /v1/dataprep graph extraction echo "Like many companies in the O&G sector, the stock of Chevron (NYSE:CVX) has declined about 10% over the past 90-days despite the fact that Q2 consensus earnings estimates have risen sharply (~25%) during that same time frame. Over the years, Chevron has kept a very strong balance sheet. FirstEnergy (NYSE:FE – Get Rating) posted its earnings results on Tuesday. The utilities provider reported $0.53 earnings per share for the quarter, topping the consensus estimate of $0.52 by $0.01, RTT News reports. FirstEnergy had a net margin of 10.85% and a return on equity of 17.17%. The Dáil was almost suspended on Thursday afternoon after Sinn Féin TD John Brady walked across the chamber and placed an on-call pager in front of the Minister for Housing Darragh O’Brien during a debate on retained firefighters. Mr O’Brien said Mr Brady had taken part in an act of theatre that was obviously choreographed.Around 2,000 retained firefighters around the country staged a second day of industrial action on Tuesday and are due to start all out-strike action from next Tuesday. The mostly part-time workers, who keep the services going outside of Ireland’s larger urban centres, are taking industrial action in a dispute over pay and working conditions. Speaking in the Dáil, Sinn Féin deputy leader Pearse Doherty said firefighters had marched on Leinster House today and were very angry at the fact the Government will not intervene. Reintroduction of tax relief on mortgages needs to be considered, O’Brien says. Martin withdraws comment after saying People Before Profit would ‘put the jackboot on people’ Taoiseach ‘propagated fears’ farmers forced to rewet land due to nature restoration law – Cairns An intervention is required now. I’m asking you to make an improved offer in relation to pay for retained firefighters, Mr Doherty told the housing minister.I’m also asking you, and challenging you, to go outside after this Order of Business and meet with the firefighters because they are just fed up to the hilt in relation to what you said.Some of them have handed in their pagers to members of the Opposition and have challenged you to wear the pager for the next number of weeks, put up with an €8,600 retainer and not leave your community for the two and a half kilometres and see how you can stand over those type of pay and conditions. At this point, Mr Brady got up from his seat, walked across the chamber and placed the pager on the desk in front of Mr O’Brien. Ceann Comhairle Seán Ó Fearghaíl said the Sinn Féin TD was completely out of order and told him not to carry out a charade in this House, adding it was absolutely outrageous behaviour and not to be encouraged.Mr O’Brien said Mr Brady had engaged in an act of theatre here today which was obviously choreographed and was then interrupted with shouts from the Opposition benches. Mr Ó Fearghaíl said he would suspend the House if this racket continues.Mr O’Brien later said he said he was confident the dispute could be resolved and he had immense regard for firefighters. The minister said he would encourage the unions to re-engage with the State’s industrial relations process while also accusing Sinn Féin of using the issue for their own political gain." > $LOG_PATH/dataprep_file.txt validate_service \ - "http://${ip_address}:6004/v1/dataprep/ingest" \ + "http://${ip_address}:${DATAPREP_PORT}/v1/dataprep/ingest" \ "Data preparation succeeded" \ "extract_graph_neo4j" \ - "test-comps-dataprep-neo4j-server" + "dataprep-neo4j-llamaindex" } function stop_docker() { - cid_retrievers=$(docker ps -aq --filter "name=test-comps-dataprep-neo4j*") - if [[ ! -z "$cid_retrievers" ]]; then - docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s + cid=$(docker ps -aq --filter "name=dataprep-neo4j*") + if [[ ! -z "$cid" ]]; then + docker stop $cid && docker rm $cid && sleep 1s fi - cid_db=$(docker ps -aq --filter "name=test-comps-neo4j-apoc") - if [[ ! -z "$cid_retrievers" ]]; then - docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s + cid_db=$(docker ps -aq --filter "name=neo4j-apoc" --filter "name=tgi-gaudi-server") + if [[ ! -z "$cid_db" ]]; then + docker stop $cid_db && docker rm $cid_db && sleep 1s fi } diff --git a/tests/dataprep/test_dataprep_opensearch.sh b/tests/dataprep/test_dataprep_opensearch.sh index 1adddd6080..da9415dad3 100644 --- a/tests/dataprep/test_dataprep_opensearch.sh +++ b/tests/dataprep/test_dataprep_opensearch.sh @@ -7,61 +7,41 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -dataprep_service_port="6007" +DATAPREP_PORT="11104" OPENSEARCH_INITIAL_ADMIN_PASSWORD="StRoNgOpEa0)" function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build -t opea/dataprep-opensearch:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . + docker build -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-opensearch built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-opensearch built successful" + echo "opea/dataprep built successful" fi } function start_service() { - # Start OpenSearch vector db container - docker run -d \ - --name test-comps-dataprep-opensearch-langchain \ - -e cluster.name=opensearch-cluster \ - -e node.name=opensearch-vector-db \ - -e discovery.seed_hosts=opensearch-vector-db \ - -e cluster.initial_master_nodes=opensearch-vector-db \ - -e bootstrap.memory_lock=true \ - -e "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" \ - -e OPENSEARCH_INITIAL_ADMIN_PASSWORD=$OPENSEARCH_INITIAL_ADMIN_PASSWORD \ - --ulimit memlock=-1:-1 \ - --ulimit nofile=65536:65536 \ - -p 9200:9200 \ - -p 9600:9600 \ - opensearchproject/opensearch:latest - # Start OpenSearch dataprep container - OPENSEARCH_URL="http://${ip_address}:9200" - echo $(OPENSEARCH_URL) - INDEX_NAME="file-index" - docker run -d \ - --name test-comps-dataprep-opensearch-langchain-server \ - -p 6007:5000 \ - -e https_proxy=$https_proxy \ - -e http_proxy=$http_proxy \ - -e OPENSEARCH_INITIAL_ADMIN_PASSWORD=$OPENSEARCH_INITIAL_ADMIN_PASSWORD \ - -e OPENSEARCH_URL=$OPENSEARCH_URL \ - -e INDEX_NAME=$INDEX_NAME \ - -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_OPENSEARCH" \ - opea/dataprep-opensearch:latest - - sleep 2m + export OPENSEARCH_INITIAL_ADMIN_PASSWORD="StRoNgOpEa0)" + export OPENSEARCH_PORT1=9200 + export OPENSEARCH_URL="http://${ip_address}:${OPENSEARCH_PORT1}" + echo ${OPENSEARCH_URL} + export INDEX_NAME="file-index" + service_name="opensearch-vector-db dataprep-opensearch" + export host_ip=${ip_address} + export TAG="comps" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 1m } function validate_microservice() { cd $LOG_PATH # test /v1/dataprep upload file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/ingest" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' -k -u admin:$OPENSEARCH_INITIAL_ADMIN_PASSWORD "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -70,14 +50,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-opensearch-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-opensearch-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-opensearch-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-opensearch-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -85,7 +65,7 @@ function validate_microservice() { # test /v1/dataprep upload link - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/ingest" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'link_list=["https://www.ces.tech/"]' -k -u admin:$OPENSEARCH_INITIAL_ADMIN_PASSWORD "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -94,21 +74,21 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-opensearch-langchain-server >> ${LOG_PATH}/dataprep_upload_link.log + docker logs dataprep-opensearch-server >> ${LOG_PATH}/dataprep_upload_link.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-opensearch-langchain-server >> ${LOG_PATH}/dataprep_upload_link.log + docker logs dataprep-opensearch-server >> ${LOG_PATH}/dataprep_upload_link.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi # test /v1/dataprep/get_file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/get" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -k -u admin:$OPENSEARCH_INITIAL_ADMIN_PASSWORD "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -116,21 +96,21 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-opensearch-langchain-server >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-opensearch-server >> ${LOG_PATH}/dataprep_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" -ne "null" ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-opensearch-langchain-server >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-opensearch-server >> ${LOG_PATH}/dataprep_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi # test /v1/dataprep/delete_file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/delete" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' -k -u admin:$OPENSEARCH_INITIAL_ADMIN_PASSWORD "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -139,7 +119,7 @@ function validate_microservice() { # check response status if [ "$HTTP_STATUS" -ne "404" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 404. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-opensearch-langchain-server >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-opensearch-server >> ${LOG_PATH}/dataprep_del.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 404. Checking content..." @@ -147,7 +127,7 @@ function validate_microservice() { # check response body if [[ "$RESPONSE_BODY" != *'{"detail":"Single file deletion is not implemented yet"}'* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-opensearch-langchain-server >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-opensearch-server >> ${LOG_PATH}/dataprep_del.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -155,9 +135,10 @@ function validate_microservice() { } function stop_service() { - cid=$(docker ps -aq --filter "name=test-comps-dataprep-opensearch-langchain*") + cid=$(docker ps -aq --filter "name=dataprep-opensearch-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cid=$(docker ps -aq --filter "name=opensearch-vector-db") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - } function main() { @@ -169,7 +150,7 @@ function main() { validate_microservice stop_service - # echo y | docker system prune + echo y | docker system prune } main diff --git a/tests/dataprep/test_dataprep_pgvector.sh b/tests/dataprep/test_dataprep_pgvector.sh index b768f05936..e8f7e2cf7a 100644 --- a/tests/dataprep/test_dataprep_pgvector.sh +++ b/tests/dataprep/test_dataprep_pgvector.sh @@ -7,7 +7,7 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -dataprep_service_port=5013 +DATAPREP_PORT="11105" function build_docker_images() { cd $WORKPATH @@ -16,34 +16,35 @@ function build_docker_images() { docker pull pgvector/pgvector:0.7.0-pg16 # build dataprep image for pgvector - docker build --no-cache -t opea/dataprep-pgvector:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/src/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-pgvector built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-pgvector built successful" + echo "opea/dataprep built successful" fi } function start_service() { + export VOLUMES_PATH=$WORKPATH/comps/third_parties/pgvector/src/init.sql export POSTGRES_USER=testuser export POSTGRES_PASSWORD=testpwd export POSTGRES_DB=vectordb - - docker run --name test-comps-vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -p 5432:5432 -d -v $WORKPATH/comps/third_parties/pgvector/src/init.sql:/docker-entrypoint-initdb.d/init.sql pgvector/pgvector:0.7.0-pg16 - - sleep 10s - - docker run -d --name="test-comps-dataprep-pgvector" -p ${dataprep_service_port}:5000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:5432/${POSTGRES_DB} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_PGVECTOR" opea/dataprep-pgvector:comps - - sleep 3m + export PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:5432/${POSTGRES_DB} + + service_name="pgvector-db dataprep-pgvector" + export host_ip=${ip_address} + export TAG="comps" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 1m } function validate_microservice() { cd $LOG_PATH - # test /v1/dataprep - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/ingest" + # test /v1/dataprep/ingest + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") if [ "$HTTP_STATUS" -eq 200 ]; then @@ -55,17 +56,17 @@ function validate_microservice() { echo "[ dataprep ] Content is as expected." else echo "[ dataprep ] Content does not match the expected result: $CONTENT" - docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep.log + docker logs dataprep-pgvector-server >> ${LOG_PATH}/dataprep.log exit 1 fi else echo "[ dataprep ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep.log + docker logs dataprep-pgvector-server >> ${LOG_PATH}/dataprep.log exit 1 fi - # test /v1/dataprep/get_file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get" + # test /v1/dataprep/get + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/get" HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") if [ "$HTTP_STATUS" -eq 200 ]; then echo "[ dataprep - file ] HTTP status is 200. Checking content..." @@ -75,33 +76,33 @@ function validate_microservice() { echo "[ dataprep - file ] Content is as expected." else echo "[ dataprep - file ] Content does not match the expected result: $CONTENT" - docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-pgvector-server >> ${LOG_PATH}/dataprep_file.log exit 1 fi else echo "[ dataprep - file ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-pgvector-server >> ${LOG_PATH}/dataprep_file.log exit 1 fi - # test /v1/dataprep/delete_file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete" + # test /v1/dataprep/delete + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/delete" HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL") if [ "$HTTP_STATUS" -eq 200 ]; then echo "[ dataprep - del ] HTTP status is 200." - docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-pgvector-server >> ${LOG_PATH}/dataprep_del.log else echo "[ dataprep - del ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-pgvector-server >> ${LOG_PATH}/dataprep_del.log exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-vectorstore-postgres*") + cid=$(docker ps -aq --filter "name=dataprep-pgvector-server") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - cid=$(docker ps -aq --filter "name=test-comps-dataprep-pgvector*") + cid=$(docker ps -aq --filter "name=pgvector-db") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/dataprep/test_dataprep_pinecone.sh b/tests/dataprep/test_dataprep_pinecone.sh index 3ba0b3b882..6afde25c9d 100644 --- a/tests/dataprep/test_dataprep_pinecone.sh +++ b/tests/dataprep/test_dataprep_pinecone.sh @@ -6,16 +6,18 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') +DATAPREP_PORT="11106" + function build_docker_images() { cd $WORKPATH # build dataprep image for pinecone - docker build --no-cache -t opea/dataprep-pinecone:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/src/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-pinecone built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-pinecone built successful" + echo "opea/dataprep built successful" fi } @@ -24,13 +26,15 @@ function start_service() { export PINECONE_INDEX_NAME="test-index" export HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN - docker run -d --name="test-comps-dataprep-pinecone" -p 5039:5000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e PINECONE_API_KEY=$PINECONE_API_KEY -e PINECONE_INDEX_NAME=$PINECONE_INDEX_NAME -e LOGFLAG=true -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_PINECONE" opea/dataprep-pinecone:comps - + service_name="dataprep-pinecone" + export TAG="comps" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d sleep 1m } function validate_microservice() { - URL="http://$ip_address:5039/v1/dataprep/ingest" + URL="http://$ip_address:${DATAPREP_PORT}/v1/dataprep/ingest" echo 'The OPEA platform includes: Detailed framework of composable building blocks for state-of-the-art generative AI systems including LLMs, data stores, and prompt engines' > ./dataprep_file.txt result=$(curl --noproxy $ip_address --location --request POST \ --form 'files=@./dataprep_file.txt' $URL) @@ -38,26 +42,23 @@ function validate_microservice() { echo "Result correct." else echo "Result wrong. Received was $result" - docker logs test-comps-dataprep-pinecone + docker logs dataprep-pinecone-server exit 1 fi - DELETE_URL="http://$ip_address:5039/v1/dataprep/delete" + DELETE_URL="http://$ip_address:${DATAPREP_PORT}/v1/dataprep/delete" result=$(curl --noproxy $ip_address --location --request POST \ -d '{"file_path": "all"}' -H 'Content-Type: application/json' $DELETE_URL) if [[ $result == *"true"* ]]; then echo "Result correct." else echo "Result wrong. Received was $result" - docker logs test-comps-dataprep-pinecone + docker logs dataprep-pinecone-server exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=vectorstore-pinecone*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - - cid=$(docker ps -aq --filter "name=test-comps-dataprep-pinecone*") + cid=$(docker ps -aq --filter "name=dataprep-pinecone-server*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/dataprep/test_dataprep_qdrant.sh b/tests/dataprep/test_dataprep_qdrant.sh index 17edbc2556..818f99da24 100644 --- a/tests/dataprep/test_dataprep_qdrant.sh +++ b/tests/dataprep/test_dataprep_qdrant.sh @@ -7,30 +7,34 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +DATAPREP_PORT="11107" +TEI_EMBEDDER_PORT="10220" function build_docker_images() { cd $WORKPATH # dataprep qdrant image - docker build --no-cache -t opea/dataprep-qdrant:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-qdrant built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-qdrant built successful" + echo "opea/dataprep built successful" fi } function start_service() { - QDRANT_PORT=6360 - docker run -d --name="test-comps-dataprep-qdrant-langchain" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $QDRANT_PORT:6333 -p 6334:6334 --ipc=host qdrant/qdrant - tei_embedding_port=6361 - model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-dataprep-qdrant-langchain-tei" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $tei_embedding_port:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model - dataprep_service_port=6362 - TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_embedding_port}" - COLLECTION_NAME="rag-qdrant" - docker run -d --name="test-comps-dataprep-qdrant-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e QDRANT_HOST=$ip_address -e QDRANT_PORT=$QDRANT_PORT -e COLLECTION_NAME=$COLLECTION_NAME -e TEI_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -p ${dataprep_service_port}:5000 --ipc=host -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_QDRANT" opea/dataprep-qdrant:comps + export host_ip=${ip_address} + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export TEI_EMBEDDER_PORT="10224" + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}" + export COLLECTION_NAME="rag-qdrant" + export QDRANT_HOST=$ip_address + export QDRANT_PORT=6360 + export TAG="comps" + service_name="qdrant-vector-db tei-embedding-serving dataprep-qdrant" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d sleep 1m } @@ -57,8 +61,8 @@ function validate_services() { # check response status if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-qdrant-langchain-tei >> ${LOG_PATH}/tei-endpoint.log - docker logs test-comps-dataprep-qdrant-langchain-server >> ${LOG_PATH}/dataprep-qdrant.log + docker logs tei-embedding-serving >> ${LOG_PATH}/tei-endpoint.log + docker logs dataprep-qdrant-server >> ${LOG_PATH}/dataprep-qdrant.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." @@ -66,8 +70,8 @@ function validate_services() { # check response body if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-qdrant-langchain-tei >> ${LOG_PATH}/tei-endpoint.log - docker logs test-comps-dataprep-qdrant-langchain-server >> ${LOG_PATH}/dataprep-qdrant.log + docker logs tei-embedding-serving >> ${LOG_PATH}/tei-endpoint.log + docker logs dataprep-qdrant-server >> ${LOG_PATH}/dataprep-qdrant.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -79,31 +83,31 @@ function validate_services() { function validate_microservice() { # tei for embedding service validate_services \ - "${ip_address}:6361/embed" \ + "${ip_address}:${TEI_EMBEDDER_PORT}/embed" \ "[[" \ "tei_embedding" \ - "test-comps-dataprep-qdrant-langchain-tei" \ + "tei-embedding-serving" \ '{"inputs":"What is Deep Learning?"}' # dataprep upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt validate_services \ - "${ip_address}:6362/v1/dataprep/ingest" \ + "${ip_address}:${DATAPREP_PORT}/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_file" \ - "test-comps-dataprep-qdrant-langchain-server" + "dataprep-qdrant-server" # dataprep upload link validate_services \ - "${ip_address}:6362/v1/dataprep/ingest" \ + "${ip_address}:${DATAPREP_PORT}/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_link" \ - "test-comps-dataprep-qdrant-langchain-server" + "dataprep-qdrant-server" } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-dataprep-qdrant-langchain*") + cid=$(docker ps -aq --filter "name=dataprep-qdrant-server*" --filter "name=tei-embedding-serving*" --filter "name=qdrant-vector-db") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi rm $LOG_PATH/dataprep_file.txt diff --git a/tests/dataprep/test_dataprep_redis.sh b/tests/dataprep/test_dataprep_redis.sh index 7e8af5b005..3cbd6b02b5 100644 --- a/tests/dataprep/test_dataprep_redis.sh +++ b/tests/dataprep/test_dataprep_redis.sh @@ -7,42 +7,44 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +DATAPREP_PORT="11108" +TEI_EMBEDDER_PORT="10221" function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build -t opea/dataprep-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . + docker build -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-redis built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-redis built successful" + echo "opea/dataprep built successful" fi } function start_service() { - REDIS_PORT=6380 - docker run -d --name="test-comps-dataprep-redis-langchain" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $REDIS_PORT:6379 -p 8002:8001 --ipc=host redis/redis-stack:7.2.0-v9 - embed_port=5439 - embed_model="BAAI/bge-base-en-v1.5" - docker run -d -p $embed_port:80 -v ./data:/data --name test-comps-dataprep-redis-langchain-tei-server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $embed_model - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${embed_port}" - - export dataprep_service_port=5013 - REDIS_URL="redis://${ip_address}:${REDIS_PORT}" + export host_ip=${ip_address} + export REDIS_HOST=$ip_address + export REDIS_PORT=6379 + export DATAPREP_PORT="11108" + export TEI_EMBEDDER_PORT="10221" + export REDIS_URL="redis://${ip_address}:${REDIS_PORT}" + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}" export INDEX_NAME="rag_redis" - export HF_TOKEN=${HF_TOKEN} - docker run -d --name="test-comps-dataprep-redis-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e REDIS_HOST=$ip_address -e REDIS_PORT=$REDIS_PORT -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e INDEX_NAME=$INDEX_NAME -e HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} -e LOGFLAG=true -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_REDIS" -p ${dataprep_service_port}:5000 --ipc=host opea/dataprep-redis:comps + export TAG="comps" + service_name="redis-vector-db tei-embedding-serving dataprep-redis" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d sleep 1m } function validate_microservice() { cd $LOG_PATH - export dataprep_service_port=5013 # test /v1/dataprep/delete - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete" + URL="http://${ip_address}:${DATAPREP_PORT}/v1/dataprep/delete" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -50,7 +52,7 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-redis-server >> ${LOG_PATH}/dataprep_del.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." @@ -58,14 +60,14 @@ function validate_microservice() { # check response body if [[ "$RESPONSE_BODY" != *'{"status":true}'* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-redis-server >> ${LOG_PATH}/dataprep_del.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi # test /v1/dataprep/ingest upload file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/ingest" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -74,21 +76,21 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi # test /v1/dataprep/ingest upload link - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/ingest" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'link_list=["https://www.ces.tech/"]' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -97,21 +99,21 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_link.log + docker logs dataprep-redis-server >> ${LOG_PATH}/dataprep_upload_link.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_link.log + docker logs dataprep-redis-server >> ${LOG_PATH}/dataprep_upload_link.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi # test /v1/dataprep/get - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/get" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -119,14 +121,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-redis-server >> ${LOG_PATH}/dataprep_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *'{"name":'* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-redis-server >> ${LOG_PATH}/dataprep_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -135,7 +137,7 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-dataprep-redis-langchain*") + cid=$(docker ps -aq --filter "name=dataprep-redis-server*" --filter "name=redis-vector-*" --filter "name=tei-embedding-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/dataprep/test_dataprep_redis_multimodal.sh b/tests/dataprep/test_dataprep_redis_multimodal.sh index a5c4701bc9..f99a3878e8 100644 --- a/tests/dataprep/test_dataprep_redis_multimodal.sh +++ b/tests/dataprep/test_dataprep_redis_multimodal.sh @@ -22,17 +22,18 @@ image_fn="${tmp_dir}/${image_name}.png" caption_fn="${tmp_dir}/${image_name}.txt" pdf_name="nke-10k-2023" pdf_fn="${tmp_dir}/${pdf_name}.pdf" +DATAPREP_PORT="11109" function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/dataprep-multimodal-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-multimodal-redis built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-multimodal-redis built successful" + echo "opea/dataprep built successful" fi } @@ -74,17 +75,17 @@ function start_lvm() { } function start_service() { - # start redis - echo "Starting Redis server" - REDIS_PORT=6380 - docker run -d --name="test-redis" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $REDIS_PORT:6379 -p 8002:8001 --ipc=host redis/redis-stack:7.2.0-v9 - - # start dataprep microservice - echo "Starting dataprep microservice" - dataprep_service_port=5013 - REDIS_URL="redis://${ip_address}:${REDIS_PORT}" - docker run -d --name="test-comps-dataprep-multimodal-redis" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e LVM_ENDPOINT=$LVM_ENDPOINT -p ${dataprep_service_port}:5000 --runtime=runc --ipc=host -e MULTIMODAL_DATAPREP=true -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MULTIMODALREDIS" opea/dataprep-multimodal-redis:comps - + export host_ip=${ip_address} + export REDIS_HOST=$ip_address + export REDIS_PORT=6379 + export REDIS_URL="redis://${ip_address}:${REDIS_PORT}" + export LVM_PORT=5028 + export LVM_ENDPOINT="http://${ip_address}:${LVM_PORT}/v1/lvm" + export INDEX_NAME="dataprep" + export TAG="comps" + service_name="redis-vector-db dataprep-multimodal-redis" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d sleep 1m } @@ -145,7 +146,7 @@ function validate_microservice() { # test v1/generate_transcripts upload file echo "Testing generate_transcripts API" - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/generate_transcripts" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/generate_transcripts" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@$video_fn" -F "files=@$audio_fn" -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -153,14 +154,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -168,7 +169,7 @@ function validate_microservice() { # test ingest upload video file echo "Testing ingest API with video+transcripts" - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/ingest" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@$video_fn" -F "files=@$transcript_fn" -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -177,14 +178,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -192,7 +193,7 @@ function validate_microservice() { # test ingest upload image file echo "Testing ingest API with image+caption" - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/ingest" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@$image_fn" -F "files=@$caption_fn" -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -201,14 +202,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -216,7 +217,7 @@ function validate_microservice() { # test ingest with video and image echo "Testing ingest API with both video+transcript and image+caption" - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/ingest" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@$image_fn" -F "files=@$caption_fn" -F "files=@$video_fn" -F "files=@$transcript_fn" -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -225,14 +226,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -240,7 +241,7 @@ function validate_microservice() { # test ingest with invalid input (.png image with .vtt transcript) echo "Testing ingest API with invalid input (.png and .vtt)" - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/ingest" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@$image_fn" -F "files=@$transcript_fn" -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -249,14 +250,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "400" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 400. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 400. Checking content..." fi if [[ "$RESPONSE_BODY" != *"No caption file found for $image_name"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -264,7 +265,7 @@ function validate_microservice() { # test ingest with a PDF file echo "Testing ingest API with a PDF file" - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/ingest" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@$pdf_fn" -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -273,14 +274,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -288,7 +289,7 @@ function validate_microservice() { # test generate_captions upload video file echo "Testing generate_captions API with video" - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/generate_captions" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/generate_captions" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@$video_fn" -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -297,14 +298,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -312,7 +313,7 @@ function validate_microservice() { # test v1/generate_captions upload image file echo "Testing generate_captions API with image" - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/generate_captions" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/generate_captions" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@$image_fn" -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -321,14 +322,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -336,7 +337,7 @@ function validate_microservice() { # test /v1/dataprep/get_files echo "Testing get_files API" - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/get" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -344,14 +345,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *${image_name}* || "$RESPONSE_BODY" != *${video_name}* || "$RESPONSE_BODY" != *${audio_name}* || "$RESPONSE_BODY" != *${pdf_name}* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -359,7 +360,7 @@ function validate_microservice() { # test /v1/dataprep/delete echo "Testing delete API" - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/delete" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -368,7 +369,7 @@ function validate_microservice() { # check response status if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_del.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." @@ -376,7 +377,7 @@ function validate_microservice() { # check response body if [[ "$RESPONSE_BODY" != *'{"status":true}'* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_del.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -384,10 +385,10 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-*") + cid=$(docker ps -aq --filter "name=dataprep-multimodal-redis-server*" --filter "name=redis-vector-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cid=$(docker ps -aq --filter "name=test-comps-lvm*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - # cid=$(docker ps -aq --filter "name=test-comps-lvm*") - # if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/dataprep/test_dataprep_vdms.sh b/tests/dataprep/test_dataprep_vdms.sh index 0fe806e080..2409e13f70 100644 --- a/tests/dataprep/test_dataprep_vdms.sh +++ b/tests/dataprep/test_dataprep_vdms.sh @@ -7,28 +7,34 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +DATAPREP_PORT="11110" function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/dataprep-vdms:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-vdms built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-vdms built successful" + echo "opea/dataprep built successful" fi docker pull intellabs/vdms:latest } function start_service() { - VDMS_PORT=5043 - docker run -d --name="test-comps-dataprep-vdms" -p $VDMS_PORT:55555 intellabs/vdms:latest - dataprep_service_port=5013 - COLLECTION_NAME="test-comps" - docker run -d --name="test-comps-dataprep-vdms-server" -e COLLECTION_NAME=$COLLECTION_NAME -e no_proxy=$no_proxy -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e VDMS_HOST=$ip_address -e VDMS_PORT=$VDMS_PORT -p ${dataprep_service_port}:5000 -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_VDMS" --ipc=host opea/dataprep-vdms:comps - sleep 30s + export host_ip=${ip_address} + export VDMS_HOST=$ip_address + export VDMS_PORT=55555 + export COLLECTION_NAME="test-comps" + export QDRANT_HOST=$ip_address + export QDRANT_PORT=$QDRANT_PORT + export TAG="comps" + service_name="vdms-vector-db dataprep-vdms" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 1m } function validate_microservice() { @@ -36,9 +42,7 @@ function validate_microservice() { echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt - dataprep_service_port=5013 - - URL="http://$ip_address:$dataprep_service_port/v1/dataprep/ingest" + URL="http://$ip_address:$DATAPREP_PORT/v1/dataprep/ingest" HTTP_STATUS=$(http_proxy="" curl -s -o /dev/null -w "%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' ${URL} ) if [ "$HTTP_STATUS" -eq 200 ]; then echo "[ dataprep-upload-file ] HTTP status is 200. Checking content..." @@ -47,14 +51,14 @@ function validate_microservice() { echo "[ dataprep-upload-file ] Content is correct." else echo "[ dataprep-upload-file ] Content is not correct. Received content was $CONTENT" - docker logs test-comps-dataprep-vdms-server >> ${LOG_PATH}/dataprep-upload-file.log - docker logs test-comps-dataprep-vdms >> ${LOG_PATH}/dataprep-upload-file_vdms.log + docker logs dataprep-vdms-server >> ${LOG_PATH}/dataprep-upload-file.log + docker logs vdms-vector-db >> ${LOG_PATH}/dataprep-upload-file_vdms.log exit 1 fi else echo "[ dataprep-upload-file ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-vdms-server >> ${LOG_PATH}/dataprep-upload-file.log - docker logs test-comps-dataprep-vdms >> ${LOG_PATH}/dataprep-upload-file_vdms.log + docker logs dataprep-vdms-server >> ${LOG_PATH}/dataprep-upload-file.log + docker logs vdms-vector-db >> ${LOG_PATH}/dataprep-upload-file_vdms.log exit 1 fi rm ./dataprep_file.txt @@ -62,7 +66,7 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-dataprep-vdms*") + cid=$(docker ps -aq --filter "name=dataprep-vdms*" --filter "name=vdms-vector*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/dataprep/test_dataprep_vdms_multimodal.sh b/tests/dataprep/test_dataprep_vdms_multimodal.sh index ec28f30ab5..a3af8dd5e0 100755 --- a/tests/dataprep/test_dataprep_vdms_multimodal.sh +++ b/tests/dataprep/test_dataprep_vdms_multimodal.sh @@ -7,28 +7,35 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +DATAPREP_PORT="11111" function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/dataprep-vdms:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-vdms built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-vdms built successful" + echo "opea/dataprep built successful" fi docker pull intellabs/vdms:latest } function start_service() { - VDMS_PORT=5043 - docker run -d --name="test-comps-dataprep-vdms" -p $VDMS_PORT:55555 intellabs/vdms:latest - dataprep_service_port=5013 - COLLECTION_NAME="test-comps" - docker run -d --name="test-comps-dataprep-vdms-server" -e COLLECTION_NAME=$COLLECTION_NAME -e no_proxy=$no_proxy -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e VDMS_HOST=$ip_address -e VDMS_PORT=$VDMS_PORT -p ${dataprep_service_port}:5000 --ipc=host -e MULTIMODAL_DATAPREP=true opea/dataprep-vdms:comps - sleep 30s + export host_ip=${ip_address} + export VDMS_HOST=$ip_address + export VDMS_PORT=55555 + export COLLECTION_NAME="test-comps" + export QDRANT_HOST=$ip_address + export QDRANT_PORT=$QDRANT_PORT + export TAG="comps" + service_name="vdms-vector-db dataprep-vdms-multimodal" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 1m + } function validate_microservice() { @@ -37,7 +44,7 @@ function validate_microservice() { sleep 5 # test /v1/dataprep upload file - URL="http://$ip_address:$dataprep_service_port/v1/dataprep/ingest_videos" + URL="http://$ip_address:$DATAPREP_PORT/v1/dataprep/ingest_videos" response=$(http_proxy="" curl -s -w "\n%{http_code}" -X POST -F 'files=@./silence_girl.mp4' -H 'Content-Type: multipart/form-data' ${URL}) CONTENT=$(echo "$response" | sed -e '$ d') @@ -49,14 +56,14 @@ function validate_microservice() { echo "[ dataprep-upload-videos ] Content is correct." else echo "[ dataprep-upload-videos ] Content is not correct. Received content was $CONTENT" - docker logs test-comps-dataprep-vdms-server >> ${LOG_PATH}/dataprep-upload-videos.log - docker logs test-comps-dataprep-vdms >> ${LOG_PATH}/dataprep-upload-videos_vdms.log + docker logs dataprep-vdms-multimodal-server >> ${LOG_PATH}/dataprep-upload-videos.log + docker logs vdms-vector-db >> ${LOG_PATH}/dataprep-upload-videos_vdms.log exit 1 fi else echo "[ dataprep-upload-videos ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-vdms-server >> ${LOG_PATH}/dataprep-get-videos.log - docker logs test-comps-dataprep-vdms >> ${LOG_PATH}/dataprep-upload-videos_vdms.log + docker logs dataprep-vdms-multimodal-server >> ${LOG_PATH}/dataprep-get-videos.log + docker logs vdms-vector-db >> ${LOG_PATH}/dataprep-upload-videos_vdms.log exit 1 fi @@ -64,7 +71,7 @@ function validate_microservice() { rm ./silence_girl.mp4 # test /v1/dataprep/get_videos - URL="http://$ip_address:$dataprep_service_port/v1/dataprep/get_videos" + URL="http://$ip_address:$DATAPREP_PORT/v1/dataprep/get_videos" response=$(http_proxy="" curl -s -w "\n%{http_code}" -X GET ${URL}) CONTENT=$(echo "$response" | sed -e '$ d') @@ -76,19 +83,19 @@ function validate_microservice() { echo "[ dataprep-get-videos ] Content is correct." else echo "[ dataprep-get-videos ] Content is not correct. Received content was $CONTENT" - docker logs test-comps-dataprep-vdms-server >> ${LOG_PATH}/dataprep-get-videos.log + docker logs dataprep-vdms-multimodal-server >> ${LOG_PATH}/dataprep-get-videos.log exit 1 fi else echo "[ dataprep-get-videos ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-vdms-server >> ${LOG_PATH}/dataprep-get-videos.log + docker logs dataprep-vdms-multimodal-server >> ${LOG_PATH}/dataprep-get-videos.log exit 1 fi # test /v1/dataprep/get_file/{filename} file_list=$CONTENT filename=$(echo $file_list | sed 's/^\[//;s/\]$//;s/,.*//;s/"//g') - URL="http://$ip_address:$dataprep_service_port/v1/dataprep/get/${filename}" + URL="http://$ip_address:$DATAPREP_PORT/v1/dataprep/get/${filename}" http_proxy="" wget ${URL} CONTENT=$(ls) @@ -96,14 +103,14 @@ function validate_microservice() { echo "[ download_file ] Content is correct." else echo "[ download_file ] Content is not correct. $CONTENT" - docker logs test-comps-dataprep-vdms-server >> ${LOG_PATH}/download_file.log + docker logs dataprep-vdms-multimodal-server >> ${LOG_PATH}/download_file.log exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-dataprep-vdms*") + cid=$(docker ps -aq --filter "name=vdms-vector-db*" --filter "name=dataprep-vdms-multimodal*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh b/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh index d040f954a1..92b29827fe 100644 --- a/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh +++ b/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh @@ -13,6 +13,7 @@ function build_docker_images() { git clone https://github.com/HabanaAI/vllm-fork.git cd vllm-fork/ git checkout v0.6.4.post2+Gaudi-1.19.0 + sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.hpu -t opea/vllm-gaudi:comps --shm-size=128g . if [ $? -ne 0 ]; then echo "opea/vllm-gaudi built fail" diff --git a/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh b/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh new file mode 100644 index 0000000000..f6857f35cb --- /dev/null +++ b/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh @@ -0,0 +1,175 @@ +#!/usr/bin/env bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +export host_ip=$(hostname -I | awk '{print $1}') +service_name="retriever-neo4j" + +function build_docker_images() { + cd $WORKPATH + echo "current dir: $PWD" + docker build --no-cache -t ${REGISTRY:-opea}/retriever:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/retriever built fail" + exit 1 + else + echo "opea/retriever built successful" + fi + + docker build --no-cache -t opea/dataprep-neo4j-llamaindex:comps --build-arg no_proxy=$no_proxy --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/dataprep-neo4j-llamaindex built fail" + exit 1 + else + echo "opea/dataprep-neo4j-llamaindex built successful" + fi + + docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1 + docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 +} + +function start_service() { + export NEO4J_PORT1=11631 + export NEO4J_PORT2=11632 + export TEI_EMBEDDER_PORT=11633 + export LLM_ENDPOINT_PORT=11634 + export RETRIEVER_PORT=11635 + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export DATA_PATH="/data2/cache" + export MAX_INPUT_TOKENS=1024 + export MAX_TOTAL_TOKENS=3000 + export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" + export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct" + export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" + export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6004" + export NEO4J_URI="bolt://${host_ip}:${NEO4J_PORT2}" + export NEO4J_USERNAME="neo4j" + export NEO4J_PASSWORD="neo4jtest" + export no_proxy="localhost,127.0.0.1,"${host_ip} + export LOGFLAG=True + + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + sleep 1m + + # dataprep neo4j + # Not testing openai code path since not able to provide key for cicd + docker run -d --name="test-comps-retrievers-neo4j-llama-index-dataprep" -p 6004:5000 -v ./data:/data --ipc=host -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT \ + -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e EMBEDDING_MODEL_ID=$EMBEDDING_MODEL_ID -e LLM_MODEL_ID=$LLM_MODEL_ID -e host_ip=$host_ip -e no_proxy=$no_proxy \ + -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}" -e NEO4J_USERNAME="neo4j" \ + -e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j-llamaindex:comps + + sleep 1m + +} + +function validate_service() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + if [[ $SERVICE_NAME == *"extract_graph_neo4j"* ]]; then + cd $LOG_PATH + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") + elif [[ $SERVICE_NAME == *"neo4j-apoc"* ]]; then + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" "$URL") + else + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + fi + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + # check response body + if [[ "$SERVICE_NAME" == "neo4j-apoc" ]]; then + echo "[ $SERVICE_NAME ] Skipping content check for neo4j-apoc." + else + if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi + fi + + sleep 1s +} + +function validate_microservice() { + # validate neo4j-apoc + validate_service \ + "${host_ip}:${NEO4J_PORT1}" \ + "200 OK" \ + "neo4j-apoc" \ + "neo4j-apoc" \ + "" + sleep 1m # retrieval can't curl as expected, try to wait for more time + + # tgi for llm service + validate_service \ + "${host_ip}:${LLM_ENDPOINT_PORT}/generate" \ + "generated_text" \ + "tgi-gaudi-service" \ + "tgi-gaudi-server" \ + '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' + + # test /v1/dataprep graph extraction + echo "The stock of company Chevron has declined about 10% over the past 90-days despite the fact that Q2 consensus earnings estimates have risen sharply (~25%) during that same time frame. Over the years, Chevron has kept a very strong balance sheet. FirstEnergy company posted its earnings results on Tuesday. The utilities provider reported $0.53 earnings per share for the quarter, topping the consensus estimate of $0.52 by $0.01, RTT News reports. FirstEnergy had a net margin of 10.85% and a return on equity of 17.17%. The Dáil was almost suspended on Thursday afternoon after Sinn Féin TD John Brady walked across the chamber and placed an on-call pager in front of the Minister for Housing Darragh O’Brien during a debate on retained firefighters. Darragh O’Brien said John Brady had taken part in an act of theatre that was obviously choreographed. Around 2,000 retained firefighters around the country staged a second day of industrial action on Tuesday and are due to start all out-strike action from next Tuesday. The mostly part-time workers, who keep the services going outside of Ireland’s larger urban centres, are taking industrial action in a dispute over pay and working conditions. Speaking in the Dáil, Sinn Féin deputy leader Pearse Doherty said firefighters had marched on Leinster House today and were very angry at the fact the Government will not intervene. Reintroduction of tax relief on mortgages needs to be considered, Darragh O’Brien says. Martin withdraws comment after saying People Before Profit would ‘put the jackboot on people’ Taoiseach ‘propagated fears’ farmers forced to rewet land due to nature restoration law – Cairns An intervention is required now. I’m asking you to make an improved offer in relation to pay for retained firefighters, Mr Doherty told the housing minister. I’m also asking you, and challenging you, to go outside after this Order of Business and meet with the firefighters because they are just fed up to the hilt in relation to what you said. Some of them have handed in their pagers to members of the Opposition and have challenged you to wear the pager for the next number of weeks, put up with an €8,600 retainer and not leave your community for the two and a half kilometres and see how you can stand over those type of pay and conditions. At this point, John Brady got up from his seat, walked across the chamber and placed the pager on the desk in front of Darragh O’Brien. Ceann Comhairle Seán Ó Fearghaíl said the Sinn Féin TD was completely out of order and told him not to carry out a charade in this House, adding it was absolutely outrageous behaviour and not to be encouraged. Darragh O’Brien said John Brady had engaged in an act of theatre here today which was obviously choreographed and was then interrupted with shouts from the Opposition benches. Mr Ó Fearghaíl said he would suspend the House if this racket continues. Darragh O’Brien later said he was confident the dispute could be resolved and he had immense regard for firefighters. The minister said he would encourage the unions to re-engage with the State’s industrial relations process while also accusing Sinn Féin of using the issue for their own political gain." > $LOG_PATH/dataprep_file.txt + validate_service \ + "http://${host_ip}:6004/v1/dataprep/ingest" \ + "Data preparation succeeded" \ + "extract_graph_neo4j" \ + "test-comps-retrievers-neo4j-llama-index-dataprep" + + # retrieval microservice + validate_service \ + "${host_ip}:${RETRIEVER_PORT}/v1/retrieval" \ + "documents" \ + "retriever_community_answers_neo4j" \ + "${service_name}" \ + "{\"messages\": [{\"role\": \"user\",\"content\": \"Who is John Brady and has he had any confrontations?\"}]}" + +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/retrievers/test_retrievers_opensearch.sh b/tests/retrievers/test_retrievers_opensearch.sh index 87385a6374..7a5fc0aeb2 100644 --- a/tests/retrievers/test_retrievers_opensearch.sh +++ b/tests/retrievers/test_retrievers_opensearch.sh @@ -13,7 +13,7 @@ echo "TAG=${TAG}" WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" export host_ip=$(hostname -I | awk '{print $1}') -service_name="retriever-opensearch" +service_name="opensearch-vector-db tei-embedding-serving retriever-opensearch" function build_docker_images() { cd $WORKPATH From f51f3224d3bde474235235c579d5040d47102988 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Mon, 27 Jan 2025 14:32:41 +0800 Subject: [PATCH 2/6] Fix trivy scan issue of setuptools. (#1240) Signed-off-by: ZePan110 --- comps/agent/src/Dockerfile | 4 ---- comps/animation/src/Dockerfile | 3 +-- comps/asr/src/Dockerfile | 3 +-- comps/asr/src/integrations/dependency/whisper/Dockerfile | 4 +--- .../src/integrations/dependency/whisper/Dockerfile.intel_hpu | 3 +-- comps/chathistory/src/Dockerfile | 4 ++-- comps/dataprep/src/Dockerfile | 4 ---- comps/feedback_management/src/Dockerfile | 4 ++-- comps/finetuning/src/Dockerfile | 4 ++-- comps/finetuning/src/Dockerfile.intel_hpu | 4 ++-- comps/guardrails/src/bias_detection/Dockerfile | 4 ++-- comps/guardrails/src/guardrails/Dockerfile | 4 ++-- comps/guardrails/src/hallucination_detection/Dockerfile | 4 ++-- comps/guardrails/src/toxicity_detection/Dockerfile | 4 ++-- comps/image2image/src/Dockerfile.intel_hpu | 4 ++-- comps/image2video/src/Dockerfile.intel_hpu | 4 ++-- comps/llms/src/doc-summarization/Dockerfile | 4 ++-- comps/llms/src/faq-generation/Dockerfile | 4 ++-- comps/llms/src/text-generation/Dockerfile | 4 ++-- comps/llms/src/text-generation/Dockerfile.intel_hpu | 4 ++-- comps/llms/utils/lm-eval/Dockerfile | 4 ++-- comps/lvms/src/Dockerfile | 4 ++-- .../lvms/src/integrations/dependency/llama-vision/Dockerfile | 4 ++-- .../integrations/dependency/llama-vision/Dockerfile.guard | 4 ++-- comps/lvms/src/integrations/dependency/llava/Dockerfile | 2 +- .../src/integrations/dependency/llava/Dockerfile.intel_hpu | 4 ++-- .../lvms/src/integrations/dependency/video-llama/Dockerfile | 3 +-- comps/prompt_registry/src/Dockerfile | 4 ++-- comps/rerankings/src/Dockerfile | 5 ++--- comps/retrievers/src/Dockerfile | 4 ++-- comps/text2image/src/Dockerfile.intel_hpu | 4 ++-- comps/text2sql/src/Dockerfile | 4 ++-- comps/third_parties/bridgetower/src/Dockerfile | 4 ++-- comps/third_parties/bridgetower/src/Dockerfile.intel_hpu | 4 ++-- comps/third_parties/clip/src/Dockerfile | 4 ++-- comps/tts/src/Dockerfile | 4 ++-- comps/tts/src/integrations/dependency/speecht5/Dockerfile | 4 ++-- .../integrations/dependency/speecht5/Dockerfile.intel_hpu | 4 ++-- comps/web_retrievers/src/Dockerfile | 4 ++-- 39 files changed, 68 insertions(+), 83 deletions(-) diff --git a/comps/agent/src/Dockerfile b/comps/agent/src/Dockerfile index b9772ae983..5c2982f6b2 100644 --- a/comps/agent/src/Dockerfile +++ b/comps/agent/src/Dockerfile @@ -15,8 +15,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -28,8 +26,6 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=/home/user -USER root - RUN mkdir -p /home/user/comps/agent/src/status && chown -R user /home/user/comps/agent/src/status USER user diff --git a/comps/animation/src/Dockerfile b/comps/animation/src/Dockerfile index 2608178272..a025d3f6c0 100644 --- a/comps/animation/src/Dockerfile +++ b/comps/animation/src/Dockerfile @@ -7,7 +7,6 @@ FROM python:3.11-slim RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user ENV LANG=C.UTF-8 ARG ARCH=cpu @@ -18,7 +17,7 @@ RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /home/user/comps/animation/src/requirements.txt ; ENV PYTHONPATH=$PYTHONPATH:/home/user - +USER user WORKDIR /home/user/comps/animation/src ENTRYPOINT ["python3", "opea_animation_microservice.py"] diff --git a/comps/asr/src/Dockerfile b/comps/asr/src/Dockerfile index ab0228098f..4ee860c110 100644 --- a/comps/asr/src/Dockerfile +++ b/comps/asr/src/Dockerfile @@ -6,7 +6,6 @@ FROM python:3.11-slim RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user ENV LANG=C.UTF-8 ARG ARCH=cpu @@ -22,7 +21,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ fi ENV PYTHONPATH=$PYTHONPATH:/home/user - +USER user WORKDIR /home/user/comps/asr/src ENTRYPOINT ["python", "opea_asr_microservice.py"] diff --git a/comps/asr/src/integrations/dependency/whisper/Dockerfile b/comps/asr/src/integrations/dependency/whisper/Dockerfile index e11dc1a9ba..d0e09cfba3 100644 --- a/comps/asr/src/integrations/dependency/whisper/Dockerfile +++ b/comps/asr/src/integrations/dependency/whisper/Dockerfile @@ -17,8 +17,6 @@ RUN apt-get update \ COPY --chown=user:user comps /home/user/comps -USER user - RUN pip install --no-cache-dir --upgrade pip setuptools && \ pip install --no-cache-dir -r /home/user/comps/asr/src/requirements.txt && \ if [ "${ARCH}" = "cpu" ]; then \ @@ -29,7 +27,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ fi ENV PYTHONPATH=$PYTHONPATH:/home/user - +USER user WORKDIR /home/user/comps/asr/src/integrations/dependency/whisper ENTRYPOINT ["python", "whisper_server.py", "--device", "cpu"] diff --git a/comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu b/comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu index 2ab4f098da..fc1d93a1de 100644 --- a/comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu +++ b/comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu @@ -19,8 +19,6 @@ RUN apt-get update \ COPY --chown=user:user comps /home/user/comps -USER user - # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /home/user/comps/asr/src/requirements.txt && \ @@ -28,6 +26,7 @@ RUN pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/users +USER user WORKDIR /home/user/comps/asr/src/integrations/dependency/whisper ENTRYPOINT ["python", "whisper_server.py", "--device", "hpu"] diff --git a/comps/chathistory/src/Dockerfile b/comps/chathistory/src/Dockerfile index f1ef18b459..3b5e21df5c 100644 --- a/comps/chathistory/src/Dockerfile +++ b/comps/chathistory/src/Dockerfile @@ -14,8 +14,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps COPY requirements.txt /home/user/ @@ -25,6 +23,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/chathistory/src ENTRYPOINT ["python", "opea_chathistory_microservice.py"] diff --git a/comps/dataprep/src/Dockerfile b/comps/dataprep/src/Dockerfile index 6355b36a4b..3752b36119 100644 --- a/comps/dataprep/src/Dockerfile +++ b/comps/dataprep/src/Dockerfile @@ -30,8 +30,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -46,8 +44,6 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user -USER root - RUN mkdir -p /home/user/comps/dataprep/src/uploaded_files && chown -R user /home/user/comps/dataprep/src/uploaded_files USER user diff --git a/comps/feedback_management/src/Dockerfile b/comps/feedback_management/src/Dockerfile index cc5641e2bc..a3c5242514 100644 --- a/comps/feedback_management/src/Dockerfile +++ b/comps/feedback_management/src/Dockerfile @@ -14,8 +14,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps COPY requirements.txt /home/user/ @@ -25,6 +23,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/feedback_management/src ENTRYPOINT ["python", "opea_feedback_microservice.py"] diff --git a/comps/finetuning/src/Dockerfile b/comps/finetuning/src/Dockerfile index 1edc8c3f5c..0f55a441be 100644 --- a/comps/finetuning/src/Dockerfile +++ b/comps/finetuning/src/Dockerfile @@ -16,8 +16,6 @@ COPY comps /home/user/comps RUN chown -R user /home/user/comps/finetuning -USER user - ENV PATH=$PATH:/home/user/.local/bin RUN python -m pip install --no-cache-dir --upgrade pip && \ @@ -28,6 +26,8 @@ RUN python -m pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/finetuning/src RUN echo PKGPATH=$(python3 -c "import pkg_resources; print(pkg_resources.get_distribution('oneccl-bind-pt').location)") >> run.sh && \ diff --git a/comps/finetuning/src/Dockerfile.intel_hpu b/comps/finetuning/src/Dockerfile.intel_hpu index ab40f9c48e..4324de9999 100644 --- a/comps/finetuning/src/Dockerfile.intel_hpu +++ b/comps/finetuning/src/Dockerfile.intel_hpu @@ -14,8 +14,6 @@ COPY comps /home/user/comps RUN chown -R user /home/user/comps/finetuning -USER user - ENV PATH=$PATH:/home/user/.local/bin RUN python -m pip install --no-cache-dir --upgrade pip && \ @@ -24,6 +22,8 @@ RUN python -m pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/finetuning/src ENTRYPOINT ["/bin/bash", "launch.sh"] diff --git a/comps/guardrails/src/bias_detection/Dockerfile b/comps/guardrails/src/bias_detection/Dockerfile index 0ed299b6b7..6eb97a7b8c 100644 --- a/comps/guardrails/src/bias_detection/Dockerfile +++ b/comps/guardrails/src/bias_detection/Dockerfile @@ -16,8 +16,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ @@ -26,6 +24,8 @@ RUN pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/guardrails/src/bias_detection/ ENTRYPOINT ["python", "opea_bias_detection_microservice.py"] diff --git a/comps/guardrails/src/guardrails/Dockerfile b/comps/guardrails/src/guardrails/Dockerfile index c89fbb5cb8..890dd23790 100644 --- a/comps/guardrails/src/guardrails/Dockerfile +++ b/comps/guardrails/src/guardrails/Dockerfile @@ -15,8 +15,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -25,6 +23,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/guardrails/src/guardrails/ ENTRYPOINT ["python", "opea_guardrails_microservice.py"] diff --git a/comps/guardrails/src/hallucination_detection/Dockerfile b/comps/guardrails/src/hallucination_detection/Dockerfile index 0b66f9bc1e..73d075f4bc 100644 --- a/comps/guardrails/src/hallucination_detection/Dockerfile +++ b/comps/guardrails/src/hallucination_detection/Dockerfile @@ -13,8 +13,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ @@ -26,6 +24,8 @@ RUN pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/guardrails/src/hallucination_detection ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/guardrails/src/toxicity_detection/Dockerfile b/comps/guardrails/src/toxicity_detection/Dockerfile index 5f8bf60c82..fd397c9f61 100644 --- a/comps/guardrails/src/toxicity_detection/Dockerfile +++ b/comps/guardrails/src/toxicity_detection/Dockerfile @@ -16,8 +16,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -26,6 +24,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/guardrails/src/toxicity_detection/ ENTRYPOINT ["python", "opea_toxicity_detection_microservice.py"] diff --git a/comps/image2image/src/Dockerfile.intel_hpu b/comps/image2image/src/Dockerfile.intel_hpu index dd0d29f523..f9090fa191 100644 --- a/comps/image2image/src/Dockerfile.intel_hpu +++ b/comps/image2image/src/Dockerfile.intel_hpu @@ -12,7 +12,7 @@ COPY comps /home/user/comps RUN chown -R user /home/user/comps/image2image RUN rm -rf /etc/ssh/ssh_host* -USER user + # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/home/user/optimum-habana @@ -21,7 +21,7 @@ ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/home/user/optimum-habana RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /home/user/comps/image2image/src/requirements.txt && \ pip install --no-cache-dir optimum[habana] - +USER user WORKDIR /home/user/comps/image2image/src RUN echo python opea_image2image_microservice.py --device hpu --use_hpu_graphs --bf16 >> run.sh diff --git a/comps/image2video/src/Dockerfile.intel_hpu b/comps/image2video/src/Dockerfile.intel_hpu index 67be7913ca..9b8f7f8362 100644 --- a/comps/image2video/src/Dockerfile.intel_hpu +++ b/comps/image2video/src/Dockerfile.intel_hpu @@ -25,7 +25,7 @@ COPY comps /home/user/comps RUN chown -R user /home/user/comps/image2video RUN rm -rf /etc/ssh/ssh_host* -USER user + # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana @@ -36,7 +36,7 @@ RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir optimum[habana] ENV PYTHONPATH=$PYTHONPATH:/home/user - +USER user WORKDIR /home/user/comps/image2video/src ENTRYPOINT ["python", "opea_image2video_microservice.py", "--device", "hpu"] diff --git a/comps/llms/src/doc-summarization/Dockerfile b/comps/llms/src/doc-summarization/Dockerfile index a7c07df449..c1e0686b30 100644 --- a/comps/llms/src/doc-summarization/Dockerfile +++ b/comps/llms/src/doc-summarization/Dockerfile @@ -13,8 +13,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -23,6 +21,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/llms/src/doc-summarization ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/src/faq-generation/Dockerfile b/comps/llms/src/faq-generation/Dockerfile index 90439a6542..73ac91aa96 100644 --- a/comps/llms/src/faq-generation/Dockerfile +++ b/comps/llms/src/faq-generation/Dockerfile @@ -11,8 +11,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -20,6 +18,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/llms/src/faq-generation ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/src/text-generation/Dockerfile b/comps/llms/src/text-generation/Dockerfile index 5c0ad6e52f..463c1f7428 100644 --- a/comps/llms/src/text-generation/Dockerfile +++ b/comps/llms/src/text-generation/Dockerfile @@ -11,8 +11,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -20,6 +18,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/llms/src/text-generation ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/src/text-generation/Dockerfile.intel_hpu b/comps/llms/src/text-generation/Dockerfile.intel_hpu index 9b6f50f5e4..3705475211 100644 --- a/comps/llms/src/text-generation/Dockerfile.intel_hpu +++ b/comps/llms/src/text-generation/Dockerfile.intel_hpu @@ -17,8 +17,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - RUN git lfs install COPY comps /home/user/comps @@ -35,6 +33,8 @@ RUN git clone ${REPO} /home/user/optimum-habana && \ ENV PYTHONPATH=/root:/home/user +USER user + WORKDIR /home/user/comps/llms/src/text-generation/ ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/utils/lm-eval/Dockerfile b/comps/llms/utils/lm-eval/Dockerfile index 57a4270df0..9d535af1e7 100644 --- a/comps/llms/utils/lm-eval/Dockerfile +++ b/comps/llms/utils/lm-eval/Dockerfile @@ -21,8 +21,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ python3-pip \ wget -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ @@ -30,6 +28,8 @@ RUN pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/llms/utils/lm-eval diff --git a/comps/lvms/src/Dockerfile b/comps/lvms/src/Dockerfile index 37a5d7c782..9cf9d03410 100644 --- a/comps/lvms/src/Dockerfile +++ b/comps/lvms/src/Dockerfile @@ -7,8 +7,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - ENV LANG=C.UTF-8 COPY comps /home/user/comps @@ -18,6 +16,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/lvms/src ENTRYPOINT ["python", "opea_lvm_microservice.py"] \ No newline at end of file diff --git a/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile b/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile index 8bfc81afb6..293fe94ef7 100644 --- a/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile +++ b/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile @@ -16,8 +16,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - RUN git lfs install COPY comps /home/user/comps @@ -28,6 +26,8 @@ RUN cd /home/user/comps/lvms/src/integrations/dependency/llama-vision/ && \ ENV PYTHONPATH=/root:/home/user +USER user + WORKDIR /home/user/comps/lvms/src/integrations/dependency/llama-vision/ ENTRYPOINT ["python", "lvm.py"] diff --git a/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.guard b/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.guard index 103b656b1c..507b58d4ba 100644 --- a/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.guard +++ b/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.guard @@ -16,8 +16,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - RUN git lfs install COPY comps /home/user/comps @@ -28,6 +26,8 @@ RUN cd /home/user/comps/lvms/src/integrations/dependency/llama-vision/ && \ ENV PYTHONPATH=/root:/home/user +USER user + WORKDIR /home/user/comps/lvms/src/integrations/dependency/llama-vision/ ENTRYPOINT ["python", "lvm_guard.py"] diff --git a/comps/lvms/src/integrations/dependency/llava/Dockerfile b/comps/lvms/src/integrations/dependency/llava/Dockerfile index d89f34cf8c..4f337ded6b 100644 --- a/comps/lvms/src/integrations/dependency/llava/Dockerfile +++ b/comps/lvms/src/integrations/dependency/llava/Dockerfile @@ -10,7 +10,6 @@ RUN useradd -m -s /bin/bash user && \ RUN apt-get update \ && apt-get install -y curl -USER user # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana @@ -22,6 +21,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user WORKDIR /home/user/comps/lvms/src/integrations/dependency/llava ENTRYPOINT ["python", "llava_server.py", "--device", "cpu"] diff --git a/comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu b/comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu index f3129f64ee..dda62f78f2 100644 --- a/comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu +++ b/comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu @@ -12,7 +12,7 @@ RUN apt-get update \ && apt-get install -y curl RUN rm -rf /etc/ssh/ssh_host* -USER user + # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana @@ -25,7 +25,7 @@ RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir optimum[habana] ENV PYTHONPATH=$PYTHONPATH:/home/user - +USER user WORKDIR /home/user/comps/lvms/src/integrations/dependency/llava/ ENTRYPOINT ["python", "llava_server.py"] diff --git a/comps/lvms/src/integrations/dependency/video-llama/Dockerfile b/comps/lvms/src/integrations/dependency/video-llama/Dockerfile index 85657b1a27..94e9213202 100644 --- a/comps/lvms/src/integrations/dependency/video-llama/Dockerfile +++ b/comps/lvms/src/integrations/dependency/video-llama/Dockerfile @@ -14,8 +14,6 @@ RUN useradd -m -s /bin/bash user && \ chown -R user:user /home/user/ RUN mkdir /home/user/model && chown user:user -R /home/user/model -USER user - COPY --chown=user:user comps /home/user/comps WORKDIR /home/user/comps/lvms/src/integrations/dependency/video-llama/ @@ -31,6 +29,7 @@ RUN tar -xvf video-llama.patch.tar && \ mv video_llama ../ && \ cd ../ && rm -rf Video-LLaMA +USER user ENV PYTHONPATH=/home/user diff --git a/comps/prompt_registry/src/Dockerfile b/comps/prompt_registry/src/Dockerfile index b00a8d56c7..9c9d0a79c5 100644 --- a/comps/prompt_registry/src/Dockerfile +++ b/comps/prompt_registry/src/Dockerfile @@ -14,8 +14,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps COPY requirements.txt /home/user/ @@ -25,6 +23,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/prompt_registry/src/ ENTRYPOINT ["python", "opea_prompt_microservice.py"] diff --git a/comps/rerankings/src/Dockerfile b/comps/rerankings/src/Dockerfile index c70dee8106..8fbe8c920d 100644 --- a/comps/rerankings/src/Dockerfile +++ b/comps/rerankings/src/Dockerfile @@ -17,8 +17,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN if [ ${ARCH} = "cpu" ]; then \ @@ -38,9 +36,10 @@ fi && \ pip install --no-cache-dir --upgrade pip setuptools && \ pip install --no-cache-dir -r /home/user/comps/rerankings/src/requirements.txt; - ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/rerankings/src ENTRYPOINT ["python", "opea_reranking_microservice.py"] diff --git a/comps/retrievers/src/Dockerfile b/comps/retrievers/src/Dockerfile index 77910b8691..3fb6b3650e 100644 --- a/comps/retrievers/src/Dockerfile +++ b/comps/retrievers/src/Dockerfile @@ -18,8 +18,6 @@ RUN useradd -m -s /bin/bash user && \ COPY comps /home/user/comps -USER user - RUN pip install --no-cache-dir --upgrade pip setuptools && \ if [ ${ARCH} = "cpu" ]; then \ PIP_EXTRA_INDEX_URL="--extra-index-url https://download.pytorch.org/whl/cpu"; \ @@ -32,6 +30,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/retrievers/src ENTRYPOINT ["python", "opea_retrievers_microservice.py"] diff --git a/comps/text2image/src/Dockerfile.intel_hpu b/comps/text2image/src/Dockerfile.intel_hpu index b142bbb441..a8cbbfb478 100644 --- a/comps/text2image/src/Dockerfile.intel_hpu +++ b/comps/text2image/src/Dockerfile.intel_hpu @@ -12,7 +12,7 @@ COPY comps /home/user/comps RUN chown -R user /home/user/comps/text2image RUN rm -rf /etc/ssh/ssh_host* -USER user + # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/home/user/optimum-habana @@ -21,7 +21,7 @@ ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/home/user/optimum-habana RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /home/user/comps/text2image/src/requirements.txt && \ pip install --no-cache-dir optimum[habana] - +USER user WORKDIR /home/user/comps/text2image/src RUN echo python opea_text2image_microservice.py --device hpu --use_hpu_graphs --bf16 >> run.sh diff --git a/comps/text2sql/src/Dockerfile b/comps/text2sql/src/Dockerfile index d77e5522dd..592b96d7c3 100644 --- a/comps/text2sql/src/Dockerfile +++ b/comps/text2sql/src/Dockerfile @@ -15,8 +15,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -28,6 +26,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/text2sql/src/ ENTRYPOINT ["python", "opea_text2sql_microservice.py"] \ No newline at end of file diff --git a/comps/third_parties/bridgetower/src/Dockerfile b/comps/third_parties/bridgetower/src/Dockerfile index f8b6dd7b2c..2992b24cbd 100644 --- a/comps/third_parties/bridgetower/src/Dockerfile +++ b/comps/third_parties/bridgetower/src/Dockerfile @@ -6,7 +6,7 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ RUN apt-get update && apt-get install -y curl -USER user + # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana @@ -18,7 +18,7 @@ RUN pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/user - +USER user ARG EMBEDDER_PORT=8080 ENV PORT=$EMBEDDER_PORT diff --git a/comps/third_parties/bridgetower/src/Dockerfile.intel_hpu b/comps/third_parties/bridgetower/src/Dockerfile.intel_hpu index 8ee4633346..648776c2dc 100644 --- a/comps/third_parties/bridgetower/src/Dockerfile.intel_hpu +++ b/comps/third_parties/bridgetower/src/Dockerfile.intel_hpu @@ -9,7 +9,7 @@ RUN useradd -m -s /bin/bash user && \ RUN rm -rf /etc/ssh/ssh_host* RUN apt-get update && apt-get install -y curl -USER user + # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana @@ -22,7 +22,7 @@ RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir optimum[habana] ENV PYTHONPATH=$PYTHONPATH:/home/user - +USER user ARG EMBEDDER_PORT=8080 ENV PORT=$EMBEDDER_PORT diff --git a/comps/third_parties/clip/src/Dockerfile b/comps/third_parties/clip/src/Dockerfile index 5479eee297..26f08e64c9 100644 --- a/comps/third_parties/clip/src/Dockerfile +++ b/comps/third_parties/clip/src/Dockerfile @@ -13,8 +13,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -23,6 +21,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/third_parties/clip/src/ ENTRYPOINT ["python", "clip_server.py"] diff --git a/comps/tts/src/Dockerfile b/comps/tts/src/Dockerfile index de3bbce35d..3ec7555144 100644 --- a/comps/tts/src/Dockerfile +++ b/comps/tts/src/Dockerfile @@ -5,7 +5,7 @@ FROM python:3.11-slim RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user + ENV LANG=C.UTF-8 ARG ARCH=cpu @@ -20,7 +20,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ fi ENV PYTHONPATH=$PYTHONPATH:/home/user - +USER user WORKDIR /home/user/comps/tts/src ENTRYPOINT ["python", "opea_tts_microservice.py"] diff --git a/comps/tts/src/integrations/dependency/speecht5/Dockerfile b/comps/tts/src/integrations/dependency/speecht5/Dockerfile index 34ade3576e..161a44505a 100644 --- a/comps/tts/src/integrations/dependency/speecht5/Dockerfile +++ b/comps/tts/src/integrations/dependency/speecht5/Dockerfile @@ -18,8 +18,6 @@ RUN apt-get update \ COPY --chown=user:user comps /home/user/comps -USER user - RUN pip install --no-cache-dir --upgrade pip setuptools && \ if [ "${ARCH}" = "cpu" ]; then \ pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu ; \ @@ -30,6 +28,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/tts/src/integrations/dependency/speecht5 ENTRYPOINT ["python", "speecht5_server.py", "--device", "cpu"] diff --git a/comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu b/comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu index 895118d1d3..eee2aa2d6c 100644 --- a/comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu +++ b/comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu @@ -20,8 +20,6 @@ RUN apt-get update \ COPY --chown=user:user comps /home/user/comps -USER user - # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /home/user/comps/tts/src/integrations/dependency/speecht5/requirements.txt && \ @@ -29,6 +27,8 @@ RUN pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/tts/src/integrations/dependency/speecht5 ENTRYPOINT ["python", "speecht5_server.py", "--device", "hpu"] diff --git a/comps/web_retrievers/src/Dockerfile b/comps/web_retrievers/src/Dockerfile index 6548d4ef22..d9e1ded610 100644 --- a/comps/web_retrievers/src/Dockerfile +++ b/comps/web_retrievers/src/Dockerfile @@ -14,8 +14,6 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin libjemalloc-dev \ curl -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -27,6 +25,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/web_retrievers/src ENTRYPOINT ["python", "opea_web_retrievers_microservice.py"] From fda471e7a1506b83d50e372cde1994fbb12811ed Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Mon, 27 Jan 2025 15:58:58 +0800 Subject: [PATCH 3/6] Opt build steps (#1238) * Add null value judgment Signed-off-by: ZePan110 --- .github/workflows/pr-helm-test.yaml | 2 +- .github/workflows/push-image-build.yml | 22 +++++++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/.github/workflows/pr-helm-test.yaml b/.github/workflows/pr-helm-test.yaml index 3ae7a23958..4e7fdbf4fc 100644 --- a/.github/workflows/pr-helm-test.yaml +++ b/.github/workflows/pr-helm-test.yaml @@ -63,7 +63,7 @@ jobs: Chart-test: needs: [job1] - if: always() && ${{ needs.job1.outputs.run_matrix.service.length }} > 0 + if: always() && ${{ fromJSON(needs.job1.outputs.run_matrix).length != 0 }} uses: ./.github/workflows/_run-helm-chart.yml strategy: matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }} diff --git a/.github/workflows/push-image-build.yml b/.github/workflows/push-image-build.yml index 98c76269ff..fda1528065 100644 --- a/.github/workflows/push-image-build.yml +++ b/.github/workflows/push-image-build.yml @@ -32,22 +32,28 @@ jobs: - name: Get Test Services id: get-services run: | + set -x base_commit=$(git rev-parse HEAD~1) merged_commit=$(git log -1 --format='%H') # git diff --name-only ${base_commit} ${merged_commit} | grep -E "cores|comps/__init__.py" | grep -Ev ".md" - # if [ $? -eq 0 ]; then + if git diff --name-only ${base_commit} ${merged_commit} | grep -E "cores|comps/__init__.py" | grep -Ev ".md"; then echo "ALL image build!!!" services=$(basename -a .github/workflows/docker/compose/*-compose.yaml | sed 's/-compose.yaml//' | jq -R '.' ) else changed_src="$(git diff --name-only ${base_commit} ${merged_commit} | grep 'src/' | grep -vE '\.md')" || true changed_yamls="$(git diff --name-only ${base_commit} ${merged_commit} | grep '.github/workflows/docker/compose/')" || true - services=$(printf '%s\n' "${changed_src[@]}" | cut -d'/' -f2 | grep -vE '\.py' | sort -u | jq -R '.' ) || true - while IFS= read -r line; do - filename=$(basename "$line" -compose.yaml) - echo "$line $(printf '%s\n' "$filename" | jq -R '.' )" - services+=" $(printf '%s\n' "$filename" | jq -R '.' )" || true - done <<< "$changed_yamls" + [[ -n "$changed_src" ]] && services=$(printf '%s\n' "${changed_src[@]}" | cut -d'/' -f2 | grep -vE '\.py' | sort -u | jq -R '.' ) || true + + if [[ -n "$changed_yamls" ]]; then + while IFS= read -r line; do + filename=$(basename "$line" -compose.yaml) + echo "$line $(printf '%s\n' "$filename" | jq -R '.' )" + services+=" $(printf '%s\n' "$filename" | jq -R '.' )" || true + done <<< "$changed_yamls" + else + echo "No changes in YAML files." + fi fi echo "services=$(echo "$services" | jq -sc 'unique | sort')" @@ -56,6 +62,7 @@ jobs: image-build: needs: get-build-matrix + if: ${{ fromJSON(needs.get-build-matrix.outputs.services).length != 0 }} strategy: matrix: service: ${{ fromJSON(needs.get-build-matrix.outputs.services) }} @@ -65,6 +72,7 @@ jobs: steps: - name: Clean up Working Directory run: | + echo "matrix.service=${{ matrix.service }}" sudo rm -rf ${{github.workspace}}/* - name: Checkout out Repo From 2c7c6113ee43eee8745542c303d2e477c5544fe7 Mon Sep 17 00:00:00 2001 From: "chen, suyue" Date: Mon, 27 Jan 2025 16:55:56 +0800 Subject: [PATCH 4/6] remove duplicate images (#1243) Signed-off-by: chensuyue --- .github/workflows/docker/compose/animation-compose.yaml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/docker/compose/animation-compose.yaml b/.github/workflows/docker/compose/animation-compose.yaml index 650f4b1c07..ab9149b79b 100644 --- a/.github/workflows/docker/compose/animation-compose.yaml +++ b/.github/workflows/docker/compose/animation-compose.yaml @@ -7,11 +7,3 @@ services: build: dockerfile: comps/animation/src/Dockerfile image: ${REGISTRY:-opea}/animation:${TAG:-latest} - wav2lip: - build: - dockerfile: comps/third_parties/wav2lip/src/Dockerfile - image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest} - wav2lip-gaudi: - build: - dockerfile: comps/third_parties/wav2lip/src/Dockerfile.intel_hpu - image: ${REGISTRY:-opea}/wav2lip-gaudi:${TAG:-latest} From b38d9f32ecf970178b1520bc5ba32261ed310760 Mon Sep 17 00:00:00 2001 From: rbrugaro Date: Mon, 27 Jan 2025 15:14:03 -0800 Subject: [PATCH 5/6] GraphRAG README/compose fixes post refactor (#1221) * GraphRAG README/compose fixes post refactor - Move env settings from set_env.sh script to README body - Description improvements - Fix tgi settings in docker compose file Signed-off-by: rbrygaro * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove compose_neo4j_llamaindex.yaml from PR #1221 and fix filename in README Signed-off-by: rbrugaro --------- Signed-off-by: rbrygaro Signed-off-by: rbrugaro Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Abolfazl Shahbazi <12436063+ashahba@users.noreply.github.com> --- comps/dataprep/src/README_neo4j_llamaindex.md | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/comps/dataprep/src/README_neo4j_llamaindex.md b/comps/dataprep/src/README_neo4j_llamaindex.md index 906303d6ef..c80fc6256a 100644 --- a/comps/dataprep/src/README_neo4j_llamaindex.md +++ b/comps/dataprep/src/README_neo4j_llamaindex.md @@ -2,14 +2,14 @@ This Dataprep microservice performs: -- Graph extraction (entities, relationships and descripttions) using LLM +- Graph extraction (entities, relationships and descriptions) using LLM - Performs hierarchical_leiden clustering to identify communities in the knowledge graph - Generates a community symmary for each community - Stores all of the above in Neo4j Graph DB -This microservice follows the graphRAG approached defined by Microsoft paper ["From Local to Global: A Graph RAG Approach to Query-Focused Summarization"](https://www.microsoft.com/en-us/research/publication/from-local-to-global-a-graph-rag-approach-to-query-focused-summarization/) with some differences such as: 1) only level zero cluster summaries are leveraged, 2) The input context to the final answer generation is trimmed to fit maximum context length. +This microservice follows the graphRAG approached defined by Microsoft paper ["From Local to Global: A Graph RAG Approach to Query-Focused Summarization"](https://www.microsoft.com/en-us/research/publication/from-local-to-global-a-graph-rag-approach-to-query-focused-summarization/) with some differences such as: 1) no node degree prioritization is used in populating the LLM context window for community summaries, 2) no ranking of sub-communities is applied in generating higher level communities summaries. -This dataprep microservice ingests the input files and uses LLM (TGI or OpenAI model when OPENAI_API_KEY is set) to extract entities, relationships and descriptions of those to build a graph-based text index. +This dataprep microservice ingests the input files and uses LLM (TGI, VLLM or OpenAI model when OPENAI_API_KEY is set) to extract entities, relationships and descriptions of those to build a graph-based text index. Compose yaml file deploys TGI but works also with vLLM inference endpoint. ## Setup Environment Variables @@ -23,10 +23,20 @@ export NEO4J_URI=${your_neo4j_url} export NEO4J_USERNAME=${your_neo4j_username} export NEO4J_PASSWORD=${your_neo4j_password} # should match what was used in NEO4J_AUTH when running the neo4j-apoc export PYTHONPATH=${path_to_comps} -export OPENAI_KEY=${your_openai_api_key} # optional, when not provided will use smaller models TGI/TEI +export OPENAI_KEY=${your_openai_api_key} # optional, when not provided will use open models TGI/TEI export HUGGINGFACEHUB_API_TOKEN=${your_hf_token} + # set additional environment settings -source ./set_env.sh +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export OPENAI_EMBEDDING_MODEL="text-embedding-3-small" +export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" +export OPENAI_LLM_MODEL="gpt-4o" +export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006" +export TGI_LLM_ENDPOINT="http://${host_ip}:6005" +export NEO4J_URL="bolt://${host_ip}:7687" +export NEO4J_USERNAME=neo4j +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6004/v1/dataprep" +export LOGFLAG=True ``` ## 🚀Start Microservice with Docker @@ -62,7 +72,7 @@ Docker compose will start 4 microservices: dataprep-neo4j-llamaindex, neo4j-apoc ```bash cd comps/dataprep/deployment/docker_compose -docker compose -f ompose_neo4j_llamaindex.yaml up -d +docker compose -f compose.yaml up -d ``` ## Invoke Microservice From 63c66a03c906b12c8198c8bd1dbb0fe86a88dd7a Mon Sep 17 00:00:00 2001 From: Ruoyu Ying Date: Tue, 28 Jan 2025 16:14:26 +0800 Subject: [PATCH 6/6] doc: fix minor issue in vllm doc (#1242) Signed-off-by: Ruoyu Ying Co-authored-by: sdp --- comps/third_parties/vllm/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comps/third_parties/vllm/README.md b/comps/third_parties/vllm/README.md index baf71b08b4..899ccb0870 100644 --- a/comps/third_parties/vllm/README.md +++ b/comps/third_parties/vllm/README.md @@ -44,7 +44,7 @@ bash ./launch_vllm_service.sh ${port_number} ${model_name} ```bash cd deplopyment/docker_compose -docker compose -f compose.yaml vllm-server up -d +docker compose -f compose.yaml up vllm-server -d ``` ### 2.2 vLLM on Gaudi