diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index e51dd2d56b..0110f0b93d 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -16,7 +16,6 @@ /comps/guardrails/ liang1.lv@intel.com letong.han@intel.com /comps/image2image/ qing.yao@intel.com xinyu.ye@intel.com /comps/image2video/ xinyu.ye@intel.com qing.yao@intel.com -/comps/intent_detection/ liang1.lv@intel.com xinyao.wang@intel.com /comps/llms/ liang1.lv@intel.com letong.han@intel.com /comps/lvms/ sihan.chen@intel.com liang1.lv@intel.com /comps/prompt_registry/ hoong.tee.yeoh@intel.com xinyao.wang@intel.com @@ -26,5 +25,4 @@ /comps/text2image/ xinyu.ye@intel.com liang1.lv@intel.com /comps/text2sql/ yogesh.pandey@intel.com qing.yao@intel.com /comps/tts/ sihan.chen@intel.com letong.han@intel.com -/comps/vectorstores/ xinyu.ye@intel.com letong.han@intel.com /comps/web_retrievers/ sihan.chen@intel.com liang1.lv@intel.com diff --git a/.github/ISSUE_TEMPLATE/1_bug_template.yml b/.github/ISSUE_TEMPLATE/1_bug_template.yml index 9661eb7780..820e83681c 100644 --- a/.github/ISSUE_TEMPLATE/1_bug_template.yml +++ b/.github/ISSUE_TEMPLATE/1_bug_template.yml @@ -66,6 +66,7 @@ body: options: - label: Pull docker images from hub.docker.com - label: Build docker images from source + - label: Other validations: required: true @@ -74,10 +75,10 @@ body: attributes: label: Deploy method options: - - label: Docker compose - label: Docker - - label: Kubernetes - - label: Helm + - label: Docker Compose + - label: Kubernetes Helm Charts + - label: Other validations: required: true @@ -88,6 +89,7 @@ body: options: - Single Node - Multiple Nodes + - Other default: 0 validations: required: true diff --git a/.github/ISSUE_TEMPLATE/2_feature_template.yml b/.github/ISSUE_TEMPLATE/2_feature_template.yml index 2381f94563..0b54952b9b 100644 --- a/.github/ISSUE_TEMPLATE/2_feature_template.yml +++ b/.github/ISSUE_TEMPLATE/2_feature_template.yml @@ -66,6 +66,7 @@ body: options: - Single Node - Multiple Nodes + - Other default: 0 validations: required: true diff --git a/.github/workflows/_comps-workflow.yml b/.github/workflows/_comps-workflow.yml index 12db04eb74..964d2d7284 100644 --- a/.github/workflows/_comps-workflow.yml +++ b/.github/workflows/_comps-workflow.yml @@ -17,11 +17,16 @@ on: default: true required: false type: boolean - test: + test_compose: default: true description: "Test comps with docker compose" required: false type: boolean + test_helmchart: + default: true + description: "Test comps with helm chart" + required: false + type: boolean mode: default: "CD" description: "Whether the test range is CI, CD or CICD" @@ -33,6 +38,7 @@ jobs: # Image Build #################################################################################################### build-images: + if: ${{ !(fromJSON(inputs.test_helmchart)) }} runs-on: "docker-build-gaudi" continue-on-error: true outputs: @@ -64,8 +70,8 @@ jobs: cd ./vllm-openvino && git checkout v0.6.1 && git rev-parse HEAD && cd ../ fi if [[ $(grep -c "vllm-gaudi:" ${docker_compose_yml}) != 0 ]]; then - git clone https://github.com/HabanaAI/vllm-fork.git vllm-fork - cd vllm-fork && git checkout 3c39626 && cd ../ + git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git + sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt fi - name: Get build list id: get-build-list @@ -98,9 +104,21 @@ jobs: #################################################################################################### test-service-compose: needs: [build-images] - if: ${{ fromJSON(inputs.test) }} + if: ${{ fromJSON(inputs.test_compose) }} uses: ./.github/workflows/_run-docker-compose.yml with: tag: ${{ inputs.tag }} service: ${{ inputs.service }} secrets: inherit + + #################################################################################################### + # Helm Chart Test + #################################################################################################### + test-service-helmchart: + if: ${{ fromJSON(inputs.test_helmchart) }} + uses: ./.github/workflows/_run-helm-chart.yml + with: + tag: ${{ inputs.tag }} + mode: ${{ inputs.mode }} + service: ${{ inputs.service }} + secrets: inherit diff --git a/.github/workflows/_run-helm-chart.yml b/.github/workflows/_run-helm-chart.yml new file mode 100644 index 0000000000..08fc71db66 --- /dev/null +++ b/.github/workflows/_run-helm-chart.yml @@ -0,0 +1,242 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Helm Chart Test on GenAIComps For Call +permissions: read-all +on: + workflow_call: + inputs: + service: + default: "chatqna" + required: true + type: string + description: "service to test, e.g. asr" + dockerhub: + default: false + required: false + type: boolean + description: "Set to true if you want to use released docker images at dockerhub. By default using internal docker registry." + tag: + default: "latest" + required: false + type: string + mode: + default: "CD" + description: "Whether the test range is CI, CD or CICD" + required: false + type: string + version: + default: "0-latest" + required: false + type: string + +jobs: + get-test-case: + runs-on: ubuntu-latest + outputs: + run_matrix: ${{ steps.get-test-files.outputs.run_matrix }} + CHECKOUT_REF: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }} + steps: + - name: Get checkout ref + id: get-checkout-ref + run: | + if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then + CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge + else + CHECKOUT_REF=${{ github.ref }} + fi + echo "CHECKOUT_REF=${CHECKOUT_REF}" >> $GITHUB_OUTPUT + echo "checkout ref ${CHECKOUT_REF}" + + - name: Checkout Repo + uses: actions/checkout@v4 + with: + ref: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }} + fetch-depth: 0 + + - name: Get test Services + id: get-test-files + run: | + set -x + if [ "${{ inputs.mode }}" = "CI" ]; then + base_commit=${{ github.event.pull_request.base.sha }} + merged_commit=$(git log -1 --format='%H') + values_files=$(git diff --name-only ${base_commit} ${merged_commit} | \ + grep "values.yaml" | \ + grep "${{ inputs.service }}" | \ + sort -u ) + echo $values_files + elif [ "${{ inputs.mode }}" = "CD" ]; then + values_files=$(ls ${{ github.workspace }}/comps/${{ inputs.service }}/deployment/kubernetes/*values.yaml) + fi + run_matrix="[" + run_matrix="{\"include\":[" + for file in ${values_files}; do + if [ -f "$file" ]; then + filename=$(basename "$file") + if [[ "$filename" == *"gaudi"* ]]; then + hardware="gaudi" + value_file="$filename" + elif [[ "$filename" == *"nv"* ]]; then + continue + else + hardware="xeon" + value_file="$filename" + fi + echo "service=${{ inputs.service }}, hardware=${hardware}, value_file=${value_file}" + if [[ $(echo ${run_matrix} | grep -c "{\"value_file\":\"${value_file}\",\"hardware\":\"${hardware}\"},") == 0 ]]; then + run_matrix="${run_matrix}{\"value_file\":\"${value_file}\",\"hardware\":\"${hardware}\"}," + echo "------------------ add one values file ------------------" + fi + fi + done + run_matrix="${run_matrix%,}]}" + + echo "run_matrix=${run_matrix}" + echo "run_matrix=${run_matrix}" >> $GITHUB_OUTPUT + + helm-test: + needs: [get-test-case] + strategy: + matrix: ${{ fromJSON(needs.get-test-case.outputs.run_matrix) }} + if: ${{ needs.get-test-case.outputs.run_matrix != '[]' }} + runs-on: k8s-${{ matrix.hardware }} + continue-on-error: true + steps: + - name: Clean Up Working Directory + run: | + echo "service=${{ inputs.service }} hardware=${{ matrix.hardware }} value_file=${{ matrix.value_file }}" + echo "value_file=${{ matrix.value_file }}" + sudo rm -rf ${{github.workspace}}/* + + - name: Get checkout ref + id: get-checkout-ref + run: | + if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then + CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge + else + CHECKOUT_REF=${{ github.ref }} + fi + echo "CHECKOUT_REF=${CHECKOUT_REF}" >> $GITHUB_OUTPUT + echo "checkout ref ${CHECKOUT_REF}" + + - name: Checkout Repo + uses: actions/checkout@v4 + with: + ref: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }} + fetch-depth: 0 + + - name: Set variables + env: + service: ${{ inputs.service }} + run: | + service_name="${service,,}" # third_parties/bridgetower + if [[ "${service,,}" == *"third_parties"* ]]; then + CHART_NAME="$(echo "${service,,}"|cut -d'/' -f2)" # bridgetower + else + CHART_NAME="${service_name}" # agent + fi + echo "service_name=$service_name" >> $GITHUB_ENV + echo "CHART_NAME=$CHART_NAME" >> $GITHUB_ENV + echo "RELEASE_NAME=${CHART_NAME}$(date +%d%H%M%S)" >> $GITHUB_ENV + echo "NAMESPACE=comp-${CHART_NAME}-$(date +%d%H%M%S)" >> $GITHUB_ENV + echo "ROLLOUT_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV + echo "TEST_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV + echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV + echo "should_cleanup=false" >> $GITHUB_ENV + echo "skip_validate=false" >> $GITHUB_ENV + echo "CHART_FOLDER=comps/${service}/deployment/kubernetes" >> $GITHUB_ENV + + - name: get template name + run: | + echo "template_link=$(grep 'helm install' comps/${service_name}/deployment/kubernetes/README.md | awk -F' ' '{print $4}' | head -n 1)" + echo "template_link=$(grep 'helm install' comps/${service_name}/deployment/kubernetes/README.md | awk -F' ' '{print $4}' | head -n 1)" >> $GITHUB_ENV + + - name: Helm install + id: install + env: + GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + HFTOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + value_file: ${{ matrix.value_file }} + run: | + set -xe + echo "should_cleanup=true" >> $GITHUB_ENV + if [[ ! -f ${{ github.workspace }}/${{ env.CHART_FOLDER }}/${value_file} ]]; then + echo "No value file found, exiting test!" + echo "skip_validate=true" >> $GITHUB_ENV + echo "should_cleanup=false" >> $GITHUB_ENV + exit 0 + fi + + for img in `helm template -n $NAMESPACE $RELEASE_NAME ${template_link} -f comps/${{ inputs.service }}/deployment/kubernetes/${value_file} --version ${{ inputs.version }} | grep 'image:' | grep 'opea/' | awk '{print $2}' | xargs`; + do + # increase helm install wait for for vllm-gaudi case + if [[ $img == *"vllm-gaudi"* ]]; then + ROLLOUT_TIMEOUT_SECONDS=900s + fi + done + # oci://ghcr.io/opea-project/charts/${CHART_NAME} \ + if ! helm install --create-namespace --namespace $NAMESPACE $RELEASE_NAME \ + ${template_link} \ + --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} \ + --set global.modelUseHostPath=/home/sdp/.cache/huggingface/hub \ + --set GOOGLE_API_KEY=${{ env.GOOGLE_API_KEY}} \ + --set GOOGLE_CSE_ID=${{ env.GOOGLE_CSE_ID}} \ + -f comps/${{ inputs.service }}/deployment/kubernetes/${value_file} \ + --version ${{ inputs.version }} \ + --wait --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then + echo "Failed to install chart ${{ inputs.service }}" + echo "skip_validate=true" >> $GITHUB_ENV + .github/workflows/scripts/k8s-utils.sh dump_pods_status $NAMESPACE + exit 1 + fi + helm list -A + kubectl get pods -n $NAMESPACE + + - name: Validate e2e test + if: always() + run: | + set -xe + if $skip_validate; then + echo "Skip validate" + else + LOG_PATH=/home/$(whoami)/helm-logs + chart=${{ env.CHART_NAME }} + helm test -n $NAMESPACE $RELEASE_NAME --logs --timeout "$TEST_TIMEOUT_SECONDS" | tee ${LOG_PATH}/charts-${chart}.log + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "Chart ${chart} test failed, please check the logs in ${LOG_PATH}!" + exit 1 + fi + + echo "Checking response results, make sure the output is reasonable. " + teststatus=false + if [[ -f $LOG_PATH/charts-${chart}.log ]] && \ + [[ $(grep -c "^Phase:.*Failed" $LOG_PATH/charts-${chart}.log) != 0 ]]; then + teststatus=false + ${{ github.workspace }}/.github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE + else + teststatus=true + fi + + if [ $teststatus == false ]; then + echo "Response check failed, please check the logs in artifacts!" + exit 1 + else + echo "Response check succeeded!" + exit 0 + fi + fi + + - name: Helm uninstall + if: always() + run: | + if $should_cleanup ; then + helm uninstall $RELEASE_NAME --namespace $NAMESPACE + if ! kubectl delete ns $NAMESPACE --timeout=$KUBECTL_TIMEOUT_SECONDS; then + kubectl delete pods --namespace $NAMESPACE --force --grace-period=0 --all + kubectl delete ns $NAMESPACE --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS + fi + fi diff --git a/.github/workflows/check-online-doc-build.yml b/.github/workflows/check-online-doc-build.yml index 4b3ebe066b..bf3e0867a2 100644 --- a/.github/workflows/check-online-doc-build.yml +++ b/.github/workflows/check-online-doc-build.yml @@ -13,7 +13,7 @@ on: jobs: build: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - name: Checkout diff --git a/.github/workflows/docker/compose/animation-compose.yaml b/.github/workflows/docker/compose/animation-compose.yaml index 650f4b1c07..ab9149b79b 100644 --- a/.github/workflows/docker/compose/animation-compose.yaml +++ b/.github/workflows/docker/compose/animation-compose.yaml @@ -7,11 +7,3 @@ services: build: dockerfile: comps/animation/src/Dockerfile image: ${REGISTRY:-opea}/animation:${TAG:-latest} - wav2lip: - build: - dockerfile: comps/third_parties/wav2lip/src/Dockerfile - image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest} - wav2lip-gaudi: - build: - dockerfile: comps/third_parties/wav2lip/src/Dockerfile.intel_hpu - image: ${REGISTRY:-opea}/wav2lip-gaudi:${TAG:-latest} diff --git a/.github/workflows/docker/compose/chathistory-compose.yaml b/.github/workflows/docker/compose/chathistory-compose.yaml index 987447feea..3991a99734 100644 --- a/.github/workflows/docker/compose/chathistory-compose.yaml +++ b/.github/workflows/docker/compose/chathistory-compose.yaml @@ -5,5 +5,5 @@ services: chathistory-mongo-server: build: - dockerfile: comps/chathistory/mongo/Dockerfile + dockerfile: comps/chathistory/src/Dockerfile image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest} diff --git a/.github/workflows/docker/compose/dataprep-compose.yaml b/.github/workflows/docker/compose/dataprep-compose.yaml index 69cbadd534..893f6947ea 100644 --- a/.github/workflows/docker/compose/dataprep-compose.yaml +++ b/.github/workflows/docker/compose/dataprep-compose.yaml @@ -7,59 +7,3 @@ services: build: dockerfile: comps/dataprep/src/Dockerfile image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} - dataprep-redis: - build: - dockerfile: comps/dataprep/redis/langchain/Dockerfile - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} - dataprep-qdrant: - build: - dockerfile: comps/dataprep/qdrant/langchain/Dockerfile - image: ${REGISTRY:-opea}/dataprep-qdrant:${TAG:-latest} - dataprep-on-ray-redis: - build: - dockerfile: comps/dataprep/redis/langchain_ray/Dockerfile - image: ${REGISTRY:-opea}/dataprep-on-ray-redis:${TAG:-latest} - dataprep-multimodal-vdms: - build: - dockerfile: comps/dataprep/vdms/multimodal_langchain/Dockerfile - image: ${REGISTRY:-opea}/dataprep-multimodal-vdms:${TAG:-latest} - dataprep-multimodal-redis: - build: - dockerfile: comps/dataprep/multimodal/redis/langchain/Dockerfile - image: ${REGISTRY:-opea}/dataprep-multimodal-redis:${TAG:-latest} - dataprep-redis-llama-index: - build: - dockerfile: comps/dataprep/redis/llama_index/Dockerfile - image: ${REGISTRY:-opea}/dataprep-redis-llama-index:${TAG:-latest} - dataprep-milvus: - build: - dockerfile: comps/dataprep/milvus/langchain/Dockerfile - image: ${REGISTRY:-opea}/dataprep-milvus:${TAG:-latest} - dataprep-pgvector: - build: - dockerfile: comps/dataprep/pgvector/langchain/Dockerfile - image: ${REGISTRY:-opea}/dataprep-pgvector:${TAG:-latest} - dataprep-pinecone: - build: - dockerfile: comps/dataprep/pinecone/langchain/Dockerfile - image: ${REGISTRY:-opea}/dataprep-pinecone:${TAG:-latest} - dataprep-vdms: - build: - dockerfile: comps/dataprep/vdms/langchain/Dockerfile - image: ${REGISTRY:-opea}/dataprep-vdms:${TAG:-latest} - dataprep-neo4j: - build: - dockerfile: comps/dataprep/neo4j/langchain/Dockerfile - image: ${REGISTRY:-opea}/dataprep-neo4j:${TAG:-latest} - dataprep-neo4j-llamaindex: - build: - dockerfile: comps/dataprep/neo4j/llama_index/Dockerfile - image: ${REGISTRY:-opea}/dataprep-neo4j-llamaindex:${TAG:-latest} - dataprep-elasticsearch: - build: - dockerfile: comps/dataprep/elasticsearch/langchain/Dockerfile - image: ${REGISTRY:-opea}/dataprep-elasticsearch:${TAG:-latest} - dataprep-opensearch: - build: - dockerfile: comps/dataprep/opensearch/langchain/Dockerfile - image: ${REGISTRY:-opea}/dataprep-opensearch:${TAG:-latest} diff --git a/.github/workflows/docker/compose/feedback_management-compose.yaml b/.github/workflows/docker/compose/feedback_management-compose.yaml index 29fa00d73b..9ad21d151b 100644 --- a/.github/workflows/docker/compose/feedback_management-compose.yaml +++ b/.github/workflows/docker/compose/feedback_management-compose.yaml @@ -3,7 +3,7 @@ # this file should be run in the root of the repo services: - feedbackmanagement: + feedbackmanagement-mongo: build: dockerfile: comps/feedback_management/src/Dockerfile - image: ${REGISTRY:-opea}/feedbackmanagement:${TAG:-latest} + image: ${REGISTRY:-opea}/feedbackmanagement-mongo:${TAG:-latest} diff --git a/.github/workflows/docker/compose/guardrails-compose.yaml b/.github/workflows/docker/compose/guardrails-compose.yaml index fd8c2e7a76..5c8ccf621f 100644 --- a/.github/workflows/docker/compose/guardrails-compose.yaml +++ b/.github/workflows/docker/compose/guardrails-compose.yaml @@ -11,6 +11,10 @@ services: build: dockerfile: comps/guardrails/src/bias_detection/Dockerfile image: ${REGISTRY:-opea}/guardrails-bias-detection:${TAG:-latest} + guardrails-hallucination-detection: + build: + dockerfile: comps/guardrails/src/hallucination_detection/Dockerfile + image: ${REGISTRY:-opea}/guardrails-hallucination-detection:${TAG:-latest} guardrails-pii-predictionguard: build: dockerfile: comps/guardrails/src/pii_detection/Dockerfile diff --git a/.github/workflows/docker/compose/intent_detection-compose.yaml b/.github/workflows/docker/compose/intent_detection-compose.yaml deleted file mode 100644 index 12b7a12b38..0000000000 --- a/.github/workflows/docker/compose/intent_detection-compose.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# this file should be run in the root of the repo -services: - intent-detection-tgi: - build: - dockerfile: comps/intent_detection/langchain/Dockerfile - image: ${REGISTRY:-opea}/intent-detection-tgi:${TAG:-latest} diff --git a/.github/workflows/docker/compose/llms-compose.yaml b/.github/workflows/docker/compose/llms-compose.yaml index 864d74bd80..f50507542e 100644 --- a/.github/workflows/docker/compose/llms-compose.yaml +++ b/.github/workflows/docker/compose/llms-compose.yaml @@ -7,50 +7,19 @@ services: build: dockerfile: comps/llms/src/text-generation/Dockerfile image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} - llm-ollama: + llm-textgen-gaudi: build: - dockerfile: comps/llms/text-generation/ollama/langchain/Dockerfile - image: ${REGISTRY:-opea}/llm-ollama:${TAG:-latest} - llm-docsum-tgi: + dockerfile: comps/llms/src/text-generation/Dockerfile.intel_hpu + image: ${REGISTRY:-opea}/llm-textgen-gaudi:${TAG:-latest} + llm-docsum: build: - dockerfile: comps/llms/summarization/tgi/langchain/Dockerfile + dockerfile: comps/llms/src/doc-summarization/Dockerfile image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest} llm-faqgen: build: dockerfile: comps/llms/src/faq-generation/Dockerfile image: ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest} - llm-native: - build: - dockerfile: comps/llms/text-generation/native/langchain/Dockerfile - image: ${REGISTRY:-opea}/llm-native:${TAG:-latest} - llm-native-llamaindex: - build: - dockerfile: comps/llms/text-generation/native/llama_index/Dockerfile - image: ${REGISTRY:-opea}/llm-native-llamaindex:${TAG:-latest} - vllm-openvino: - build: - context: vllm-openvino - dockerfile: Dockerfile.openvino - image: ${REGISTRY:-opea}/vllm-openvino:${TAG:-latest} - vllm-gaudi: - build: - context: vllm-fork - dockerfile: Dockerfile.hpu - shm_size: '128g' - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} - vllm-arc: - build: - dockerfile: comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_gpu - image: ${REGISTRY:-opea}/vllm-arc:${TAG:-latest} llm-eval: build: dockerfile: comps/llms/utils/lm-eval/Dockerfile image: ${REGISTRY:-opea}/llm-eval:${TAG:-latest} - llm-textgen-predictionguard: - build: - dockerfile: comps/llms/text-generation/predictionguard/Dockerfile - image: ${REGISTRY:-opea}/llm-textgen-predictionguard:${TAG:-latest} - llm-docsum-vllm: - build: - dockerfile: comps/llms/summarization/vllm/langchain/Dockerfile - image: ${REGISTRY:-opea}/llm-docsum-vllm:${TAG:-latest} diff --git a/.github/workflows/docker/compose/lvms-compose.yaml b/.github/workflows/docker/compose/lvms-compose.yaml index 6e89db0e35..9424db0440 100644 --- a/.github/workflows/docker/compose/lvms-compose.yaml +++ b/.github/workflows/docker/compose/lvms-compose.yaml @@ -2,44 +2,40 @@ # SPDX-License-Identifier: Apache-2.0 # this file should be run in the root of the repo +# Deprecated original wrappers opea/lvm-tgi, opea/lvm-llava-svc, opea/lvm-video-llama +# and merged functionalities in opea/lvm +# Original video-llama-lvm-server renamed as lvm-video-llama + services: - lvm-tgi: + lvm: build: - dockerfile: comps/lvms/tgi-llava/Dockerfile - image: ${REGISTRY:-opea}/lvm-tgi:${TAG:-latest} + dockerfile: comps/lvms/src/Dockerfile + image: ${REGISTRY:-opea}/lvm:${TAG:-latest} lvm-video-llama: build: - dockerfile: comps/lvms/video-llama/Dockerfile + dockerfile: comps/lvms/src/integrations/dependency/video-llama/Dockerfile image: ${REGISTRY:-opea}/lvm-video-llama:${TAG:-latest} - video-llama-lvm-server: - build: - dockerfile: comps/lvms/video-llama/dependency/Dockerfile - image: ${REGISTRY:-opea}/video-llama-lvm-server:${TAG:-latest} lvm-llava: build: - dockerfile: comps/lvms/llava/dependency/Dockerfile + dockerfile: comps/lvms/src/integrations/dependency/llava/Dockerfile image: ${REGISTRY:-opea}/lvm-llava:${TAG:-latest} - lvm-llava-svc: - build: - dockerfile: comps/lvms/llava/Dockerfile - image: ${REGISTRY:-opea}/lvm-llava-svc:${TAG:-latest} llava-gaudi: build: - dockerfile: comps/lvms/llava/dependency/Dockerfile.intel_hpu + dockerfile: comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu image: ${REGISTRY:-opea}/llava-gaudi:${TAG:-latest} lvm-predictionguard: build: - dockerfile: comps/lvms/predictionguard/Dockerfile + dockerfile: comps/lvms/src/integrations/dependency/predictionguard/Dockerfile image: ${REGISTRY:-opea}/lvm-predictionguard:${TAG:-latest} lvm-llama-vision: build: - dockerfile: comps/lvms/llama-vision/Dockerfile + dockerfile: comps/lvms/src/integrations/dependency/llama-vision/Dockerfile image: ${REGISTRY:-opea}/lvm-llama-vision:${TAG:-latest} lvm-llama-vision-tp: build: - dockerfile: comps/lvms/llama-vision/Dockerfile_tp + dockerfile: comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.tp image: ${REGISTRY:-opea}/lvm-llama-vision-tp:${TAG:-latest} lvm-llama-vision-guard: build: - dockerfile: comps/lvms/llama-vision/Dockerfile_guard + dockerfile: comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.guard image: ${REGISTRY:-opea}/lvm-llama-vision-guard:${TAG:-latest} diff --git a/.github/workflows/docker/compose/prompt_registry-compose.yaml b/.github/workflows/docker/compose/prompt_registry-compose.yaml index f50daa42c4..b2fde5a59b 100644 --- a/.github/workflows/docker/compose/prompt_registry-compose.yaml +++ b/.github/workflows/docker/compose/prompt_registry-compose.yaml @@ -3,7 +3,7 @@ # this file should be run in the root of the repo services: - promptregistry-server: + promptregistry-mongo: build: dockerfile: comps/prompt_registry/src/Dockerfile - image: ${REGISTRY:-opea}/promptregistry-server:${TAG:-latest} + image: ${REGISTRY:-opea}/promptregistry-mongo:${TAG:-latest} diff --git a/.github/workflows/docker/compose/ragas-compose.yaml b/.github/workflows/docker/compose/ragas-compose.yaml deleted file mode 100644 index 9e4d557985..0000000000 --- a/.github/workflows/docker/compose/ragas-compose.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -services: - ragas: - build: - dockerfile: comps/ragas/src/tgi/langchain/Dockerfile - image: ${REGISTRY:-opea}/ragas:${TAG:-latest} diff --git a/.github/workflows/docker/compose/retrievers-compose.yaml b/.github/workflows/docker/compose/retrievers-compose.yaml index a81b6a9952..72ce523367 100644 --- a/.github/workflows/docker/compose/retrievers-compose.yaml +++ b/.github/workflows/docker/compose/retrievers-compose.yaml @@ -7,51 +7,3 @@ services: build: dockerfile: comps/retrievers/src/Dockerfile image: ${REGISTRY:-opea}/retriever:${TAG:-latest} - retriever-redis: - build: - dockerfile: comps/retrievers/redis/langchain/Dockerfile - image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} - retriever-qdrant: - build: - dockerfile: comps/retrievers/qdrant/haystack/Dockerfile - image: ${REGISTRY:-opea}/retriever-qdrant:${TAG:-latest} - retriever-vdms: - build: - dockerfile: comps/retrievers/vdms/langchain/Dockerfile - image: ${REGISTRY:-opea}/retriever-vdms:${TAG:-latest} - retriever-pgvector: - build: - dockerfile: comps/retrievers/pgvector/langchain/Dockerfile - image: ${REGISTRY:-opea}/retriever-pgvector:${TAG:-latest} - retriever-pinecone: - build: - dockerfile: comps/retrievers/pinecone/langchain/Dockerfile - image: ${REGISTRY:-opea}/retriever-pinecone:${TAG:-latest} - retriever-milvus: - build: - dockerfile: comps/retrievers/milvus/langchain/Dockerfile - image: ${REGISTRY:-opea}/retriever-milvus:${TAG:-latest} - retriever-redis-llamaindex: - build: - dockerfile: comps/retrievers/redis/llama_index/Dockerfile - image: ${REGISTRY:-opea}/retriever-redis-llamaindex:${TAG:-latest} - retriever-pathway: - build: - dockerfile: comps/retrievers/pathway/langchain/Dockerfile - image: ${REGISTRY:-opea}/retriever-pathway:${TAG:-latest} - retriever-neo4j: - build: - dockerfile: comps/retrievers/neo4j/langchain/Dockerfile - image: ${REGISTRY:-opea}/retriever-neo4j:${TAG:-latest} - retriever-neo4j-llamaindex: - build: - dockerfile: comps/retrievers/neo4j/llama_index/Dockerfile - image: ${REGISTRY:-opea}/retriever-neo4j-llamaindex:${TAG:-latest} - retriever-elasticsearch: - build: - dockerfile: comps/retrievers/elasticsearch/langchain/Dockerfile - image: ${REGISTRY:-opea}/retriever-elasticsearch:${TAG:-latest} - retriever-opensearch: - build: - dockerfile: comps/retrievers/opensearch/langchain/Dockerfile - image: ${REGISTRY:-opea}/retriever-opensearch:${TAG:-latest} diff --git a/.github/workflows/docker/compose/third_parties-compose.yaml b/.github/workflows/docker/compose/third_parties-compose.yaml index b028d47e57..93c968c20a 100644 --- a/.github/workflows/docker/compose/third_parties-compose.yaml +++ b/.github/workflows/docker/compose/third_parties-compose.yaml @@ -18,6 +18,10 @@ services: build: dockerfile: comps/third_parties/bridgetower/src/Dockerfile.intel_hpu image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower-gaudi:${TAG:-latest} + pathway: + build: + dockerfile: comps/third_parties/pathway/src/Dockerfile + image: ${REGISTRY:-opea}/pathway:${TAG:-latest} wav2lip: build: dockerfile: comps/third_parties/wav2lip/src/Dockerfile @@ -26,3 +30,18 @@ services: build: dockerfile: comps/third_parties/wav2lip/src/Dockerfile.intel_hpu image: ${REGISTRY:-opea}/wav2lip-gaudi:${TAG:-latest} + vllm-arc: + build: + dockerfile: comps/third_parties/vllm/src/Dockerfile.intel_gpu + image: ${REGISTRY:-opea}/vllm-arc:${TAG:-latest} + vllm-openvino: + build: + context: vllm-openvino + dockerfile: Dockerfile.openvino + image: ${REGISTRY:-opea}/vllm-openvino:${TAG:-latest} + vllm-gaudi: + build: + context: vllm-fork + dockerfile: Dockerfile.hpu + shm_size: '128g' + image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} diff --git a/.github/workflows/docker/compose/vectorstores-compose.yaml b/.github/workflows/docker/compose/vectorstores-compose.yaml deleted file mode 100644 index 584927e952..0000000000 --- a/.github/workflows/docker/compose/vectorstores-compose.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# this file should be run in the root of the repo -services: - vectorstore-pathway: - build: - dockerfile: comps/vectorstores/pathway/Dockerfile - image: ${REGISTRY:-opea}/vectorstore-pathway:${TAG:-latest} diff --git a/.github/workflows/manual-comps-test.yml b/.github/workflows/manual-comps-test.yml index 34b0b84101..c3f73fb25f 100644 --- a/.github/workflows/manual-comps-test.yml +++ b/.github/workflows/manual-comps-test.yml @@ -7,7 +7,7 @@ on: inputs: services: default: "asr" - description: "List of services to test [agent,asr,chathistory,dataprep,embeddings,feedback_management,finetuning,guardrails,intent_detection,knowledgegraphs,llms,lvms,nginx,prompt_registry,ragas,rerankings,retrievers,tts,vectorstores,web_retrievers]" + description: "List of services to test [agent,asr,chathistory,dataprep,embeddings,feedback_management,finetuning,guardrails,knowledgegraphs,llms,lvms,nginx,prompt_registry,ragas,rerankings,retrievers,tts,web_retrievers]" required: true type: string build: @@ -15,11 +15,16 @@ on: description: "Build test required images for Comps" required: false type: boolean - test: + test_compose: default: true description: "Test comps with docker compose" required: false type: boolean + test_helmchart: + default: true + description: "Test comps with helm chart" + required: false + type: boolean tag: default: "rc" description: "Tag to apply to images" @@ -57,5 +62,6 @@ jobs: service: ${{ matrix.service }} tag: ${{ inputs.tag }} mode: ${{ inputs.mode }} - test: ${{ inputs.test }} + test_compose: ${{ inputs.test_compose }} + test_helmchart: ${{ inputs.test_helmchart }} secrets: inherit diff --git a/.github/workflows/manual-docker-publish.yml b/.github/workflows/manual-docker-publish.yml index 6f51a29e3f..b7e770dedb 100644 --- a/.github/workflows/manual-docker-publish.yml +++ b/.github/workflows/manual-docker-publish.yml @@ -7,12 +7,12 @@ on: inputs: services: default: "" - description: "List of services to test [agent,asr,chathistory,dataprep,embeddings,feedback_management,finetuning,guardrails,intent_detection,knowledgegraphs,llms,lvms,nginx,prompt_registry,ragas,rerankings,retrievers,tts,vectorstores,web_retrievers]" + description: "List of services to test [agent,asr,chathistory,dataprep,embeddings,feedback_management,finetuning,guardrails,knowledgegraphs,llms,lvms,nginx,prompt_registry,ragas,rerankings,retrievers,tts,web_retrievers]" required: false type: string images: default: "" - description: "List of images to scan [dataprep-pgvector,dataprep-pinecone...]" + description: "List of images to publish [dataprep-pgvector,dataprep-pinecone...]" required: false type: string tag: @@ -45,6 +45,7 @@ jobs: strategy: matrix: image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }} + fail-fast: false runs-on: "docker-build-gaudi" steps: - uses: docker/login-action@v3.2.0 diff --git a/.github/workflows/manual-docker-scan.yml b/.github/workflows/manual-docker-scan.yml index c82b49987e..be11ee93cf 100644 --- a/.github/workflows/manual-docker-scan.yml +++ b/.github/workflows/manual-docker-scan.yml @@ -7,7 +7,7 @@ on: inputs: services: default: "asr" - description: "List of services to test [agent_langchain,asr,chathistory_mongo,dataprep_milvus...]" #,embeddings,guardrails,llms,lvms,prompt_registry,ragas,rerankings,retrievers,tts,vectorstores,web_retrievers]" + description: "List of services to test [agent_langchain,asr,chathistory_mongo,dataprep_milvus...]" #,embeddings,guardrails,llms,lvms,prompt_registry,ragas,rerankings,retrievers,tts,web_retrievers]" required: false type: string images: diff --git a/.github/workflows/pr-helm-test.yaml b/.github/workflows/pr-helm-test.yaml new file mode 100644 index 0000000000..4e7fdbf4fc --- /dev/null +++ b/.github/workflows/pr-helm-test.yaml @@ -0,0 +1,73 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Microservice Test With Helm Charts + +on: + pull_request_target: + branches: [main] + types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped + paths: + - "!**.md" + - "**/deployment/kubernetes/**" + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + job1: + name: Get-test-matrix + runs-on: ubuntu-latest + outputs: + run_matrix: ${{ steps.get-test-matrix.outputs.run_matrix }} + steps: + - name: Checkout out Repo + uses: actions/checkout@v4 + with: + ref: "refs/pull/${{ github.event.number }}/merge" + fetch-depth: 0 + + - name: Get test matrix + id: get-test-matrix + run: | + set -x + base_commit=${{ github.event.pull_request.base.sha }} + merged_commit=$(git log -1 --format='%H') + values_files=$(git diff --name-only ${base_commit} ${merged_commit} | \ + grep "values.yaml" | \ + sort -u) # comps/agent/deployment/kubernetes/cpu-values.yaml + + run_matrix="{\"include\":[" + for values_file in ${values_files}; do + if [ -f "$values_file" ]; then + valuefile=$(basename "$values_file") # cpu-values.yaml + if [[ "$values_file" == *"third_parties"* ]]; then #comps/third_parties/bridgetower/deployment/kubernetes/gaudi-values.yaml + service=$(echo "$values_file" | cut -d'/' -f2-3) # third_parties/bridgetower + else + service=$(echo "$values_file" | cut -d'/' -f2) # agent + fi + + echo "service=${service}" + if [[ $(echo ${run_matrix} | grep -c "{\"service\":\"${service}\"},") == 0 ]]; then + run_matrix="${run_matrix}{\"service\":\"${service}\"}," + echo "------------------ add one values file ------------------" + fi + fi + done + run_matrix="${run_matrix%,}" + run_matrix=$run_matrix"]}" + echo "run_matrix=${run_matrix}" + echo "run_matrix=${run_matrix}" >> $GITHUB_OUTPUT + + Chart-test: + needs: [job1] + if: always() && ${{ fromJSON(needs.job1.outputs.run_matrix).length != 0 }} + uses: ./.github/workflows/_run-helm-chart.yml + strategy: + matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }} + with: + service: ${{ matrix.service }} + mode: "CI" + secrets: inherit diff --git a/.github/workflows/pr-link-path-scan.yaml b/.github/workflows/pr-link-path-scan.yaml index 5ba0143a9a..32a5a47385 100644 --- a/.github/workflows/pr-link-path-scan.yaml +++ b/.github/workflows/pr-link-path-scan.yaml @@ -139,7 +139,7 @@ jobs: fi fi else - echo "$check_path does not exist" + echo "Invalid reference path from $refer_path, reference path: $(echo $png_line | cut -d ']' -f2)" fail="TRUE" fi done diff --git a/.github/workflows/pr-microservice-test.yml b/.github/workflows/pr-microservice-test.yml index d0a56cdacf..e7cb81403f 100644 --- a/.github/workflows/pr-microservice-test.yml +++ b/.github/workflows/pr-microservice-test.yml @@ -5,7 +5,7 @@ name: MicroService-test on: pull_request_target: - branches: ["main", "*rc"] + branches: ["main", "*rc", "pre-ci"] types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped paths: - comps/** @@ -27,20 +27,33 @@ jobs: strategy: matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }} runs-on: ${{ matrix.hardware }} + permissions: + id-token: write + contents: read continue-on-error: true if: ${{ !github.event.pull_request.draft }} steps: - name: Clean Up Working Directory run: | sudo rm -rf ${{github.workspace}}/* - docker system prune -f + cid=$(docker ps -aq --filter "name=test-comps-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi docker rmi $(docker images --filter reference="*/*:comps" -q) || true + docker system prune -f + docker ps - name: Checkout out Repo uses: actions/checkout@v4 with: ref: "refs/pull/${{ github.event.number }}/merge" + - name: Get AWS Credentials + uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 + if: ${{ contains(matrix.service, 'bedrock') }} + with: + role-to-assume: ${{ secrets.AWS_IAM_ROLE_ARN }} + aws-region: us-east-1 + - name: Run microservice test env: HF_TOKEN: ${{ secrets.HF_TOKEN }} @@ -56,7 +69,6 @@ jobs: timeout 60m bash $(find . -type f -name test_${service}.sh) - name: Clean up container - if: cancelled() || failure() run: | cid=$(docker ps -aq --filter "name=test-comps-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi diff --git a/.github/workflows/push-image-build.yml b/.github/workflows/push-image-build.yml index 503907770d..fda1528065 100644 --- a/.github/workflows/push-image-build.yml +++ b/.github/workflows/push-image-build.yml @@ -1,17 +1,18 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# Test name: Build latest images on push event on: push: branches: ["main"] paths: - - comps/** - - "!**.md" - - "!**.txt" - - "**requirements.txt" - - .github/workflows/push-image-build.yml + - 'comps/**' + - 'src/**' + - '.github/workflows/docker/compose/**' + - '!**/*.md' + - '!**/*.txt' + - '**/requirements.txt' + - '.github/workflows/push-image-build.yml' concurrency: group: ${{ github.workflow }}-${{ github.ref }}-on-push @@ -23,7 +24,7 @@ jobs: outputs: services: ${{ steps.get-services.outputs.services }} steps: - - name: Checkout out Repo + - name: Checkout Repo uses: actions/checkout@v4 with: fetch-depth: 0 @@ -31,24 +32,47 @@ jobs: - name: Get Test Services id: get-services run: | + set -x base_commit=$(git rev-parse HEAD~1) merged_commit=$(git log -1 --format='%H') - changed_files="$(git diff --name-only ${base_commit} ${merged_commit} | grep 'comps/' | grep -vE '\.md|\.txt|comps/cores')" || true - services=$(printf '%s\n' "${changed_files[@]}" | cut -d'/' -f2 | grep -vE '\.py' | sort -u | jq -R '.' | jq -sc '.') || true - echo "services=$services" - echo "services=$services" >> $GITHUB_OUTPUT + # git diff --name-only ${base_commit} ${merged_commit} | grep -E "cores|comps/__init__.py" | grep -Ev ".md" + + if git diff --name-only ${base_commit} ${merged_commit} | grep -E "cores|comps/__init__.py" | grep -Ev ".md"; then + echo "ALL image build!!!" + services=$(basename -a .github/workflows/docker/compose/*-compose.yaml | sed 's/-compose.yaml//' | jq -R '.' ) + else + changed_src="$(git diff --name-only ${base_commit} ${merged_commit} | grep 'src/' | grep -vE '\.md')" || true + changed_yamls="$(git diff --name-only ${base_commit} ${merged_commit} | grep '.github/workflows/docker/compose/')" || true + [[ -n "$changed_src" ]] && services=$(printf '%s\n' "${changed_src[@]}" | cut -d'/' -f2 | grep -vE '\.py' | sort -u | jq -R '.' ) || true + + if [[ -n "$changed_yamls" ]]; then + while IFS= read -r line; do + filename=$(basename "$line" -compose.yaml) + echo "$line $(printf '%s\n' "$filename" | jq -R '.' )" + services+=" $(printf '%s\n' "$filename" | jq -R '.' )" || true + done <<< "$changed_yamls" + else + echo "No changes in YAML files." + fi + fi + + echo "services=$(echo "$services" | jq -sc 'unique | sort')" + echo "services=$(echo "$services" | jq -sc 'unique | sort')" >> $GITHUB_OUTPUT + continue-on-error: true image-build: needs: get-build-matrix + if: ${{ fromJSON(needs.get-build-matrix.outputs.services).length != 0 }} strategy: matrix: service: ${{ fromJSON(needs.get-build-matrix.outputs.services) }} - node: [docker-build-xeon, docker-build-gaudi] - runs-on: ${{ matrix.node }} + node: [xeon, gaudi] + runs-on: docker-build-${{ matrix.node }} continue-on-error: true steps: - name: Clean up Working Directory run: | + echo "matrix.service=${{ matrix.service }}" sudo rm -rf ${{github.workspace}}/* - name: Checkout out Repo @@ -66,14 +90,14 @@ jobs: echo "file_exists=false" >> $GITHUB_ENV echo "docker_compose_path=${docker_compose_path} for this service does not exist, so skipping image build for this service!!!" fi - - name: Get build list - id: get-build-list - env: - docker_compose_path: ${{ env.docker_compose_path }} - mode: CI - run: | - build_list=$(bash ${{ github.workspace }}/.github/workflows/scripts/get_cicd_list.sh "${mode}" ${docker_compose_path}) - echo "build_list=${build_list}" >> $GITHUB_OUTPUT + if [[ $(grep -c "vllm-openvino:" ${docker_compose_path}) != 0 ]]; then + git clone https://github.com/vllm-project/vllm.git vllm-openvino + cd ./vllm-openvino && git checkout v0.6.1 && git rev-parse HEAD && cd ../ + fi + if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then + git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git + sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt + fi - name: Build Image if: env.file_exists == 'true' @@ -82,4 +106,3 @@ jobs: work_dir: ${{ github.workspace }} docker_compose_path: ${{ env.docker_compose_path }} registry: ${OPEA_IMAGE_REPO}opea - service_list: ${{ steps.get-build-list.outputs.build_list }} diff --git a/.github/workflows/scripts/get_test_matrix.sh b/.github/workflows/scripts/get_test_matrix.sh index d7f99fc97b..8df06d2b57 100644 --- a/.github/workflows/scripts/get_test_matrix.sh +++ b/.github/workflows/scripts/get_test_matrix.sh @@ -102,12 +102,31 @@ function find_test_2() { test_files=$(printf '%s\n' "${changed_files[@]}" | grep -E "\.sh") || true for test_file in ${test_files}; do if [ -f $test_file ]; then - _service=$(echo $test_file | cut -d'/' -f3 | cut -d'.' -f1 | cut -c6-) + _service=$(echo $test_file | cut -d'/' -f3 | grep -E "\.sh" | cut -d'.' -f1 | cut -c6-) _fill_in_matrix $_service fi done } +function find_test_3() { + yaml_files=${changed_files} + for yaml_file in ${yaml_files}; do + if [ -f $yaml_file ]; then + _service=$(echo $yaml_file | cut -d'/' -f2) + yaml_name=$(echo $yaml_file | cut -d'/' -f5) + if [ "$yaml_name" != "compose.yaml" ]; then + _domain=${yaml_name%.yaml} + _domain=${_domain#compose_} + _service=${_service}_${_domain} + fi + find_test=$(find ./tests -type f -name test_${_service}*.sh) || true + if [ "$find_test" ]; then + fill_in_matrix "$find_test" + fi + fi + done +} + function main() { changed_files=$(printf '%s\n' "${changed_files_full[@]}" | grep 'comps/' | grep -vE '\.md|comps/cores|comps/third_parties|deployment|\.yaml') || true @@ -126,6 +145,14 @@ function main() { echo "run_matrix=${run_matrix}" echo "===========finish find_test_2============" + changed_files=$(printf '%s\n' "${changed_files_full[@]}" | grep 'deployment/docker_compose/compose' | grep '.yaml') || true + echo "===========start find_test_3============" + echo "changed_files=${changed_files}" + find_test_3 + sleep 1s + echo "run_matrix=${run_matrix}" + echo "===========finish find_test_3============" + run_matrix=$run_matrix"]}" echo "run_matrix=${run_matrix}" >> $GITHUB_OUTPUT } diff --git a/.github/workflows/scripts/k8s-utils.sh b/.github/workflows/scripts/k8s-utils.sh new file mode 100755 index 0000000000..ba58e1a152 --- /dev/null +++ b/.github/workflows/scripts/k8s-utils.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -e + +function dump_pod_log() { + pod_name=$1 + namespace=$2 + echo "-----------Pod: $pod_name---------" + echo "#kubectl describe pod $pod_name -n $namespace" + kubectl describe pod $pod_name -n $namespace + echo "-----------------------------------" + echo "#kubectl logs $pod_name -n $namespace" + kubectl logs $pod_name -n $namespace + echo "-----------------------------------" +} + +function dump_pods_status() { + namespace=$1 + echo "-----DUMP POD STATUS in NS $namespace------" + kubectl get pods -n $namespace -o wide + echo "-----------------------------------" + + # Get all pods in the namespace and their statuses + pods=$(kubectl get pods -n $namespace --no-headers) + + # Loop through each pod + echo "$pods" | while read -r line; do + pod_name=$(echo $line | awk '{print $1}') + ready=$(echo $line | awk '{print $2}') + status=$(echo $line | awk '{print $3}') + + # Extract the READY count + ready_count=$(echo $ready | cut -d'/' -f1) + required_count=$(echo $ready | cut -d'/' -f2) + + # Check if the pod is not in "Running" status or READY count is less than required + if [[ "$status" != "Running" || "$ready_count" -lt "$required_count" ]]; then + dump_pod_log $pod_name $namespace + fi + done +} + +function dump_all_pod_logs() { + namespace=$1 + echo "-----DUMP POD STATUS AND LOG in NS $namespace------" + + pods=$(kubectl get pods -n $namespace -o jsonpath='{.items[*].metadata.name}') + for pod_name in $pods + do + dump_pod_log $pod_name $namespace + done +} + +if [ $# -eq 0 ]; then + echo "Usage: $0 " + exit 1 +fi + +case "$1" in + dump_pods_status) + dump_pods_status $2 + ;; + dump_all_pod_logs) + dump_all_pod_logs $2 + ;; + *) + echo "Unknown function: $1" + ;; +esac diff --git a/README.md b/README.md index 0132144122..86f58da8d2 100644 --- a/README.md +++ b/README.md @@ -40,17 +40,17 @@ The initially supported `Microservices` are described in the below table. More ` | ------------------------------------------------- | ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | ------ | ------------------------------------- | | [Embedding](./comps/embeddings/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI-Gaudi](https://github.com/huggingface/tei-gaudi) | Gaudi2 | Embedding on Gaudi2 | | [Embedding](./comps/embeddings/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Embedding on Xeon CPU | -| [Retriever](./comps/retrievers/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Retriever on Xeon CPU | +| [Retriever](./comps/retrievers/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Retriever on Xeon CPU | | [Reranking](./comps/rerankings/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | [TEI-Gaudi](https://github.com/huggingface/tei-gaudi) | Gaudi2 | Reranking on Gaudi2 | | [Reranking](./comps/rerankings/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BBAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Reranking on Xeon CPU | | [ASR](./comps/asr/src/README.md) | NA | [openai/whisper-small](https://huggingface.co/openai/whisper-small) | NA | Gaudi2 | Audio-Speech-Recognition on Gaudi2 | | [ASR](./comps/asr/src/README.md) | NA | [openai/whisper-small](https://huggingface.co/openai/whisper-small) | NA | Xeon | Audio-Speech-RecognitionS on Xeon CPU | | [TTS](./comps/tts/src/README.md) | NA | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) | NA | Gaudi2 | Text-To-Speech on Gaudi2 | | [TTS](./comps/tts/src/README.md) | NA | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) | NA | Xeon | Text-To-Speech on Xeon CPU | -| [Dataprep](./comps/dataprep/src/README.md) | [Qdrant](https://qdrant.tech/) | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA | Gaudi2 | Dataprep on Gaudi2 | -| [Dataprep](./comps/dataprep/src/README.md) | [Qdrant](https://qdrant.tech/) | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA | Xeon | Dataprep on Xeon CPU | -| [Dataprep](./comps/dataprep/src/README.md) | [Redis](https://redis.io/) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | NA | Gaudi2 | Dataprep on Gaudi2 | -| [Dataprep](./comps/dataprep/src/README.md) | [Redis](https://redis.io/) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | NA | Xeon | Dataprep on Xeon CPU | +| [Dataprep](./comps/dataprep/README.md) | [Qdrant](https://qdrant.tech/) | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA | Gaudi2 | Dataprep on Gaudi2 | +| [Dataprep](./comps/dataprep/README.md) | [Qdrant](https://qdrant.tech/) | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA | Xeon | Dataprep on Xeon CPU | +| [Dataprep](./comps/dataprep/README.md) | [Redis](https://redis.io/) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | NA | Gaudi2 | Dataprep on Gaudi2 | +| [Dataprep](./comps/dataprep/README.md) | [Redis](https://redis.io/) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | NA | Xeon | Dataprep on Xeon CPU | | [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [TGI Gaudi](https://github.com/huggingface/tgi-gaudi) | Gaudi2 | LLM on Gaudi2 | | [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon | LLM on Xeon CPU | | [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [Ray Serve](https://github.com/ray-project/ray) | Gaudi2 | LLM on Gaudi2 | diff --git a/comps/__init__.py b/comps/__init__.py index babd491edd..a35e703675 100644 --- a/comps/__init__.py +++ b/comps/__init__.py @@ -3,6 +3,8 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import os + # Document from comps.cores.proto.docarray import ( Audio2TextDoc, @@ -38,7 +40,6 @@ PIIResponseDoc, Audio2text, DocSumDoc, - DocSumLLMParams, ) # Constants @@ -50,7 +51,8 @@ from comps.cores.mega.micro_service import MicroService, register_microservice, opea_microservices # Telemetry -from comps.cores.telemetry.opea_telemetry import opea_telemetry +if os.getenv("ENABLE_OPEA_TELEMETRY", "false").lower() == "true": + from comps.cores.telemetry.opea_telemetry import opea_telemetry # Common from comps.cores.common.component import OpeaComponent, OpeaComponentRegistry, OpeaComponentLoader diff --git a/comps/agent/deployment/kubernetes/README.md b/comps/agent/deployment/kubernetes/README.md index e69de29bb2..158ee40818 100644 --- a/comps/agent/deployment/kubernetes/README.md +++ b/comps/agent/deployment/kubernetes/README.md @@ -0,0 +1,11 @@ +# Deploy Agent microservice on Kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Kubernetes + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install agent oci://ghcr.io/opea-project/charts/agent --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml +``` diff --git a/comps/agent/deployment/kubernetes/gaudi-values.yaml b/comps/agent/deployment/kubernetes/gaudi-values.yaml new file mode 100644 index 0000000000..f5e8313b95 --- /dev/null +++ b/comps/agent/deployment/kubernetes/gaudi-values.yaml @@ -0,0 +1,11 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Accelerate inferencing in heaviest components to improve performance +# by overriding their subchart values + +vllm: + enabled: true + image: + repository: opea/vllm-gaudi +llm_endpoint_url: http://{{ .Release.Name }}-vllm diff --git a/comps/agent/src/Dockerfile b/comps/agent/src/Dockerfile index be889676c8..5c2982f6b2 100644 --- a/comps/agent/src/Dockerfile +++ b/comps/agent/src/Dockerfile @@ -15,8 +15,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -26,9 +24,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ pip install --no-cache-dir -r /home/user/comps/agent/src/requirements.txt; \ fi -ENV PYTHONPATH=$PYTHONPATH:/home/user - -USER root +ENV PYTHONPATH=/home/user RUN mkdir -p /home/user/comps/agent/src/status && chown -R user /home/user/comps/agent/src/status diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md index 6d1bc47933..8d8360a962 100644 --- a/comps/agent/src/README.md +++ b/comps/agent/src/README.md @@ -121,6 +121,8 @@ docker logs comps-agent-endpoint Once microservice starts, user can use below script to invoke. +### 3.1 Use chat completions API + ```bash curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{ "query": "What is OPEA project?" @@ -132,6 +134,41 @@ data: 'The OPEA project is .....' # just showing partial example here. data: [DONE] +``` + +### 3.2 Use assistants APIs + +```bash + +# step1 create assistant to get `asssistant_id` + +curl http://${ip_address}:9090/v1/assistants -X POST -H "Content-Type: application/json" -d '{ + "agent_config": {"llm_engine": "tgi", "llm_endpoint_url": "http://${ip_address}:8080", "tools": "/home/user/comps/agent/src/tools/custom_tools.yaml"} + }' + +## if want to persist your agent messages, set store config like this: +curl http://${ip_address}:9090/v1/assistants -X POST -H "Content-Type: application/json" -d '{ + "agent_config": {"llm_engine": "tgi", "llm_endpoint_url": "http://${ip_address}:8080", "tools": "/home/user/comps/agent/src/tools/custom_tools.yaml","with_store":true, "store_config":{"redis_uri":"redis://${ip_address}:6379"}} + }' + +# step2 create thread to get `thread_id` + +curl http://${ip_address}:9090/v1/threads -X POST -H "Content-Type: application/json" -d '{}' + +# step3 create messages + +curl http://${ip_address}:9091/v1/threads/{thread_id}/messages -X POST -H "Content-Type: application/json" -d '{"role": "user", "content": "What is OPEA project?"}' + + +## if agent is set with `with_store`, should add `assistant_id` in the messages for store + +curl http://${ip_address}:9091/v1/threads/{thread_id}/messages -X POST -H "Content-Type: application/json" -d '{"role": "user", "content": "What is OPEA project?", "assistant_id": "{assistant_id}"}' + +# step4 run + +curl http://${ip_address}:9091/v1/threads/{thread_id}/runs -X POST -H "Content-Type: application/json" -d '{"assistant_id": "{assistant_id}"}' + + ``` ## 🚀 4. Provide your own tools diff --git a/comps/agent/src/agent.py b/comps/agent/src/agent.py index fc47c3132e..baeb179d2c 100644 --- a/comps/agent/src/agent.py +++ b/comps/agent/src/agent.py @@ -5,9 +5,10 @@ import pathlib import sys from datetime import datetime -from typing import Union +from typing import List, Optional, Union from fastapi.responses import StreamingResponse +from pydantic import BaseModel cur_path = pathlib.Path(__file__).parent.resolve() comps_path = os.path.join(cur_path, "../../../") @@ -17,7 +18,7 @@ from comps.agent.src.integrations.agent import instantiate_agent from comps.agent.src.integrations.global_var import assistants_global_kv, threads_global_kv from comps.agent.src.integrations.thread import instantiate_thread_memory, thread_completion_callback -from comps.agent.src.integrations.utils import get_args +from comps.agent.src.integrations.utils import assemble_store_messages, get_args from comps.cores.proto.api_protocol import ( AssistantsObject, ChatCompletionRequest, @@ -35,12 +36,17 @@ args, _ = get_args() +db_client = None + logger.info("========initiating agent============") logger.info(f"args: {args}") agent_inst = instantiate_agent(args, args.strategy, with_memory=args.with_memory) -class AgentCompletionRequest(LLMParamsDoc): +class AgentCompletionRequest(ChatCompletionRequest): + # rewrite, specify tools in this turn of conversation + tool_choice: Optional[List[str]] = None + # for short/long term in-memory thread_id: str = "0" user_id: str = "0" @@ -52,42 +58,85 @@ class AgentCompletionRequest(LLMParamsDoc): host="0.0.0.0", port=args.port, ) -async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, AgentCompletionRequest]): +async def llm_generate(input: AgentCompletionRequest): if logflag: logger.info(input) - input.stream = args.stream - config = {"recursion_limit": args.recursion_limit} + # don't use global stream setting + # input.stream = args.stream + config = {"recursion_limit": args.recursion_limit, "tool_choice": input.tool_choice} if args.with_memory: - if isinstance(input, AgentCompletionRequest): - config["configurable"] = {"thread_id": input.thread_id} - else: - config["configurable"] = {"thread_id": "0"} + config["configurable"] = {"thread_id": input.thread_id} if logflag: logger.info(type(agent_inst)) - if isinstance(input, LLMParamsDoc): - # use query as input - input_query = input.query + # openai compatible input + if isinstance(input.messages, str): + messages = input.messages else: - # openai compatible input - if isinstance(input.messages, str): - input_query = input.messages - else: - input_query = input.messages[-1]["content"] + # TODO: need handle multi-turn messages + messages = input.messages[-1]["content"] # 2. prepare the input for the agent if input.stream: logger.info("-----------STREAMING-------------") - return StreamingResponse(agent_inst.stream_generator(input_query, config), media_type="text/event-stream") + return StreamingResponse( + agent_inst.stream_generator(messages, config), + media_type="text/event-stream", + ) else: logger.info("-----------NOT STREAMING-------------") - response = await agent_inst.non_streaming_run(input_query, config) + response = await agent_inst.non_streaming_run(messages, config) logger.info("-----------Response-------------") - return GeneratedDoc(text=response, prompt=input_query) + return GeneratedDoc(text=response, prompt=messages) + + +class RedisConfig(BaseModel): + redis_uri: Optional[str] = "redis://127.0.0.1:6379" + + +class AgentConfig(BaseModel): + stream: Optional[bool] = False + agent_name: Optional[str] = "OPEA_Default_Agent" + strategy: Optional[str] = "react_llama" + role_description: Optional[str] = "LLM enhanced agent" + tools: Optional[str] = None + recursion_limit: Optional[int] = 5 + + model: Optional[str] = "meta-llama/Meta-Llama-3-8B-Instruct" + llm_engine: Optional[str] = None + llm_endpoint_url: Optional[str] = None + max_new_tokens: Optional[int] = 1024 + top_k: Optional[int] = 10 + top_p: Optional[float] = 0.95 + temperature: Optional[float] = 0.01 + repetition_penalty: Optional[float] = 1.03 + return_full_text: Optional[bool] = False + custom_prompt: Optional[str] = None + + # short/long term memory + with_memory: Optional[bool] = False + # persistence + with_store: Optional[bool] = False + store_config: Optional[RedisConfig] = None + + timeout: Optional[int] = 60 + + # sql agent config + db_path: Optional[str] = None + db_name: Optional[str] = None + use_hints: Optional[bool] = False + hints_file: Optional[str] = None + + # specify tools in this turn of conversation + tool_choice: Optional[List[str]] = None + + +class CreateAssistant(CreateAssistantsRequest): + agent_config: AgentConfig @register_microservice( @@ -96,17 +145,32 @@ async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, AgentCo host="0.0.0.0", port=args.port, ) -def create_assistants(input: CreateAssistantsRequest): +def create_assistants(input: CreateAssistant): # 1. initialize the agent - agent_id = agent_inst.id + agent_inst = instantiate_agent( + input.agent_config, input.agent_config.strategy, with_memory=input.agent_config.with_memory + ) + assistant_id = agent_inst.id created_at = int(datetime.now().timestamp()) with assistants_global_kv as g_assistants: - g_assistants[agent_id] = (agent_inst, created_at) - logger.info(f"Record assistant inst {agent_id} in global KV") + g_assistants[assistant_id] = (agent_inst, created_at) + logger.info(f"Record assistant inst {assistant_id} in global KV") + + if input.agent_config.with_store: + logger.info("Save Agent Config to database") + agent_inst.with_store = input.agent_config.with_store + print(input) + global db_client + if db_client is None: + from comps.agent.src.integrations.storage.persistence_redis import RedisPersistence + + db_client = RedisPersistence(input.agent_config.store_config.redis_uri) + # save + db_client.put(assistant_id, {"config": input.model_dump_json(), "created_at": created_at}, "agent_config") # get current time in string format return AssistantsObject( - id=agent_id, + id=assistant_id, created_at=created_at, ) @@ -151,14 +215,27 @@ def create_messages(thread_id, input: CreateMessagesRequest): msg_id, created_at = thread_inst.add_query(query) structured_content = MessageContent(text=query) - return MessageObject( + message = MessageObject( id=msg_id, created_at=created_at, thread_id=thread_id, role=role, content=[structured_content], + assistant_id=input.assistant_id, ) + # save messages using assistant_id as key + if input.assistant_id is not None: + with assistants_global_kv as g_assistants: + agent_inst, _ = g_assistants[input.assistant_id] + if agent_inst.with_store: + logger.info(f"Save Agent Messages, assistant_id: {input.assistant_id}, thread_id: {thread_id}") + # if with store, db_client initialized already + global db_client + db_client.put(msg_id, message.model_dump_json(), input.assistant_id) + + return message + @register_microservice( name="opea_service@comps-chat-agent", @@ -173,12 +250,19 @@ def create_run(thread_id, input: CreateRunResponse): if status == "running": return "[error] Thread is already running, need to cancel the current run or wait for it to finish" - agent_id = input.assistant_id + assistant_id = input.assistant_id with assistants_global_kv as g_assistants: - agent_inst, _ = g_assistants[agent_id] + agent_inst, _ = g_assistants[assistant_id] config = {"recursion_limit": args.recursion_limit} - input_query = thread_inst.get_query() + + if agent_inst.with_store: + # assemble multi-turn messages + global db_client + input_query = assemble_store_messages(db_client.get_all(assistant_id)) + else: + input_query = thread_inst.get_query() + try: return StreamingResponse( thread_completion_callback(agent_inst.stream_generator(input_query, config, thread_id), thread_id), diff --git a/comps/agent/src/integrations/persistence.py b/comps/agent/src/integrations/storage/persistence_memory.py similarity index 100% rename from comps/agent/src/integrations/persistence.py rename to comps/agent/src/integrations/storage/persistence_memory.py diff --git a/comps/agent/src/integrations/storage/persistence_redis.py b/comps/agent/src/integrations/storage/persistence_redis.py new file mode 100644 index 0000000000..449a0b1b82 --- /dev/null +++ b/comps/agent/src/integrations/storage/persistence_redis.py @@ -0,0 +1,146 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +from collections import OrderedDict +from typing import Any, Dict, List, Optional, Tuple + +from redis import Redis +from redis.asyncio import Redis as AsyncRedis + +DEFAULT_COLLECTION = "data" +DEFAULT_BATCH_SIZE = 1 + + +class RedisPersistence: + def __init__( + self, + redis_uri: Optional[str] = "redis://127.0.0.1:6379", + **kwargs: Any, + ): + try: + # connect to redis from url + self._redis_client = Redis.from_url(redis_uri, **kwargs) + self._async_redis_client = AsyncRedis.from_url(redis_uri, **kwargs) + except ValueError as e: + raise ValueError(f"Redis failed to connect: {e}") + + def put(self, key: str, val: dict, collection: str = DEFAULT_COLLECTION) -> None: + """Put a key-value pair into the store. + + Args: + key (str): key + val (dict): value + collection (str): collection name + """ + self._redis_client.hset(name=collection, key=key, value=json.dumps(val)) + + async def aput(self, key: str, val: dict, collection: str = DEFAULT_COLLECTION) -> None: + """Put a key-value pair into the store. + + Args: + key (str): key + val (dict): value + collection (str): collection name + """ + await self._async_redis_client.hset(name=collection, key=key, value=json.dumps(val)) + + def put_all( + self, + kv_pairs: List[Tuple[str, dict]], + collection: str = DEFAULT_COLLECTION, + batch_size: int = DEFAULT_BATCH_SIZE, + ) -> None: + """Put a dictionary of key-value pairs into the store. + + Args: + kv_pairs (List[Tuple[str, dict]]): key-value pairs + collection (str): collection name + """ + with self._redis_client.pipeline() as pipe: + cur_batch = 0 + for key, val in kv_pairs: + pipe.hset(name=collection, key=key, value=json.dumps(val)) + cur_batch += 1 + + if cur_batch >= batch_size: + cur_batch = 0 + pipe.execute() + + if cur_batch > 0: + pipe.execute() + + def get(self, key: str, collection: str = DEFAULT_COLLECTION) -> Optional[dict]: + """Get a value from the store. + + Args: + key (str): key + collection (str): collection name + """ + val_str = self._redis_client.hget(name=collection, key=key) + if val_str is None: + return None + return json.loads(val_str) + + async def aget(self, key: str, collection: str = DEFAULT_COLLECTION) -> Optional[dict]: + """Get a value from the store. + + Args: + key (str): key + collection (str): collection name + """ + val_str = await self._async_redis_client.hget(name=collection, key=key) + if val_str is None: + return None + return json.loads(val_str) + + def get_all(self, collection: str = DEFAULT_COLLECTION) -> Dict[str, dict]: + """Get all values from the store.""" + collection_kv_dict = OrderedDict() + for key, val_str in self._redis_client.hscan_iter(name=collection): + value = json.loads(val_str) + collection_kv_dict[key.decode()] = value + return collection_kv_dict + + async def aget_all(self, collection: str = DEFAULT_COLLECTION) -> Dict[str, dict]: + """Get all values from the store.""" + collection_kv_dict = OrderedDict() + async for key, val_str in self._async_redis_client.hscan_iter(name=collection): + value = json.loads(val_str) + collection_kv_dict[key.decode()] = value + return collection_kv_dict + + def delete(self, key: str, collection: str = DEFAULT_COLLECTION) -> bool: + """Delete a value from the store. + + Args: + key (str): key + collection (str): collection name + """ + deleted_num = self._redis_client.hdel(collection, key) + return bool(deleted_num > 0) + + async def adelete(self, key: str, collection: str = DEFAULT_COLLECTION) -> bool: + """Delete a value from the store. + + Args: + key (str): key + collection (str): collection name + """ + deleted_num = await self._async_redis_client.hdel(collection, key) + return bool(deleted_num > 0) + + @classmethod + def from_host_and_port( + cls, + host: str, + port: int, + ) -> "RedisPersistence": + """Load a RedisPersistence from a Redis host and port. + + Args: + host (str): Redis host + port (int): Redis port + """ + url = f"redis://{host}:{port}".format(host=host, port=port) + return cls(redis_uri=url) diff --git a/comps/agent/src/integrations/strategy/react/planner.py b/comps/agent/src/integrations/strategy/react/planner.py index 773cc199ce..d500412171 100644 --- a/comps/agent/src/integrations/strategy/react/planner.py +++ b/comps/agent/src/integrations/strategy/react/planner.py @@ -11,7 +11,7 @@ from langgraph.prebuilt import create_react_agent from ...global_var import threads_global_kv -from ...utils import has_multi_tool_inputs, tool_renderer +from ...utils import filter_tools, has_multi_tool_inputs, tool_renderer from ..base_agent import BaseAgent from .prompt import REACT_SYS_MESSAGE, hwchase17_react_prompt @@ -136,7 +136,8 @@ async def non_streaming_run(self, query, config): # does not rely on langchain bind_tools API # since tgi and vllm still do not have very good support for tool calling like OpenAI -from typing import Annotated, Sequence, TypedDict +import json +from typing import Annotated, List, Optional, Sequence, TypedDict from langchain_core.messages import AIMessage, BaseMessage from langchain_core.prompts import PromptTemplate @@ -145,7 +146,7 @@ async def non_streaming_run(self, query, config): from langgraph.managed import IsLastStep from langgraph.prebuilt import ToolNode -from ...persistence import AgentPersistence, PersistenceConfig +from ...storage.persistence_memory import AgentPersistence, PersistenceConfig from ...utils import setup_chat_model from .utils import assemble_history, assemble_memory, convert_json_to_tool_call @@ -154,6 +155,7 @@ class AgentState(TypedDict): """The state of the agent.""" messages: Annotated[Sequence[BaseMessage], add_messages] + tool_choice: Optional[List[str]] = None is_last_step: IsLastStep @@ -191,7 +193,11 @@ def __call__(self, state): history = assemble_history(messages) print("@@@ History: ", history) - tools_descriptions = tool_renderer(self.tools) + tools_used = self.tools + if state.get("tool_choice") is not None: + tools_used = filter_tools(self.tools, state["tool_choice"]) + + tools_descriptions = tool_renderer(tools_used) print("@@@ Tools description: ", tools_descriptions) # invoke chain @@ -277,23 +283,47 @@ def should_continue(self, state: AgentState): def prepare_initial_state(self, query): return {"messages": [HumanMessage(content=query)]} - async def stream_generator(self, query, config): + async def stream_generator(self, query, config, thread_id=None): initial_state = self.prepare_initial_state(query) + if "tool_choice" in config: + initial_state["tool_choice"] = config.pop("tool_choice") + try: - async for event in self.app.astream(initial_state, config=config): - for node_name, node_state in event.items(): - yield f"--- CALL {node_name} ---\n" - for k, v in node_state.items(): - if v is not None: - yield f"{k}: {v}\n" + async for event in self.app.astream(initial_state, config=config, stream_mode=["updates"]): + event_type = event[0] + data = event[1] + if event_type == "updates": + for node_name, node_state in data.items(): + print(f"--- CALL {node_name} node ---\n") + for k, v in node_state.items(): + if v is not None: + print(f"------- {k}, {v} -------\n\n") + if node_name == "agent": + if v[0].content == "": + tool_names = [] + for tool_call in v[0].tool_calls: + tool_names.append(tool_call["name"]) + result = {"tool": tool_names} + else: + result = {"content": [v[0].content.replace("\n\n", "\n")]} + # ui needs this format + yield f"data: {json.dumps(result)}\n\n" + elif node_name == "tools": + full_content = v[0].content + tool_name = v[0].name + result = {"tool": tool_name, "content": [full_content]} + yield f"data: {json.dumps(result)}\n\n" + if not full_content: + continue - yield f"data: {repr(event)}\n\n" yield "data: [DONE]\n\n" except Exception as e: yield str(e) async def non_streaming_run(self, query, config): initial_state = self.prepare_initial_state(query) + if "tool_choice" in config: + initial_state["tool_choice"] = config.pop("tool_choice") try: async for s in self.app.astream(initial_state, config=config, stream_mode="values"): message = s["messages"][-1] diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py index 6440671421..b39b1f603a 100644 --- a/comps/agent/src/integrations/utils.py +++ b/comps/agent/src/integrations/utils.py @@ -3,6 +3,7 @@ import argparse import importlib +import json from .config import env_config @@ -86,6 +87,14 @@ def tool_renderer(tools): return "\n".join(tool_strings) +def filter_tools(tools, tools_choices): + tool_used = [] + for tool in tools: + if tool.name in tools_choices: + tool_used.append(tool) + return tool_used + + def has_multi_tool_inputs(tools): ret = False for tool in tools: @@ -112,6 +121,18 @@ def adapt_custom_prompt(local_vars, custom_prompt): local_vars[k] = v +def assemble_store_messages(messages): + + inputs = [] + for mid in messages: + message = json.loads(messages[mid]) + # TODO: format messages + inputs.append("### " + message["role"].upper() + ":" + "\n" + message["content"][0]["text"]) + + # revert messages + return "\n".join(inputs) + + def get_args(): parser = argparse.ArgumentParser() # llm args diff --git a/comps/agent/src/requirements.txt b/comps/agent/src/requirements.txt index 431a5060a4..4a018c421a 100644 --- a/comps/agent/src/requirements.txt +++ b/comps/agent/src/requirements.txt @@ -22,14 +22,6 @@ pandas prometheus_fastapi_instrumentator pyarrow pydantic #==1.10.13 -shortuuid -tavily-python - -# used by agents -transformers -transformers[sentencepiece] - -uvicorn # used by document loader # beautifulsoup4 @@ -44,3 +36,13 @@ uvicorn # used by Ray # ray # virtualenv + +redis +shortuuid +tavily-python + +# used by agents +transformers +transformers[sentencepiece] + +uvicorn diff --git a/comps/agent/src/test_assistant_api.py b/comps/agent/src/test_assistant_api.py index b4c252d232..2a1be1883c 100644 --- a/comps/agent/src/test_assistant_api.py +++ b/comps/agent/src/test_assistant_api.py @@ -33,7 +33,15 @@ def process_request(api, query, is_stream=False): return False # step 1. create assistants - query = {} + # query = {} + query = { + "agent_config": { + "llm_engine": "tgi", + "llm_endpoint_url": args.llm_endpoint_url, + "tools": "/home/user/tools/custom_tools.yaml", + } + } + if ret := process_request("assistants", query): assistant_id = ret.get("id") print("Created Assistant Id: ", assistant_id) @@ -68,6 +76,57 @@ def process_request(api, query, is_stream=False): query = {"assistant_id": assistant_id} process_request(f"threads/{thread_id}/runs", query, is_stream=True) + # ---------------------------------------- test persistent + # step 1. create assistants + + query = { + "agent_config": { + "llm_engine": "tgi", + "llm_endpoint_url": args.llm_endpoint_url, + "tools": "/home/user/comps/agent/src/tools/custom_tools.yaml", + "with_store": True, + "store_config": {"redis_uri": f"redis://{args.ip_addr}:6379"}, + } + } + + if ret := process_request("assistants", query): + assistant_id = ret.get("id") + print("Created Assistant Id: ", assistant_id) + else: + print("Error when creating assistants !!!!") + return + + # step 2. create threads + query = {} + if ret := process_request("threads", query): + thread_id = ret.get("id") + print("Created Thread Id: ", thread_id) + else: + print("Error when creating threads !!!!") + return + + # step 3. add messages + if args.query is None: + query = { + "role": "user", + "content": "How old was Bill Gates when he built Microsoft?", + "assistant_id": assistant_id, + } + else: + query = {"role": "user", "content": args.query, "assistant_id": assistant_id} + if ret := process_request(f"threads/{thread_id}/messages", query): + pass + else: + print("Error when add messages !!!!") + return + + # step 4. run + print("You may cancel the running process with cmdline") + print(f"curl {url}/threads/{thread_id}/runs/cancel -X POST -H 'Content-Type: application/json'") + + query = {"assistant_id": assistant_id} + process_request(f"threads/{thread_id}/runs", query, is_stream=True) + if __name__ == "__main__": args1, _ = get_args() @@ -84,6 +143,7 @@ def process_request(api, query, is_stream=False): parser.add_argument("--filename", type=str, default="query.csv", help="query_list_file") parser.add_argument("--output", type=str, default="output.csv", help="query_list_file") parser.add_argument("--ut", action="store_true", help="ut") + parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8085", help="tgi/vllm endpoint") args, _ = parser.parse_known_args() diff --git a/comps/agent/src/tools/custom_tools.py b/comps/agent/src/tools/custom_tools.py index d87a99374c..2b11d91047 100644 --- a/comps/agent/src/tools/custom_tools.py +++ b/comps/agent/src/tools/custom_tools.py @@ -4,9 +4,17 @@ # tool for unit test def search_web(query: str) -> str: - """Search the web for a given query.""" + """Search the web knowledge for a given query.""" ret_text = """ The Linux Foundation AI & Data announced the Open Platform for Enterprise AI (OPEA) as its latest Sandbox Project. OPEA aims to accelerate secure, cost-effective generative AI (GenAI) deployments for businesses by driving interoperability across a diverse and heterogeneous ecosystem, starting with retrieval-augmented generation (RAG). """ return ret_text + + +def search_weather(query: str) -> str: + """Search the weather for a given query.""" + ret_text = """ + It's clear. + """ + return ret_text diff --git a/comps/animation/deployment/docker_compose/compose.yaml b/comps/animation/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..1bc8ec75e7 --- /dev/null +++ b/comps/animation/deployment/docker_compose/compose.yaml @@ -0,0 +1,41 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +include: + - ../../../third_parties/wav2lip/deployment/docker_compose/compose.yaml + +services: + animation: + image: ${REGISTRY:-opea}/animation:${TAG:-latest} + container_name: animation-server + ports: + - ${ANIMATION_PORT:-9066}:9066 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + WAV2LIP_ENDPOINT: ${WAV2LIP_ENDPOINT} + restart: unless-stopped + depends_on: + - wav2lip-server + + animation-gaudi: + image: ${REGISTRY:-opea}/animation:${TAG:-latest} + container_name: animation-gaudi-server + ports: + - ${ANIMATION_PORT:-9066}:9066 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + WAV2LIP_ENDPOINT: ${WAV2LIP_ENDPOINT} + runtime: habana + cap_add: + - SYS_NICE + restart: unless-stopped + depends_on: + - wav2lip-gaudi-server diff --git a/comps/animation/src/Dockerfile b/comps/animation/src/Dockerfile index 2608178272..a025d3f6c0 100644 --- a/comps/animation/src/Dockerfile +++ b/comps/animation/src/Dockerfile @@ -7,7 +7,6 @@ FROM python:3.11-slim RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user ENV LANG=C.UTF-8 ARG ARCH=cpu @@ -18,7 +17,7 @@ RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /home/user/comps/animation/src/requirements.txt ; ENV PYTHONPATH=$PYTHONPATH:/home/user - +USER user WORKDIR /home/user/comps/animation/src ENTRYPOINT ["python3", "opea_animation_microservice.py"] diff --git a/comps/animation/src/README.md b/comps/animation/src/README.md index a4d832041a..58eb2e6f5e 100644 --- a/comps/animation/src/README.md +++ b/comps/animation/src/README.md @@ -93,18 +93,37 @@ docker run --privileged -d --name "wav2lip-gaudi-service" -p 7860:7860 --runtime docker run -d -p 9066:9066 --ipc=host --name "animation-service" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e WAV2LIP_ENDPOINT=http://$ip_address:7860 opea/animation:latest ``` -# 🚀3. Validate Microservice +# 🚀3. Start Microservice with Docker Compose + +Alternatively, you can also start the Animation microservice with Docker Compose. + +- Xeon CPU + +```bash +cd comps/animation/deployment/docker_compose +docker compose -f compose.yaml up animation -d + +``` + +- Gaudi2 HPU + +```bash +cd comps/animation/deployment/docker_compose +docker compose -f compose.yaml up animation-gaudi -d +``` + +# 🚀4. Validate Microservice Once microservice starts, user can use below script to validate the running microservice. -## 3.1 Validate Wav2Lip service +## 4.1 Validate Wav2Lip service ```bash cd GenAIComps python3 comps/third_parties/wav2lip/src/check_wav2lip_server.py ``` -## 3.2 Validate Animation service +## 4.2 Validate Animation service ```bash cd GenAIComps diff --git a/comps/animation/src/requirements.txt b/comps/animation/src/requirements.txt index f58a320a32..0c6cda50ed 100644 --- a/comps/animation/src/requirements.txt +++ b/comps/animation/src/requirements.txt @@ -11,7 +11,7 @@ huggingface-hub librosa lmdb numba -numpy +numpy==1.23.5 opencv-python opentelemetry-api opentelemetry-exporter-otlp diff --git a/comps/asr/assets/ljspeech_30s_audio.wav b/comps/asr/assets/ljspeech_30s_audio.wav new file mode 100644 index 0000000000..85193b7f6a Binary files /dev/null and b/comps/asr/assets/ljspeech_30s_audio.wav differ diff --git a/comps/asr/assets/ljspeech_60s_audio.wav b/comps/asr/assets/ljspeech_60s_audio.wav new file mode 100644 index 0000000000..0c71f5d908 Binary files /dev/null and b/comps/asr/assets/ljspeech_60s_audio.wav differ diff --git a/comps/asr/deployment/docker_compose/compose.yaml b/comps/asr/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..e9cb743482 --- /dev/null +++ b/comps/asr/deployment/docker_compose/compose.yaml @@ -0,0 +1,70 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - ${WHISPER_PORT:-7066}:7066 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7066/health"] + interval: 10s + timeout: 6s + retries: 18 + whisper-gaudi-service: + image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest} + container_name: whisper-gaudi-service + ports: + - ${WHISPER_PORT:-7066}:7066 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + runtime: habana + cap_add: + - SYS_NICE + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7066/health"] + interval: 10s + timeout: 6s + retries: 60 + asr: + image: ${REGISTRY:-opea}/asr:${TAG:-latest} + container_name: asr-service + ports: + - ${ASR_PORT:-9099}:9099 + ipc: host + environment: + ASR_ENDPOINT: ${ASR_ENDPOINT} + ASR_COMPONENT_NAME: ${ASR_COMPONENT_NAME:-OPEA_WHISPER_ASR} + asr-whisper: + extends: asr + container_name: asr-whisper-service + environment: + ASR_COMPONENT_NAME: ${ASR_COMPONENT_NAME:-OPEA_WHISPER_ASR} + depends_on: + whisper-service: + condition: service_healthy + asr-whisper-gaudi: + extends: asr + container_name: asr-whisper-gaudi-service + environment: + ASR_COMPONENT_NAME: ${ASR_COMPONENT_NAME:-OPEA_WHISPER_ASR} + depends_on: + whisper-gaudi-service: + condition: service_healthy + +networks: + default: + driver: bridge diff --git a/comps/asr/deployment/docker_compose/compose_whisper.yaml b/comps/asr/deployment/docker_compose/compose_whisper.yaml deleted file mode 100644 index d64ecffc32..0000000000 --- a/comps/asr/deployment/docker_compose/compose_whisper.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - whisper-service: - image: ${REGISTRY:-opea}/whisper:${TAG:-latest} - container_name: whisper-service - ports: - - "7066:7066" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - restart: unless-stopped - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:7066/health"] - interval: 10s - timeout: 6s - retries: 18 - asr: - image: ${REGISTRY:-opea}/asr:${TAG:-latest} - container_name: asr-service - ports: - - "9099:9099" - ipc: host - environment: - ASR_ENDPOINT: ${ASR_ENDPOINT} - dependes_on: - speecht5-service: - condition: service_healthy - -networks: - default: - driver: bridge diff --git a/comps/asr/deployment/docker_compose/compose_whisper_hpu.yaml b/comps/asr/deployment/docker_compose/compose_whisper_hpu.yaml deleted file mode 100644 index a27d219086..0000000000 --- a/comps/asr/deployment/docker_compose/compose_whisper_hpu.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - whisper-service: - image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest} - container_name: whisper-service - ports: - - "7066:7066" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - runtime: habana - cap_add: - - SYS_NICE - restart: unless-stopped - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:7066/health"] - interval: 10s - timeout: 6s - retries: 18 - asr: - image: ${REGISTRY:-opea}/asr:${TAG:-latest} - container_name: asr-service - ports: - - "3001:9099" - ipc: host - environment: - ASR_ENDPOINT: ${ASR_ENDPOINT} - dependes_on: - speecht5-service: - condition: service_healthy - -networks: - default: - driver: bridge diff --git a/comps/asr/deployment/kubernetes/README.md b/comps/asr/deployment/kubernetes/README.md index e69de29bb2..54f5676832 100644 --- a/comps/asr/deployment/kubernetes/README.md +++ b/comps/asr/deployment/kubernetes/README.md @@ -0,0 +1,11 @@ +# Deploy ASR microservice on Kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Kubernetes + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install asr oci://ghcr.io/opea-project/charts/asr --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` diff --git a/comps/asr/deployment/kubernetes/cpu-values.yaml b/comps/asr/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..221ea994d5 --- /dev/null +++ b/comps/asr/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +whisper: + enabled: true diff --git a/comps/asr/src/Dockerfile b/comps/asr/src/Dockerfile index ab0228098f..4ee860c110 100644 --- a/comps/asr/src/Dockerfile +++ b/comps/asr/src/Dockerfile @@ -6,7 +6,6 @@ FROM python:3.11-slim RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user ENV LANG=C.UTF-8 ARG ARCH=cpu @@ -22,7 +21,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ fi ENV PYTHONPATH=$PYTHONPATH:/home/user - +USER user WORKDIR /home/user/comps/asr/src ENTRYPOINT ["python", "opea_asr_microservice.py"] diff --git a/comps/asr/src/README.md b/comps/asr/src/README.md index 406b7c5414..d38c05d0c2 100644 --- a/comps/asr/src/README.md +++ b/comps/asr/src/README.md @@ -73,15 +73,15 @@ Alternatively, you can also start the ASR microservice with Docker. - Xeon CPU ```bash -cd ../.. -docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/Dockerfile . +cd ../../.. +docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile . ``` - Gaudi2 HPU ```bash -cd ../.. -docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/Dockerfile.intel_hpu . +cd ../../.. +docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu . ``` #### 2.1.2 ASR Service Image @@ -129,3 +129,26 @@ curl http://localhost:9099/v1/audio/transcriptions \ # python python check_asr_server.py ``` + +## 🚀3. Start Microservice with Docker Compose (Option 3) + +Alternatively, you can also start the ASR microservice with Docker Compose. + +```bash +export ip_address=$(hostname -I | awk '{print $1}') +export ASR_ENDPOINT=http://$ip_address:7066 +export no_proxy=localhost,$no_proxy + +# cpu +docker compose -f ../deployment/docker_compose/compose.yaml up whisper-service asr-whisper -d + +# hpu +docker compose -f ../deployment/docker_compose/compose.yaml up whisper-gaudi-service asr-whisper-gaudi -d + +# Test +wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav +curl http://localhost:9099/v1/audio/transcriptions \ + -H "Content-Type: multipart/form-data" \ + -F file="@./sample.wav" \ + -F model="openai/whisper-small" +``` diff --git a/comps/asr/src/integrations/dependency/whisper/Dockerfile b/comps/asr/src/integrations/dependency/whisper/Dockerfile index a403e6cd68..d0e09cfba3 100644 --- a/comps/asr/src/integrations/dependency/whisper/Dockerfile +++ b/comps/asr/src/integrations/dependency/whisper/Dockerfile @@ -13,12 +13,10 @@ ARG ARCH=cpu # Install system dependencies RUN apt-get update \ - && apt-get install -y ffmpeg + && apt-get install -y ffmpeg curl COPY --chown=user:user comps /home/user/comps -USER user - RUN pip install --no-cache-dir --upgrade pip setuptools && \ pip install --no-cache-dir -r /home/user/comps/asr/src/requirements.txt && \ if [ "${ARCH}" = "cpu" ]; then \ @@ -29,7 +27,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ fi ENV PYTHONPATH=$PYTHONPATH:/home/user - +USER user WORKDIR /home/user/comps/asr/src/integrations/dependency/whisper ENTRYPOINT ["python", "whisper_server.py", "--device", "cpu"] diff --git a/comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu b/comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu index ec591ea3d7..fc1d93a1de 100644 --- a/comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu +++ b/comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu @@ -15,12 +15,10 @@ ENV LD_LIBRARY_PATH=/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH # Install system dependencies RUN apt-get update \ - && apt-get install -y ffmpeg + && apt-get install -y ffmpeg curl COPY --chown=user:user comps /home/user/comps -USER user - # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /home/user/comps/asr/src/requirements.txt && \ @@ -28,6 +26,7 @@ RUN pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/users +USER user WORKDIR /home/user/comps/asr/src/integrations/dependency/whisper ENTRYPOINT ["python", "whisper_server.py", "--device", "hpu"] diff --git a/comps/asr/src/integrations/dependency/whisper/whisper_model.py b/comps/asr/src/integrations/dependency/whisper/whisper_model.py index c3e8108031..47e3edbcd7 100644 --- a/comps/asr/src/integrations/dependency/whisper/whisper_model.py +++ b/comps/asr/src/integrations/dependency/whisper/whisper_model.py @@ -41,8 +41,12 @@ def __init__( self.return_timestamps = return_timestamps if device == "hpu": - self._warmup_whisper_hpu_graph("https://github.com/Spycsh/assets/raw/main/ljspeech_60s_audio.wav") - self._warmup_whisper_hpu_graph("https://github.com/Spycsh/assets/raw/main/ljspeech_30s_audio.wav") + self._warmup_whisper_hpu_graph( + os.path.dirname(os.path.abspath(__file__)) + "/../../../../assets/ljspeech_30s_audio.wav" + ) + self._warmup_whisper_hpu_graph( + os.path.dirname(os.path.abspath(__file__)) + "/../../../../assets/ljspeech_60s_audio.wav" + ) def _audiosegment_to_librosawav(self, audiosegment): # https://github.com/jiaaro/pydub/blob/master/API.markdown#audiosegmentget_array_of_samples @@ -56,14 +60,9 @@ def _audiosegment_to_librosawav(self, audiosegment): return fp_arr - def _warmup_whisper_hpu_graph(self, url): - print("[ASR] fetch warmup audio...") - urllib.request.urlretrieve( - url, - "warmup.wav", - ) + def _warmup_whisper_hpu_graph(self, path_to_audio): print("[ASR] warmup...") - waveform = AudioSegment.from_file("warmup.wav").set_frame_rate(16000) + waveform = AudioSegment.from_file(path_to_audio).set_frame_rate(16000) waveform = self._audiosegment_to_librosawav(waveform) try: @@ -206,7 +205,3 @@ def audio2text(self, audio_path): "sample.wav", ) text = asr.audio2text("sample.wav") - - for i in [5, 10, 30, 60]: - urllib.request.urlretrieve(f"https://github.com/Spycsh/assets/raw/main/ljspeech_{i}s_audio.wav", "sample.wav") - text = asr.audio2text("sample.wav") diff --git a/comps/asr/src/integrations/opea_whisper.py b/comps/asr/src/integrations/whisper.py similarity index 99% rename from comps/asr/src/integrations/opea_whisper.py rename to comps/asr/src/integrations/whisper.py index 72f688e888..e9c681f69e 100644 --- a/comps/asr/src/integrations/opea_whisper.py +++ b/comps/asr/src/integrations/whisper.py @@ -2,7 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 import os -import time from typing import List import requests diff --git a/comps/asr/src/opea_asr_microservice.py b/comps/asr/src/opea_asr_microservice.py index decf602a69..8210149613 100644 --- a/comps/asr/src/opea_asr_microservice.py +++ b/comps/asr/src/opea_asr_microservice.py @@ -6,7 +6,7 @@ from typing import List from fastapi import File, Form, UploadFile -from integrations.opea_whisper import OpeaWhisperAsr +from integrations.whisper import OpeaWhisperAsr from comps import ( Base64ByteStrDoc, diff --git a/comps/chathistory/README.md b/comps/chathistory/README.md index 4f7bcbf717..5ba529774b 100644 --- a/comps/chathistory/README.md +++ b/comps/chathistory/README.md @@ -23,4 +23,4 @@ The Chat History microservice able to support various database backends for stor ### Chat History with MongoDB -For more detail, please refer to this [README](./mongo/README.md) +For more detail, please refer to this [README](src/README.md) diff --git a/comps/chathistory/deployment/docker_compose/monga.yaml b/comps/chathistory/deployment/docker_compose/compose.yaml similarity index 86% rename from comps/chathistory/deployment/docker_compose/monga.yaml rename to comps/chathistory/deployment/docker_compose/compose.yaml index e272d4f91c..db9812e692 100644 --- a/comps/chathistory/deployment/docker_compose/monga.yaml +++ b/comps/chathistory/deployment/docker_compose/compose.yaml @@ -15,10 +15,10 @@ services: command: mongod --quiet --logpath /dev/null chathistory-mongo: - image: opea/chathistory-mongo-server:latest + image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest} container_name: chathistory-mongo-server ports: - - "6012:6012" + - "${CHATHISTORY_PORT:-6012}:6012" ipc: host environment: http_proxy: ${http_proxy} diff --git a/comps/chathistory/deployment/kubernetes/README.md b/comps/chathistory/deployment/kubernetes/README.md index e69de29bb2..cb105bb7db 100644 --- a/comps/chathistory/deployment/kubernetes/README.md +++ b/comps/chathistory/deployment/kubernetes/README.md @@ -0,0 +1,11 @@ +# Deploy chathistory microservice on Kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Kubernetes + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install chathistory-usvc oci://ghcr.io/opea-project/charts/chathistory-usvc --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` diff --git a/comps/chathistory/deployment/kubernetes/cpu-values.yaml b/comps/chathistory/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..7850c0ee9d --- /dev/null +++ b/comps/chathistory/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +mongodb: + enabled: true diff --git a/comps/chathistory/mongo/Dockerfile b/comps/chathistory/src/Dockerfile similarity index 76% rename from comps/chathistory/mongo/Dockerfile rename to comps/chathistory/src/Dockerfile index 58b1c5aa4d..3b5e21df5c 100644 --- a/comps/chathistory/mongo/Dockerfile +++ b/comps/chathistory/src/Dockerfile @@ -14,17 +14,17 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps COPY requirements.txt /home/user/ RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/user/comps/chathistory/mongo/requirements.txt && \ + pip install --no-cache-dir -r /home/user/comps/chathistory/src/requirements.txt && \ pip install --no-cache-dir -r /home/user/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/chathistory/mongo +USER user + +WORKDIR /home/user/comps/chathistory/src -ENTRYPOINT ["python", "chathistory_mongo.py"] +ENTRYPOINT ["python", "opea_chathistory_microservice.py"] diff --git a/comps/chathistory/mongo/README.md b/comps/chathistory/src/README.md similarity index 81% rename from comps/chathistory/mongo/README.md rename to comps/chathistory/src/README.md index 0eb949cfc6..5d753fdec3 100644 --- a/comps/chathistory/mongo/README.md +++ b/comps/chathistory/src/README.md @@ -17,13 +17,13 @@ export COLLECTION_NAME=${COLLECTION_NAME} --- -## 🚀Start Microservice with Docker +## 🚀 Start Microservice with Docker (Option 1) ### Build Docker Image ```bash cd ../../../../ -docker build -t opea/chathistory-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/mongo/Dockerfile . +docker build -t opea/chathistory-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile . ``` ### Run Docker with CLI @@ -37,11 +37,19 @@ docker build -t opea/chathistory-mongo-server:latest --build-arg https_proxy=$ht - Run the Chat History microservice ```bash - docker run -d --name="chathistory-mongo-server" -p 6012:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=$ {COLLECTION_NAME} opea/chathistory-mongo-server:latest + docker run -d --name="chathistory-mongo" -p 6012:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/chathistory-mongo:latest ``` --- +## 🚀 Start Microservice with Docker Compose (Option 2) + +```bash +docker compose -f ../deployment/docker_compose/compose.yaml up -d +``` + +--- + ## ✅ Invoke Microservice The Chat History microservice exposes the following API endpoints: diff --git a/comps/chathistory/src/__init__.py b/comps/chathistory/src/__init__.py new file mode 100644 index 0000000000..4057dc0163 --- /dev/null +++ b/comps/chathistory/src/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/chathistory/mongo/mongo_store.py b/comps/chathistory/src/document_store.py similarity index 97% rename from comps/chathistory/mongo/mongo_store.py rename to comps/chathistory/src/document_store.py index 03d53af25c..ae2501cfea 100644 --- a/comps/chathistory/mongo/mongo_store.py +++ b/comps/chathistory/src/document_store.py @@ -4,8 +4,9 @@ import bson.errors as BsonError from bson.objectid import ObjectId -from config import COLLECTION_NAME -from mongo_conn import MongoClient + +from comps.chathistory.src.integrations.mongo.config import COLLECTION_NAME +from comps.chathistory.src.integrations.mongo.mongo_conn import MongoClient class DocumentStore: diff --git a/comps/chathistory/src/integrations/__init__.py b/comps/chathistory/src/integrations/__init__.py new file mode 100644 index 0000000000..4057dc0163 --- /dev/null +++ b/comps/chathistory/src/integrations/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/chathistory/src/integrations/mongo/__init__.py b/comps/chathistory/src/integrations/mongo/__init__.py new file mode 100644 index 0000000000..4057dc0163 --- /dev/null +++ b/comps/chathistory/src/integrations/mongo/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/chathistory/mongo/config.py b/comps/chathistory/src/integrations/mongo/config.py similarity index 100% rename from comps/chathistory/mongo/config.py rename to comps/chathistory/src/integrations/mongo/config.py diff --git a/comps/chathistory/mongo/mongo_conn.py b/comps/chathistory/src/integrations/mongo/mongo_conn.py similarity index 83% rename from comps/chathistory/mongo/mongo_conn.py rename to comps/chathistory/src/integrations/mongo/mongo_conn.py index 6bbcc16c31..952aac95c9 100644 --- a/comps/chathistory/mongo/mongo_conn.py +++ b/comps/chathistory/src/integrations/mongo/mongo_conn.py @@ -4,7 +4,8 @@ from typing import Any import motor.motor_asyncio as motor -from config import DB_NAME, MONGO_HOST, MONGO_PORT + +from comps.chathistory.src.integrations.mongo.config import DB_NAME, MONGO_HOST, MONGO_PORT class MongoClient: diff --git a/comps/chathistory/mongo/chathistory_mongo.py b/comps/chathistory/src/opea_chathistory_microservice.py similarity index 97% rename from comps/chathistory/mongo/chathistory_mongo.py rename to comps/chathistory/src/opea_chathistory_microservice.py index 29f5d41cb6..4bae2ab926 100644 --- a/comps/chathistory/mongo/chathistory_mongo.py +++ b/comps/chathistory/src/opea_chathistory_microservice.py @@ -1,13 +1,13 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import os from typing import Optional from fastapi import HTTPException -from mongo_store import DocumentStore from pydantic import BaseModel from comps import CustomLogger +from comps.chathistory.src.document_store import DocumentStore from comps.cores.mega.micro_service import opea_microservices, register_microservice from comps.cores.proto.api_protocol import ChatCompletionRequest diff --git a/comps/chathistory/mongo/requirements.txt b/comps/chathistory/src/requirements.txt similarity index 100% rename from comps/chathistory/mongo/requirements.txt rename to comps/chathistory/src/requirements.txt diff --git a/comps/cores/assets/img/opea_telemetry.jpg b/comps/cores/assets/img/opea_telemetry.jpg new file mode 100644 index 0000000000..f472126645 Binary files /dev/null and b/comps/cores/assets/img/opea_telemetry.jpg differ diff --git a/comps/cores/mega/http_service.py b/comps/cores/mega/http_service.py index 799cc5c80c..2b5a472f70 100644 --- a/comps/cores/mega/http_service.py +++ b/comps/cores/mega/http_service.py @@ -8,6 +8,7 @@ from typing import Optional from fastapi import FastAPI +from fastapi.responses import Response from prometheus_fastapi_instrumentator import Instrumentator from uvicorn import Config, Server @@ -73,6 +74,11 @@ async def _health_check(): """Get the health status of this GenAI microservice.""" return {"Service Title": self.title, "Service Description": self.description} + @app.get("/health") + async def _health() -> Response: + """Health check.""" + return Response(status_code=200) + @app.get( path="/v1/statistics", summary="Get the statistics of GenAI services", diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py index 851fb67d03..4053988566 100644 --- a/comps/cores/mega/orchestrator.py +++ b/comps/cores/mega/orchestrator.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import asyncio +import contextlib import copy import json import os @@ -16,6 +17,7 @@ from pydantic import BaseModel from ..proto.docarray import LLMParams +from ..telemetry.opea_telemetry import opea_telemetry, tracer from .constants import ServiceType from .dag import DAG from .logger import CustomLogger @@ -80,6 +82,7 @@ def flow_to(self, from_service, to_service): logger.error(e) return False + @opea_telemetry async def schedule(self, initial_inputs: Dict | BaseModel, llm_parameters: LLMParams = LLMParams(), **kwargs): req_start = time.time() self.metrics.pending_update(True) @@ -166,6 +169,26 @@ def process_outputs(self, prev_nodes: List, result_dict: Dict) -> Dict: all_outputs.update(result_dict[prev_node]) return all_outputs + def wrap_iterable(self, iterable, is_first=True): + + with tracer.start_as_current_span("llm_generate_stream"): + while True: + with ( + tracer.start_as_current_span("llm_generate_stream_first_token") + if is_first + else contextlib.nullcontext() + ): # else tracer.start_as_current_span(f"llm_generate_stream_next_token") + try: + token = next(iterable) + yield token + is_first = False + except StopIteration: + # Exiting the iterable loop cleanly + break + except Exception as e: + raise e + + @opea_telemetry async def execute( self, session: aiohttp.client.ClientSession, @@ -193,14 +216,15 @@ async def execute( # Still leave to sync requests.post for StreamingResponse if LOGFLAG: logger.info(inputs) - response = requests.post( - url=endpoint, - data=json.dumps(inputs), - headers={"Content-type": "application/json"}, - proxies={"http": None}, - stream=True, - timeout=1000, - ) + with tracer.start_as_current_span(f"{cur_node}_asyn_generate"): + response = requests.post( + url=endpoint, + data=json.dumps(inputs), + headers={"Content-type": "application/json"}, + proxies={"http": None}, + stream=True, + timeout=1000, + ) downstream = runtime_graph.downstream(cur_node) if downstream: assert len(downstream) == 1, "Not supported multiple stream downstreams yet!" @@ -214,7 +238,9 @@ def generate(): # response.elapsed = time until first headers received buffered_chunk_str = "" is_first = True - for chunk in response.iter_content(chunk_size=None): + + for chunk in self.wrap_iterable(response.iter_content(chunk_size=None)): + if chunk: if downstream: chunk = chunk.decode("utf-8") @@ -240,6 +266,7 @@ def generate(): token_start = self.metrics.token_update(token_start, is_first) yield chunk is_first = False + self.metrics.request_update(req_start) self.metrics.pending_update(False) @@ -256,19 +283,18 @@ def generate(): input_data = {k: v for k, v in input_data.items() if v is not None} else: input_data = inputs - async with session.post(endpoint, json=input_data) as response: - if response.content_type == "audio/wav": - audio_data = await response.read() - data = self.align_outputs( - audio_data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs - ) - else: - # Parse as JSON - data = await response.json() - # post process - data = self.align_outputs(data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs) - - return data, cur_node + with tracer.start_as_current_span(f"{cur_node}_generate"): + response = await session.post(endpoint, json=input_data) + if response.content_type == "audio/wav": + audio_data = await response.read() + data = self.align_outputs(audio_data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs) + else: + # Parse as JSON + data = await response.json() + # post process + data = self.align_outputs(data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs) + + return data, cur_node def align_inputs(self, inputs, *args, **kwargs): """Override this method in megaservice definition.""" diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py index 44972acd61..f8fec8d3a9 100644 --- a/comps/cores/proto/api_protocol.py +++ b/comps/cores/proto/api_protocol.py @@ -269,11 +269,10 @@ class ChatCompletionRequest(BaseModel): request_type: Literal["chat"] = "chat" -class DocSumChatCompletionRequest(BaseModel): - llm_params: Optional[ChatCompletionRequest] = None - text: Optional[str] = None - audio: Optional[str] = None - video: Optional[str] = None +class DocSumChatCompletionRequest(ChatCompletionRequest): + summary_type: str = "auto" # can be "auto", "stuff", "truncate", "map_reduce", "refine" + chunk_size: int = -1 + chunk_overlap: int = -1 type: Optional[str] = None @@ -495,6 +494,7 @@ class CreateMessagesRequest(BaseModel): role: str = "user" content: Union[str, List[MessageContent]] attachments: Attachments = None + assistant_id: str = None class CreateThreadsRequest(BaseModel): diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py index d26d48a119..5a86d3c90a 100644 --- a/comps/cores/proto/docarray.py +++ b/comps/cores/proto/docarray.py @@ -212,12 +212,6 @@ def chat_template_must_contain_variables(cls, v): return v -class DocSumLLMParams(LLMParamsDoc): - summary_type: str = "auto" # can be "auto", "stuff", "truncate", "map_reduce", "refine" - chunk_size: int = -1 - chunk_overlap: int = -1 - - class LLMParams(BaseDoc): model: Optional[str] = None max_tokens: int = 1024 @@ -284,7 +278,7 @@ class GraphDoc(BaseDoc): class LVMDoc(BaseDoc): - image: str + image: Union[str, List[str]] prompt: str max_new_tokens: conint(ge=0, le=1024) = 512 top_k: int = 10 diff --git a/comps/cores/telemetry/README.md b/comps/cores/telemetry/README.md index 4474d52456..9a99e7f000 100644 --- a/comps/cores/telemetry/README.md +++ b/comps/cores/telemetry/README.md @@ -2,7 +2,7 @@ OPEA Comps currently provides telemetry functionalities for metrics and tracing using Prometheus, Grafana, and Jaeger. Here’s a basic introduction to these tools: -![opea telemetry](https://raw.githubusercontent.com/Spycsh/assets/main/OPEA%20Telemetry.jpg) +![opea telemetry](../assets/img/opea_telemetry.jpg) ## Metrics diff --git a/comps/cores/telemetry/opea_telemetry.py b/comps/cores/telemetry/opea_telemetry.py index 4d66b9c16b..5f08d4bd4d 100644 --- a/comps/cores/telemetry/opea_telemetry.py +++ b/comps/cores/telemetry/opea_telemetry.py @@ -6,12 +6,29 @@ from functools import wraps from opentelemetry import trace +from opentelemetry.context.contextvars_context import ContextVarsRuntimeContext from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPSpanExporter from opentelemetry.sdk.resources import SERVICE_NAME, Resource from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter + +def detach_ignore_err(self, token: object) -> None: + """Resets Context to a previous value. + + Args: + token: A reference to a previous Context. + """ + try: + self._current_context.reset(token) # type: ignore + except Exception as e: + pass + + +# bypass the ValueError that ContextVar context was created in a different Context from StreamingResponse +ContextVarsRuntimeContext.detach = detach_ignore_err + telemetry_endpoint = os.environ.get("TELEMETRY_ENDPOINT", "http://localhost:4318/v1/traces") resource = Resource.create({SERVICE_NAME: "opea"}) @@ -26,7 +43,6 @@ def opea_telemetry(func): - print(f"[*** telemetry ***] {func.__name__} under telemetry.") if inspect.iscoroutinefunction(func): @wraps(func) diff --git a/comps/dataprep/README.md b/comps/dataprep/README.md index 46a57d37da..0d09545d21 100644 --- a/comps/dataprep/README.md +++ b/comps/dataprep/README.md @@ -11,7 +11,7 @@ apt-get install libreoffice ## Use LVM (Large Vision Model) for Summarizing Image Data -Occasionally unstructured data will contain image data, to convert the image data to the text data, LVM can be used to summarize the image. To leverage LVM, please refer to this [readme](../lvms/llava/README.md) to start the LVM microservice first and then set the below environment variable, before starting any dataprep microservice. +Occasionally unstructured data will contain image data, to convert the image data to the text data, LVM can be used to summarize the image. To leverage LVM, please refer to this [readme](../lvms/src/README.md) to start the LVM microservice first and then set the below environment variable, before starting any dataprep microservice. ```bash export SUMMARIZE_IMAGE_VIA_LVM=1 @@ -19,28 +19,40 @@ export SUMMARIZE_IMAGE_VIA_LVM=1 ## Dataprep Microservice with Redis -For details, please refer to this [readme](redis/README.md) +For details, please refer to this [readme](src/README_redis.md) ## Dataprep Microservice with Milvus -For details, please refer to this [readme](milvus/langchain/README.md) +For details, please refer to this [readme](src/README_milvus.md) ## Dataprep Microservice with Qdrant -For details, please refer to this [readme](qdrant/langchain/README.md) +For details, please refer to this [readme](src/README_qdrant.md) ## Dataprep Microservice with Pinecone -For details, please refer to this [readme](pinecone/langchain/README.md) +For details, please refer to this [readme](src/README_pinecone.md) ## Dataprep Microservice with PGVector -For details, please refer to this [readme](pgvector/langchain/README.md) +For details, please refer to this [readme](src/README_pgvector.md) ## Dataprep Microservice with VDMS -For details, please refer to this [readme](vdms/README.md) +For details, please refer to this [readme](src/README_vdms.md) ## Dataprep Microservice with Multimodal -For details, please refer to this [readme](multimodal/redis/langchain/README.md) +For details, please refer to this [readme](src/README_multimodal.md) + +## Dataprep Microservice with ElasticSearch + +For details, please refer to this [readme](src/README_elasticsearch.md) + +## Dataprep Microservice with OpenSearch + +For details, please refer to this [readme](src/README_opensearch.md) + +## Dataprep Microservice with neo4j + +For details, please refer to this [readme](src/README_neo4j_llamaindex.md) diff --git a/comps/dataprep/deployment/docker_compose/compose.yaml b/comps/dataprep/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..ef54a69e0c --- /dev/null +++ b/comps/dataprep/deployment/docker_compose/compose.yaml @@ -0,0 +1,258 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +include: + - ../../../third_parties/elasticsearch/deployment/docker_compose/compose.yaml + - ../../../third_parties/neo4j/deployment/docker_compose/compose.yaml + - ../../../third_parties/opensearch/deployment/docker_compose/compose.yaml + - ../../../third_parties/pgvector/deployment/docker_compose/compose.yaml + - ../../../third_parties/qdrant/deployment/docker_compose/compose.yaml + - ../../../third_parties/redis/deployment/docker_compose/compose.yaml + - ../../../third_parties/vdms/deployment/docker_compose/compose.yaml + - ../../../third_parties/tgi/deployment/docker_compose/compose.yaml + - ../../../third_parties/tei/deployment/docker_compose/compose.yaml + +services: + + dataprep-elasticsearch: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-elasticsearch + ports: + - "${DATAPREP_PORT:-11100}:5000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_ELASTICSEARCH" + ES_CONNECTION_STRING: ${ES_CONNECTION_STRING} + INDEX_NAME: ${INDEX_NAME} + TEI_ENDPOINT: ${TEI_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + depends_on: + elasticsearch-vector-db: + condition: service_healthy + + dataprep-milvus: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-milvus-server + ports: + - "${DATAPREP_PORT:-11101}:5000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MILVUS" + MILVUS_HOST: ${MILVUS_HOST} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + depends_on: + tei-embedding-serving: + condition: service_healthy + + dataprep-neo4j-llamaindex: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-neo4j-llamaindex + depends_on: + neo4j-apoc: + condition: service_healthy + tgi-gaudi-server: + condition: service_healthy + tei-embedding-serving: + condition: service_healthy + ports: + - "${DATAPREP_PORT:-11103}:5000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + host_ip: ${host_ip} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_NEO4J_LLAMAINDEX" + NEO4J_URL: ${NEO4J_URL} + NEO4J_USERNAME: ${NEO4J_USERNAME} + NEO4J_PASSWORD: ${NEO4J_PASSWORD} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + OPENAI_API_KEY: ${OPENAI_API_KEY} + OPENAI_EMBEDDING_MODEL: ${OPENAI_EMBEDDING_MODEL} + OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL} + EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID} + LLM_MODEL_ID: ${LLM_MODEL_ID} + LOGFLAG: ${LOGFLAG} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + + dataprep-opensearch: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-opensearch-server + ports: + - "${DATAPREP_PORT:-11104}:5000" + depends_on: + opensearch-vector-db: + condition: service_healthy + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + OPENSEARCH_INITIAL_ADMIN_PASSWORD: ${OPENSEARCH_INITIAL_ADMIN_PASSWORD} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_OPENSEARCH" + OPENSEARCH_URL: ${OPENSEARCH_URL} + INDEX_NAME: ${INDEX_NAME} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + security_opt: + - no-new-privileges:true + + dataprep-pgvector: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-pgvector-server + ports: + - "${DATAPREP_PORT:-11105}:5000" + depends_on: + pgvector-db: + condition: service_healthy + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_PGVECTOR" + PG_CONNECTION_STRING: ${PG_CONNECTION_STRING} + restart: unless-stopped + + dataprep-pinecone: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-pinecone-server + ports: + - "${DATAPREP_PORT:-11106}:5000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_PINECONE" + PINECONE_API_KEY: ${PINECONE_API_KEY} + PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + + dataprep-qdrant: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-qdrant-server + depends_on: + qdrant-vector-db: + condition: service_healthy + tei-embedding-serving: + condition: service_healthy + ports: + - "${DATAPREP_PORT:-11107}:5000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_QDRANT" + QDRANT_HOST: ${QDRANT_HOST} + QDRANT_PORT: ${QDRANT_PORT} + COLLECTION_NAME: ${COLLECTION_NAME} + TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + + dataprep-redis: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + redis-vector-db: + condition: service_healthy + tei-embedding-serving: + condition: service_healthy + ports: + - "${DATAPREP_PORT:-11108}:5000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_REDIS" + REDIS_HOST: ${REDIS_HOST} + REDIS_PORT: ${REDIS_PORT} + REDIS_URL: ${REDIS_URL} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + INDEX_NAME: ${INDEX_NAME} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + + dataprep-multimodal-redis: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-multimodal-redis-server + ports: + - "${DATAPREP_PORT:-11109}:5000" + depends_on: + redis-vector-db: + condition: service_healthy + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MULTIMODAL_DATAPREP: true + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS" + REDIS_HOST: ${REDIS_HOST} + REDIS_PORT: ${REDIS_PORT} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + LVM_ENDPOINT: ${LVM_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + + dataprep-vdms: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-vdms-server + ports: + - "${DATAPREP_PORT:-11110}:5000" + depends_on: + vdms-vector-db: + condition: service_healthy + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_VDMS" + VDMS_HOST: ${VDMS_HOST} + VDMS_PORT: ${VDMS_PORT} + COLLECTION_NAME: ${COLLECTION_NAME} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + + dataprep-vdms-multimodal: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-vdms-multimodal-server + ports: + - "${DATAPREP_PORT:-11111}:5000" + depends_on: + vdms-vector-db: + condition: service_healthy + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MULTIMODAL_DATAPREP: true + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALVDMS" + VDMS_HOST: ${VDMS_HOST} + VDMS_PORT: ${VDMS_PORT} + COLLECTION_NAME: ${INDEX_NAME} + restart: unless-stopped + +networks: + default: + driver: bridge + opensearch-net: diff --git a/comps/dataprep/deployment/kubernetes/README.md b/comps/dataprep/deployment/kubernetes/README.md new file mode 100644 index 0000000000..fc9d9ab0bf --- /dev/null +++ b/comps/dataprep/deployment/kubernetes/README.md @@ -0,0 +1,18 @@ +# Deploy dataprep microservice on Kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Kubernetes with redis VectorDB + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install data-prep oci://ghcr.io/opea-project/charts/data-prep --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f redis-values.yaml +``` + +## Deploy on Kubernetes with milvus VectorDB + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install data-prep oci://ghcr.io/opea-project/charts/data-prep --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f milvus-values.yaml +``` diff --git a/comps/dataprep/deployment/kubernetes/milvus-values.yaml b/comps/dataprep/deployment/kubernetes/milvus-values.yaml new file mode 100644 index 0000000000..d0d458d14a --- /dev/null +++ b/comps/dataprep/deployment/kubernetes/milvus-values.yaml @@ -0,0 +1,34 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +milvus: + enabled: true + # Milvus config for standalone mode with no PVC which has minimum requirements for the K8s cluster. + # Check https://github.com/zilliztech/milvus-helm/tree/milvus-4.2.12/charts/milvus for more production level configuration. + cluster: + enabled: false + etcd: + replicaCount: 1 + persistence: + enabled: false + pulsar: + enabled: false + minio: + mode: standalone + persistence: + enabled: false + standalone: + persistence: + enabled: false +redis-vector-db: + enabled: false +tei: + enabled: true + +DATAPREP_BACKEND: "MILVUS" +# text embedding inference service URL, e.g. http://: +# TEI_EMBEDDING_ENDPOINT: "http://data-prep-tei:80" +# milvus DB configurations +# MILVUS_HOST: "data-prep-milvus" +MILVUS_PORT: 19530 +COLLECTION_NAME: "rag_milvus" diff --git a/comps/dataprep/deployment/kubernetes/redis-values.yaml b/comps/dataprep/deployment/kubernetes/redis-values.yaml new file mode 100644 index 0000000000..54853db043 --- /dev/null +++ b/comps/dataprep/deployment/kubernetes/redis-values.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +tei: + enabled: true +redis-vector-db: + enabled: true +milvus: + enabled: false diff --git a/comps/dataprep/elasticsearch/langchain/Dockerfile b/comps/dataprep/elasticsearch/langchain/Dockerfile deleted file mode 100644 index 016a5d04d9..0000000000 --- a/comps/dataprep/elasticsearch/langchain/Dockerfile +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - default-jre \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/dataprep/elasticsearch/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -USER root - -RUN mkdir -p /home/user/comps/dataprep/elasticsearch/langchain/uploaded_files && chown -R user /home/user/comps/dataprep/elasticsearch/langchain/uploaded_files - -USER user - -WORKDIR /home/user/comps/dataprep/elasticsearch/langchain - -ENTRYPOINT ["python", "prepare_doc_elasticsearch.py"] diff --git a/comps/dataprep/elasticsearch/langchain/config.py b/comps/dataprep/elasticsearch/langchain/config.py deleted file mode 100644 index 3167e480dd..0000000000 --- a/comps/dataprep/elasticsearch/langchain/config.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -ES_CONNECTION_STRING = os.getenv("ES_CONNECTION_STRING", "http://localhost:9200") -UPLOADED_FILES_PATH = os.getenv("UPLOADED_FILES_PATH", "./uploaded_files/") - -# Embedding model -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - -# TEI Embedding endpoints -TEI_ENDPOINT = os.getenv("TEI_ENDPOINT", "") - -# Vector Index Configuration -INDEX_NAME = os.getenv("INDEX_NAME", "rag-elastic") - -# chunk parameters -CHUNK_SIZE = os.getenv("CHUNK_SIZE", 1500) -CHUNK_OVERLAP = os.getenv("CHUNK_OVERLAP", 100) - -# Logging enabled -LOG_FLAG = os.getenv("LOGFLAG", False) diff --git a/comps/dataprep/elasticsearch/langchain/docker-compose.yaml b/comps/dataprep/elasticsearch/langchain/docker-compose.yaml deleted file mode 100644 index 01d818eac0..0000000000 --- a/comps/dataprep/elasticsearch/langchain/docker-compose.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - elasticsearch-vector-db: - hostname: db - container_name: elasticsearch-vector-db - image: docker.elastic.co/elasticsearch/elasticsearch:8.16.0 - ports: - - "9200:9200" - - "9300:9300" - restart: always - ipc: host - environment: - - ES_JAVA_OPTS=-Xms1g -Xmx1g - - discovery.type=single-node - - xpack.security.enabled=false - - bootstrap.memory_lock=false - - no_proxy= ${no_proxy} - - http_proxy= ${http_proxy} - - https_proxy= ${https_proxy} - - dataprep-elasticsearch: - image: opea/dataprep-elasticsearch:latest - container_name: dataprep-elasticsearch - ports: - - "6011:6011" - ipc: host - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - ES_CONNECTION_STRING: ${ES_CONNECTION_STRING} - INDEX_NAME: ${INDEX_NAME} - TEI_ENDPOINT: ${TEI_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/elasticsearch/langchain/prepare_doc_elasticsearch.py b/comps/dataprep/elasticsearch/langchain/prepare_doc_elasticsearch.py deleted file mode 100644 index 0d5c67824b..0000000000 --- a/comps/dataprep/elasticsearch/langchain/prepare_doc_elasticsearch.py +++ /dev/null @@ -1,373 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -from pathlib import Path -from typing import List, Optional, Union - -from config import ( - CHUNK_OVERLAP, - CHUNK_SIZE, - EMBED_MODEL, - ES_CONNECTION_STRING, - INDEX_NAME, - LOG_FLAG, - TEI_ENDPOINT, - UPLOADED_FILES_PATH, -) -from elasticsearch import Elasticsearch -from fastapi import Body, File, Form, HTTPException, UploadFile -from langchain.text_splitter import HTMLHeaderTextSplitter, RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings -from langchain_core.documents import Document -from langchain_elasticsearch import ElasticsearchStore -from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings - -from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.src.utils import ( - create_upload_folder, - document_loader, - encode_filename, - get_file_structure, - get_separators, - get_tables_result, - parse_html, - remove_folder_with_ignore, - save_content_to_local_disk, -) - -logger = CustomLogger(__name__) - - -def create_index() -> None: - if not es_client.indices.exists(index=INDEX_NAME): - es_client.indices.create(index=INDEX_NAME) - - -def get_embedder() -> Union[HuggingFaceEndpointEmbeddings, HuggingFaceBgeEmbeddings]: - """Obtain required Embedder.""" - - if TEI_ENDPOINT: - return HuggingFaceEndpointEmbeddings(model=TEI_ENDPOINT) - else: - return HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - -def get_elastic_store(embedder: Union[HuggingFaceEndpointEmbeddings, HuggingFaceBgeEmbeddings]) -> ElasticsearchStore: - """Get Elasticsearch vector store.""" - - return ElasticsearchStore(index_name=INDEX_NAME, embedding=embedder, es_connection=es_client) - - -def delete_embeddings(doc_name: str) -> bool: - """Delete documents from Elasticsearch.""" - - try: - if doc_name == "all": - if LOG_FLAG: - logger.info("Deleting all documents from vectorstore") - - query = {"query": {"match_all": {}}} - else: - if LOG_FLAG: - logger.info(f"Deleting {doc_name} from vectorstore") - - query = {"query": {"match": {"metadata.doc_name": {"query": doc_name, "operator": "AND"}}}} - - es_client.delete_by_query(index=INDEX_NAME, body=query) - - return True - - except Exception as e: - if LOG_FLAG: - logger.info(f"An unexpected error occurred: {e}") - - return False - - -def search_by_filename(file_name: str) -> bool: - """Search Elasticsearch by file name.""" - - query = {"query": {"match": {"metadata.doc_name": {"query": file_name, "operator": "AND"}}}} - results = es_client.search(index=INDEX_NAME, body=query) - - if LOG_FLAG: - logger.info(f"[ search by file ] searched by {file_name}") - logger.info(f"[ search by file ] {len(results['hits'])} results: {results}") - - return results["hits"]["total"]["value"] > 0 - - -def ingest_doc_to_elastic(doc_path: DocPath) -> None: - """Ingest documents to Elasticsearch.""" - - path = doc_path.path - file_name = path.split("/")[-1] - if LOG_FLAG: - logger.info(f"Parsing document {path}, file name: {file_name}.") - - if path.endswith(".html"): - headers_to_split_on = [ - ("h1", "Header 1"), - ("h2", "Header 2"), - ("h3", "Header 3"), - ] - text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) - else: - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=doc_path.chunk_size, - chunk_overlap=doc_path.chunk_overlap, - add_start_index=True, - separators=get_separators(), - ) - - content = document_loader(path) - - structured_types = [".xlsx", ".csv", ".json", "jsonl"] - _, ext = os.path.splitext(path) - - if ext in structured_types: - chunks = content - else: - chunks = text_splitter.split_text(content) - - if doc_path.process_table and path.endswith(".pdf"): - table_chunks = get_tables_result(path, doc_path.table_strategy) - chunks = chunks + table_chunks - - if LOG_FLAG: - logger.info(f"Done preprocessing. Created {len(chunks)} chunks of the original file.") - - batch_size = 32 - num_chunks = len(chunks) - - metadata = dict({"doc_name": str(file_name)}) - - for i in range(0, num_chunks, batch_size): - batch_chunks = chunks[i : i + batch_size] - batch_texts = batch_chunks - - documents = [Document(page_content=text, metadata=metadata) for text in batch_texts] - _ = es_store.add_documents(documents=documents) - if LOG_FLAG: - logger.info(f"Processed batch {i // batch_size + 1}/{(num_chunks - 1) // batch_size + 1}") - - -async def ingest_link_to_elastic(link_list: List[str]) -> None: - """Ingest data scraped from website links into Elasticsearch.""" - - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=CHUNK_SIZE, - chunk_overlap=CHUNK_OVERLAP, - add_start_index=True, - separators=get_separators(), - ) - - batch_size = 32 - - for link in link_list: - content = parse_html([link])[0][0] - if LOG_FLAG: - logger.info(f"[ ingest link ] link: {link} content: {content}") - - encoded_link = encode_filename(link) - save_path = UPLOADED_FILES_PATH + encoded_link + ".txt" - doc_path = UPLOADED_FILES_PATH + link + ".txt" - if LOG_FLAG: - logger.info(f"[ ingest link ] save_path: {save_path}") - - await save_content_to_local_disk(save_path, content) - - chunks = text_splitter.split_text(content) - - num_chunks = len(chunks) - metadata = [dict({"doc_name": str(doc_path)})] - - for i in range(0, num_chunks, batch_size): - batch_chunks = chunks[i : i + batch_size] - batch_texts = batch_chunks - - documents = [Document(page_content=text, metadata=metadata) for text in batch_texts] - _ = es_store.add_documents(documents=documents) - - if LOG_FLAG: - logger.info(f"Processed batch {i // batch_size + 1}/{(num_chunks - 1) // batch_size + 1}") - - -@register_microservice(name="opea_service@prepare_doc_elastic", endpoint="/v1/dataprep", host="0.0.0.0", port=6011) -async def ingest_documents( - files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), - link_list: Optional[str] = Form(None), - chunk_size: int = Form(1500), - chunk_overlap: int = Form(100), - process_table: bool = Form(False), - table_strategy: str = Form("fast"), -): - """Ingest documents for RAG.""" - - if LOG_FLAG: - logger.info(f"files:{files}") - logger.info(f"link_list:{link_list}") - - if files and link_list: - raise HTTPException(status_code=400, detail="Provide either a file or a string list, not both.") - - if files: - if not isinstance(files, list): - files = [files] - - if not os.path.exists(UPLOADED_FILES_PATH): - Path(UPLOADED_FILES_PATH).mkdir(parents=True, exist_ok=True) - - for file in files: - encode_file = encode_filename(file.filename) - save_path = UPLOADED_FILES_PATH + encode_file - filename = save_path.split("/")[-1] - - try: - exists = search_by_filename(filename) - except Exception as e: - raise HTTPException( - status_code=500, - detail=f"Failed when searching in Elasticsearch for file {file.filename}.", - ) - - if exists: - if LOG_FLAG: - logger.info(f"[ upload ] File {file.filename} already exists.") - - raise HTTPException( - status_code=400, - detail=f"Uploaded file {file.filename} already exists. Please change file name.", - ) - - await save_content_to_local_disk(save_path, file) - - ingest_doc_to_elastic( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ) - ) - if LOG_FLAG: - logger.info(f"Successfully saved file {save_path}") - - result = {"status": 200, "message": "Data preparation succeeded"} - - if LOG_FLAG: - logger.info(result) - return result - - if link_list: - try: - link_list = json.loads(link_list) # Parse JSON string to list - if not isinstance(link_list, list): - raise HTTPException(status_code=400, detail="link_list should be a list.") - - await ingest_link_to_elastic(link_list) - - if LOG_FLAG: - logger.info(f"Successfully saved link list {link_list}") - - result = {"status": 200, "message": "Data preparation succeeded"} - - if LOG_FLAG: - logger.info(result) - return result - - except json.JSONDecodeError: - raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") - - raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") - - -@register_microservice( - name="opea_service@prepare_doc_elastic", - endpoint="/v1/dataprep/get_file", - host="0.0.0.0", - port=6011, -) -async def rag_get_file_structure(): - """Obtain uploaded file list.""" - - if LOG_FLAG: - logger.info("[ dataprep - get file ] start to get file structure") - - if not Path(UPLOADED_FILES_PATH).exists(): - if LOG_FLAG: - logger.info("No file uploaded, return empty list.") - return [] - - file_content = get_file_structure(UPLOADED_FILES_PATH) - - if LOG_FLAG: - logger.info(file_content) - - return file_content - - -@register_microservice( - name="opea_service@prepare_doc_elastic", - endpoint="/v1/dataprep/delete_file", - host="0.0.0.0", - port=6011, -) -async def delete_single_file(file_path: str = Body(..., embed=True)): - """Delete file according to `file_path`. - - `file_path`: - - specific file path (e.g. /path/to/file.txt) - - folder path (e.g. /path/to/folder) - - "all": delete all files uploaded - """ - if file_path == "all": - if LOG_FLAG: - logger.info("[dataprep - del] delete all files") - remove_folder_with_ignore(UPLOADED_FILES_PATH) - assert delete_embeddings(file_path) - if LOG_FLAG: - logger.info("[dataprep - del] successfully delete all files.") - create_upload_folder(UPLOADED_FILES_PATH) - if LOG_FLAG: - logger.info({"status": True}) - return {"status": True} - - delete_path = Path(UPLOADED_FILES_PATH + "/" + encode_filename(file_path)) - - if LOG_FLAG: - logger.info(f"[dataprep - del] delete_path: {delete_path}") - - if delete_path.exists(): - # delete file - if delete_path.is_file(): - try: - assert delete_embeddings(file_path) - delete_path.unlink() - except Exception as e: - if LOG_FLAG: - logger.info(f"[dataprep - del] fail to delete file {delete_path}: {e}") - logger.info({"status": False}) - return {"status": False} - # delete folder - else: - if LOG_FLAG: - logger.info("[dataprep - del] delete folder is not supported for now.") - logger.info({"status": False}) - return {"status": False} - if LOG_FLAG: - logger.info({"status": True}) - return {"status": True} - else: - raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") - - -if __name__ == "__main__": - es_client = Elasticsearch(hosts=ES_CONNECTION_STRING) - es_store = get_elastic_store(get_embedder()) - create_upload_folder(UPLOADED_FILES_PATH) - create_index() - opea_microservices["opea_service@prepare_doc_elastic"].start() diff --git a/comps/dataprep/elasticsearch/langchain/requirements.txt b/comps/dataprep/elasticsearch/langchain/requirements.txt deleted file mode 100644 index 95bfc46be0..0000000000 --- a/comps/dataprep/elasticsearch/langchain/requirements.txt +++ /dev/null @@ -1,30 +0,0 @@ -beautifulsoup4 -cairosvg -docarray[full] -docx2txt -easyocr -elasticsearch -fastapi -huggingface_hub -langchain -langchain-community -langchain-elasticsearch -langchain-huggingface -langchain-text-splitters -markdown -numpy -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -pandas -Pillow -prometheus-fastapi-instrumentator -pymupdf -pytesseract -python-bidi -python-docx -python-pptx -sentence_transformers -shortuuid -unstructured[all-docs] -uvicorn diff --git a/comps/dataprep/milvus/langchain/Dockerfile b/comps/dataprep/milvus/langchain/Dockerfile deleted file mode 100644 index fbf308720e..0000000000 --- a/comps/dataprep/milvus/langchain/Dockerfile +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - default-jre \ - libgl1-mesa-glx \ - libjemalloc-dev \ - tesseract-ocr - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/dataprep/milvus/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -USER root - -RUN mkdir -p /home/user/comps/dataprep/milvus/langchain/uploaded_files && chown -R user /home/user/comps/dataprep/milvus/langchain/uploaded_files - -USER user -WORKDIR /home/user/comps/dataprep/milvus/langchain - -ENTRYPOINT ["python", "prepare_doc_milvus.py"] diff --git a/comps/dataprep/milvus/langchain/config.py b/comps/dataprep/milvus/langchain/config.py deleted file mode 100644 index da037a0d99..0000000000 --- a/comps/dataprep/milvus/langchain/config.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -# Local Embedding model -LOCAL_EMBEDDING_MODEL = os.getenv("LOCAL_EMBEDDING_MODEL", "maidalun1020/bce-embedding-base_v1") - -# MILVUS configuration -MILVUS_HOST = os.getenv("MILVUS_HOST", "localhost") -MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530)) -COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag_milvus") -# TEI configuration -TEI_EMBEDDING_MODEL = os.environ.get("TEI_EMBEDDING_MODEL", "/home/user/bge-large-zh-v1.5") -TEI_EMBEDDING_ENDPOINT = os.environ.get("TEI_EMBEDDING_ENDPOINT", "") -os.environ["OPENAI_API_BASE"] = TEI_EMBEDDING_ENDPOINT -os.environ["OPENAI_API_KEY"] = "Dummy key" diff --git a/comps/dataprep/milvus/langchain/docker-compose.yaml b/comps/dataprep/milvus/langchain/docker-compose.yaml deleted file mode 100644 index 8dde3fed06..0000000000 --- a/comps/dataprep/milvus/langchain/docker-compose.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: '3.5' - -services: - etcd: - container_name: milvus-etcd - image: quay.io/coreos/etcd:v3.5.5 - environment: - - ETCD_AUTO_COMPACTION_MODE=revision - - ETCD_AUTO_COMPACTION_RETENTION=1000 - - ETCD_QUOTA_BACKEND_BYTES=4294967296 - - ETCD_SNAPSHOT_COUNT=50000 - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd - command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd - healthcheck: - test: ["CMD", "etcdctl", "endpoint", "health"] - interval: 30s - timeout: 20s - retries: 3 - - minio: - container_name: milvus-minio - image: minio/minio:RELEASE.2023-03-20T20-16-18Z - environment: - MINIO_ACCESS_KEY: minioadmin - MINIO_SECRET_KEY: minioadmin - ports: - - "5044:9001" - - "5043:9000" - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data - command: minio server /minio_data --console-address ":9001" - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] - interval: 30s - timeout: 20s - retries: 3 - - standalone: - container_name: milvus-standalone - image: milvusdb/milvus:v2.4.9 - command: ["milvus", "run", "standalone"] - security_opt: - - seccomp:unconfined - environment: - ETCD_ENDPOINTS: etcd:2379 - MINIO_ADDRESS: minio:9000 - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus - - ${DOCKER_VOLUME_DIRECTORY:-.}/milvus.yaml:/milvus/configs/milvus.yaml - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] - interval: 30s - start_period: 90s - timeout: 20s - retries: 3 - ports: - - "19530:19530" - - "9091:9091" - depends_on: - - "etcd" - - "minio" - -networks: - default: - name: milvus diff --git a/comps/dataprep/milvus/langchain/milvus.yaml b/comps/dataprep/milvus/langchain/milvus.yaml deleted file mode 100644 index 52962b8342..0000000000 --- a/comps/dataprep/milvus/langchain/milvus.yaml +++ /dev/null @@ -1,1031 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Licensed to the LF AI & Data foundation under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Related configuration of etcd, used to store Milvus metadata & service discovery. -etcd: - # Endpoints used to access etcd service. You can change this parameter as the endpoints of your own etcd cluster. - # Environment variable: ETCD_ENDPOINTS - # etcd preferentially acquires valid address from environment variable ETCD_ENDPOINTS when Milvus is started. - endpoints: localhost:2379 - # Root prefix of the key to where Milvus stores data in etcd. - # It is recommended to change this parameter before starting Milvus for the first time. - # To share an etcd instance among multiple Milvus instances, consider changing this to a different value for each Milvus instance before you start them. - # Set an easy-to-identify root path for Milvus if etcd service already exists. - # Changing this for an already running Milvus instance may result in failures to read legacy data. - rootPath: by-dev - # Sub-prefix of the key to where Milvus stores metadata-related information in etcd. - # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data. - # It is recommended to change this parameter before starting Milvus for the first time. - metaSubPath: meta - # Sub-prefix of the key to where Milvus stores timestamps in etcd. - # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data. - # It is recommended not to change this parameter if there is no specific reason. - kvSubPath: kv - log: - level: info # Only supports debug, info, warn, error, panic, or fatal. Default 'info'. - # path is one of: - # - "default" as os.Stderr, - # - "stderr" as os.Stderr, - # - "stdout" as os.Stdout, - # - file path to append server logs to. - # please adjust in embedded Milvus: /tmp/milvus/logs/etcd.log - path: stdout - ssl: - enabled: false # Whether to support ETCD secure connection mode - tlsCert: /path/to/etcd-client.pem # path to your cert file - tlsKey: /path/to/etcd-client-key.pem # path to your key file - tlsCACert: /path/to/ca.pem # path to your CACert file - # TLS min version - # Optional values: 1.0, 1.1, 1.2, 1.3。 - # We recommend using version 1.2 and above. - tlsMinVersion: 1.3 - requestTimeout: 10000 # Etcd operation timeout in milliseconds - use: - embed: false # Whether to enable embedded Etcd (an in-process EtcdServer). - data: - dir: default.etcd # Embedded Etcd only. please adjust in embedded Milvus: /tmp/milvus/etcdData/ - auth: - enabled: false # Whether to enable authentication - userName: # username for etcd authentication - password: # password for etcd authentication - -metastore: - type: etcd # Default value: etcd, Valid values: [etcd, tikv] - -# Related configuration of tikv, used to store Milvus metadata. -# Notice that when TiKV is enabled for metastore, you still need to have etcd for service discovery. -# TiKV is a good option when the metadata size requires better horizontal scalability. -tikv: - endpoints: 127.0.0.1:2389 # Note that the default pd port of tikv is 2379, which conflicts with etcd. - rootPath: by-dev # The root path where data is stored in tikv - metaSubPath: meta # metaRootPath = rootPath + '/' + metaSubPath - kvSubPath: kv # kvRootPath = rootPath + '/' + kvSubPath - requestTimeout: 10000 # ms, tikv request timeout - snapshotScanSize: 256 # batch size of tikv snapshot scan - ssl: - enabled: false # Whether to support TiKV secure connection mode - tlsCert: # path to your cert file - tlsKey: # path to your key file - tlsCACert: # path to your CACert file - -localStorage: - # Local path to where vector data are stored during a search or a query to avoid repetitve access to MinIO or S3 service. - # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data. - # It is recommended to change this parameter before starting Milvus for the first time. - path: /var/lib/milvus/data/ - -# Related configuration of MinIO/S3/GCS or any other service supports S3 API, which is responsible for data persistence for Milvus. -# We refer to the storage service as MinIO/S3 in the following description for simplicity. -minio: - # IP address of MinIO or S3 service. - # Environment variable: MINIO_ADDRESS - # minio.address and minio.port together generate the valid access to MinIO or S3 service. - # MinIO preferentially acquires the valid IP address from the environment variable MINIO_ADDRESS when Milvus is started. - # Default value applies when MinIO or S3 is running on the same network with Milvus. - address: localhost - port: 9000 # Port of MinIO or S3 service. - # Access key ID that MinIO or S3 issues to user for authorized access. - # Environment variable: MINIO_ACCESS_KEY_ID or minio.accessKeyID - # minio.accessKeyID and minio.secretAccessKey together are used for identity authentication to access the MinIO or S3 service. - # This configuration must be set identical to the environment variable MINIO_ACCESS_KEY_ID, which is necessary for starting MinIO or S3. - # The default value applies to MinIO or S3 service that started with the default docker-compose.yml file. - accessKeyID: minioadmin - # Secret key used to encrypt the signature string and verify the signature string on server. It must be kept strictly confidential and accessible only to the MinIO or S3 server and users. - # Environment variable: MINIO_SECRET_ACCESS_KEY or minio.secretAccessKey - # minio.accessKeyID and minio.secretAccessKey together are used for identity authentication to access the MinIO or S3 service. - # This configuration must be set identical to the environment variable MINIO_SECRET_ACCESS_KEY, which is necessary for starting MinIO or S3. - # The default value applies to MinIO or S3 service that started with the default docker-compose.yml file. - secretAccessKey: minioadmin - useSSL: false # Switch value to control if to access the MinIO or S3 service through SSL. - ssl: - tlsCACert: /path/to/public.crt # path to your CACert file - # Name of the bucket where Milvus stores data in MinIO or S3. - # Milvus 2.0.0 does not support storing data in multiple buckets. - # Bucket with this name will be created if it does not exist. If the bucket already exists and is accessible, it will be used directly. Otherwise, there will be an error. - # To share an MinIO instance among multiple Milvus instances, consider changing this to a different value for each Milvus instance before you start them. For details, see Operation FAQs. - # The data will be stored in the local Docker if Docker is used to start the MinIO service locally. Ensure that there is sufficient storage space. - # A bucket name is globally unique in one MinIO or S3 instance. - bucketName: a-bucket - # Root prefix of the key to where Milvus stores data in MinIO or S3. - # It is recommended to change this parameter before starting Milvus for the first time. - # To share an MinIO instance among multiple Milvus instances, consider changing this to a different value for each Milvus instance before you start them. For details, see Operation FAQs. - # Set an easy-to-identify root key prefix for Milvus if etcd service already exists. - # Changing this for an already running Milvus instance may result in failures to read legacy data. - rootPath: files - # Whether to useIAM role to access S3/GCS instead of access/secret keys - # For more information, refer to - # aws: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use.html - # gcp: https://cloud.google.com/storage/docs/access-control/iam - # aliyun (ack): https://www.alibabacloud.com/help/en/container-service-for-kubernetes/latest/use-rrsa-to-enforce-access-control - # aliyun (ecs): https://www.alibabacloud.com/help/en/elastic-compute-service/latest/attach-an-instance-ram-role - useIAM: false - # Cloud Provider of S3. Supports: "aws", "gcp", "aliyun". - # You can use "aws" for other cloud provider supports S3 API with signature v4, e.g.: minio - # You can use "gcp" for other cloud provider supports S3 API with signature v2 - # You can use "aliyun" for other cloud provider uses virtual host style bucket - # When useIAM enabled, only "aws", "gcp", "aliyun" is supported for now - cloudProvider: aws - # Custom endpoint for fetch IAM role credentials. when useIAM is true & cloudProvider is "aws". - # Leave it empty if you want to use AWS default endpoint - iamEndpoint: - logLevel: fatal # Log level for aws sdk log. Supported level: off, fatal, error, warn, info, debug, trace - region: # Specify minio storage system location region - useVirtualHost: false # Whether use virtual host mode for bucket - requestTimeoutMs: 10000 # minio timeout for request time in milliseconds - # The maximum number of objects requested per batch in minio ListObjects rpc, - # 0 means using oss client by default, decrease these configuration if ListObjects timeout - listObjectsMaxKeys: 0 - -# Milvus supports four MQ: rocksmq(based on RockDB), natsmq(embedded nats-server), Pulsar and Kafka. -# You can change your mq by setting mq.type field. -# If you don't set mq.type field as default, there is a note about enabling priority if we config multiple mq in this file. -# 1. standalone(local) mode: rocksmq(default) > natsmq > Pulsar > Kafka -# 2. cluster mode: Pulsar(default) > Kafka (rocksmq and natsmq is unsupported in cluster mode) -mq: - # Default value: "default" - # Valid values: [default, pulsar, kafka, rocksmq, natsmq] - type: default - enablePursuitMode: true # Default value: "true" - pursuitLag: 10 # time tick lag threshold to enter pursuit mode, in seconds - pursuitBufferSize: 8388608 # pursuit mode buffer size in bytes - mqBufSize: 16 # MQ client consumer buffer length - dispatcher: - mergeCheckInterval: 1 # the interval time(in seconds) for dispatcher to check whether to merge - targetBufSize: 16 # the length of channel buffer for targe - maxTolerantLag: 3 # Default value: "3", the timeout(in seconds) that target sends msgPack - -# Related configuration of pulsar, used to manage Milvus logs of recent mutation operations, output streaming log, and provide log publish-subscribe services. -pulsar: - # IP address of Pulsar service. - # Environment variable: PULSAR_ADDRESS - # pulsar.address and pulsar.port together generate the valid access to Pulsar. - # Pulsar preferentially acquires the valid IP address from the environment variable PULSAR_ADDRESS when Milvus is started. - # Default value applies when Pulsar is running on the same network with Milvus. - address: localhost - port: 6650 # Port of Pulsar service. - webport: 80 # Web port of of Pulsar service. If you connect directly without proxy, should use 8080. - # The maximum size of each message in Pulsar. Unit: Byte. - # By default, Pulsar can transmit at most 5 MB of data in a single message. When the size of inserted data is greater than this value, proxy fragments the data into multiple messages to ensure that they can be transmitted correctly. - # If the corresponding parameter in Pulsar remains unchanged, increasing this configuration will cause Milvus to fail, and reducing it produces no advantage. - maxMessageSize: 5242880 - # Pulsar can be provisioned for specific tenants with appropriate capacity allocated to the tenant. - # To share a Pulsar instance among multiple Milvus instances, you can change this to an Pulsar tenant rather than the default one for each Milvus instance before you start them. However, if you do not want Pulsar multi-tenancy, you are advised to change msgChannel.chanNamePrefix.cluster to the different value. - tenant: public - namespace: default # A Pulsar namespace is the administrative unit nomenclature within a tenant. - requestTimeout: 60 # pulsar client global request timeout in seconds - enableClientMetrics: false # Whether to register pulsar client metrics into milvus metrics path. - -# If you want to enable kafka, needs to comment the pulsar configs -# kafka: -# brokerList: -# saslUsername: -# saslPassword: -# saslMechanisms: -# securityProtocol: -# ssl: -# enabled: false # whether to enable ssl mode -# tlsCert: # path to client's public key (PEM) used for authentication -# tlsKey: # path to client's private key (PEM) used for authentication -# tlsCaCert: # file or directory path to CA certificate(s) for verifying the broker's key -# tlsKeyPassword: # private key passphrase for use with ssl.key.location and set_ssl_cert(), if any -# readTimeout: 10 - -rocksmq: - # Prefix of the key to where Milvus stores data in RocksMQ. - # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data. - # It is recommended to change this parameter before starting Milvus for the first time. - # Set an easy-to-identify root key prefix for Milvus if etcd service already exists. - path: /var/lib/milvus/rdb_data - lrucacheratio: 0.06 # rocksdb cache memory ratio - rocksmqPageSize: 67108864 # The maximum size of messages in each page in RocksMQ. Messages in RocksMQ are checked and cleared (when expired) in batch based on this parameters. Unit: Byte. - retentionTimeInMinutes: 4320 # The maximum retention time of acked messages in RocksMQ. Acked messages in RocksMQ are retained for the specified period of time and then cleared. Unit: Minute. - retentionSizeInMB: 8192 # The maximum retention size of acked messages of each topic in RocksMQ. Acked messages in each topic are cleared if their size exceed this parameter. Unit: MB. - compactionInterval: 86400 # Time interval to trigger rocksdb compaction to remove deleted data. Unit: Second - compressionTypes: 0,0,7,7,7 # compaction compression type, only support use 0,7. 0 means not compress, 7 will use zstd. Length of types means num of rocksdb level. - -# natsmq configuration. -# more detail: https://docs.nats.io/running-a-nats-service/configuration -natsmq: - server: - port: 4222 # Listening port of the NATS server. - storeDir: /var/lib/milvus/nats # Directory to use for JetStream storage of nats - maxFileStore: 17179869184 # Maximum size of the 'file' storage - maxPayload: 8388608 # Maximum number of bytes in a message payload - maxPending: 67108864 # Maximum number of bytes buffered for a connection Applies to client connections - initializeTimeout: 4000 # waiting for initialization of natsmq finished - monitor: - trace: false # If true enable protocol trace log messages - debug: false # If true enable debug log messages - logTime: true # If set to false, log without timestamps. - logFile: /tmp/milvus/logs/nats.log # Log file path relative to .. of milvus binary if use relative path - logSizeLimit: 536870912 # Size in bytes after the log file rolls over to a new one - retention: - maxAge: 4320 # Maximum age of any message in the P-channel - maxBytes: # How many bytes the single P-channel may contain. Removing oldest messages if the P-channel exceeds this size - maxMsgs: # How many message the single P-channel may contain. Removing oldest messages if the P-channel exceeds this limit - -# Related configuration of rootCoord, used to handle data definition language (DDL) and data control language (DCL) requests -rootCoord: - dmlChannelNum: 16 # The number of DML-Channels to create at the root coord startup. - # The maximum number of partitions in each collection. - # New partitions cannot be created if this parameter is set as 0 or 1. - # Range: [0, INT64MAX] - maxPartitionNum: 1024 - # The minimum row count of a segment required for creating index. - # Segments with smaller size than this parameter will not be indexed, and will be searched with brute force. - minSegmentSizeToEnableIndex: 1024 - enableActiveStandby: false - maxDatabaseNum: 64 # Maximum number of database - maxGeneralCapacity: 65536 # upper limit for the sum of of product of partitionNumber and shardNumber - gracefulStopTimeout: 5 # seconds. force stop node without graceful stop - ip: # TCP/IP address of rootCoord. If not specified, use the first unicastable address - port: 53100 # TCP port of rootCoord - grpc: - serverMaxSendSize: 536870912 # The maximum size of each RPC request that the rootCoord can send, unit: byte - serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the rootCoord can receive, unit: byte - clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on rootCoord can send, unit: byte - clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on rootCoord can receive, unit: byte - -# Related configuration of proxy, used to validate client requests and reduce the returned results. -proxy: - timeTickInterval: 200 # The interval at which proxy synchronizes the time tick, unit: ms. - healthCheckTimeout: 3000 # ms, the interval that to do component healthy check - msgStream: - timeTick: - bufSize: 512 # The maximum number of messages can be buffered in the timeTick message stream of the proxy when producing messages. - maxNameLength: 255 # The maximum length of the name or alias that can be created in Milvus, including the collection name, collection alias, partition name, and field name. - maxFieldNum: 64 # The maximum number of field can be created when creating in a collection. It is strongly DISCOURAGED to set maxFieldNum >= 64. - maxVectorFieldNum: 4 # The maximum number of vector fields that can be specified in a collection. Value range: [1, 10]. - maxShardNum: 16 # The maximum number of shards can be created when creating in a collection. - maxDimension: 32768 # The maximum number of dimensions of a vector can have when creating in a collection. - # Whether to produce gin logs.\n - # please adjust in embedded Milvus: false - ginLogging: true - ginLogSkipPaths: / # skip url path for gin log - maxTaskNum: 1024 # The maximum number of tasks in the task queue of the proxy. - mustUsePartitionKey: false # switch for whether proxy must use partition key for the collection - accessLog: - enable: false # Whether to enable the access log feature. - minioEnable: false # Whether to upload local access log files to MinIO. This parameter can be specified when proxy.accessLog.filename is not empty. - localPath: /tmp/milvus_access # The local folder path where the access log file is stored. This parameter can be specified when proxy.accessLog.filename is not empty. - filename: # The name of the access log file. If you leave this parameter empty, access logs will be printed to stdout. - maxSize: 64 # The maximum size allowed for a single access log file. If the log file size reaches this limit, a rotation process will be triggered. This process seals the current access log file, creates a new log file, and clears the contents of the original log file. Unit: MB. - rotatedTime: 0 # The maximum time interval allowed for rotating a single access log file. Upon reaching the specified time interval, a rotation process is triggered, resulting in the creation of a new access log file and sealing of the previous one. Unit: seconds - remotePath: access_log/ # The path of the object storage for uploading access log files. - remoteMaxTime: 0 # The time interval allowed for uploading access log files. If the upload time of a log file exceeds this interval, the file will be deleted. Setting the value to 0 disables this feature. - formatters: - base: - format: "[$time_now] [ACCESS] <$user_name: $user_addr> $method_name [status: $method_status] [code: $error_code] [sdk: $sdk_version] [msg: $error_msg] [traceID: $trace_id] [timeCost: $time_cost]" - query: - format: "[$time_now] [ACCESS] <$user_name: $user_addr> $method_name [status: $method_status] [code: $error_code] [sdk: $sdk_version] [msg: $error_msg] [traceID: $trace_id] [timeCost: $time_cost] [database: $database_name] [collection: $collection_name] [partitions: $partition_name] [expr: $method_expr]" - methods: "Query,Search,Delete" - cacheSize: 0 # Size of log of write cache, in byte. (Close write cache if size was 0) - cacheFlushInterval: 3 # time interval of auto flush write cache, in seconds. (Close auto flush if interval was 0) - connectionCheckIntervalSeconds: 120 # the interval time(in seconds) for connection manager to scan inactive client info - connectionClientInfoTTLSeconds: 86400 # inactive client info TTL duration, in seconds - maxConnectionNum: 10000 # the max client info numbers that proxy should manage, avoid too many client infos - gracefulStopTimeout: 30 # seconds. force stop node without graceful stop - slowQuerySpanInSeconds: 5 # query whose executed time exceeds the `slowQuerySpanInSeconds` can be considered slow, in seconds. - queryNodePooling: - size: 10 # the size for shardleader(querynode) client pool - http: - enabled: true # Whether to enable the http server - debug_mode: false # Whether to enable http server debug mode - port: # high-level restful api - acceptTypeAllowInt64: true # high-level restful api, whether http client can deal with int64 - enablePprof: true # Whether to enable pprof middleware on the metrics port - ip: # TCP/IP address of proxy. If not specified, use the first unicastable address - port: 19530 # TCP port of proxy - internalPort: 19529 - grpc: - serverMaxSendSize: 268435456 # The maximum size of each RPC request that the proxy can send, unit: byte - serverMaxRecvSize: 67108864 # The maximum size of each RPC request that the proxy can receive, unit: byte - clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on proxy can send, unit: byte - clientMaxRecvSize: 67108864 # The maximum size of each RPC request that the clients on proxy can receive, unit: byte - -# Related configuration of queryCoord, used to manage topology and load balancing for the query nodes, and handoff from growing segments to sealed segments. -queryCoord: - taskMergeCap: 1 - taskExecutionCap: 256 - # Switch value to control if to automatically replace a growing segment with the corresponding indexed sealed segment when the growing segment reaches the sealing threshold. - # If this parameter is set false, Milvus simply searches the growing segments with brute force. - autoHandoff: true - autoBalance: true # Switch value to control if to automatically balance the memory usage among query nodes by distributing segment loading and releasing operations evenly. - autoBalanceChannel: true # Enable auto balance channel - balancer: ScoreBasedBalancer # auto balancer used for segments on queryNodes - globalRowCountFactor: 0.1 # the weight used when balancing segments among queryNodes - scoreUnbalanceTolerationFactor: 0.05 # the least value for unbalanced extent between from and to nodes when doing balance - reverseUnBalanceTolerationFactor: 1.3 # the largest value for unbalanced extent between from and to nodes after doing balance - overloadedMemoryThresholdPercentage: 90 # The threshold of memory usage (in percentage) in a query node to trigger the sealed segment balancing. - balanceIntervalSeconds: 60 # The interval at which query coord balances the memory usage among query nodes. - memoryUsageMaxDifferencePercentage: 30 # The threshold of memory usage difference (in percentage) between any two query nodes to trigger the sealed segment balancing. - rowCountFactor: 0.4 # the row count weight used when balancing segments among queryNodes - segmentCountFactor: 0.4 # the segment count weight used when balancing segments among queryNodes - globalSegmentCountFactor: 0.1 # the segment count weight used when balancing segments among queryNodes - segmentCountMaxSteps: 50 # segment count based plan generator max steps - rowCountMaxSteps: 50 # segment count based plan generator max steps - randomMaxSteps: 10 # segment count based plan generator max steps - growingRowCountWeight: 4 # the memory weight of growing segment row count - delegatorMemoryOverloadFactor: 0.1 # the factor of delegator overloaded memory - balanceCostThreshold: 0.001 # the threshold of balance cost, if the difference of cluster's cost after executing the balance plan is less than this value, the plan will not be executed - checkSegmentInterval: 1000 - checkChannelInterval: 1000 - checkBalanceInterval: 10000 - checkIndexInterval: 10000 - channelTaskTimeout: 60000 # 1 minute - segmentTaskTimeout: 120000 # 2 minute - distPullInterval: 500 - heartbeatAvailableInterval: 10000 # 10s, Only QueryNodes which fetched heartbeats within the duration are available - loadTimeoutSeconds: 600 - distRequestTimeout: 5000 # the request timeout for querycoord fetching data distribution from querynodes, in milliseconds - heatbeatWarningLag: 5000 # the lag value for querycoord report warning when last heartbeat is too old, in milliseconds - checkHandoffInterval: 5000 - enableActiveStandby: false - checkInterval: 1000 - checkHealthInterval: 3000 # 3s, the interval when query coord try to check health of query node - checkHealthRPCTimeout: 2000 # 100ms, the timeout of check health rpc to query node - brokerTimeout: 5000 # 5000ms, querycoord broker rpc timeout - collectionRecoverTimes: 3 # if collection recover times reach the limit during loading state, release it - observerTaskParallel: 16 # the parallel observer dispatcher task number - checkAutoBalanceConfigInterval: 10 # the interval of check auto balance config - checkNodeSessionInterval: 60 # the interval(in seconds) of check querynode cluster session - gracefulStopTimeout: 5 # seconds. force stop node without graceful stop - enableStoppingBalance: true # whether enable stopping balance - channelExclusiveNodeFactor: 4 # the least node number for enable channel's exclusive mode - collectionObserverInterval: 200 # the interval of collection observer - checkExecutedFlagInterval: 100 # the interval of check executed flag to force to pull dist - updateCollectionLoadStatusInterval: 5 # 5m, max interval for updating collection loaded status - cleanExcludeSegmentInterval: 60 # the time duration of clean pipeline exclude segment which used for filter invalid data, in seconds - ip: # TCP/IP address of queryCoord. If not specified, use the first unicastable address - port: 19531 # TCP port of queryCoord - grpc: - serverMaxSendSize: 536870912 # The maximum size of each RPC request that the queryCoord can send, unit: byte - serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the queryCoord can receive, unit: byte - clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on queryCoord can send, unit: byte - clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on queryCoord can receive, unit: byte - -# Related configuration of queryNode, used to run hybrid search between vector and scalar data. -queryNode: - stats: - publishInterval: 1000 # The interval that query node publishes the node statistics information, including segment status, cpu usage, memory usage, health status, etc. Unit: ms. - segcore: - knowhereThreadPoolNumRatio: 4 # The number of threads in knowhere's thread pool. If disk is enabled, the pool size will multiply with knowhereThreadPoolNumRatio([1, 32]). - chunkRows: 128 # Row count by which Segcore divides a segment into chunks. - interimIndex: - # Whether to create a temporary index for growing segments and sealed segments not yet indexed, improving search performance. - # Milvus will eventually seals and indexes all segments, but enabling this optimizes search performance for immediate queries following data insertion. - # This defaults to true, indicating that Milvus creates temporary index for growing segments and the sealed segments that are not indexed upon searches. - enableIndex: true - nlist: 128 # temp index nlist, recommend to set sqrt(chunkRows), must smaller than chunkRows/8 - nprobe: 16 # nprobe to search small index, based on your accuracy requirement, must smaller than nlist - memExpansionRate: 1.15 # extra memory needed by building interim index - buildParallelRate: 0.5 # the ratio of building interim index parallel matched with cpu num - knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic - loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments - enableDisk: false # enable querynode load disk index, and search on disk index - maxDiskUsagePercentage: 95 - cache: - enabled: true - memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024 - readAheadPolicy: willneed # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed` - # options: async, sync, disable. - # Specifies the necessity for warming up the chunk cache. - # 1. If set to "sync" or "async" the original vector data will be synchronously/asynchronously loaded into the - # chunk cache during the load process. This approach has the potential to substantially reduce query/search latency - # for a specific duration post-load, albeit accompanied by a concurrent increase in disk usage; - # 2. If set to "disable" original vector data will only be loaded into the chunk cache during search/query. - warmup: disable - mmap: - mmapEnabled: false # Enable mmap for loading data - growingMmapEnabled: false # Enable mmap for using in growing raw data - fixedFileSizeForMmapAlloc: 1 # tmp file size for mmap chunk manager - maxDiskUsagePercentageForMmapAlloc: 50 # disk percentage used in mmap chunk manager - lazyload: - enabled: false # Enable lazyload for loading data - waitTimeout: 30000 # max wait timeout duration in milliseconds before start to do lazyload search and retrieve - requestResourceTimeout: 5000 # max timeout in milliseconds for waiting request resource for lazy load, 5s by default - requestResourceRetryInterval: 2000 # retry interval in milliseconds for waiting request resource for lazy load, 2s by default - maxRetryTimes: 1 # max retry times for lazy load, 1 by default - maxEvictPerRetry: 1 # max evict count for lazy load, 1 by default - grouping: - enabled: true - maxNQ: 1000 - topKMergeRatio: 20 - scheduler: - receiveChanSize: 10240 - unsolvedQueueSize: 10240 - # maxReadConcurrentRatio is the concurrency ratio of read task (search task and query task). - # Max read concurrency would be the value of hardware.GetCPUNum * maxReadConcurrentRatio. - # It defaults to 2.0, which means max read concurrency would be the value of hardware.GetCPUNum * 2. - # Max read concurrency must greater than or equal to 1, and less than or equal to hardware.GetCPUNum * 100. - # (0, 100] - maxReadConcurrentRatio: 1 - cpuRatio: 10 # ratio used to estimate read task cpu usage. - maxTimestampLag: 86400 - scheduleReadPolicy: - # fifo: A FIFO queue support the schedule. - # user-task-polling: - # The user's tasks will be polled one by one and scheduled. - # Scheduling is fair on task granularity. - # The policy is based on the username for authentication. - # And an empty username is considered the same user. - # When there are no multi-users, the policy decay into FIFO" - name: fifo - taskQueueExpire: 60 # Control how long (many seconds) that queue retains since queue is empty - enableCrossUserGrouping: false # Enable Cross user grouping when using user-task-polling policy. (Disable it if user's task can not merge each other) - maxPendingTaskPerUser: 1024 # Max pending task per user in scheduler - dataSync: - flowGraph: - maxQueueLength: 16 # The maximum size of task queue cache in flow graph in query node. - maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph - enableSegmentPrune: false # use partition stats to prune data in search/query on shard delegator - bloomFilterApplyParallelFactor: 4 # parallel factor when to apply pk to bloom filter, default to 4*CPU_CORE_NUM - queryStreamBatchSize: 4194304 # return batch size of stream query - workerPooling: - size: 10 # the size for worker querynode client pool - ip: # TCP/IP address of queryNode. If not specified, use the first unicastable address - port: 21123 # TCP port of queryNode - grpc: - serverMaxSendSize: 536870912 # The maximum size of each RPC request that the queryNode can send, unit: byte - serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the queryNode can receive, unit: byte - clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on queryNode can send, unit: byte - clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on queryNode can receive, unit: byte - -indexCoord: - bindIndexNodeMode: - enable: false - address: localhost:22930 - withCred: false - nodeID: 0 - segment: - minSegmentNumRowsToEnableIndex: 1024 # It's a threshold. When the segment num rows is less than this value, the segment will not be indexed - -indexNode: - scheduler: - buildParallel: 1 - enableDisk: true # enable index node build disk vector index - maxDiskUsagePercentage: 95 - ip: # TCP/IP address of indexNode. If not specified, use the first unicastable address - port: 21121 # TCP port of indexNode - grpc: - serverMaxSendSize: 536870912 # The maximum size of each RPC request that the indexNode can send, unit: byte - serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the indexNode can receive, unit: byte - clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on indexNode can send, unit: byte - clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on indexNode can receive, unit: byte - -dataCoord: - channel: - watchTimeoutInterval: 300 # Timeout on watching channels (in seconds). Datanode tickler update watch progress will reset timeout timer. - balanceWithRpc: true # Whether to enable balance with RPC, default to use etcd watch - legacyVersionWithoutRPCWatch: 2.4.1 # Datanodes <= this version are considered as legacy nodes, which doesn't have rpc based watch(). This is only used during rolling upgrade where legacy nodes won't get new channels - balanceSilentDuration: 300 # The duration after which the channel manager start background channel balancing - balanceInterval: 360 # The interval with which the channel manager check dml channel balance status - checkInterval: 1 # The interval in seconds with which the channel manager advances channel states - notifyChannelOperationTimeout: 5 # Timeout notifing channel operations (in seconds). - segment: - maxSize: 1024 # The maximum size of a segment, unit: MB. datacoord.segment.maxSize and datacoord.segment.sealProportion together determine if a segment can be sealed. - diskSegmentMaxSize: 2048 # Maximum size of a segment in MB for collection which has Disk index - sealProportion: 0.12 # The minimum proportion to datacoord.segment.maxSize to seal a segment. datacoord.segment.maxSize and datacoord.segment.sealProportion together determine if a segment can be sealed. - assignmentExpiration: 2000 # Expiration time of the segment assignment, unit: ms - allocLatestExpireAttempt: 200 # The time attempting to alloc latest lastExpire from rootCoord after restart - maxLife: 86400 # The max lifetime of segment in seconds, 24*60*60 - # If a segment didn't accept dml records in maxIdleTime and the size of segment is greater than - # minSizeFromIdleToSealed, Milvus will automatically seal it. - # The max idle time of segment in seconds, 10*60. - maxIdleTime: 600 - minSizeFromIdleToSealed: 16 # The min size in MB of segment which can be idle from sealed. - # The max number of binlog file for one segment, the segment will be sealed if - # the number of binlog file reaches to max value. - maxBinlogFileNumber: 32 - smallProportion: 0.5 # The segment is considered as "small segment" when its # of rows is smaller than - # (smallProportion * segment max # of rows). - # A compaction will happen on small segments if the segment after compaction will have - compactableProportion: 0.85 - # over (compactableProportion * segment max # of rows) rows. - # MUST BE GREATER THAN OR EQUAL TO !!! - # During compaction, the size of segment # of rows is able to exceed segment max # of rows by (expansionRate-1) * 100%. - expansionRate: 1.25 - sealPolicy: - channel: - # The size threshold in MB, if the total size of growing segments of each shard - # exceeds this threshold, the largest growing segment will be sealed. - growingSegmentsMemSize: 4096 - autoUpgradeSegmentIndex: false # whether auto upgrade segment index to index engine's version - segmentFlushInterval: 2 # the minimal interval duration(unit: Seconds) between flushing operation on same segment - # Switch value to control if to enable segment compaction. - # Compaction merges small-size segments into a large segment, and clears the entities deleted beyond the rentention duration of Time Travel. - enableCompaction: true - compaction: - # Switch value to control if to enable automatic segment compaction during which data coord locates and merges compactable segments in the background. - # This configuration takes effect only when dataCoord.enableCompaction is set as true. - enableAutoCompaction: true - indexBasedCompaction: true - rpcTimeout: 10 - maxParallelTaskNum: 10 - workerMaxParallelTaskNum: 2 - clustering: - enable: true # Enable clustering compaction - autoEnable: false # Enable auto clustering compaction - triggerInterval: 600 # clustering compaction trigger interval in seconds - minInterval: 3600 # The minimum interval between clustering compaction executions of one collection, to avoid redundant compaction - maxInterval: 259200 # If a collection haven't been clustering compacted for longer than maxInterval, force compact - newDataSizeThreshold: 512m # If new data size is large than newDataSizeThreshold, execute clustering compaction - preferSegmentSizeRatio: 0.8 - maxSegmentSizeRatio: 1 - maxTrainSizeRatio: 0.8 # max data size ratio in Kmeans train, if larger than it, will down sampling to meet this limit - maxCentroidsNum: 10240 # maximum centroids number in Kmeans train - minCentroidsNum: 16 # minimum centroids number in Kmeans train - minClusterSizeRatio: 0.01 # minimum cluster size / avg size in Kmeans train - maxClusterSizeRatio: 10 # maximum cluster size / avg size in Kmeans train - maxClusterSize: 5g # maximum cluster size in Kmeans train - levelzero: - forceTrigger: - minSize: 8388608 # The minimum size in bytes to force trigger a LevelZero Compaction, default as 8MB - maxSize: 67108864 # The maxmum size in bytes to force trigger a LevelZero Compaction, default as 64MB - deltalogMinNum: 10 # The minimum number of deltalog files to force trigger a LevelZero Compaction - deltalogMaxNum: 30 # The maxmum number of deltalog files to force trigger a LevelZero Compaction, default as 30 - syncSegmentsInterval: 300 # The time interval for regularly syncing segments - enableGarbageCollection: true # Switch value to control if to enable garbage collection to clear the discarded data in MinIO or S3 service. - gc: - interval: 3600 # The interval at which data coord performs garbage collection, unit: second. - missingTolerance: 86400 # The retention duration of the unrecorded binary log (binlog) files. Setting a reasonably large value for this parameter avoids erroneously deleting the newly created binlog files that lack metadata. Unit: second. - dropTolerance: 10800 # The retention duration of the binlog files of the deleted segments before they are cleared, unit: second. - removeConcurrent: 32 # number of concurrent goroutines to remove dropped s3 objects - scanInterval: 168 # orphan file (file on oss but has not been registered on meta) on object storage garbage collection scanning interval in hours - enableActiveStandby: false - brokerTimeout: 5000 # 5000ms, dataCoord broker rpc timeout - autoBalance: true # Enable auto balance - checkAutoBalanceConfigInterval: 10 # the interval of check auto balance config - import: - filesPerPreImportTask: 2 # The maximum number of files allowed per pre-import task. - taskRetention: 10800 # The retention period in seconds for tasks in the Completed or Failed state. - maxSizeInMBPerImportTask: 6144 # To prevent generating of small segments, we will re-group imported files. This parameter represents the sum of file sizes in each group (each ImportTask). - scheduleInterval: 2 # The interval for scheduling import, measured in seconds. - checkIntervalHigh: 2 # The interval for checking import, measured in seconds, is set to a high frequency for the import checker. - checkIntervalLow: 120 # The interval for checking import, measured in seconds, is set to a low frequency for the import checker. - maxImportFileNumPerReq: 1024 # The maximum number of files allowed per single import request. - waitForIndex: true # Indicates whether the import operation waits for the completion of index building. - gracefulStopTimeout: 5 # seconds. force stop node without graceful stop - slot: - clusteringCompactionUsage: 16 # slot usage of clustering compaction job. - mixCompactionUsage: 8 # slot usage of mix compaction job. - l0DeleteCompactionUsage: 8 # slot usage of l0 compaction job. - ip: # TCP/IP address of dataCoord. If not specified, use the first unicastable address - port: 13333 # TCP port of dataCoord - grpc: - serverMaxSendSize: 536870912 # The maximum size of each RPC request that the dataCoord can send, unit: byte - serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the dataCoord can receive, unit: byte - clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on dataCoord can send, unit: byte - clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on dataCoord can receive, unit: byte - -dataNode: - dataSync: - flowGraph: - maxQueueLength: 16 # Maximum length of task queue in flowgraph - maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph - maxParallelSyncMgrTasks: 256 # The max concurrent sync task number of datanode sync mgr globally - skipMode: - enable: true # Support skip some timetick message to reduce CPU usage - skipNum: 4 # Consume one for every n records skipped - coldTime: 60 # Turn on skip mode after there are only timetick msg for x seconds - segment: - # The maximum size of each binlog file in a segment buffered in memory. Binlog files whose size exceeds this value are then flushed to MinIO or S3 service. - # Unit: Byte - # Setting this parameter too small causes the system to store a small amount of data too frequently. Setting it too large increases the system's demand for memory. - insertBufSize: 16777216 - deleteBufBytes: 16777216 # Max buffer size in bytes to flush del for a single channel, default as 16MB - syncPeriod: 600 # The period to sync segments if buffer is not empty. - memory: - forceSyncEnable: true # Set true to force sync if memory usage is too high - forceSyncSegmentNum: 1 # number of segments to sync, segments with top largest buffer will be synced. - checkInterval: 3000 # the interval to check datanode memory usage, in milliseconds - forceSyncWatermark: 0.5 # memory watermark for standalone, upon reaching this watermark, segments will be synced. - timetick: - byRPC: true - interval: 500 - channel: - # specify the size of global work pool of all channels - # if this parameter <= 0, will set it as the maximum number of CPUs that can be executing - # suggest to set it bigger on large collection numbers to avoid blocking - workPoolSize: -1 - # specify the size of global work pool for channel checkpoint updating - # if this parameter <= 0, will set it as 10 - updateChannelCheckpointMaxParallel: 10 - updateChannelCheckpointInterval: 60 # the interval duration(in seconds) for datanode to update channel checkpoint of each channel - updateChannelCheckpointRPCTimeout: 20 # timeout in seconds for UpdateChannelCheckpoint RPC call - maxChannelCheckpointsPerPRC: 128 # The maximum number of channel checkpoints per UpdateChannelCheckpoint RPC. - channelCheckpointUpdateTickInSeconds: 10 # The frequency, in seconds, at which the channel checkpoint updater executes updates. - import: - maxConcurrentTaskNum: 16 # The maximum number of import/pre-import tasks allowed to run concurrently on a datanode. - maxImportFileSizeInGB: 16 # The maximum file size (in GB) for an import file, where an import file refers to either a Row-Based file or a set of Column-Based files. - readBufferSizeInMB: 16 # The data block size (in MB) read from chunk manager by the datanode during import. - compaction: - levelZeroBatchMemoryRatio: 0.05 # The minimal memory ratio of free memory for level zero compaction executing in batch mode - levelZeroMaxBatchSize: -1 # Max batch size refers to the max number of L1/L2 segments in a batch when executing L0 compaction. Default to -1, any value that is less than 1 means no limit. Valid range: >= 1. - gracefulStopTimeout: 1800 # seconds. force stop node without graceful stop - slot: - slotCap: 16 # The maximum number of tasks(e.g. compaction, importing) allowed to run concurrently on a datanode - clusteringCompaction: - memoryBufferRatio: 0.1 # The ratio of memory buffer of clustering compaction. Data larger than threshold will be flushed to storage. - workPoolSize: 8 # worker pool size for one clustering compaction job. - ip: # TCP/IP address of dataNode. If not specified, use the first unicastable address - port: 21124 # TCP port of dataNode - grpc: - serverMaxSendSize: 536870912 # The maximum size of each RPC request that the dataNode can send, unit: byte - serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the dataNode can receive, unit: byte - clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on dataNode can send, unit: byte - clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on dataNode can receive, unit: byte - -# This topic introduces the message channel-related configurations of Milvus. -msgChannel: - chanNamePrefix: - # Root name prefix of the channel when a message channel is created. - # It is recommended to change this parameter before starting Milvus for the first time. - # To share a Pulsar instance among multiple Milvus instances, consider changing this to a name rather than the default one for each Milvus instance before you start them. - cluster: by-dev - # Sub-name prefix of the message channel where the root coord publishes time tick messages. - # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.rootCoordTimeTick} - # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data. - # It is recommended to change this parameter before starting Milvus for the first time. - rootCoordTimeTick: rootcoord-timetick - # Sub-name prefix of the message channel where the root coord publishes its own statistics messages. - # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.rootCoordStatistics} - # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data. - # It is recommended to change this parameter before starting Milvus for the first time. - rootCoordStatistics: rootcoord-statistics - # Sub-name prefix of the message channel where the root coord publishes Data Manipulation Language (DML) messages. - # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.rootCoordDml} - # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data. - # It is recommended to change this parameter before starting Milvus for the first time. - rootCoordDml: rootcoord-dml - replicateMsg: replicate-msg - # Sub-name prefix of the message channel where the query node publishes time tick messages. - # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.queryTimeTick} - # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data. - # It is recommended to change this parameter before starting Milvus for the first time. - queryTimeTick: queryTimeTick - # Sub-name prefix of the message channel where the data coord publishes time tick messages. - # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.dataCoordTimeTick} - # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data. - # It is recommended to change this parameter before starting Milvus for the first time. - dataCoordTimeTick: datacoord-timetick-channel - # Sub-name prefix of the message channel where the data coord publishes segment information messages. - # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.dataCoordSegmentInfo} - # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data. - # It is recommended to change this parameter before starting Milvus for the first time. - dataCoordSegmentInfo: segment-info-channel - subNamePrefix: - # Subscription name prefix of the data coord. - # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data. - # It is recommended to change this parameter before starting Milvus for the first time. - dataCoordSubNamePrefix: dataCoord - # Subscription name prefix of the data node. - # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data. - # It is recommended to change this parameter before starting Milvus for the first time. - dataNodeSubNamePrefix: dataNode - -# Configures the system log output. -log: - # Milvus log level. Option: debug, info, warn, error, panic, and fatal. - # It is recommended to use debug level under test and development environments, and info level in production environment. - level: info - file: - # Root path to the log files. - # The default value is set empty, indicating to output log files to standard output (stdout) and standard error (stderr). - # If this parameter is set to a valid local path, Milvus writes and stores log files in this path. - # Set this parameter as the path that you have permission to write. - rootPath: - maxSize: 300 # The maximum size of a log file, unit: MB. - maxAge: 10 # The maximum retention time before a log file is automatically cleared, unit: day. The minimum value is 1. - maxBackups: 20 # The maximum number of log files to back up, unit: day. The minimum value is 1. - format: text # Milvus log format. Option: text and JSON - stdout: true # Stdout enable or not - -grpc: - log: - level: WARNING - gracefulStopTimeout: 10 # second, time to wait graceful stop finish - client: - compressionEnabled: false - dialTimeout: 200 - keepAliveTime: 10000 - keepAliveTimeout: 20000 - maxMaxAttempts: 10 - initialBackoff: 0.2 - maxBackoff: 10 - minResetInterval: 1000 - maxCancelError: 32 - minSessionCheckInterval: 200 - -# Configure the proxy tls enable. -tls: - serverPemPath: configs/cert/server.pem - serverKeyPath: configs/cert/server.key - caPemPath: configs/cert/ca.pem - -common: - defaultPartitionName: _default # Name of the default partition when a collection is created - defaultIndexName: _default_idx # Name of the index when it is created with name unspecified - entityExpiration: -1 # Entity expiration in seconds, CAUTION -1 means never expire - indexSliceSize: 16 # Index slice size in MB - threadCoreCoefficient: - highPriority: 10 # This parameter specify how many times the number of threads is the number of cores in high priority pool - middlePriority: 5 # This parameter specify how many times the number of threads is the number of cores in middle priority pool - lowPriority: 1 # This parameter specify how many times the number of threads is the number of cores in low priority pool - buildIndexThreadPoolRatio: 0.75 - DiskIndex: - MaxDegree: 56 - SearchListSize: 100 - PQCodeBudgetGBRatio: 0.125 - BuildNumThreadsRatio: 1 - SearchCacheBudgetGBRatio: 0.1 - LoadNumThreadRatio: 8 - BeamWidthRatio: 4 - gracefulTime: 5000 # milliseconds. it represents the interval (in ms) by which the request arrival time needs to be subtracted in the case of Bounded Consistency. - gracefulStopTimeout: 1800 # seconds. it will force quit the server if the graceful stop process is not completed during this time. - storageType: remote # please adjust in embedded Milvus: local, available values are [local, remote, opendal], value minio is deprecated, use remote instead - # Default value: auto - # Valid values: [auto, avx512, avx2, avx, sse4_2] - # This configuration is only used by querynode and indexnode, it selects CPU instruction set for Searching and Index-building. - simdType: auto - security: - authorizationEnabled: false - # The superusers will ignore some system check processes, - # like the old password verification when updating the credential - superUsers: - defaultRootPassword: Milvus # default password for root user - tlsMode: 0 - session: - ttl: 30 # ttl value when session granting a lease to register service - retryTimes: 30 # retry times when session sending etcd requests - locks: - metrics: - enable: false # whether gather statistics for metrics locks - threshold: - info: 500 # minimum milliseconds for printing durations in info level - warn: 1000 # minimum milliseconds for printing durations in warn level - storage: - scheme: s3 - enablev2: false - # Whether to disable the internal time messaging mechanism for the system. - # If disabled (set to false), the system will not allow DML operations, including insertion, deletion, queries, and searches. - # This helps Milvus-CDC synchronize incremental data - ttMsgEnabled: true - traceLogMode: 0 # trace request info - bloomFilterSize: 100000 # bloom filter initial size - maxBloomFalsePositive: 0.001 # max false positive rate for bloom filter - bloomFilterType: BasicBloomFilter # bloom filter type, support BasicBloomFilter and BlockedBloomFilter - bloomFilterApplyBatchSize: 1000 # batch size when to apply pk to bloom filter - usePartitionKeyAsClusteringKey: false # if true, do clustering compaction and segment prune on partition key field - useVectorAsClusteringKey: false # if true, do clustering compaction and segment prune on vector field - enableVectorClusteringKey: false # if true, enable vector clustering key and vector clustering compaction - -# QuotaConfig, configurations of Milvus quota and limits. -# By default, we enable: -# 1. TT protection; -# 2. Memory protection. -# 3. Disk quota protection. -# You can enable: -# 1. DML throughput limitation; -# 2. DDL, DQL qps/rps limitation; -# 3. DQL Queue length/latency protection; -# 4. DQL result rate protection; -# If necessary, you can also manually force to deny RW requests. -quotaAndLimits: - enabled: true # `true` to enable quota and limits, `false` to disable. - # quotaCenterCollectInterval is the time interval that quotaCenter - # collects metrics from Proxies, Query cluster and Data cluster. - # seconds, (0 ~ 65536) - quotaCenterCollectInterval: 3 - limits: - allocRetryTimes: 15 # retry times when delete alloc forward data from rate limit failed - allocWaitInterval: 1000 # retry wait duration when delete alloc forward data rate failed, in millisecond - complexDeleteLimitEnable: false # whether complex delete check forward data by limiter - maxCollectionNum: 65536 - maxCollectionNumPerDB: 65536 # Maximum number of collections per database. - maxInsertSize: -1 # maximum size of a single insert request, in bytes, -1 means no limit - maxResourceGroupNumOfQueryNode: 1024 # maximum number of resource groups of query nodes - ddl: - enabled: false # Whether DDL request throttling is enabled. - # Maximum number of collection-related DDL requests per second. - # Setting this item to 10 indicates that Milvus processes no more than 10 collection-related DDL requests per second, including collection creation requests, collection drop requests, collection load requests, and collection release requests. - # To use this setting, set quotaAndLimits.ddl.enabled to true at the same time. - collectionRate: -1 - # Maximum number of partition-related DDL requests per second. - # Setting this item to 10 indicates that Milvus processes no more than 10 partition-related requests per second, including partition creation requests, partition drop requests, partition load requests, and partition release requests. - # To use this setting, set quotaAndLimits.ddl.enabled to true at the same time. - partitionRate: -1 - db: - collectionRate: -1 # qps of db level , default no limit, rate for CreateCollection, DropCollection, LoadCollection, ReleaseCollection - partitionRate: -1 # qps of db level, default no limit, rate for CreatePartition, DropPartition, LoadPartition, ReleasePartition - indexRate: - enabled: false # Whether index-related request throttling is enabled. - # Maximum number of index-related requests per second. - # Setting this item to 10 indicates that Milvus processes no more than 10 partition-related requests per second, including index creation requests and index drop requests. - # To use this setting, set quotaAndLimits.indexRate.enabled to true at the same time. - max: -1 - db: - max: -1 # qps of db level, default no limit, rate for CreateIndex, DropIndex - flushRate: - enabled: true # Whether flush request throttling is enabled. - # Maximum number of flush requests per second. - # Setting this item to 10 indicates that Milvus processes no more than 10 flush requests per second. - # To use this setting, set quotaAndLimits.flushRate.enabled to true at the same time. - max: -1 - collection: - max: 0.1 # qps, default no limit, rate for flush at collection level. - db: - max: -1 # qps of db level, default no limit, rate for flush - compactionRate: - enabled: false # Whether manual compaction request throttling is enabled. - # Maximum number of manual-compaction requests per second. - # Setting this item to 10 indicates that Milvus processes no more than 10 manual-compaction requests per second. - # To use this setting, set quotaAndLimits.compaction.enabled to true at the same time. - max: -1 - db: - max: -1 # qps of db level, default no limit, rate for manualCompaction - dml: - enabled: false # Whether DML request throttling is enabled. - insertRate: - # Highest data insertion rate per second. - # Setting this item to 5 indicates that Milvus only allows data insertion at the rate of 5 MB/s. - # To use this setting, set quotaAndLimits.dml.enabled to true at the same time. - max: -1 - db: - max: -1 # MB/s, default no limit - collection: - # Highest data insertion rate per collection per second. - # Setting this item to 5 indicates that Milvus only allows data insertion to any collection at the rate of 5 MB/s. - # To use this setting, set quotaAndLimits.dml.enabled to true at the same time. - max: -1 - partition: - max: -1 # MB/s, default no limit - upsertRate: - max: -1 # MB/s, default no limit - db: - max: -1 # MB/s, default no limit - collection: - max: -1 # MB/s, default no limit - partition: - max: -1 # MB/s, default no limit - deleteRate: - # Highest data deletion rate per second. - # Setting this item to 0.1 indicates that Milvus only allows data deletion at the rate of 0.1 MB/s. - # To use this setting, set quotaAndLimits.dml.enabled to true at the same time. - max: -1 - db: - max: -1 # MB/s, default no limit - collection: - # Highest data deletion rate per second. - # Setting this item to 0.1 indicates that Milvus only allows data deletion from any collection at the rate of 0.1 MB/s. - # To use this setting, set quotaAndLimits.dml.enabled to true at the same time. - max: -1 - partition: - max: -1 # MB/s, default no limit - bulkLoadRate: - max: -1 # MB/s, default no limit, not support yet. TODO: limit bulkLoad rate - db: - max: -1 # MB/s, default no limit, not support yet. TODO: limit db bulkLoad rate - collection: - max: -1 # MB/s, default no limit, not support yet. TODO: limit collection bulkLoad rate - partition: - max: -1 # MB/s, default no limit, not support yet. TODO: limit partition bulkLoad rate - dql: - enabled: false # Whether DQL request throttling is enabled. - searchRate: - # Maximum number of vectors to search per second. - # Setting this item to 100 indicates that Milvus only allows searching 100 vectors per second no matter whether these 100 vectors are all in one search or scattered across multiple searches. - # To use this setting, set quotaAndLimits.dql.enabled to true at the same time. - max: -1 - db: - max: -1 # vps (vectors per second), default no limit - collection: - # Maximum number of vectors to search per collection per second. - # Setting this item to 100 indicates that Milvus only allows searching 100 vectors per second per collection no matter whether these 100 vectors are all in one search or scattered across multiple searches. - # To use this setting, set quotaAndLimits.dql.enabled to true at the same time. - max: -1 - partition: - max: -1 # vps (vectors per second), default no limit - queryRate: - # Maximum number of queries per second. - # Setting this item to 100 indicates that Milvus only allows 100 queries per second. - # To use this setting, set quotaAndLimits.dql.enabled to true at the same time. - max: -1 - db: - max: -1 # qps, default no limit - collection: - # Maximum number of queries per collection per second. - # Setting this item to 100 indicates that Milvus only allows 100 queries per collection per second. - # To use this setting, set quotaAndLimits.dql.enabled to true at the same time. - max: -1 - partition: - max: -1 # qps, default no limit - limitWriting: - # forceDeny false means dml requests are allowed (except for some - # specific conditions, such as memory of nodes to water marker), true means always reject all dml requests. - forceDeny: false - ttProtection: - enabled: false - # maxTimeTickDelay indicates the backpressure for DML Operations. - # DML rates would be reduced according to the ratio of time tick delay to maxTimeTickDelay, - # if time tick delay is greater than maxTimeTickDelay, all DML requests would be rejected. - # seconds - maxTimeTickDelay: 300 - memProtection: - # When memory usage > memoryHighWaterLevel, all dml requests would be rejected; - # When memoryLowWaterLevel < memory usage < memoryHighWaterLevel, reduce the dml rate; - # When memory usage < memoryLowWaterLevel, no action. - enabled: true - dataNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in DataNodes - dataNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in DataNodes - queryNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in QueryNodes - queryNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in QueryNodes - growingSegmentsSizeProtection: - # No action will be taken if the growing segments size is less than the low watermark. - # When the growing segments size exceeds the low watermark, the dml rate will be reduced, - # but the rate will not be lower than minRateRatio * dmlRate. - enabled: false - minRateRatio: 0.5 - lowWaterLevel: 0.2 - highWaterLevel: 0.4 - diskProtection: - enabled: true # When the total file size of object storage is greater than `diskQuota`, all dml requests would be rejected; - diskQuota: -1 # MB, (0, +inf), default no limit - diskQuotaPerDB: -1 # MB, (0, +inf), default no limit - diskQuotaPerCollection: -1 # MB, (0, +inf), default no limit - diskQuotaPerPartition: -1 # MB, (0, +inf), default no limit - l0SegmentsRowCountProtection: - enabled: false # switch to enable l0 segment row count quota - lowWaterLevel: 32768 # l0 segment row count quota, low water level - highWaterLevel: 65536 # l0 segment row count quota, low water level - limitReading: - # forceDeny false means dql requests are allowed (except for some - # specific conditions, such as collection has been dropped), true means always reject all dql requests. - forceDeny: false - queueProtection: - enabled: false - # nqInQueueThreshold indicated that the system was under backpressure for Search/Query path. - # If NQ in any QueryNode's queue is greater than nqInQueueThreshold, search&query rates would gradually cool off - # until the NQ in queue no longer exceeds nqInQueueThreshold. We think of the NQ of query request as 1. - # int, default no limit - nqInQueueThreshold: -1 - # queueLatencyThreshold indicated that the system was under backpressure for Search/Query path. - # If dql latency of queuing is greater than queueLatencyThreshold, search&query rates would gradually cool off - # until the latency of queuing no longer exceeds queueLatencyThreshold. - # The latency here refers to the averaged latency over a period of time. - # milliseconds, default no limit - queueLatencyThreshold: -1 - resultProtection: - enabled: false - # maxReadResultRate indicated that the system was under backpressure for Search/Query path. - # If dql result rate is greater than maxReadResultRate, search&query rates would gradually cool off - # until the read result rate no longer exceeds maxReadResultRate. - # MB/s, default no limit - maxReadResultRate: -1 - maxReadResultRatePerDB: -1 - maxReadResultRatePerCollection: -1 - # colOffSpeed is the speed of search&query rates cool off. - # (0, 1] - coolOffSpeed: 0.9 - -trace: - # trace exporter type, default is stdout, - # optional values: ['noop','stdout', 'jaeger', 'otlp'] - exporter: noop - # fraction of traceID based sampler, - # optional values: [0, 1] - # Fractions >= 1 will always sample. Fractions < 0 are treated as zero. - sampleFraction: 0 - jaeger: - url: # when exporter is jaeger should set the jaeger's URL - otlp: - endpoint: # example: "127.0.0.1:4317" for grpc, "127.0.0.1:4318" for http - method: # otlp export method, acceptable values: ["grpc", "http"], using "grpc" by default - secure: true - initTimeoutSeconds: 10 # segcore initialization timeout in seconds, preventing otlp grpc hangs forever - -#when using GPU indexing, Milvus will utilize a memory pool to avoid frequent memory allocation and deallocation. -#here, you can set the size of the memory occupied by the memory pool, with the unit being MB. -#note that there is a possibility of Milvus crashing when the actual memory demand exceeds the value set by maxMemSize. -#if initMemSize and MaxMemSize both set zero, -#milvus will automatically initialize half of the available GPU memory, -#maxMemSize will the whole available GPU memory. -gpu: - initMemSize: # Gpu Memory Pool init size - maxMemSize: # Gpu Memory Pool Max size diff --git a/comps/dataprep/milvus/langchain/prepare_doc_milvus.py b/comps/dataprep/milvus/langchain/prepare_doc_milvus.py deleted file mode 100644 index a741fc634e..0000000000 --- a/comps/dataprep/milvus/langchain/prepare_doc_milvus.py +++ /dev/null @@ -1,459 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -from pathlib import Path -from typing import List, Optional, Union - -from config import ( - COLLECTION_NAME, - LOCAL_EMBEDDING_MODEL, - MILVUS_HOST, - MILVUS_PORT, - TEI_EMBEDDING_ENDPOINT, - TEI_EMBEDDING_MODEL, -) -from fastapi import Body, File, Form, HTTPException, UploadFile -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings, OpenAIEmbeddings -from langchain_core.documents import Document -from langchain_milvus.vectorstores import Milvus -from langchain_text_splitters import HTMLHeaderTextSplitter - -from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.src.utils import ( - create_upload_folder, - decode_filename, - document_loader, - encode_filename, - get_separators, - get_tables_result, - parse_html_new, - remove_folder_with_ignore, - save_content_to_local_disk, -) - -logger = CustomLogger("prepare_doc_milvus") -logflag = os.getenv("LOGFLAG", False) - -# workaround notes: cp comps/dataprep/utils.py ./milvus/utils.py -index_params = {"index_type": "FLAT", "metric_type": "IP", "params": {}} -partition_field_name = "filename" -upload_folder = "./uploaded_files/" -milvus_uri = f"http://{MILVUS_HOST}:{MILVUS_PORT}" - - -def ingest_chunks_to_milvus(file_name: str, chunks: List): - if logflag: - logger.info(f"[ ingest chunks ] file name: {file_name}") - - # insert documents to Milvus - insert_docs = [] - for chunk in chunks: - insert_docs.append(Document(page_content=chunk, metadata={partition_field_name: file_name})) - - # Batch size - batch_size = 32 - num_chunks = len(chunks) - - for i in range(0, num_chunks, batch_size): - if logflag: - logger.info(f"[ ingest chunks ] Current batch: {i}") - batch_docs = insert_docs[i : i + batch_size] - - try: - _ = Milvus.from_documents( - batch_docs, - embeddings, - collection_name=COLLECTION_NAME, - connection_args={"uri": milvus_uri}, - partition_key_field=partition_field_name, - ) - except Exception as e: - if logflag: - logger.info(f"[ ingest chunks ] fail to ingest chunks into Milvus. error: {e}") - raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}.") - - if logflag: - logger.info(f"[ ingest chunks ] Docs ingested file {file_name} to Milvus collection {COLLECTION_NAME}.") - - return True - - -def ingest_data_to_milvus(doc_path: DocPath): - """Ingest document to Milvus.""" - path = doc_path.path - file_name = path.split("/")[-1] - if logflag: - logger.info(f"[ ingest data ] Parsing document {path}, file name: {file_name}.") - - if path.endswith(".html"): - headers_to_split_on = [ - ("h1", "Header 1"), - ("h2", "Header 2"), - ("h3", "Header 3"), - ] - text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) - else: - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=doc_path.chunk_size, - chunk_overlap=doc_path.chunk_overlap, - add_start_index=True, - separators=get_separators(), - ) - - content = document_loader(path) - - if logflag: - logger.info("[ ingest data ] file content loaded") - - structured_types = [".xlsx", ".csv", ".json", "jsonl"] - _, ext = os.path.splitext(path) - - if ext in structured_types: - chunks = content - else: - chunks = text_splitter.split_text(content) - - if doc_path.process_table and path.endswith(".pdf"): - table_chunks = get_tables_result(path, doc_path.table_strategy) - chunks = chunks + table_chunks - if logflag: - logger.info(f"[ ingest data ] Done preprocessing. Created {len(chunks)} chunks of the original file.") - - return ingest_chunks_to_milvus(file_name, chunks) - - -def search_by_file(collection, file_name): - query = f"{partition_field_name} == '{file_name}'" - results = collection.query( - expr=query, - output_fields=[partition_field_name, "pk"], - ) - if logflag: - logger.info(f"[ search by file ] searched by {file_name}") - logger.info(f"[ search by file ] {len(results)} results: {results}") - return results - - -def search_all(collection): - results = collection.query(expr="pk >= 0", output_fields=[partition_field_name, "pk"]) - if logflag: - logger.info(f"[ search all ] {len(results)} results: {results}") - return results - - -def delete_all_data(my_milvus): - if logflag: - logger.info("[ delete all ] deleting all data in milvus") - if my_milvus.col: - my_milvus.col.drop() - if logflag: - logger.info("[ delete all ] delete success: all data") - - -def delete_by_partition_field(my_milvus, partition_field): - if logflag: - logger.info(f"[ delete partition ] deleting {partition_field_name} {partition_field}") - pks = my_milvus.get_pks(f'{partition_field_name} == "{partition_field}"') - if logflag: - logger.info(f"[ delete partition ] target pks: {pks}") - res = my_milvus.delete(pks) - my_milvus.col.flush() - if logflag: - logger.info(f"[ delete partition ] delete success: {res}") - - -@register_microservice(name="opea_service@prepare_doc_milvus", endpoint="/v1/dataprep", host="0.0.0.0", port=6010) -async def ingest_documents( - files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), - link_list: Optional[str] = Form(None), - chunk_size: int = Form(1000), - chunk_overlap: int = Form(100), - process_table: bool = Form(False), - table_strategy: str = Form("fast"), -): - if logflag: - logger.info(f"[ upload ] files:{files}") - logger.info(f"[ upload ] link_list:{link_list}") - - if files and link_list: - raise HTTPException(status_code=400, detail="Provide either a file or a string list, not both.") - - # define Milvus obj - my_milvus = Milvus( - embedding_function=embeddings, - collection_name=COLLECTION_NAME, - connection_args={"uri": milvus_uri}, - index_params=index_params, - auto_id=True, - ) - - if files: - if not isinstance(files, list): - files = [files] - uploaded_files = [] - - for file in files: - encode_file = encode_filename(file.filename) - save_path = upload_folder + encode_file - if logflag: - logger.info(f"[ upload ] processing file {save_path}") - - if my_milvus.col: - # check whether the file is already uploaded - try: - search_res = search_by_file(my_milvus.col, encode_file) - except Exception as e: - raise HTTPException( - status_code=500, detail=f"Failed when searching in Milvus db for file {file.filename}." - ) - if len(search_res) > 0: - if logflag: - logger.info(f"[ upload ] File {file.filename} already exists.") - raise HTTPException( - status_code=400, - detail=f"Uploaded file {file.filename} already exists. Please change file name.", - ) - - await save_content_to_local_disk(save_path, file) - ingest_data_to_milvus( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ), - ) - uploaded_files.append(save_path) - if logflag: - logger.info(f"Saved file {save_path} into local disk.") - - # def process_files_wrapper(files): - # if not isinstance(files, list): - # files = [files] - # for file in files: - # encode_file = encode_filename(file.filename) - # save_path = upload_folder + encode_file - # ingest_data_to_milvus( - # DocPath( - # path=save_path, - # chunk_size=chunk_size, - # chunk_overlap=chunk_overlap, - # process_table=process_table, - # table_strategy=table_strategy, - # ), - # ) - - # try: - # # Create a SparkContext - # conf = SparkConf().setAppName("Parallel-dataprep").setMaster("local[*]") - # sc = SparkContext(conf=conf) - # # Create an RDD with parallel processing - # parallel_num = min(len(uploaded_files), os.cpu_count()) - # rdd = sc.parallelize(uploaded_files, parallel_num) - # print(uploaded_files) - # # Perform a parallel operation - # rdd_trans = rdd.map(process_files_wrapper) - # rdd_trans.collect() - # # Stop the SparkContext - # sc.stop() - # except: - # # Stop the SparkContext - # sc.stop() - results = {"status": 200, "message": "Data preparation succeeded"} - if logflag: - logger.info(results) - return results - - if link_list: - link_list = json.loads(link_list) # Parse JSON string to list - if not isinstance(link_list, list): - raise HTTPException(status_code=400, detail="link_list should be a list.") - - for link in link_list: - encoded_link = encode_filename(link) - if logflag: - logger.info(f"[ upload ] processing link {encoded_link}") - - # check whether the link file already exists - if my_milvus.col: - try: - search_res = search_by_file(my_milvus.col, encoded_link + ".txt") - except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed when searching in Milvus db for link {link}.") - if len(search_res) > 0: - if logflag: - logger.info(f"[ upload ] Link {link} already exists.") - raise HTTPException( - status_code=400, detail=f"Uploaded link {link} already exists. Please change link." - ) - - save_path = upload_folder + encoded_link + ".txt" - content = parse_html_new([link], chunk_size=chunk_size, chunk_overlap=chunk_overlap) - await save_content_to_local_disk(save_path, content) - ingest_data_to_milvus( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ), - ) - if logflag: - logger.info(f"[ upload ] Successfully saved link list {link_list}") - return {"status": 200, "message": "Data preparation succeeded"} - - raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") - - -@register_microservice( - name="opea_service@prepare_doc_milvus", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6010 -) -async def rag_get_file_structure(): - if logflag: - logger.info("[ get ] start to get file structure") - - # define Milvus obj - my_milvus = Milvus( - embedding_function=embeddings, - collection_name=COLLECTION_NAME, - connection_args={"uri": milvus_uri}, - index_params=index_params, - auto_id=True, - ) - - # collection does not exist - if not my_milvus.col: - logger.info(f"[ get ] collection {COLLECTION_NAME} does not exist.") - return [] - - # get all files from db - try: - all_data = search_all(my_milvus.col) - except Exception as e: - raise HTTPException(status_code=500, detail="Failed when searching in Milvus db for all files.") - - # return [] if no data in db - if len(all_data) == 0: - return [] - - res_file = [res["filename"] for res in all_data] - unique_list = list(set(res_file)) - if logflag: - logger.info(f"[ get ] unique list from db: {unique_list}") - - # construct result file list in format - file_list = [] - for file_name in unique_list: - file_dict = { - "name": decode_filename(file_name), - "id": decode_filename(file_name), - "type": "File", - "parent": "", - } - file_list.append(file_dict) - - if logflag: - logger.info(f"[ get ] final file list: {file_list}") - return file_list - - -@register_microservice( - name="opea_service@prepare_doc_milvus", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6010 -) -async def delete_single_file(file_path: str = Body(..., embed=True)): - """Delete file according to `file_path`. - - `file_path`: - - file/link path (e.g. /path/to/file.txt) - - "all": delete all files uploaded - """ - if logflag: - logger.info(file_path) - - # define Milvus obj - my_milvus = Milvus( - embedding_function=embeddings, - collection_name=COLLECTION_NAME, - connection_args={"uri": milvus_uri}, - index_params=index_params, - auto_id=True, - ) - - # delete all uploaded files - if file_path == "all": - if logflag: - logger.info("[ delete ] deleting all files") - - delete_all_data(my_milvus) - - # delete files on local disk - try: - remove_folder_with_ignore(upload_folder) - except Exception as e: - if logflag: - logger.info(f"[ delete ] {e}. Fail to delete {upload_folder}.") - raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}.") - - if logflag: - logger.info("[ delete ] successfully delete all files.") - - create_upload_folder(upload_folder) - if logflag: - logger.info("[ delete ] new upload folder created.") - return {"status": True} - - encode_file_name = encode_filename(file_path) - delete_path = Path(upload_folder + "/" + encode_file_name) - if logflag: - logger.info(f"[delete] delete_path: {delete_path}") - - # partially delete files - if delete_path.exists(): - - # TODO: check existence before delete - - # delete file - if delete_path.is_file(): - if logflag: - logger.info(f"[delete] deleting file {encode_file_name}") - try: - delete_by_partition_field(my_milvus, encode_file_name) - except Exception as e: - if logflag: - logger.info(f"[delete] fail to delete file {delete_path}: {e}") - return {"status": False} - delete_path.unlink() - if logflag: - logger.info(f"[delete] file {file_path} deleted") - return {"status": True} - - # delete folder - else: - if logflag: - logger.info(f"[delete] delete folder {file_path} is not supported for now.") - raise HTTPException(status_code=404, detail=f"Delete folder {file_path} is not supported for now.") - else: - raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") - - -if __name__ == "__main__": - create_upload_folder(upload_folder) - - # Create vectorstore - if TEI_EMBEDDING_ENDPOINT: - # create embeddings using TEI endpoint service - if logflag: - logger.info(f"[ prepare_doc_milvus ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") - embeddings = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) - else: - # create embeddings using local embedding model - if logflag: - logger.info(f"[ prepare_doc_milvus ] LOCAL_EMBEDDING_MODEL:{LOCAL_EMBEDDING_MODEL}") - embeddings = HuggingFaceBgeEmbeddings(model_name=LOCAL_EMBEDDING_MODEL) - - opea_microservices["opea_service@prepare_doc_milvus"].start() diff --git a/comps/dataprep/milvus/langchain/requirements.txt b/comps/dataprep/milvus/langchain/requirements.txt deleted file mode 100644 index 611c95a15d..0000000000 --- a/comps/dataprep/milvus/langchain/requirements.txt +++ /dev/null @@ -1,31 +0,0 @@ -beautifulsoup4 -cairosvg -docarray[full] -docx2txt -easyocr -fastapi -html2text -huggingface_hub -langchain -langchain-community -langchain-text-splitters -langchain_milvus -markdown -numpy -openai -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -pandas -Pillow -prometheus-fastapi-instrumentator -pymupdf -pyspark -pytesseract -python-docx -python-pptx -sentence_transformers -shortuuid -tiktoken -unstructured[all-docs]==0.15.7 -uvicorn diff --git a/comps/dataprep/multimodal/redis/langchain/Dockerfile b/comps/dataprep/multimodal/redis/langchain/Dockerfile deleted file mode 100644 index b12c11c7bf..0000000000 --- a/comps/dataprep/multimodal/redis/langchain/Dockerfile +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - default-jre \ - libgl1-mesa-glx \ - libjemalloc-dev \ - wget - -# Install ffmpeg static build -RUN cd /root && wget https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz && \ - mkdir ffmpeg-git-amd64-static && tar -xvf ffmpeg-git-amd64-static.tar.xz -C ffmpeg-git-amd64-static --strip-components 1 && \ - export PATH=/root/ffmpeg-git-amd64-static:$PATH && \ - cp /root/ffmpeg-git-amd64-static/ffmpeg /usr/local/bin/ - -RUN mkdir -p /home/user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/dataprep/multimodal/redis/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/dataprep/multimodal/redis/langchain - -ENTRYPOINT ["python", "prepare_videodoc_redis.py"] diff --git a/comps/dataprep/multimodal/redis/langchain/config.py b/comps/dataprep/multimodal/redis/langchain/config.py deleted file mode 100644 index 90a73d5a96..0000000000 --- a/comps/dataprep/multimodal/redis/langchain/config.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -# Models -EMBED_MODEL = os.getenv("EMBEDDING_MODEL_ID", "BridgeTower/bridgetower-large-itm-mlm-itc") -WHISPER_MODEL = os.getenv("WHISPER_MODEL", "small") - -# Redis Connection Information -REDIS_HOST = os.getenv("REDIS_HOST", "localhost") -REDIS_PORT = int(os.getenv("REDIS_PORT", 6379)) - -# Lvm Microservice Information -LVM_ENDPOINT = os.getenv("LVM_ENDPOINT", "http://localhost:9399/v1/lvm") - - -def get_boolean_env_var(var_name, default_value=False): - """Retrieve the boolean value of an environment variable. - - Args: - var_name (str): The name of the environment variable to retrieve. - default_value (bool): The default value to return if the variable - is not found. - Returns: - bool: The value of the environment variable, interpreted as a boolean. - """ - true_values = {"true", "1", "t", "y", "yes"} - false_values = {"false", "0", "f", "n", "no"} - - # Retrieve the environment variable's value - value = os.getenv(var_name, "").lower() - - # Decide the boolean value based on the content of the string - if value in true_values: - return True - elif value in false_values: - return False - else: - return default_value - - -def format_redis_conn_from_env(): - redis_url = os.getenv("REDIS_URL", None) - if redis_url: - return redis_url - else: - using_ssl = get_boolean_env_var("REDIS_SSL", False) - start = "rediss://" if using_ssl else "redis://" - - # if using RBAC - password = os.getenv("REDIS_PASSWORD", None) - username = os.getenv("REDIS_USERNAME", "default") - if password is not None: - start += f"{username}:{password}@" - - return start + f"{REDIS_HOST}:{REDIS_PORT}" - - -REDIS_URL = format_redis_conn_from_env() - -# Vector Index Configuration -INDEX_NAME = os.getenv("INDEX_NAME", "mm-rag-redis") - -current_file_path = os.path.abspath(__file__) -parent_dir = os.path.dirname(current_file_path) -REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "schema.yml") -TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", 600)) -schema_path = os.path.join(parent_dir, REDIS_SCHEMA) -INDEX_SCHEMA = schema_path diff --git a/comps/dataprep/multimodal/redis/langchain/docker-compose-dataprep-redis.yaml b/comps/dataprep/multimodal/redis/langchain/docker-compose-dataprep-redis.yaml deleted file mode 100644 index e3dc78a97b..0000000000 --- a/comps/dataprep/multimodal/redis/langchain/docker-compose-dataprep-redis.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - redis-vector-db: - image: redis/redis-stack:7.2.0-v9 - container_name: redis-vector-db - ports: - - "6379:6379" - - "8001:8001" - dataprep-multimodal-redis: - image: opea/dataprep-multimodal-redis:latest - container_name: dataprep-multimodal-redis - ports: - - "6007:6007" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - REDIS_URL: ${REDIS_URL} - INDEX_NAME: ${INDEX_NAME} - LVM_ENDPOINT: ${LVM_ENDPOINT} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/multimodal/redis/langchain/prepare_videodoc_redis.py b/comps/dataprep/multimodal/redis/langchain/prepare_videodoc_redis.py deleted file mode 100644 index 960dcf3dbb..0000000000 --- a/comps/dataprep/multimodal/redis/langchain/prepare_videodoc_redis.py +++ /dev/null @@ -1,639 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import shutil -import time -import uuid -from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Type, Union - -from config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, LVM_ENDPOINT, REDIS_URL, WHISPER_MODEL -from fastapi import File, HTTPException, UploadFile -from langchain_community.utilities.redis import _array_to_buffer -from langchain_community.vectorstores import Redis -from langchain_community.vectorstores.redis.base import _generate_field_schema, _prepare_metadata -from langchain_core.embeddings import Embeddings -from langchain_core.utils import get_from_dict_or_env -from multimodal_utils import ( - clear_upload_folder, - convert_video_to_audio, - create_upload_folder, - delete_audio_file, - extract_frames_and_annotations_from_transcripts, - extract_frames_and_generate_captions, - extract_transcript_from_audio, - generate_annotations_from_transcript, - generate_id, - load_json_file, - load_whisper_model, - write_vtt, -) -from PIL import Image - -from comps import opea_microservices, register_microservice -from comps.third_parties.bridgetower.src.bridgetower_embedding import BridgeTowerEmbedding - -device = "cpu" -upload_folder = "./uploaded_files/" - - -class MultimodalRedis(Redis): - """Redis vector database to process multimodal data.""" - - @classmethod - def from_text_image_pairs_return_keys( - cls: Type[Redis], - texts: List[str], - images: List[str] = None, - embedding: Embeddings = BridgeTowerEmbedding, - metadatas: Optional[List[dict]] = None, - index_name: Optional[str] = None, - index_schema: Optional[Union[Dict[str, str], str, os.PathLike]] = None, - vector_schema: Optional[Dict[str, Union[str, int]]] = None, - **kwargs: Any, - ): - """ - Args: - texts (List[str]): List of texts to add to the vectorstore. - images (List[str]): Optional list of path-to-images to add to the vectorstore. If provided, the length of - the list of images must match the length of the list of text strings. - embedding (Embeddings): Embeddings to use for the vectorstore. - metadatas (Optional[List[dict]], optional): Optional list of metadata - dicts to add to the vectorstore. Defaults to None. - index_name (Optional[str], optional): Optional name of the index to - create or add to. Defaults to None. - index_schema (Optional[Union[Dict[str, str], str, os.PathLike]], optional): - Optional fields to index within the metadata. Overrides generated - schema. Defaults to None. - vector_schema (Optional[Dict[str, Union[str, int]]], optional): Optional - vector schema to use. Defaults to None. - **kwargs (Any): Additional keyword arguments to pass to the Redis client. - Returns: - Tuple[Redis, List[str]]: Tuple of the Redis instance and the keys of - the newly created documents. - Raises: - ValueError: If the number of texts does not equal the number of images. - ValueError: If the number of metadatas does not match the number of texts. - """ - # If images are provided, the length of texts must be equal to the length of images - if images and len(texts) != len(images): - raise ValueError(f"the len of captions {len(texts)} does not equal the len of images {len(images)}") - - redis_url = get_from_dict_or_env(kwargs, "redis_url", "REDIS_URL") - - if "redis_url" in kwargs: - kwargs.pop("redis_url") - - # flag to use generated schema - if "generate" in kwargs: - kwargs.pop("generate") - - # see if the user specified keys - keys = None - if "keys" in kwargs: - keys = kwargs.pop("keys") - - # Name of the search index if not given - if not index_name: - index_name = uuid.uuid4().hex - - # type check for metadata - if metadatas: - if isinstance(metadatas, list) and len(metadatas) != len(texts): # type: ignore # noqa: E501 - raise ValueError("Number of metadatas must match number of texts") - if not (isinstance(metadatas, list) and isinstance(metadatas[0], dict)): - raise ValueError("Metadatas must be a list of dicts") - generated_schema = _generate_field_schema(metadatas[0]) - - if not index_schema: - index_schema = generated_schema - - # Create instance - instance = cls( - redis_url, - index_name, - embedding, - index_schema=index_schema, - vector_schema=vector_schema, - **kwargs, - ) - # Add data to Redis - keys = ( - instance.add_text_image_pairs(texts, images, metadatas, keys=keys) - if images - else instance.add_text(texts, metadatas, keys=keys) - ) - return instance, keys - - def add_text_image_pairs( - self, - texts: Iterable[str], - images: Iterable[str], - metadatas: Optional[List[dict]] = None, - embeddings: Optional[List[List[float]]] = None, - batch_size: int = 2, - clean_metadata: bool = True, - **kwargs: Any, - ) -> List[str]: - """Add more embeddings of text-image pairs to the vectorstore. - - Args: - texts (Iterable[str]): Iterable of strings/text to add to the vectorstore. - images: Iterable[str]: Iterable of strings/text of path-to-image to add to the vectorstore. - metadatas (Optional[List[dict]], optional): Optional list of metadatas. - Defaults to None. - embeddings (Optional[List[List[float]]], optional): Optional pre-generated - embeddings. Defaults to None. - keys (List[str]) or ids (List[str]): Identifiers of entries. - Defaults to None. - batch_size (int, optional): Batch size to use for writes. Defaults to 1000. - Returns: - List[str]: List of ids added to the vectorstore - """ - ids = [] - # Get keys or ids from kwargs - # Other vectorstores use ids - keys_or_ids = kwargs.get("keys", kwargs.get("ids")) - - # type check for metadata - if metadatas: - if isinstance(metadatas, list) and len(metadatas) != len(texts): # type: ignore # noqa: E501 - raise ValueError("Number of metadatas must match number of texts") - if not (isinstance(metadatas, list) and isinstance(metadatas[0], dict)): - raise ValueError("Metadatas must be a list of dicts") - pil_imgs = [Image.open(img) for img in images] - if not embeddings: - embeddings = self._embeddings.embed_image_text_pairs(list(texts), pil_imgs, batch_size=batch_size) - self._create_index_if_not_exist(dim=len(embeddings[0])) - - # Write data to redis - pipeline = self.client.pipeline(transaction=False) - for i, text in enumerate(texts): - # Use provided values by default or fallback - key = keys_or_ids[i] if keys_or_ids else str(uuid.uuid4().hex) - if not key.startswith(self.key_prefix + ":"): - key = self.key_prefix + ":" + key - metadata = metadatas[i] if metadatas else {} - metadata = _prepare_metadata(metadata) if clean_metadata else metadata - pipeline.hset( - key, - mapping={ - self._schema.content_key: text, - self._schema.content_vector_key: _array_to_buffer(embeddings[i], self._schema.vector_dtype), - **metadata, - }, - ) - ids.append(key) - - # Write batch - if i % batch_size == 0: - pipeline.execute() - - # Cleanup final batch - pipeline.execute() - return ids - - def add_text( - self, - texts: Iterable[str], - metadatas: Optional[List[dict]] = None, - embeddings: Optional[List[List[float]]] = None, - clean_metadata: bool = True, - **kwargs: Any, - ) -> List[str]: - """Add more embeddings of text to the vectorstore. - - Args: - texts (Iterable[str]): Iterable of strings/text to add to the vectorstore. - metadatas (Optional[List[dict]], optional): Optional list of metadatas. - Defaults to None. - embeddings (Optional[List[List[float]]], optional): Optional pre-generated - embeddings. Defaults to None. - keys (List[str]) or ids (List[str]): Identifiers of entries. - Defaults to None. - Returns: - List[str]: List of ids added to the vectorstore - """ - ids = [] - # Get keys or ids from kwargs - # Other vectorstores use ids - keys_or_ids = kwargs.get("keys", kwargs.get("ids")) - - # type check for metadata - if metadatas: - if isinstance(metadatas, list) and len(metadatas) != len(texts): # type: ignore # noqa: E501 - raise ValueError("Number of metadatas must match number of texts") - if not (isinstance(metadatas, list) and isinstance(metadatas[0], dict)): - raise ValueError("Metadatas must be a list of dicts") - - if not embeddings: - embeddings = self._embeddings.embed_documents(list(texts)) - self._create_index_if_not_exist(dim=len(embeddings[0])) - - # Write data to redis - pipeline = self.client.pipeline(transaction=False) - for i, text in enumerate(texts): - # Use provided values by default or fallback - key = keys_or_ids[i] if keys_or_ids else str(uuid.uuid4().hex) - if not key.startswith(self.key_prefix + ":"): - key = self.key_prefix + ":" + key - metadata = metadatas[i] if metadatas else {} - metadata = _prepare_metadata(metadata) if clean_metadata else metadata - pipeline.hset( - key, - mapping={ - self._schema.content_key: text, - self._schema.content_vector_key: _array_to_buffer(embeddings[i], self._schema.vector_dtype), - **metadata, - }, - ) - ids.append(key) - - # Cleanup final batch - pipeline.execute() - return ids - - -def prepare_data_and_metadata_from_annotation( - annotation, path_to_frames, title, num_transcript_concat_for_ingesting=2, num_transcript_concat_for_inference=7 -): - text_list = [] - image_list = [] - metadatas = [] - for i, frame in enumerate(annotation): - frame_index = frame["sub_video_id"] - path_to_frame = os.path.join(path_to_frames, f"frame_{frame_index}.png") - # augment this frame's transcript with a reasonable number of neighboring frames' transcripts helps semantic retrieval - lb_ingesting = max(0, i - num_transcript_concat_for_ingesting) - ub_ingesting = min(len(annotation), i + num_transcript_concat_for_ingesting + 1) - caption_for_ingesting = " ".join([annotation[j]["caption"] for j in range(lb_ingesting, ub_ingesting)]) - - # augment this frame's transcript with more neighboring frames' transcript to provide more context to LVM for question answering - lb_inference = max(0, i - num_transcript_concat_for_inference) - ub_inference = min(len(annotation), i + num_transcript_concat_for_inference + 1) - caption_for_inference = " ".join([annotation[j]["caption"] for j in range(lb_inference, ub_inference)]) - - video_id = frame["video_id"] - b64_img_str = frame["b64_img_str"] - time_of_frame = frame["time"] - embedding_type = "pair" if b64_img_str else "text" - source_video = frame["video_name"] - - text_list.append(caption_for_ingesting) - - if b64_img_str: - image_list.append(path_to_frame) - - metadatas.append( - { - "content": caption_for_ingesting, - "b64_img_str": b64_img_str, - "video_id": video_id, - "source_video": source_video, - "time_of_frame_ms": float(time_of_frame), - "embedding_type": embedding_type, - "title": title, - "transcript_for_inference": caption_for_inference, - } - ) - - return text_list, image_list, metadatas - - -def ingest_multimodal(videoname, data_folder, embeddings): - """Ingest text image pairs to Redis from the data/ directory that consists of frames and annotations.""" - data_folder = os.path.abspath(data_folder) - annotation_file_path = os.path.join(data_folder, "annotations.json") - path_to_frames = os.path.join(data_folder, "frames") - - annotation = load_json_file(annotation_file_path) - - # prepare data to ingest - text_list, image_list, metadatas = prepare_data_and_metadata_from_annotation(annotation, path_to_frames, videoname) - - MultimodalRedis.from_text_image_pairs_return_keys( - texts=[f"From {videoname}. " + text for text in text_list], - images=image_list, - embedding=embeddings, - metadatas=metadatas, - index_name=INDEX_NAME, - index_schema=INDEX_SCHEMA, - redis_url=REDIS_URL, - ) - - -def drop_index(index_name, redis_url=REDIS_URL): - print(f"dropping index {index_name}") - try: - assert Redis.drop_index(index_name=index_name, delete_documents=True, redis_url=redis_url) - print(f"index {index_name} deleted") - except Exception as e: - print(f"index {index_name} delete failed: {e}") - return False - return True - - -@register_microservice( - name="opea_service@prepare_videodoc_redis", endpoint="/v1/generate_transcripts", host="0.0.0.0", port=6007 -) -async def ingest_generate_transcripts(files: List[UploadFile] = File(None)): - """Upload videos or audio files with speech, generate transcripts using whisper and ingest into redis.""" - - if files: - files_to_ingest = [] - uploaded_files_map = {} - for file in files: - if os.path.splitext(file.filename)[1] in [".mp4", ".wav"]: - files_to_ingest.append(file) - else: - raise HTTPException( - status_code=400, detail=f"File {file.filename} is not an mp4 file. Please upload mp4 files only." - ) - - for file_to_ingest in files_to_ingest: - st = time.time() - file_extension = os.path.splitext(file_to_ingest.filename)[1] - is_video = file_extension == ".mp4" - file_type_str = "video" if is_video else "audio file" - print(f"Processing {file_type_str} {file_to_ingest.filename}") - - # Assign unique identifier to video - file_id = generate_id() - - # Create video file name by appending identifier - base_file_name = os.path.splitext(file_to_ingest.filename)[0] - file_name_with_id = f"{base_file_name}_{file_id}{file_extension}" - dir_name = os.path.splitext(file_name_with_id)[0] - - # Save file in upload_directory - with open(os.path.join(upload_folder, file_name_with_id), "wb") as f: - shutil.copyfileobj(file_to_ingest.file, f) - - uploaded_files_map[base_file_name] = file_name_with_id - - if is_video: - # Extract temporary audio wav file from video mp4 - audio_file = dir_name + ".wav" - print(f"Extracting {audio_file}") - convert_video_to_audio( - os.path.join(upload_folder, file_name_with_id), os.path.join(upload_folder, audio_file) - ) - print(f"Done extracting {audio_file}") - else: - # We already have an audio file - audio_file = file_name_with_id - - # Load whisper model - print("Loading whisper model....") - whisper_model = load_whisper_model(model_name=WHISPER_MODEL) - print("Done loading whisper!") - - # Extract transcript from audio - print("Extracting transcript from audio") - transcripts = extract_transcript_from_audio(whisper_model, os.path.join(upload_folder, audio_file)) - - # Save transcript as vtt file and delete audio file - vtt_file = dir_name + ".vtt" - write_vtt(transcripts, os.path.join(upload_folder, vtt_file)) - if is_video: - delete_audio_file(os.path.join(upload_folder, audio_file)) - print("Done extracting transcript.") - - if is_video: - # Store frames and caption annotations in a new directory - print("Extracting frames and generating annotation") - extract_frames_and_annotations_from_transcripts( - file_id, - os.path.join(upload_folder, file_name_with_id), - os.path.join(upload_folder, vtt_file), - os.path.join(upload_folder, dir_name), - ) - else: - # Generate annotations based on the transcript - print("Generating annotations for the transcription") - generate_annotations_from_transcript( - file_id, - os.path.join(upload_folder, file_name_with_id), - os.path.join(upload_folder, vtt_file), - os.path.join(upload_folder, dir_name), - ) - - print("Done extracting frames and generating annotation") - # Delete temporary vtt file - os.remove(os.path.join(upload_folder, vtt_file)) - - # Ingest multimodal data into redis - print("Ingesting data to redis vector store") - ingest_multimodal(base_file_name, os.path.join(upload_folder, dir_name), embeddings) - - # Delete temporary video directory containing frames and annotations - shutil.rmtree(os.path.join(upload_folder, dir_name)) - - print(f"Processed file {file_to_ingest.filename}") - end = time.time() - print(str(end - st)) - - return { - "status": 200, - "message": "Data preparation succeeded", - "file_id_maps": uploaded_files_map, - } - - raise HTTPException(status_code=400, detail="Must provide at least one video (.mp4) or audio (.wav) file.") - - -@register_microservice( - name="opea_service@prepare_videodoc_redis", endpoint="/v1/generate_captions", host="0.0.0.0", port=6007 -) -async def ingest_generate_caption(files: List[UploadFile] = File(None)): - """Upload images and videos without speech (only background music or no audio), generate captions using lvm microservice and ingest into redis.""" - - if files: - file_paths = [] - uploaded_files_saved_files_map = {} - for file in files: - if os.path.splitext(file.filename)[1] in [".mp4", ".png", ".jpg", ".jpeg", ".gif"]: - file_paths.append(file) - else: - raise HTTPException( - status_code=400, - detail=f"File {file.filename} is not a supported file type. Please upload mp4, png, jpg, jpeg, and gif files only.", - ) - - for file in file_paths: - print(f"Processing file {file.filename}") - - # Assign unique identifier to file - id = generate_id() - - # Create file name by appending identifier - name, ext = os.path.splitext(file.filename) - file_name = f"{name}_{id}{ext}" - dir_name = os.path.splitext(file_name)[0] - - # Save file in upload_directory - with open(os.path.join(upload_folder, file_name), "wb") as f: - shutil.copyfileobj(file.file, f) - uploaded_files_saved_files_map[name] = file_name - - # Store frames and caption annotations in a new directory - extract_frames_and_generate_captions( - id, - os.path.join(upload_folder, file_name), - LVM_ENDPOINT, - os.path.join(upload_folder, dir_name), - ) - - # Ingest multimodal data into redis - ingest_multimodal(name, os.path.join(upload_folder, dir_name), embeddings) - - # Delete temporary directory containing frames and annotations - # shutil.rmtree(os.path.join(upload_folder, dir_name)) - - print(f"Processed file {file.filename}") - - return { - "status": 200, - "message": "Data preparation succeeded", - "file_id_maps": uploaded_files_saved_files_map, - } - - raise HTTPException(status_code=400, detail="Must provide at least one file.") - - -@register_microservice( - name="opea_service@prepare_videodoc_redis", - endpoint="/v1/ingest_with_text", - host="0.0.0.0", - port=6007, -) -async def ingest_with_text(files: List[UploadFile] = File(None)): - if files: - accepted_media_formats = [".mp4", ".png", ".jpg", ".jpeg", ".gif"] - # Create a lookup dictionary containing all media files - matched_files = {f.filename: [f] for f in files if os.path.splitext(f.filename)[1] in accepted_media_formats} - uploaded_files_map = {} - - # Go through files again and match caption files to media files - for file in files: - file_base, file_extension = os.path.splitext(file.filename) - if file_extension == ".vtt": - if "{}.mp4".format(file_base) in matched_files: - matched_files["{}.mp4".format(file_base)].append(file) - else: - print(f"No video was found for caption file {file.filename}.") - elif file_extension == ".txt": - if "{}.png".format(file_base) in matched_files: - matched_files["{}.png".format(file_base)].append(file) - elif "{}.jpg".format(file_base) in matched_files: - matched_files["{}.jpg".format(file_base)].append(file) - elif "{}.jpeg".format(file_base) in matched_files: - matched_files["{}.jpeg".format(file_base)].append(file) - elif "{}.gif".format(file_base) in matched_files: - matched_files["{}.gif".format(file_base)].append(file) - else: - print(f"No image was found for caption file {file.filename}.") - elif file_extension not in accepted_media_formats: - print(f"Skipping file {file.filename} because of unsupported format.") - - # Check if every media file has a caption file - for media_file_name, file_pair in matched_files.items(): - if len(file_pair) != 2: - raise HTTPException(status_code=400, detail=f"No caption file found for {media_file_name}") - - if len(matched_files.keys()) == 0: - return HTTPException( - status_code=400, - detail="The uploaded files have unsupported formats. Please upload at least one video file (.mp4) with captions (.vtt) or one image (.png, .jpg, .jpeg, or .gif) with caption (.txt)", - ) - - for media_file in matched_files: - print(f"Processing file {media_file}") - - # Assign unique identifier to file - file_id = generate_id() - - # Create file name by appending identifier - file_name, file_extension = os.path.splitext(media_file) - media_file_name = f"{file_name}_{file_id}{file_extension}" - media_dir_name = os.path.splitext(media_file_name)[0] - - # Save file in upload_directory - with open(os.path.join(upload_folder, media_file_name), "wb") as f: - shutil.copyfileobj(matched_files[media_file][0].file, f) - uploaded_files_map[file_name] = media_file_name - - # Save caption file in upload directory - caption_file_extension = os.path.splitext(matched_files[media_file][1].filename)[1] - caption_file = f"{media_dir_name}{caption_file_extension}" - with open(os.path.join(upload_folder, caption_file), "wb") as f: - shutil.copyfileobj(matched_files[media_file][1].file, f) - - # Store frames and caption annotations in a new directory - extract_frames_and_annotations_from_transcripts( - file_id, - os.path.join(upload_folder, media_file_name), - os.path.join(upload_folder, caption_file), - os.path.join(upload_folder, media_dir_name), - ) - - # Delete temporary caption file - os.remove(os.path.join(upload_folder, caption_file)) - - # Ingest multimodal data into redis - ingest_multimodal(file_name, os.path.join(upload_folder, media_dir_name), embeddings) - - # Delete temporary media directory containing frames and annotations - shutil.rmtree(os.path.join(upload_folder, media_dir_name)) - - print(f"Processed file {media_file}") - - return { - "status": 200, - "message": "Data preparation succeeded", - "file_id_maps": uploaded_files_map, - } - - raise HTTPException( - status_code=400, - detail="Must provide at least one pair consisting of video (.mp4) and captions (.vtt) or image (.png, .jpg, .jpeg, .gif) with caption (.txt)", - ) - - -@register_microservice( - name="opea_service@prepare_videodoc_redis", endpoint="/v1/dataprep/get_files", host="0.0.0.0", port=6007 -) -async def rag_get_file_structure(): - """Returns list of names of uploaded videos saved on the server.""" - - if not Path(upload_folder).exists(): - print("No file uploaded, return empty list.") - return [] - - uploaded_videos = os.listdir(upload_folder) - return uploaded_videos - - -@register_microservice( - name="opea_service@prepare_videodoc_redis", endpoint="/v1/dataprep/delete_files", host="0.0.0.0", port=6007 -) -async def delete_files(): - """Delete all uploaded files along with redis index.""" - index_deleted = drop_index(index_name=INDEX_NAME) - - if not index_deleted: - raise HTTPException(status_code=409, detail="Uploaded files could not be deleted. Index does not exist") - - clear_upload_folder(upload_folder) - print("Successfully deleted all uploaded files.") - return {"status": True} - - -if __name__ == "__main__": - create_upload_folder(upload_folder) - # Load embeddings model - print("Initializing BridgeTower model as embedder...") - embeddings = BridgeTowerEmbedding(model_name=EMBED_MODEL, device=device) - print("Done initialization of embedder!") - opea_microservices["opea_service@prepare_videodoc_redis"].start() diff --git a/comps/dataprep/multimodal/redis/langchain/requirements.txt b/comps/dataprep/multimodal/redis/langchain/requirements.txt deleted file mode 100644 index b368bb2336..0000000000 --- a/comps/dataprep/multimodal/redis/langchain/requirements.txt +++ /dev/null @@ -1,19 +0,0 @@ -docarray[full] -fastapi -langchain -langchain-community -moviepy -openai-whisper -opencv-python -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -Pillow -prometheus-fastapi-instrumentator -pydantic -python-multipart -redis -shortuuid -transformers -uvicorn -webvtt-py diff --git a/comps/dataprep/neo4j/langchain/Dockerfile b/comps/dataprep/neo4j/langchain/Dockerfile deleted file mode 100644 index eedc0075f2..0000000000 --- a/comps/dataprep/neo4j/langchain/Dockerfile +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - default-jre \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/dataprep/neo4j/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -USER root - -RUN mkdir -p /home/user/comps/dataprep/neo4j/langchain/uploaded_files && chown -R user /home/user/comps/dataprep/neo4j/langchain/uploaded_files - -USER user - -WORKDIR /home/user/comps/dataprep/neo4j/langchain - -ENTRYPOINT ["python", "prepare_doc_neo4j.py"] diff --git a/comps/dataprep/neo4j/langchain/config.py b/comps/dataprep/neo4j/langchain/config.py deleted file mode 100644 index bb21d57e3d..0000000000 --- a/comps/dataprep/neo4j/langchain/config.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -# Neo4J configuration -NEO4J_URL = os.getenv("NEO4J_URI", "bolt://localhost:7687") -NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j") -NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "test") - -# LLM/Embedding endpoints -TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") -TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT") -OPENAI_KEY = os.getenv("OPENAI_API_KEY") diff --git a/comps/dataprep/neo4j/langchain/docker-compose-dataprep-neo4j.yaml b/comps/dataprep/neo4j/langchain/docker-compose-dataprep-neo4j.yaml deleted file mode 100644 index c5c6adf23d..0000000000 --- a/comps/dataprep/neo4j/langchain/docker-compose-dataprep-neo4j.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - neo4j-vector-db: - image: neo4j/neo4j - container_name: neo4j-graph-db - ports: - - "6337:6337" - - "6338:6338" - tgi_gaudi_service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 - container_name: tgi-service - ports: - - "8088:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HF_TOKEN} - command: --model-id ${LLM_MODEL_ID} --auto-truncate --max-input-tokens 1024 --max-total-tokens 2048 - dataprep-neo4j: - image: opea/gen-ai-comps:dataprep-neo4j-xeon-server - container_name: dataprep-neo4j-server - depends_on: - - neo4j-vector-db - - tgi_gaudi_service - ports: - - "6007:6007" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - NEO4J_URL: ${NEO4J_URL} - NEO4J_USERNAME: ${NEO4J_USERNAME} - NEO4J_PASSWORD: ${NEO4J_PASSWORD} - TGI_LLM_ENDPOINT: ${TEI_ENDPOINT} - OPENAI_KEY: ${OPENAI_API_KEY} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/neo4j/langchain/prepare_doc_neo4j.py b/comps/dataprep/neo4j/langchain/prepare_doc_neo4j.py deleted file mode 100644 index efe1fa4aad..0000000000 --- a/comps/dataprep/neo4j/langchain/prepare_doc_neo4j.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -from typing import List, Optional, Union - -import openai -from config import NEO4J_PASSWORD, NEO4J_URL, NEO4J_USERNAME, OPENAI_KEY, TGI_LLM_ENDPOINT -from fastapi import File, Form, HTTPException, UploadFile -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.graphs import Neo4jGraph -from langchain_community.llms import HuggingFaceEndpoint -from langchain_core.documents import Document -from langchain_experimental.graph_transformers import LLMGraphTransformer -from langchain_openai import ChatOpenAI -from langchain_text_splitters import HTMLHeaderTextSplitter - -from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.src.utils import ( - document_loader, - encode_filename, - get_separators, - get_tables_result, - parse_html, - save_content_to_local_disk, -) - -logger = CustomLogger("prepare_doc_neo4j") -logflag = os.getenv("LOGFLAG", False) - -upload_folder = "./uploaded_files/" - - -def ingest_data_to_neo4j(doc_path: DocPath): - """Ingest document to Neo4J.""" - path = doc_path.path - if logflag: - logger.info(f"Parsing document {path}.") - - if path.endswith(".html"): - headers_to_split_on = [ - ("h1", "Header 1"), - ("h2", "Header 2"), - ("h3", "Header 3"), - ] - text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) - else: - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=doc_path.chunk_size, - chunk_overlap=doc_path.chunk_overlap, - add_start_index=True, - separators=get_separators(), - ) - - content = document_loader(path) - - structured_types = [".xlsx", ".csv", ".json", "jsonl"] - _, ext = os.path.splitext(path) - - if ext in structured_types: - chunks = content - else: - chunks = text_splitter.split_text(content) - - if doc_path.process_table and path.endswith(".pdf"): - table_chunks = get_tables_result(path, doc_path.table_strategy) - chunks = chunks + table_chunks - if logflag: - logger.info("Done preprocessing. Created ", len(chunks), " chunks of the original file.") - - if OPENAI_KEY: - logger.info("OpenAI API Key is set. Verifying its validity...") - openai.api_key = OPENAI_KEY - - try: - response = openai.Engine.list() - logger.info("OpenAI API Key is valid.") - llm = ChatOpenAI(temperature=0, model_name="gpt-4o") - except openai.error.AuthenticationError: - logger.info("OpenAI API Key is invalid.") - except Exception as e: - logger.info(f"An error occurred while verifying the API Key: {e}") - else: - llm = HuggingFaceEndpoint( - endpoint_url=TGI_LLM_ENDPOINT, - max_new_tokens=512, - top_k=40, - top_p=0.9, - temperature=0.8, - timeout=600, - ) - - llm_transformer = LLMGraphTransformer( - llm=llm, node_properties=["description"], relationship_properties=["description"] - ) - - doc_list = [Document(page_content=text) for text in chunks] - graph_doc = llm_transformer.convert_to_graph_documents(doc_list) - - graph = Neo4jGraph(url=NEO4J_URL, username=NEO4J_USERNAME, password=NEO4J_PASSWORD) - - graph.add_graph_documents(graph_doc, baseEntityLabel=True, include_source=True) - - if logflag: - logger.info("The graph is built.") - - return True - - -@register_microservice( - name="opea_service@prepare_doc_neo4j", - endpoint="/v1/dataprep", - host="0.0.0.0", - port=6007, - input_datatype=DocPath, - output_datatype=None, -) -async def ingest_documents( - files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), - link_list: Optional[str] = Form(None), - chunk_size: int = Form(1500), - chunk_overlap: int = Form(100), - process_table: bool = Form(False), - table_strategy: str = Form("fast"), -): - if logflag: - logger.info(f"files:{files}") - logger.info(f"link_list:{link_list}") - - if files: - if not isinstance(files, list): - files = [files] - uploaded_files = [] - for file in files: - encode_file = encode_filename(file.filename) - save_path = upload_folder + encode_file - await save_content_to_local_disk(save_path, file) - ingest_data_to_neo4j( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ) - ) - uploaded_files.append(save_path) - if logflag: - logger.info(f"Successfully saved file {save_path}") - result = {"status": 200, "message": "Data preparation succeeded"} - if logflag: - logger.info(result) - return result - - if link_list: - link_list = json.loads(link_list) # Parse JSON string to list - if not isinstance(link_list, list): - raise HTTPException(status_code=400, detail="link_list should be a list.") - for link in link_list: - encoded_link = encode_filename(link) - save_path = upload_folder + encoded_link + ".txt" - content = parse_html([link])[0][0] - try: - await save_content_to_local_disk(save_path, content) - ingest_data_to_neo4j( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ) - ) - except json.JSONDecodeError: - raise HTTPException(status_code=500, detail="Fail to ingest data into qdrant.") - - if logflag: - logger.info(f"Successfully saved link {link}") - - result = {"status": 200, "message": "Data preparation succeeded"} - if logflag: - logger.info(result) - return result - - raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") - - -if __name__ == "__main__": - opea_microservices["opea_service@prepare_doc_neo4j"].start() diff --git a/comps/dataprep/neo4j/langchain/requirements.txt b/comps/dataprep/neo4j/langchain/requirements.txt deleted file mode 100644 index f0f825b310..0000000000 --- a/comps/dataprep/neo4j/langchain/requirements.txt +++ /dev/null @@ -1,31 +0,0 @@ -beautifulsoup4 -cairosvg -docarray[full] -docx2txt -easyocr -fastapi -huggingface_hub -langchain -langchain-community -langchain-experimental -langchain-openai -langchain-text-splitters -langchain_huggingface -markdown -neo4j -numpy -openai -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -pandas -Pillow -prometheus-fastapi-instrumentator -pymupdf -pytesseract -python-docx -python-pptx -sentence_transformers -shortuuid -unstructured[all-docs]==0.15.7 -uvicorn diff --git a/comps/dataprep/neo4j/llama_index/Dockerfile b/comps/dataprep/neo4j/llama_index/Dockerfile deleted file mode 100644 index 77f912ed12..0000000000 --- a/comps/dataprep/neo4j/llama_index/Dockerfile +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - default-jre \ - libgl1-mesa-glx \ - libjemalloc-dev \ - vim - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/dataprep/neo4j/llama_index/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -USER root - -RUN mkdir -p /home/user/comps/dataprep/neo4j/llama_index/uploaded_files && chown -R user /home/user/comps/dataprep/neo4j/llama_index/uploaded_files - -USER user - -WORKDIR /home/user/comps/dataprep/neo4j/llama_index - -ENTRYPOINT ["python", "extract_graph_neo4j.py"] diff --git a/comps/dataprep/neo4j/llama_index/config.py b/comps/dataprep/neo4j/llama_index/config.py deleted file mode 100644 index 3037b8f9fb..0000000000 --- a/comps/dataprep/neo4j/llama_index/config.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -host_ip = os.getenv("host_ip") -# Neo4J configuration -NEO4J_URL = os.getenv("NEO4J_URL", f"bolt://{host_ip}:7687") -NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j") -NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "neo4jtest") - -# LLM/Embedding endpoints -TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", f"http://{host_ip}:6005") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT ", f"http://{host_ip}:6006") - -OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") -OPENAI_EMBEDDING_MODEL = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small") -OPENAI_LLM_MODEL = os.getenv("OPENAI_LLM_MODEL", "gpt-4o") diff --git a/comps/dataprep/neo4j/llama_index/neo4j_llama_index.yaml b/comps/dataprep/neo4j/llama_index/neo4j_llama_index.yaml deleted file mode 100644 index ac160f6997..0000000000 --- a/comps/dataprep/neo4j/llama_index/neo4j_llama_index.yaml +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" -services: - neo4j-apoc: - image: neo4j:latest - container_name: neo4j-apoc - volumes: - - /$HOME/neo4j/logs:/logs - - /$HOME/neo4j/config:/config - - /$HOME/neo4j/data:/data - - /$HOME/neo4j/plugins:/plugins - ipc: host - environment: - - NEO4J_AUTH=${NEO4J_USERNAME}/${NEO4J_PASSWORD} - - NEO4J_PLUGINS=["apoc"] - - NEO4J_apoc_export_file_enabled=true - - NEO4J_apoc_import_file_enabled=true - - NEO4J_apoc_import_file_use__neo4j__config=true - - NEO4J_dbms_security_procedures_unrestricted=apoc.\* - ports: - - "7474:7474" - - "7687:7687" - restart: always - tei-embedding-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - container_name: tei-embedding-server - ports: - - "6006:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - NO_PROXY: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - ipc: host - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate - tgi-gaudi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 - container_name: tgi-gaudi-server - ports: - - "6005:80" - volumes: - - "./data:/data" - environment: - no_proxy: ${no_proxy} - NO_PROXY: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - ENABLE_HPU_GRAPH: true - LIMIT_HPU_GRAPH: true - USE_FLASH_ATTENTION: true - FLASH_ATTENTION_RECOMPUTE: true - runtime: habana - cap_add: - - SYS_NICE - ipc: host - command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 - dataprep-neo4j-llamaindex: - image: opea/dataprep-neo4j-llamaindex:latest - container_name: dataprep-neo4j-server - depends_on: - - neo4j-apoc - - tgi-gaudi-service - - tei-embedding-service - ports: - - "6004:6004" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - host_ip: ${host_ip} - NEO4J_URL: ${NEO4J_URL} - NEO4J_USERNAME: ${NEO4J_USERNAME} - NEO4J_PASSWORD: ${NEO4J_PASSWORD} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - OPENAI_API_KEY: ${OPENAI_API_KEY} - OPENAI_EMBEDDING_MODEL: ${OPENAI_EMBEDDING_MODEL} - OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL} - EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID} - LLM_MODEL_ID: ${LLM_MODEL_ID} - LOGFLAG: ${LOGFLAG} - restart: unless-stopped -networks: - default: - driver: bridge diff --git a/comps/dataprep/neo4j/llama_index/requirements.txt b/comps/dataprep/neo4j/llama_index/requirements.txt deleted file mode 100644 index c183ecf3dc..0000000000 --- a/comps/dataprep/neo4j/llama_index/requirements.txt +++ /dev/null @@ -1,39 +0,0 @@ -beautifulsoup4 -cairosvg -docarray[full] -docx2txt -easyocr -fastapi -future -graspologic -html2text -huggingface_hub -ipython -langchain -langchain-text-splitters -langchain_community -llama-index -llama-index-core -llama-index-embeddings-text-embeddings-inference -llama-index-llms-openai -llama-index-llms-text-generation-inference -llama_index_graph_stores_neo4j==0.3.3 -markdown -neo4j -numpy -openai -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -pandas -Pillow -prometheus-fastapi-instrumentator -pymupdf -pytesseract -python-docx -python-pptx -scipy -sentence_transformers -shortuuid -unstructured[all-docs]==0.15.7 -uvicorn diff --git a/comps/dataprep/neo4j/llama_index/set_env.sh b/comps/dataprep/neo4j/llama_index/set_env.sh deleted file mode 100644 index 58980ebbe1..0000000000 --- a/comps/dataprep/neo4j/llama_index/set_env.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Remember to set your private variables mentioned in README -# host_ip, OPENAI_KEY, HUGGINGFACEHUB_API_TOKEN, proxies... - -export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" -export OPENAI_EMBEDDING_MODEL="text-embedding-3-small" -export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" -export OPENAI_LLM_MODEL="gpt-4o" -export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006" -export TGI_LLM_ENDPOINT="http://${host_ip}:6005" -export NEO4J_URL="bolt://${host_ip}:7687" -export NEO4J_USERNAME=neo4j -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6004/v1/dataprep" -export LOGFLAG=True diff --git a/comps/dataprep/opensearch/langchain/Dockerfile b/comps/dataprep/opensearch/langchain/Dockerfile deleted file mode 100644 index f29a753bcd..0000000000 --- a/comps/dataprep/opensearch/langchain/Dockerfile +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - default-jre \ - libgl1-mesa-glx \ - libjemalloc-dev \ - libreoffice \ - poppler-utils \ - tesseract-ocr - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/dataprep/opensearch/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -USER root - -RUN mkdir -p /home/user/comps/dataprep/opensearch/langchain/uploaded_files && chown -R user /home/user/comps/dataprep/opensearch/langchain/uploaded_files - -USER user - -WORKDIR /home/user/comps/dataprep/opensearch/langchain - -ENTRYPOINT ["python", "prepare_doc_opensearch.py"] - diff --git a/comps/dataprep/opensearch/langchain/config.py b/comps/dataprep/opensearch/langchain/config.py deleted file mode 100644 index 767cd84da7..0000000000 --- a/comps/dataprep/opensearch/langchain/config.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -# Embedding model -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - -# OpenSearch Connection Information -OPENSEARCH_HOST = os.getenv("OPENSEARCH_HOST", "localhost") -OPENSEARCH_PORT = int(os.getenv("OPENSEARCH_PORT", 9200)) -OPENSEARCH_INITIAL_ADMIN_PASSWORD = os.getenv("OPENSEARCH_INITIAL_ADMIN_PASSWORD", "") - - -def get_boolean_env_var(var_name, default_value=False): - """Retrieve the boolean value of an environment variable. - - Args: - var_name (str): The name of the environment variable to retrieve. - default_value (bool): The default value to return if the variable - is not found. - - Returns: - bool: The value of the environment variable, interpreted as a boolean. - """ - true_values = {"true", "1", "t", "y", "yes"} - false_values = {"false", "0", "f", "n", "no"} - - # Retrieve the environment variable's value - value = os.getenv(var_name, "").lower() - - # Decide the boolean value based on the content of the string - if value in true_values: - return True - elif value in false_values: - return False - else: - return default_value - - -def format_opensearch_conn_from_env(): - opensearch_url = os.getenv("OPENSEARCH_URL", None) - if opensearch_url: - return opensearch_url - else: - using_ssl = get_boolean_env_var("OPENSEARCH_SSL", False) - start = "https://" if using_ssl else "http://" - - return start + f"{OPENSEARCH_HOST}:{OPENSEARCH_PORT}" - - -OPENSEARCH_URL = format_opensearch_conn_from_env() - -# Vector Index Configuration -INDEX_NAME = os.getenv("INDEX_NAME", "rag-opensearch") -KEY_INDEX_NAME = os.getenv("KEY_INDEX_NAME", "file-keys") - -TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", 600)) - -SEARCH_BATCH_SIZE = int(os.getenv("SEARCH_BATCH_SIZE", 10)) diff --git a/comps/dataprep/opensearch/langchain/docker-compose-dataprep-opensearch.yaml b/comps/dataprep/opensearch/langchain/docker-compose-dataprep-opensearch.yaml deleted file mode 100644 index 7699bee1ce..0000000000 --- a/comps/dataprep/opensearch/langchain/docker-compose-dataprep-opensearch.yaml +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - opensearch-vector-db: - image: opensearchproject/opensearch:latest - container_name: opensearch-vector-db - environment: - - cluster.name=opensearch-cluster - - node.name=opensearch-vector-db - - discovery.seed_hosts=opensearch-vector-db - - cluster.initial_master_nodes=opensearch-vector-db - - bootstrap.memory_lock=true # along with the memlock settings below, disables swapping - - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # minimum and maximum Java heap size, recommend setting both to 50% of system RAM - - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD} # Sets the demo admin user password when using demo configuration, required for OpenSearch 2.12 and later - ulimits: - memlock: - soft: -1 - hard: -1 - nofile: - soft: 65536 # maximum number of open files for the OpenSearch user, set to at least 65536 on modern systems - hard: 65536 - ports: - - 9200:9200 - - 9600:9600 # required for Performance Analyzer - networks: - - opensearch-net - security_opt: - - no-new-privileges:true - tei-embedding-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - container_name: tei-embedding-server - ports: - - "6060:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate - dataprep-opensearch: - image: opea/dataprep-opensearch:latest - container_name: dataprep-opensearch-server - ports: - - 6007:6007 - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - OPENSEARCH_URL: ${OPENSEARCH_URL} - INDEX_NAME: ${INDEX_NAME} - TEI_ENDPOINT: ${TEI_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - restart: unless-stopped - security_opt: - - no-new-privileges:true - -networks: - default: - driver: bridge - opensearch-net: diff --git a/comps/dataprep/opensearch/langchain/prepare_doc_opensearch.py b/comps/dataprep/opensearch/langchain/prepare_doc_opensearch.py deleted file mode 100644 index 499b4ba636..0000000000 --- a/comps/dataprep/opensearch/langchain/prepare_doc_opensearch.py +++ /dev/null @@ -1,471 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -from pathlib import Path -from typing import List, Optional, Union - -from config import ( - EMBED_MODEL, - INDEX_NAME, - KEY_INDEX_NAME, - OPENSEARCH_INITIAL_ADMIN_PASSWORD, - OPENSEARCH_URL, - SEARCH_BATCH_SIZE, -) -from fastapi import Body, File, Form, HTTPException, UploadFile -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings -from langchain_community.vectorstores import OpenSearchVectorSearch -from langchain_huggingface import HuggingFaceEndpointEmbeddings -from langchain_text_splitters import HTMLHeaderTextSplitter - -# from pyspark import SparkConf, SparkContext -from opensearchpy import OpenSearch, helpers - -from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.src.utils import ( - create_upload_folder, - document_loader, - encode_filename, - format_search_results, - get_separators, - get_tables_result, - parse_html, - remove_folder_with_ignore, - save_content_to_local_disk, -) - -logger = CustomLogger("prepare_doc_opensearch") -logflag = os.getenv("LOGFLAG", False) - -upload_folder = "./uploaded_files/" -tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") -if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint) -else: - # create embeddings using local embedding model - embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) -auth = ("admin", OPENSEARCH_INITIAL_ADMIN_PASSWORD) -opensearch_client = OpenSearchVectorSearch( - opensearch_url=OPENSEARCH_URL, - index_name=INDEX_NAME, - embedding_function=embeddings, - http_auth=auth, - use_ssl=True, - verify_certs=False, - ssl_assert_hostname=False, - ssl_show_warn=False, -) - - -def check_index_existence(client, index_name): - if logflag: - logger.info(f"[ check index existence ] checking {client}") - try: - exists = client.index_exists(index_name) - exists = False if exists is None else exists - if exists: - if logflag: - logger.info(f"[ check index existence ] index of client exists: {client}") - else: - if logflag: - logger.info("[ check index existence ] index does not exist") - return exists - except Exception as e: - if logflag: - logger.info(f"[ check index existence ] error checking index for client: {e}") - return False - - -def create_index(client, index_name: str = KEY_INDEX_NAME): - if logflag: - logger.info(f"[ create index ] creating index {index_name}") - try: - index_body = { - "mappings": { - "properties": { - "file_name": {"type": "text"}, - "key_ids": {"type": "text"}, - } - } - } - - # Create the index - client.client.indices.create(index_name, body=index_body) - - if logflag: - logger.info(f"[ create index ] index {index_name} successfully created") - return True - except Exception as e: - if logflag: - logger.info(f"[ create index ] fail to create index {index_name}: {e}") - return False - - -def store_by_id(client, key, value): - if logflag: - logger.info(f"[ store by id ] storing ids of {key}") - try: - client.client.index( - index=KEY_INDEX_NAME, body={"file_name": f"file:${key}", "key_ids:": value}, id="file:" + key, refresh=True - ) - if logflag: - logger.info(f"[ store by id ] store document success. id: file:{key}") - except Exception as e: - if logflag: - logger.info(f"[ store by id ] fail to store document file:{key}: {e}") - return False - return True - - -def search_by_id(client, doc_id): - if logflag: - logger.info(f"[ search by id ] searching docs of {doc_id}") - try: - result = client.client.get(index=KEY_INDEX_NAME, id=doc_id) - if result["found"]: - if logflag: - logger.info(f"[ search by id ] search success of {doc_id}: {result}") - return result - return None - except Exception as e: - if logflag: - logger.info(f"[ search by id ] fail to search docs of {doc_id}: {e}") - return None - - -def drop_index(client, index_name): - if logflag: - logger.info(f"[ drop index ] dropping index {index_name}") - try: - client.client.indices.delete(index=index_name) - if logflag: - logger.info(f"[ drop index ] index {index_name} deleted") - except Exception as e: - if logflag: - logger.info(f"[ drop index ] index {index_name} delete failed: {e}") - return False - return True - - -def delete_by_id(client, doc_id): - try: - response = client.client.delete(index=KEY_INDEX_NAME, id=doc_id) - if response["result"] == "deleted": - if logflag: - logger.info(f"[ delete by id ] delete id success: {doc_id}") - return True - else: - if logflag: - logger.info(f"[ delete by id ] delete id failed: {doc_id}") - return False - except Exception as e: - if logflag: - logger.info(f"[ delete by id ] fail to delete ids {doc_id}: {e}") - return False - - -def ingest_chunks_to_opensearch(file_name: str, chunks: List): - if logflag: - logger.info(f"[ ingest chunks ] file name: {file_name}") - - # Batch size - batch_size = 32 - num_chunks = len(chunks) - - file_ids = [] - for i in range(0, num_chunks, batch_size): - if logflag: - logger.info(f"[ ingest chunks ] Current batch: {i}") - batch_chunks = chunks[i : i + batch_size] - - keys = opensearch_client.add_texts(texts=batch_chunks, metadatas=[{"source": file_name} for _ in batch_chunks]) - if logflag: - logger.info(f"[ ingest chunks ] keys: {keys}") - file_ids.extend(keys) - if logflag: - logger.info(f"[ ingest chunks ] Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") - - # store file_ids into index file-keys - if not check_index_existence(opensearch_client, KEY_INDEX_NAME): - assert create_index(opensearch_client) - - try: - assert store_by_id(opensearch_client, key=file_name, value="#".join(file_ids)) - except Exception as e: - if logflag: - logger.info(f"[ ingest chunks ] {e}. Fail to store chunks of file {file_name}.") - raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}.") - return True - - -def ingest_data_to_opensearch(doc_path: DocPath): - """Ingest document to OpenSearch.""" - path = doc_path.path - if logflag: - logger.info(f"[ ingest data ] Parsing document {path}.") - - if path.endswith(".html"): - headers_to_split_on = [ - ("h1", "Header 1"), - ("h2", "Header 2"), - ("h3", "Header 3"), - ] - text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) - else: - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=doc_path.chunk_size, - chunk_overlap=doc_path.chunk_overlap, - add_start_index=True, - separators=get_separators(), - ) - - content = document_loader(path) - if logflag: - logger.info("[ ingest data ] file content loaded") - - structured_types = [".xlsx", ".csv", ".json", "jsonl"] - _, ext = os.path.splitext(path) - - if ext in structured_types: - chunks = content - else: - chunks = text_splitter.split_text(content) - - ### Specially processing for the table content in PDFs - if doc_path.process_table and path.endswith(".pdf"): - table_chunks = get_tables_result(path, doc_path.table_strategy) - chunks = chunks + table_chunks - if logflag: - logger.info(f"[ ingest data ] Done preprocessing. Created {len(chunks)} chunks of the given file.") - - file_name = doc_path.path.split("/")[-1] - return ingest_chunks_to_opensearch(file_name, chunks) - - -def search_all_documents(index_name, offset, search_batch_size): - try: - response = opensearch_client.client.search( - index=index_name, - body={ - "query": {"match_all": {}}, - "from": offset, # Starting position - "size": search_batch_size, # Number of results to return - }, - ) - # Get total number of matching documents - total_hits = response["hits"]["total"]["value"] - # Get the documents from the current batch - documents = response["hits"]["hits"] - - return {"total_hits": total_hits, "documents": documents} - - except Exception as e: - print(f"Error performing search: {e}") - return None - - -@register_microservice(name="opea_service@prepare_doc_opensearch", endpoint="/v1/dataprep", host="0.0.0.0", port=6007) -async def ingest_documents( - files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), - link_list: Optional[str] = Form(None), - chunk_size: int = Form(1500), - chunk_overlap: int = Form(100), - process_table: bool = Form(False), - table_strategy: str = Form("fast"), -): - if logflag: - logger.info(f"[ upload ] files:{files}") - logger.info(f"[ upload ] link_list:{link_list}") - - if files: - if not isinstance(files, list): - files = [files] - uploaded_files = [] - - for file in files: - encode_file = encode_filename(file.filename) - doc_id = "file:" + encode_file - if logflag: - logger.info(f"[ upload ] processing file {doc_id}") - - # check whether the file already exists - key_ids = None - try: - document = search_by_id(opensearch_client, doc_id) - if document: - if logflag: - logger.info(f"[ upload ] File {file.filename} already exists.") - key_ids = document["_id"] - except Exception as e: - logger.info(f"[ upload ] File {file.filename} does not exist.") - if key_ids: - raise HTTPException( - status_code=400, detail=f"Uploaded file {file.filename} already exists. Please change file name." - ) - - save_path = upload_folder + encode_file - await save_content_to_local_disk(save_path, file) - ingest_data_to_opensearch( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ) - ) - uploaded_files.append(save_path) - if logflag: - logger.info(f"[ upload ] Successfully saved file {save_path}") - - result = {"status": 200, "message": "Data preparation succeeded"} - if logflag: - logger.info(result) - return result - - if link_list: - link_list = json.loads(link_list) # Parse JSON string to list - if not isinstance(link_list, list): - raise HTTPException(status_code=400, detail=f"Link_list {link_list} should be a list.") - for link in link_list: - encoded_link = encode_filename(link) - doc_id = "file:" + encoded_link + ".txt" - if logflag: - logger.info(f"[ upload ] processing link {doc_id}") - - # check whether the link file already exists - key_ids = None - try: - document = search_by_id(opensearch_client, doc_id) - if document: - if logflag: - logger.info(f"[ upload ] Link {link} already exists.") - key_ids = document["_id"] - except Exception as e: - logger.info(f"[ upload ] Link {link} does not exist. Keep storing.") - if key_ids: - raise HTTPException( - status_code=400, detail=f"Uploaded link {link} already exists. Please change another link." - ) - - save_path = upload_folder + encoded_link + ".txt" - content = parse_html([link])[0][0] - await save_content_to_local_disk(save_path, content) - ingest_data_to_opensearch( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ) - ) - if logflag: - logger.info(f"[ upload ] Successfully saved link list {link_list}") - return {"status": 200, "message": "Data preparation succeeded"} - - raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") - - -@register_microservice( - name="opea_service@prepare_doc_opensearch", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007 -) -async def rag_get_file_structure(): - if logflag: - logger.info("[ get ] start to get file structure") - - offset = 0 - file_list = [] - - # check index existence - res = check_index_existence(opensearch_client, KEY_INDEX_NAME) - if not res: - if logflag: - logger.info(f"[ get ] index {KEY_INDEX_NAME} does not exist") - return file_list - - while True: - response = search_all_documents(KEY_INDEX_NAME, offset, SEARCH_BATCH_SIZE) - # no doc retrieved - if len(response) < 2: - break - - def format_opensearch_results(response, file_list): - for document in response["documents"]: - file_id = document["_id"] - file_list.append({"name": file_id, "id": file_id, "type": "File", "parent": ""}) - - file_list = format_opensearch_results(response, file_list) - offset += SEARCH_BATCH_SIZE - # last batch - if (len(response) - 1) // 2 < SEARCH_BATCH_SIZE: - break - if logflag: - logger.info(f"[get] final file_list: {file_list}") - return file_list - - -@register_microservice( - name="opea_service@prepare_doc_opensearch", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007 -) -async def delete_single_file(file_path: str = Body(..., embed=True)): - """Delete file according to `file_path`. - - `file_path`: - - specific file path (e.g. /path/to/file.txt) - - "all": delete all files uploaded - """ - - # delete all uploaded files - if file_path == "all": - if logflag: - logger.info("[ delete ] delete all files") - - # drop index KEY_INDEX_NAME - if check_index_existence(opensearch_client, KEY_INDEX_NAME): - try: - assert drop_index(index_name=KEY_INDEX_NAME) - except Exception as e: - if logflag: - logger.info(f"[ delete ] {e}. Fail to drop index {KEY_INDEX_NAME}.") - raise HTTPException(status_code=500, detail=f"Fail to drop index {KEY_INDEX_NAME}.") - else: - logger.info(f"[ delete ] Index {KEY_INDEX_NAME} does not exits.") - - # drop index INDEX_NAME - if check_index_existence(opensearch_client, INDEX_NAME): - try: - assert drop_index(index_name=INDEX_NAME) - except Exception as e: - if logflag: - logger.info(f"[ delete ] {e}. Fail to drop index {INDEX_NAME}.") - raise HTTPException(status_code=500, detail=f"Fail to drop index {INDEX_NAME}.") - else: - if logflag: - logger.info(f"[ delete ] Index {INDEX_NAME} does not exits.") - - # delete files on local disk - try: - remove_folder_with_ignore(upload_folder) - except Exception as e: - if logflag: - logger.info(f"[ delete ] {e}. Fail to delete {upload_folder}.") - raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}.") - - if logflag: - logger.info("[ delete ] successfully delete all files.") - create_upload_folder(upload_folder) - if logflag: - logger.info({"status": True}) - return {"status": True} - else: - raise HTTPException(status_code=404, detail="Single file deletion is not implemented yet") - - -if __name__ == "__main__": - create_upload_folder(upload_folder) - opea_microservices["opea_service@prepare_doc_opensearch"].start() diff --git a/comps/dataprep/opensearch/langchain/requirements.txt b/comps/dataprep/opensearch/langchain/requirements.txt deleted file mode 100644 index fa242973e8..0000000000 --- a/comps/dataprep/opensearch/langchain/requirements.txt +++ /dev/null @@ -1,30 +0,0 @@ -beautifulsoup4 -cairosvg -docarray[full] -docx2txt -easyocr -fastapi -huggingface_hub -langchain -langchain-community -langchain-text-splitters -langchain_huggingface -markdown -numpy -opensearch-py -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -pandas -Pillow -prometheus-fastapi-instrumentator -pymupdf -pyspark -pytesseract -python-bidi -python-docx -python-pptx -sentence_transformers -shortuuid -unstructured[all-docs] -uvicorn diff --git a/comps/dataprep/pgvector/langchain/Dockerfile b/comps/dataprep/pgvector/langchain/Dockerfile deleted file mode 100644 index 2898293809..0000000000 --- a/comps/dataprep/pgvector/langchain/Dockerfile +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - default-jre \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/dataprep/pgvector/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -USER root - -RUN mkdir -p /home/user/comps/dataprep/pgvector/langchain/uploaded_files && chown -R user /home/user/comps/dataprep/pgvector/langchain/uploaded_files - -USER user - -WORKDIR /home/user/comps/dataprep/pgvector/langchain - -ENTRYPOINT ["python", "prepare_doc_pgvector.py"] diff --git a/comps/dataprep/pgvector/langchain/config.py b/comps/dataprep/pgvector/langchain/config.py deleted file mode 100644 index 1206a8ed8d..0000000000 --- a/comps/dataprep/pgvector/langchain/config.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -# Embedding model - -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - -PG_CONNECTION_STRING = os.getenv("PG_CONNECTION_STRING", "localhost") - -# Vector Index Configuration -INDEX_NAME = os.getenv("INDEX_NAME", "rag-pgvector") - -# chunk parameters -CHUNK_SIZE = os.getenv("CHUNK_SIZE", 1500) -CHUNK_OVERLAP = os.getenv("CHUNK_OVERLAP", 100) - -current_file_path = os.path.abspath(__file__) -parent_dir = os.path.dirname(current_file_path) diff --git a/comps/dataprep/pgvector/langchain/pgvector_langchain.yaml b/comps/dataprep/pgvector/langchain/pgvector_langchain.yaml deleted file mode 100644 index 54ff7b802e..0000000000 --- a/comps/dataprep/pgvector/langchain/pgvector_langchain.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - pgvector-vector-db: - hostname: db - container_name: pgvector-vector-db - image: pgvector/pgvector:0.7.0-pg16 - ports: - - "5432:5432" - restart: always - ipc: host - environment: - - POSTGRES_DB=vectordb - - POSTGRES_USER=testuser - - POSTGRES_PASSWORD=testpwd - - POSTGRES_HOST_AUTH_METHOD=trust - - no_proxy= ${no_proxy} - - http_proxy= ${http_proxy} - - https_proxy= ${https_proxy} - volumes: - - ./init.sql:/docker-entrypoint-initdb.d/init.sql - - dataprep-pgvector: - image: opea/dataprep-pgvector:latest - container_name: dataprep-pgvector - ports: - - "6007:6007" - ipc: host - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PG_CONNECTION_STRING: ${PG_CONNECTION_STRING} - INDEX_NAME: ${INDEX_NAME} - TEI_ENDPOINT: ${TEI_ENDPOINT} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py b/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py deleted file mode 100644 index 9893b9628f..0000000000 --- a/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py +++ /dev/null @@ -1,317 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -from pathlib import Path -from typing import List, Optional, Union -from urllib.parse import urlparse - -import psycopg2 -from config import CHUNK_OVERLAP, CHUNK_SIZE, EMBED_MODEL, INDEX_NAME, PG_CONNECTION_STRING -from fastapi import Body, File, Form, HTTPException, UploadFile -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings -from langchain_community.vectorstores import PGVector - -from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.src.utils import ( - create_upload_folder, - document_loader, - encode_filename, - get_file_structure, - get_separators, - parse_html_new, - remove_folder_with_ignore, - save_content_to_local_disk, -) - -logger = CustomLogger("prepare_doc_pgvector") -logflag = os.getenv("LOGFLAG", False) - -tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") -upload_folder = "./uploaded_files/" - - -async def save_file_to_local_disk(save_path: str, file): - save_path = Path(save_path) - with save_path.open("wb") as fout: - try: - content = await file.read() - fout.write(content) - except Exception as e: - if logflag: - logger.info(f"Write file failed. Exception: {e}") - raise HTTPException(status_code=500, detail=f"Write file {save_path} failed. Exception: {e}") - - -def delete_embeddings(doc_name): - """Get all ids from a vectorstore.""" - try: - result = urlparse(PG_CONNECTION_STRING) - username = result.username - password = result.password - database = result.path[1:] - hostname = result.hostname - port = result.port - - connection = psycopg2.connect(database=database, user=username, password=password, host=hostname, port=port) - - # Create a cursor object to execute SQL queries - - if logflag: - logger.info(f"Deleting {doc_name} from vectorstore") - - cur = connection.cursor() - if doc_name == "all": - cur.execute( - "DELETE FROM langchain_pg_collection lpe WHERE lpe.name = %(index_name)s", - {"index_name": INDEX_NAME}, - ) - else: - cur.execute( - "DELETE FROM langchain_pg_embedding lpe WHERE lpe.uuid in (SELECT lpc.uuid\ - FROM langchain_pg_embedding lpc where lpc.cmetadata ->> 'doc_name' = %(doc_name)s)", - {"doc_name": doc_name}, - ) - - connection.commit() # commit the transaction - cur.close() - - return True - - except psycopg2.Error as e: - if logflag: - logger.info(f"Error deleting document from vectorstore: {e}") - return False - - except Exception as e: - if logflag: - logger.info(f"An unexpected error occurred: {e}") - return False - - -def ingest_doc_to_pgvector(doc_path: DocPath): - """Ingest document to PGVector.""" - doc_path = doc_path.path - if logflag: - logger.info(f"Parsing document {doc_path}.") - - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, add_start_index=True, separators=get_separators() - ) - - content = document_loader(doc_path) - - structured_types = [".xlsx", ".csv", ".json", "jsonl"] - _, ext = os.path.splitext(doc_path) - - if ext in structured_types: - chunks = content - else: - chunks = text_splitter.split_text(content) - - if logflag: - logger.info("Done preprocessing. Created ", len(chunks), " chunks of the original file.") - logger.info("PG Connection", PG_CONNECTION_STRING) - metadata = [dict({"doc_name": str(doc_path)})] - - # Create vectorstore - if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) - else: - # create embeddings using local embedding model - embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - # Batch size - batch_size = 32 - num_chunks = len(chunks) - for i in range(0, num_chunks, batch_size): - batch_chunks = chunks[i : i + batch_size] - batch_texts = batch_chunks - - _ = PGVector.from_texts( - texts=batch_texts, - embedding=embedder, - metadatas=metadata, - collection_name=INDEX_NAME, - connection_string=PG_CONNECTION_STRING, - ) - if logflag: - logger.info(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") - return True - - -async def ingest_link_to_pgvector(link_list: List[str]): - # Create vectorstore - if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) - else: - # create embeddings using local embedding model - embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, add_start_index=True, separators=get_separators() - ) - - for link in link_list: - texts = [] - content = parse_html_new([link], chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP) - if logflag: - logger.info(f"[ ingest link ] link: {link} content: {content}") - encoded_link = encode_filename(link) - save_path = upload_folder + encoded_link + ".txt" - doc_path = upload_folder + link + ".txt" - if logflag: - logger.info(f"[ ingest link ] save_path: {save_path}") - await save_content_to_local_disk(save_path, content) - metadata = [dict({"doc_name": str(doc_path)})] - - chunks = text_splitter.split_text(content) - - batch_size = 32 - num_chunks = len(chunks) - for i in range(0, num_chunks, batch_size): - batch_chunks = chunks[i : i + batch_size] - batch_texts = batch_chunks - - _ = PGVector.from_texts( - texts=batch_texts, - embedding=embedder, - metadatas=metadata, - collection_name=INDEX_NAME, - connection_string=PG_CONNECTION_STRING, - ) - if logflag: - logger.info(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") - - return True - - -@register_microservice( - name="opea_service@prepare_doc_pgvector", - endpoint="/v1/dataprep", - host="0.0.0.0", - port=6007, -) -async def ingest_documents( - files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), link_list: Optional[str] = Form(None) -): - if logflag: - logger.info(f"files:{files}") - logger.info(f"link_list:{link_list}") - if files and link_list: - raise HTTPException(status_code=400, detail="Provide either a file or a string list, not both.") - - if files: - if not isinstance(files, list): - files = [files] - - if not os.path.exists(upload_folder): - Path(upload_folder).mkdir(parents=True, exist_ok=True) - for file in files: - save_path = upload_folder + file.filename - await save_file_to_local_disk(save_path, file) - - ingest_doc_to_pgvector(DocPath(path=save_path)) - if logflag: - logger.info(f"Successfully saved file {save_path}") - result = {"status": 200, "message": "Data preparation succeeded"} - if logflag: - logger.info(result) - return result - - if link_list: - try: - link_list = json.loads(link_list) # Parse JSON string to list - if not isinstance(link_list, list): - raise HTTPException(status_code=400, detail="link_list should be a list.") - await ingest_link_to_pgvector(link_list) - if logflag: - logger.info(f"Successfully saved link list {link_list}") - result = {"status": 200, "message": "Data preparation succeeded"} - if logflag: - logger.info(result) - return result - except json.JSONDecodeError: - raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") - - raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") - - -@register_microservice( - name="opea_service@prepare_doc_pgvector", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007 -) -async def rag_get_file_structure(): - if logflag: - logger.info("[ dataprep - get file ] start to get file structure") - - if not Path(upload_folder).exists(): - if logflag: - logger.info("No file uploaded, return empty list.") - return [] - - file_content = get_file_structure(upload_folder) - if logflag: - logger.info(file_content) - return file_content - - -@register_microservice( - name="opea_service@prepare_doc_pgvector", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007 -) -async def delete_single_file(file_path: str = Body(..., embed=True)): - """Delete file according to `file_path`. - - `file_path`: - - specific file path (e.g. /path/to/file.txt) - - folder path (e.g. /path/to/folder) - - "all": delete all files uploaded - """ - if file_path == "all": - if logflag: - logger.info("[dataprep - del] delete all files") - remove_folder_with_ignore(upload_folder) - assert delete_embeddings(file_path) - if logflag: - logger.info("[dataprep - del] successfully delete all files.") - create_upload_folder(upload_folder) - if logflag: - logger.info({"status": True}) - return {"status": True} - - delete_path = Path(upload_folder + "/" + encode_filename(file_path)) - doc_path = upload_folder + file_path - if logflag: - logger.info(f"[dataprep - del] delete_path: {delete_path}") - - # partially delete files/folders - if delete_path.exists(): - # delete file - if delete_path.is_file(): - try: - assert delete_embeddings(doc_path) - delete_path.unlink() - except Exception as e: - if logflag: - logger.info(f"[dataprep - del] fail to delete file {delete_path}: {e}") - logger.info({"status": False}) - return {"status": False} - # delete folder - else: - if logflag: - logger.info("[dataprep - del] delete folder is not supported for now.") - logger.info({"status": False}) - return {"status": False} - if logflag: - logger.info({"status": True}) - return {"status": True} - else: - raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") - - -if __name__ == "__main__": - create_upload_folder(upload_folder) - opea_microservices["opea_service@prepare_doc_pgvector"].start() diff --git a/comps/dataprep/pgvector/langchain/requirements.txt b/comps/dataprep/pgvector/langchain/requirements.txt deleted file mode 100644 index ab3d19db4e..0000000000 --- a/comps/dataprep/pgvector/langchain/requirements.txt +++ /dev/null @@ -1,32 +0,0 @@ -beautifulsoup4 -cairosvg -docarray[full] -docx2txt -easyocr -fastapi -html2text -huggingface_hub -langchain -langchain-community -langchain-text-splitters -markdown -numpy -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -pandas -pgvector==0.2.5 -Pillow -prometheus-fastapi-instrumentator -psycopg2-binary -pymupdf -pyspark -pytesseract -python-docx -python-multipart -python-pptx -sentence_transformers -shortuuid -tiktoken -unstructured[all-docs]==0.15.7 -uvicorn diff --git a/comps/dataprep/pinecone/langchain/Dockerfile b/comps/dataprep/pinecone/langchain/Dockerfile deleted file mode 100644 index fb6e56fa2e..0000000000 --- a/comps/dataprep/pinecone/langchain/Dockerfile +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - default-jre \ - libcairo2 \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then \ - pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/dataprep/pinecone/langchain/requirements.txt; \ - else \ - pip install --no-cache-dir -r /home/user/comps/dataprep/pinecone/langchain/requirements.txt; \ - fi; - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -USER root - -RUN mkdir -p /home/user/comps/dataprep/pinecone/langchain/uploaded_files && chown -R user /home/user/comps/dataprep/pinecone/langchain/uploaded_files - -USER user - -WORKDIR /home/user/comps/dataprep/pinecone/langchain - -ENTRYPOINT ["python", "prepare_doc_pinecone.py"] diff --git a/comps/dataprep/pinecone/langchain/config.py b/comps/dataprep/pinecone/langchain/config.py deleted file mode 100644 index 7a761a09c2..0000000000 --- a/comps/dataprep/pinecone/langchain/config.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -# Embedding model -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - -# Pinecone configuration -PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "xxx_xxx") -PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME", "langchain-test") - -# LLM/Embedding endpoints -TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") -TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT") diff --git a/comps/dataprep/pinecone/langchain/pinecone_langchain.yaml b/comps/dataprep/pinecone/langchain/pinecone_langchain.yaml deleted file mode 100644 index 851bad5864..0000000000 --- a/comps/dataprep/pinecone/langchain/pinecone_langchain.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - tei-embedding-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - container_name: tei-embedding-server - ports: - - "6006:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate - dataprep-pinecone: - image: opea/dataprep-pinecone:latest - container_name: dataprep-pinecone-server - ports: - - "6007:6007" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PINECONE_API_KEY: ${PINECONE_API_KEY} - PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/pinecone/langchain/prepare_doc_pinecone.py b/comps/dataprep/pinecone/langchain/prepare_doc_pinecone.py deleted file mode 100644 index 877b7f89e6..0000000000 --- a/comps/dataprep/pinecone/langchain/prepare_doc_pinecone.py +++ /dev/null @@ -1,299 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -import shutil -import uuid -from pathlib import Path -from typing import List, Optional, Union - -from config import EMBED_MODEL, PINECONE_API_KEY, PINECONE_INDEX_NAME -from fastapi import Body, File, Form, HTTPException, UploadFile -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings, HuggingFaceHubEmbeddings -from langchain_pinecone import PineconeVectorStore -from langchain_text_splitters import HTMLHeaderTextSplitter -from pinecone import Pinecone, ServerlessSpec - -from comps import CustomLogger, DocPath, opea_microservices, opea_telemetry, register_microservice -from comps.dataprep.src.utils import ( - create_upload_folder, - document_loader, - encode_filename, - get_file_structure, - get_separators, - get_tables_result, - parse_html_new, - remove_folder_with_ignore, - save_content_to_local_disk, -) - -logger = CustomLogger("prepare_doc_pinecone") -logflag = os.getenv("LOGFLAG", False) - -tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") -upload_folder = "./uploaded_files/" - - -def check_index_existance(): - if logflag: - logger.info(f"[ check index existence ] checking {PINECONE_INDEX_NAME}") - pc = Pinecone(api_key=PINECONE_API_KEY) - existing_indexes = [index_info["name"] for index_info in pc.list_indexes()] - if PINECONE_INDEX_NAME not in existing_indexes: - if logflag: - logger.info("[ check index existence ] index does not exist") - return None - else: - return True - - -def create_index(client): - if logflag: - logger.info(f"[ create index ] creating index {PINECONE_INDEX_NAME}") - try: - client.create_index( - name=PINECONE_INDEX_NAME, - dimension=768, - metric="cosine", - spec=ServerlessSpec(cloud="aws", region="us-east-1"), - ) - if logflag: - logger.info(f"[ create index ] index {PINECONE_INDEX_NAME} successfully created") - except Exception as e: - if logflag: - logger.info(f"[ create index ] fail to create index {PINECONE_INDEX_NAME}: {e}") - return False - return True - - -def drop_index(index_name): - if logflag: - logger.info(f"[ drop index ] dropping index {index_name}") - pc = Pinecone(api_key=PINECONE_API_KEY) - try: - pc.delete_index(index_name) - if logflag: - logger.info(f"[ drop index ] index {index_name} deleted") - except Exception as e: - if logflag: - logger.info(f"[ drop index ] index {index_name} delete failed: {e}") - return False - return True - - -def ingest_data_to_pinecone(doc_path: DocPath): - """Ingest document to Pinecone.""" - path = doc_path.path - if logflag: - logger.info(f"Parsing document {path}.") - - if path.endswith(".html"): - headers_to_split_on = [ - ("h1", "Header 1"), - ("h2", "Header 2"), - ("h3", "Header 3"), - ] - text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) - else: - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=doc_path.chunk_size, - chunk_overlap=doc_path.chunk_overlap, - add_start_index=True, - separators=get_separators(), - ) - - content = document_loader(path) - - structured_types = [".xlsx", ".csv", ".json", "jsonl"] - _, ext = os.path.splitext(path) - - if ext in structured_types: - chunks = content - else: - chunks = text_splitter.split_text(content) - - if doc_path.process_table and path.endswith(".pdf"): - table_chunks = get_tables_result(path, doc_path.table_strategy) - chunks = chunks + table_chunks - if logflag: - logger.info(f"Done preprocessing. Created {len(chunks)} chunks of the original file.") - - # Create vectorstore - if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) - else: - # create embeddings using local embedding model - embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - pc = Pinecone(api_key=PINECONE_API_KEY) - - # Checking Index existence - if not check_index_existance(): - # Creating the index - create_index(pc) - if logflag: - logger.info(f"Successfully created the index {PINECONE_INDEX_NAME}") - - # Batch size - batch_size = 32 - num_chunks = len(chunks) - file_ids = [] - - for i in range(0, num_chunks, batch_size): - batch_chunks = chunks[i : i + batch_size] - batch_texts = batch_chunks - - vectorstore = PineconeVectorStore.from_texts( - texts=batch_texts, - embedding=embedder, - index_name=PINECONE_INDEX_NAME, - ) - if logflag: - logger.info(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") - - # store file_ids into index file-keys - pc = Pinecone(api_key=PINECONE_API_KEY) - - -async def ingest_link_to_pinecone(link_list: List[str], chunk_size, chunk_overlap): - # Create embedding obj - if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) - else: - # create embeddings using local embedding model - embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - pc = Pinecone(api_key=PINECONE_API_KEY) - - # Checking Index existence - if not check_index_existance(): - # Creating the index - create_index(pc) - if logflag: - logger.info(f"Successfully created the index {PINECONE_INDEX_NAME}") - - # save link contents and doc_ids one by one - for link in link_list: - content = parse_html_new([link], chunk_size=chunk_size, chunk_overlap=chunk_overlap) - if logflag: - logger.info(f"[ ingest link ] link: {link} content: {content}") - encoded_link = encode_filename(link) - save_path = upload_folder + encoded_link + ".txt" - if logflag: - logger.info(f"[ ingest link ] save_path: {save_path}") - await save_content_to_local_disk(save_path, content) - - vectorstore = PineconeVectorStore.from_texts( - texts=content, - embedding=embedder, - index_name=PINECONE_INDEX_NAME, - ) - - return True - - -@register_microservice(name="opea_service@prepare_doc_pinecone", endpoint="/v1/dataprep", host="0.0.0.0", port=6007) -async def ingest_documents( - files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), - link_list: Optional[str] = Form(None), - chunk_size: int = Form(1500), - chunk_overlap: int = Form(100), - process_table: bool = Form(False), - table_strategy: str = Form("fast"), -): - if logflag: - logger.info(f"files:{files}") - logger.info(f"link_list:{link_list}") - - if files: - if not isinstance(files, list): - files = [files] - uploaded_files = [] - for file in files: - encode_file = encode_filename(file.filename) - save_path = upload_folder + encode_file - await save_content_to_local_disk(save_path, file) - ingest_data_to_pinecone( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ) - ) - uploaded_files.append(save_path) - if logflag: - logger.info(f"Successfully saved file {save_path}") - result = {"status": 200, "message": "Data preparation succeeded"} - if logflag: - logger.info(result) - return result - - if link_list: - try: - link_list = json.loads(link_list) # Parse JSON string to list - if not isinstance(link_list, list): - raise HTTPException(status_code=400, detail="link_list should be a list.") - await ingest_link_to_pinecone(link_list, chunk_size, chunk_overlap) - result = {"status": 200, "message": "Data preparation succeeded"} - if logflag: - logger.info(f"Successfully saved link list {link_list}") - logger.info(result) - return result - except json.JSONDecodeError: - raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") - - raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") - - -@register_microservice( - name="opea_service@prepare_doc_pinecone", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007 -) -async def rag_get_file_structure(): - if logflag: - logger.info("[ dataprep - get file ] start to get file structure") - - if not Path(upload_folder).exists(): - if logflag: - logger.info("No file uploaded, return empty list.") - return [] - - file_content = get_file_structure(upload_folder) - if logflag: - logger.info(file_content) - return file_content - - -@register_microservice( - name="opea_service@prepare_doc_pinecone", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007 -) -async def delete_all(file_path: str = Body(..., embed=True)): - """Delete file according to `file_path`. - - `file_path`: - - "all": delete all files uploaded - """ - # delete all uploaded files - if file_path == "all": - if logflag: - logger.info("[dataprep - del] delete all files") - remove_folder_with_ignore(upload_folder) - assert drop_index(index_name=PINECONE_INDEX_NAME) - if logflag: - logger.info("[dataprep - del] successfully delete all files.") - create_upload_folder(upload_folder) - if logflag: - logger.info('{"status": True}') - return {"status": True} - else: - raise HTTPException(status_code=404, detail="Single file deletion is not implemented yet") - - -if __name__ == "__main__": - create_upload_folder(upload_folder) - opea_microservices["opea_service@prepare_doc_pinecone"].start() diff --git a/comps/dataprep/pinecone/langchain/requirements.txt b/comps/dataprep/pinecone/langchain/requirements.txt deleted file mode 100644 index 27bbac44b3..0000000000 --- a/comps/dataprep/pinecone/langchain/requirements.txt +++ /dev/null @@ -1,32 +0,0 @@ -beautifulsoup4 -cairosvg -docarray[full] -docx2txt -easyocr -fastapi -html2text -huggingface_hub -langchain -langchain-community -langchain-openai -langchain-pinecone -langchain-text-splitters -markdown -numpy -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -pandas -Pillow -pinecone-client -prometheus-fastapi-instrumentator -pymupdf -pyspark -pytesseract -python-bidi==0.4.2 -python-docx -python-pptx -sentence_transformers -shortuuid -unstructured[all-docs]==0.15.7 -uvicorn diff --git a/comps/dataprep/qdrant/langchain/Dockerfile b/comps/dataprep/qdrant/langchain/Dockerfile deleted file mode 100644 index fa885b9e88..0000000000 --- a/comps/dataprep/qdrant/langchain/Dockerfile +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - default-jre \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/dataprep/qdrant/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -USER root - -RUN mkdir -p /home/user/comps/dataprep/qdrant/langchain/uploaded_files && chown -R user /home/user/comps/dataprep/qdrant/langchain/uploaded_files - -USER user - -WORKDIR /home/user/comps/dataprep/qdrant/langchain - -ENTRYPOINT ["python", "prepare_doc_qdrant.py"] diff --git a/comps/dataprep/qdrant/langchain/config.py b/comps/dataprep/qdrant/langchain/config.py deleted file mode 100644 index 7cf37f404a..0000000000 --- a/comps/dataprep/qdrant/langchain/config.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -# Embedding model -EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2") - -# Qdrant configuration -QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost") -QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333)) -COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag-qdrant") - -# LLM/Embedding endpoints -TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") -TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT") diff --git a/comps/dataprep/qdrant/langchain/prepare_doc_qdrant.py b/comps/dataprep/qdrant/langchain/prepare_doc_qdrant.py deleted file mode 100644 index 6c74f5cbb2..0000000000 --- a/comps/dataprep/qdrant/langchain/prepare_doc_qdrant.py +++ /dev/null @@ -1,179 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -from typing import List, Optional, Union - -from config import COLLECTION_NAME, EMBED_MODEL, QDRANT_HOST, QDRANT_PORT, TEI_EMBEDDING_ENDPOINT -from fastapi import File, Form, HTTPException, UploadFile -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings -from langchain_community.vectorstores import Qdrant -from langchain_huggingface import HuggingFaceEndpointEmbeddings -from langchain_text_splitters import HTMLHeaderTextSplitter - -from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.src.utils import ( - document_loader, - encode_filename, - get_separators, - get_tables_result, - parse_html_new, - save_content_to_local_disk, -) - -logger = CustomLogger("prepare_doc_qdrant") -logflag = os.getenv("LOGFLAG", False) - -upload_folder = "./uploaded_files/" - - -def ingest_data_to_qdrant(doc_path: DocPath): - """Ingest document to Qdrant.""" - path = doc_path.path - if logflag: - logger.info(f"Parsing document {path}.") - - if path.endswith(".html"): - headers_to_split_on = [ - ("h1", "Header 1"), - ("h2", "Header 2"), - ("h3", "Header 3"), - ] - text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) - else: - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=doc_path.chunk_size, - chunk_overlap=doc_path.chunk_overlap, - add_start_index=True, - separators=get_separators(), - ) - - content = document_loader(path) - - structured_types = [".xlsx", ".csv", ".json", "jsonl"] - _, ext = os.path.splitext(path) - - if ext in structured_types: - chunks = content - else: - chunks = text_splitter.split_text(content) - - if doc_path.process_table and path.endswith(".pdf"): - table_chunks = get_tables_result(path, doc_path.table_strategy) - chunks = chunks + table_chunks - if logflag: - logger.info("Done preprocessing. Created ", len(chunks), " chunks of the original file.") - - # Create vectorstore - if TEI_EMBEDDING_ENDPOINT: - # create embeddings using TEI endpoint service - embedder = HuggingFaceEndpointEmbeddings(model=TEI_EMBEDDING_ENDPOINT) - else: - # create embeddings using local embedding model - embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - if logflag: - logger.info("embedder created.") - - # Batch size - batch_size = 32 - num_chunks = len(chunks) - for i in range(0, num_chunks, batch_size): - batch_chunks = chunks[i : i + batch_size] - batch_texts = batch_chunks - - _ = Qdrant.from_texts( - texts=batch_texts, - embedding=embedder, - collection_name=COLLECTION_NAME, - host=QDRANT_HOST, - port=QDRANT_PORT, - ) - if logflag: - logger.info(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") - - return True - - -@register_microservice( - name="opea_service@prepare_doc_qdrant", - endpoint="/v1/dataprep", - host="0.0.0.0", - port=6007, - input_datatype=DocPath, - output_datatype=None, -) -async def ingest_documents( - files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), - link_list: Optional[str] = Form(None), - chunk_size: int = Form(1500), - chunk_overlap: int = Form(100), - process_table: bool = Form(False), - table_strategy: str = Form("fast"), -): - if logflag: - logger.info(f"files:{files}") - logger.info(f"link_list:{link_list}") - - if files: - if not isinstance(files, list): - files = [files] - uploaded_files = [] - for file in files: - encode_file = encode_filename(file.filename) - save_path = upload_folder + encode_file - await save_content_to_local_disk(save_path, file) - ingest_data_to_qdrant( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ) - ) - uploaded_files.append(save_path) - if logflag: - logger.info(f"Successfully saved file {save_path}") - result = {"status": 200, "message": "Data preparation succeeded"} - if logflag: - logger.info(result) - return result - - if link_list: - link_list = json.loads(link_list) # Parse JSON string to list - if not isinstance(link_list, list): - raise HTTPException(status_code=400, detail="link_list should be a list.") - for link in link_list: - encoded_link = encode_filename(link) - save_path = upload_folder + encoded_link + ".txt" - content = parse_html_new([link], chunk_size=chunk_size, chunk_overlap=chunk_overlap) - try: - await save_content_to_local_disk(save_path, content) - ingest_data_to_qdrant( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ) - ) - except json.JSONDecodeError: - raise HTTPException(status_code=500, detail="Fail to ingest data into qdrant.") - - if logflag: - logger.info(f"Successfully saved link {link}") - - result = {"status": 200, "message": "Data preparation succeeded"} - if logflag: - logger.info(result) - return result - - raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") - - -if __name__ == "__main__": - opea_microservices["opea_service@prepare_doc_qdrant"].start() diff --git a/comps/dataprep/qdrant/langchain/qdrant_langchain.yaml b/comps/dataprep/qdrant/langchain/qdrant_langchain.yaml deleted file mode 100644 index aaf2a17ddc..0000000000 --- a/comps/dataprep/qdrant/langchain/qdrant_langchain.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - qdrant-vector-db: - image: qdrant/qdrant - container_name: qdrant-vector-db - ports: - - "6333:6333" - - "6334:6334" - tei-embedding-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - container_name: tei-embedding-server - ports: - - "6006:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate - dataprep-qdrant: - image: opea/gen-ai-comps:dataprep-qdrant-xeon-server - container_name: dataprep-qdrant-server - depends_on: - - qdrant-vector-db - - tei-embedding-service - ports: - - "6007:6007" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - QDRANT_HOST: ${QDRANT_HOST} - QDRANT_PORT: ${QDRANT_PORT} - COLLECTION_NAME: ${COLLECTION_NAME} - TEI_ENDPOINT: ${TEI_ENDPOINT} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/qdrant/langchain/requirements.txt b/comps/dataprep/qdrant/langchain/requirements.txt deleted file mode 100644 index 8f92c8ca8d..0000000000 --- a/comps/dataprep/qdrant/langchain/requirements.txt +++ /dev/null @@ -1,29 +0,0 @@ -beautifulsoup4 -cairosvg -docarray[full] -docx2txt -easyocr -fastapi -html2text -huggingface_hub -langchain -langchain-community -langchain-text-splitters -langchain_huggingface -markdown -numpy -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -pandas -Pillow -prometheus-fastapi-instrumentator -pymupdf -pytesseract -python-docx -python-pptx -qdrant-client -sentence_transformers -shortuuid -unstructured[all-docs]==0.15.7 -uvicorn diff --git a/comps/dataprep/redis/langchain/Dockerfile b/comps/dataprep/redis/langchain/Dockerfile deleted file mode 100644 index 66b06dee38..0000000000 --- a/comps/dataprep/redis/langchain/Dockerfile +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - default-jre \ - libgl1-mesa-glx \ - libjemalloc-dev \ - libreoffice \ - poppler-utils \ - tesseract-ocr - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/dataprep/redis/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -USER root - -RUN mkdir -p /home/user/comps/dataprep/redis/langchain/uploaded_files && chown -R user /home/user/comps/dataprep/redis/langchain/uploaded_files - -USER user - -WORKDIR /home/user/comps/dataprep/redis/langchain - -ENTRYPOINT ["python", "prepare_doc_redis.py"] diff --git a/comps/dataprep/redis/langchain/config.py b/comps/dataprep/redis/langchain/config.py deleted file mode 100644 index 2d722a84a6..0000000000 --- a/comps/dataprep/redis/langchain/config.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -# Embedding model - -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - -# Redis Connection Information -REDIS_HOST = os.getenv("REDIS_HOST", "localhost") -REDIS_PORT = int(os.getenv("REDIS_PORT", 6379)) - - -def get_boolean_env_var(var_name, default_value=False): - """Retrieve the boolean value of an environment variable. - - Args: - var_name (str): The name of the environment variable to retrieve. - default_value (bool): The default value to return if the variable - is not found. - - Returns: - bool: The value of the environment variable, interpreted as a boolean. - """ - true_values = {"true", "1", "t", "y", "yes"} - false_values = {"false", "0", "f", "n", "no"} - - # Retrieve the environment variable's value - value = os.getenv(var_name, "").lower() - - # Decide the boolean value based on the content of the string - if value in true_values: - return True - elif value in false_values: - return False - else: - return default_value - - -def format_redis_conn_from_env(): - redis_url = os.getenv("REDIS_URL", None) - if redis_url: - return redis_url - else: - using_ssl = get_boolean_env_var("REDIS_SSL", False) - start = "rediss://" if using_ssl else "redis://" - - # if using RBAC - password = os.getenv("REDIS_PASSWORD", None) - username = os.getenv("REDIS_USERNAME", "default") - if password is not None: - start += f"{username}:{password}@" - - return start + f"{REDIS_HOST}:{REDIS_PORT}" - - -REDIS_URL = format_redis_conn_from_env() - -# Vector Index Configuration -INDEX_NAME = os.getenv("INDEX_NAME", "rag-redis") -KEY_INDEX_NAME = os.getenv("KEY_INDEX_NAME", "file-keys") - -TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", 600)) - -SEARCH_BATCH_SIZE = int(os.getenv("SEARCH_BATCH_SIZE", 10)) diff --git a/comps/dataprep/redis/langchain/prepare_doc_redis.py b/comps/dataprep/redis/langchain/prepare_doc_redis.py deleted file mode 100644 index ed73d56754..0000000000 --- a/comps/dataprep/redis/langchain/prepare_doc_redis.py +++ /dev/null @@ -1,500 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -from pathlib import Path -from typing import List, Optional, Union - -# from pyspark import SparkConf, SparkContext -import redis -from config import EMBED_MODEL, INDEX_NAME, KEY_INDEX_NAME, REDIS_URL, SEARCH_BATCH_SIZE -from fastapi import Body, File, Form, HTTPException, UploadFile -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings -from langchain_community.vectorstores import Redis -from langchain_huggingface import HuggingFaceEndpointEmbeddings -from langchain_text_splitters import HTMLHeaderTextSplitter -from redis.commands.search.field import TextField -from redis.commands.search.indexDefinition import IndexDefinition, IndexType - -from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.src.utils import ( - create_upload_folder, - document_loader, - encode_filename, - format_search_results, - get_separators, - get_tables_result, - parse_html_new, - remove_folder_with_ignore, - save_content_to_local_disk, -) - -logger = CustomLogger("prepare_doc_redis") -logflag = os.getenv("LOGFLAG", False) - -tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") -upload_folder = "./uploaded_files/" -redis_pool = redis.ConnectionPool.from_url(REDIS_URL) - - -def check_index_existance(client): - if logflag: - logger.info(f"[ check index existence ] checking {client}") - try: - results = client.search("*") - if logflag: - logger.info(f"[ check index existence ] index of client exists: {client}") - return results - except Exception as e: - if logflag: - logger.info(f"[ check index existence ] index does not exist: {e}") - return None - - -def create_index(client, index_name: str = KEY_INDEX_NAME): - if logflag: - logger.info(f"[ create index ] creating index {index_name}") - try: - definition = IndexDefinition(index_type=IndexType.HASH, prefix=["file:"]) - client.create_index((TextField("file_name"), TextField("key_ids")), definition=definition) - if logflag: - logger.info(f"[ create index ] index {index_name} successfully created") - except Exception as e: - if logflag: - logger.info(f"[ create index ] fail to create index {index_name}: {e}") - return False - return True - - -def store_by_id(client, key, value): - if logflag: - logger.info(f"[ store by id ] storing ids of {key}") - try: - client.add_document(doc_id="file:" + key, file_name=key, key_ids=value) - if logflag: - logger.info(f"[ store by id ] store document success. id: file:{key}") - except Exception as e: - if logflag: - logger.info(f"[ store by id ] fail to store document file:{key}: {e}") - return False - return True - - -def search_by_id(client, doc_id): - if logflag: - logger.info(f"[ search by id ] searching docs of {doc_id}") - try: - results = client.load_document(doc_id) - if logflag: - logger.info(f"[ search by id ] search success of {doc_id}: {results}") - return results - except Exception as e: - if logflag: - logger.info(f"[ search by id ] fail to search docs of {doc_id}: {e}") - return None - - -def drop_index(index_name, redis_url=REDIS_URL): - if logflag: - logger.info(f"[ drop index ] dropping index {index_name}") - try: - assert Redis.drop_index(index_name=index_name, delete_documents=True, redis_url=redis_url) - if logflag: - logger.info(f"[ drop index ] index {index_name} deleted") - except Exception as e: - if logflag: - logger.info(f"[ drop index ] index {index_name} delete failed: {e}") - return False - return True - - -def delete_by_id(client, id): - try: - assert client.delete_document(id) - if logflag: - logger.info(f"[ delete by id ] delete id success: {id}") - except Exception as e: - if logflag: - logger.info(f"[ delete by id ] fail to delete ids {id}: {e}") - return False - return True - - -def ingest_chunks_to_redis(file_name: str, chunks: List): - if logflag: - logger.info(f"[ ingest chunks ] file name: {file_name}") - # Create vectorstore - if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embedder = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint) - else: - # create embeddings using local embedding model - embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - # Batch size - batch_size = 32 - num_chunks = len(chunks) - - file_ids = [] - for i in range(0, num_chunks, batch_size): - if logflag: - logger.info(f"[ ingest chunks ] Current batch: {i}") - batch_chunks = chunks[i : i + batch_size] - batch_texts = batch_chunks - - _, keys = Redis.from_texts_return_keys( - texts=batch_texts, - embedding=embedder, - index_name=INDEX_NAME, - redis_url=REDIS_URL, - ) - if logflag: - logger.info(f"[ ingest chunks ] keys: {keys}") - file_ids.extend(keys) - if logflag: - logger.info(f"[ ingest chunks ] Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") - - # store file_ids into index file-keys - r = redis.Redis(connection_pool=redis_pool) - client = r.ft(KEY_INDEX_NAME) - if not check_index_existance(client): - assert create_index(client) - - try: - assert store_by_id(client, key=file_name, value="#".join(file_ids)) - except Exception as e: - if logflag: - logger.info(f"[ ingest chunks ] {e}. Fail to store chunks of file {file_name}.") - raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}.") - return True - - -def ingest_data_to_redis(doc_path: DocPath): - """Ingest document to Redis.""" - path = doc_path.path - if logflag: - logger.info(f"[ ingest data ] Parsing document {path}.") - - if path.endswith(".html"): - headers_to_split_on = [ - ("h1", "Header 1"), - ("h2", "Header 2"), - ("h3", "Header 3"), - ] - text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) - else: - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=doc_path.chunk_size, - chunk_overlap=doc_path.chunk_overlap, - add_start_index=True, - separators=get_separators(), - ) - - content = document_loader(path) - if logflag: - logger.info("[ ingest data ] file content loaded") - - structured_types = [".xlsx", ".csv", ".json", "jsonl"] - _, ext = os.path.splitext(path) - - if ext in structured_types: - chunks = content - else: - chunks = text_splitter.split_text(content) - - ### Specially processing for the table content in PDFs - if doc_path.process_table and path.endswith(".pdf"): - table_chunks = get_tables_result(path, doc_path.table_strategy) - chunks = chunks + table_chunks - if logflag: - logger.info(f"[ ingest data ] Done preprocessing. Created {len(chunks)} chunks of the given file.") - - file_name = doc_path.path.split("/")[-1] - return ingest_chunks_to_redis(file_name, chunks) - - -@register_microservice(name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep", host="0.0.0.0", port=6007) -async def ingest_documents( - files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), - link_list: Optional[str] = Form(None), - chunk_size: int = Form(1500), - chunk_overlap: int = Form(100), - process_table: bool = Form(False), - table_strategy: str = Form("fast"), -): - if logflag: - logger.info(f"[ upload ] files:{files}") - logger.info(f"[ upload ] link_list:{link_list}") - - r = redis.Redis(connection_pool=redis_pool) - client = r.ft(KEY_INDEX_NAME) - - if files: - if not isinstance(files, list): - files = [files] - uploaded_files = [] - - for file in files: - encode_file = encode_filename(file.filename) - doc_id = "file:" + encode_file - if logflag: - logger.info(f"[ upload ] processing file {doc_id}") - - # check whether the file already exists - key_ids = None - try: - key_ids = search_by_id(client, doc_id).key_ids - if logflag: - logger.info(f"[ upload ] File {file.filename} already exists.") - except Exception as e: - logger.info(f"[ upload ] File {file.filename} does not exist.") - if key_ids: - raise HTTPException( - status_code=400, detail=f"Uploaded file {file.filename} already exists. Please change file name." - ) - - save_path = upload_folder + encode_file - await save_content_to_local_disk(save_path, file) - ingest_data_to_redis( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ) - ) - uploaded_files.append(save_path) - if logflag: - logger.info(f"[ upload ] Successfully saved file {save_path}") - - # def process_files_wrapper(files): - # if not isinstance(files, list): - # files = [files] - # for file in files: - # ingest_data_to_redis(DocPath(path=file, chunk_size=chunk_size, chunk_overlap=chunk_overlap)) - - # try: - # # Create a SparkContext - # conf = SparkConf().setAppName("Parallel-dataprep").setMaster("local[*]") - # sc = SparkContext(conf=conf) - # # Create an RDD with parallel processing - # parallel_num = min(len(uploaded_files), os.cpu_count()) - # rdd = sc.parallelize(uploaded_files, parallel_num) - # # Perform a parallel operation - # rdd_trans = rdd.map(process_files_wrapper) - # rdd_trans.collect() - # # Stop the SparkContext - # sc.stop() - # except: - # # Stop the SparkContext - # sc.stop() - result = {"status": 200, "message": "Data preparation succeeded"} - if logflag: - logger.info(result) - return result - - if link_list: - link_list = json.loads(link_list) # Parse JSON string to list - if not isinstance(link_list, list): - raise HTTPException(status_code=400, detail=f"Link_list {link_list} should be a list.") - for link in link_list: - encoded_link = encode_filename(link) - doc_id = "file:" + encoded_link + ".txt" - if logflag: - logger.info(f"[ upload ] processing link {doc_id}") - - # check whether the link file already exists - key_ids = None - try: - key_ids = search_by_id(client, doc_id).key_ids - if logflag: - logger.info(f"[ upload ] Link {link} already exists.") - except Exception as e: - logger.info(f"[ upload ] Link {link} does not exist. Keep storing.") - if key_ids: - raise HTTPException( - status_code=400, detail=f"Uploaded link {link} already exists. Please change another link." - ) - - save_path = upload_folder + encoded_link + ".txt" - content = parse_html_new([link], chunk_size=chunk_size, chunk_overlap=chunk_overlap) - await save_content_to_local_disk(save_path, content) - ingest_data_to_redis( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ) - ) - if logflag: - logger.info(f"[ upload ] Successfully saved link list {link_list}") - return {"status": 200, "message": "Data preparation succeeded"} - - raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") - - -@register_microservice( - name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007 -) -async def rag_get_file_structure(): - if logflag: - logger.info("[ get ] start to get file structure") - - # define redis client - r = redis.Redis(connection_pool=redis_pool) - offset = 0 - file_list = [] - - # check index existence - res = check_index_existance(r.ft(KEY_INDEX_NAME)) - if not res: - if logflag: - logger.info(f"[ get ] index {KEY_INDEX_NAME} does not exist") - return file_list - - while True: - response = r.execute_command("FT.SEARCH", KEY_INDEX_NAME, "*", "LIMIT", offset, offset + SEARCH_BATCH_SIZE) - # no doc retrieved - if len(response) < 2: - break - file_list = format_search_results(response, file_list) - offset += SEARCH_BATCH_SIZE - # last batch - if (len(response) - 1) // 2 < SEARCH_BATCH_SIZE: - break - if logflag: - logger.info(f"[get] final file_list: {file_list}") - return file_list - - -@register_microservice( - name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007 -) -async def delete_single_file(file_path: str = Body(..., embed=True)): - """Delete file according to `file_path`. - - `file_path`: - - specific file path (e.g. /path/to/file.txt) - - "all": delete all files uploaded - """ - - # define redis client - r = redis.Redis(connection_pool=redis_pool) - client = r.ft(KEY_INDEX_NAME) - client2 = r.ft(INDEX_NAME) - - # delete all uploaded files - if file_path == "all": - if logflag: - logger.info("[ delete ] delete all files") - - # drop index KEY_INDEX_NAME - if check_index_existance(client): - try: - assert drop_index(index_name=KEY_INDEX_NAME) - except Exception as e: - if logflag: - logger.info(f"[ delete ] {e}. Fail to drop index {KEY_INDEX_NAME}.") - raise HTTPException(status_code=500, detail=f"Fail to drop index {KEY_INDEX_NAME}.") - else: - logger.info(f"[ delete ] Index {KEY_INDEX_NAME} does not exits.") - - # drop index INDEX_NAME - if check_index_existance(client2): - try: - assert drop_index(index_name=INDEX_NAME) - except Exception as e: - if logflag: - logger.info(f"[ delete ] {e}. Fail to drop index {INDEX_NAME}.") - raise HTTPException(status_code=500, detail=f"Fail to drop index {INDEX_NAME}.") - else: - if logflag: - logger.info(f"[ delete ] Index {INDEX_NAME} does not exits.") - - # delete files on local disk - try: - remove_folder_with_ignore(upload_folder) - except Exception as e: - if logflag: - logger.info(f"[ delete ] {e}. Fail to delete {upload_folder}.") - raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}.") - - if logflag: - logger.info("[ delete ] successfully delete all files.") - create_upload_folder(upload_folder) - if logflag: - logger.info({"status": True}) - return {"status": True} - - delete_path = Path(upload_folder + "/" + encode_filename(file_path)) - if logflag: - logger.info(f"[ delete ] delete_path: {delete_path}") - - # partially delete files - doc_id = "file:" + encode_filename(file_path) - logger.info(f"[ delete ] doc id: {doc_id}") - - # determine whether this file exists in db KEY_INDEX_NAME - try: - key_ids = search_by_id(client, doc_id).key_ids - except Exception as e: - if logflag: - logger.info(f"[ delete ] {e}, File {file_path} does not exists.") - raise HTTPException(status_code=404, detail=f"File not found in db {KEY_INDEX_NAME}. Please check file_path.") - file_ids = key_ids.split("#") - - # delete file keys id in db KEY_INDEX_NAME - try: - assert delete_by_id(client, doc_id) - except Exception as e: - if logflag: - logger.info(f"[ delete ] {e}. File {file_path} delete failed for db {KEY_INDEX_NAME}.") - raise HTTPException(status_code=500, detail=f"File {file_path} delete failed for key index.") - - # delete file content in db INDEX_NAME - for file_id in file_ids: - # determine whether this file exists in db INDEX_NAME - try: - search_by_id(client2, file_id) - except Exception as e: - if logflag: - logger.info(f"[ delete ] {e}. File {file_path} does not exists.") - raise HTTPException(status_code=404, detail=f"File not found in db {INDEX_NAME}. Please check file_path.") - - # delete file content - try: - assert delete_by_id(client2, file_id) - except Exception as e: - if logflag: - logger.info(f"[ delete ] {e}. File {file_path} delete failed for db {INDEX_NAME}") - raise HTTPException(status_code=500, detail=f"File {file_path} delete failed for index.") - - # local file does not exist (restarted docker container) - if not delete_path.exists(): - if logflag: - logger.info(f"[ delete ] File {file_path} not saved locally.") - return {"status": True} - - # delete local file - if delete_path.is_file(): - # delete file on local disk - delete_path.unlink() - if logflag: - logger.info(f"[ delete ] File {file_path} deleted successfully.") - return {"status": True} - - # delete folder - else: - if logflag: - logger.info(f"[ delete ] Delete folder {file_path} is not supported for now.") - raise HTTPException(status_code=404, detail=f"Delete folder {file_path} is not supported for now.") - - -if __name__ == "__main__": - create_upload_folder(upload_folder) - opea_microservices["opea_service@prepare_doc_redis"].start() diff --git a/comps/dataprep/redis/langchain/redis_langchain.yaml b/comps/dataprep/redis/langchain/redis_langchain.yaml deleted file mode 100644 index ea716cc2a9..0000000000 --- a/comps/dataprep/redis/langchain/redis_langchain.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - redis-vector-db: - image: redis/redis-stack:7.2.0-v9 - container_name: redis-vector-db - ports: - - "6379:6379" - - "8001:8001" - tei-embedding-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - container_name: tei-embedding-server - ports: - - "6006:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate - dataprep-redis: - image: opea/dataprep-redis:latest - container_name: dataprep-redis-server - ports: - - "5000:5000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - REDIS_URL: ${REDIS_URL} - INDEX_NAME: ${INDEX_NAME} - TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - LOGFLAG: ${LOGFLAG} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/redis/langchain/requirements.txt b/comps/dataprep/redis/langchain/requirements.txt deleted file mode 100644 index 43ff2f93bd..0000000000 --- a/comps/dataprep/redis/langchain/requirements.txt +++ /dev/null @@ -1,31 +0,0 @@ -beautifulsoup4 -cairosvg -docarray[full] -docx2txt -easyocr -fastapi -html2text -huggingface_hub -langchain -langchain-community -langchain-text-splitters -langchain_huggingface -markdown -numpy -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -pandas -Pillow -prometheus-fastapi-instrumentator -pymupdf -pyspark -pytesseract -python-bidi -python-docx -python-pptx -redis -sentence_transformers -shortuuid -unstructured[all-docs] -uvicorn diff --git a/comps/dataprep/redis/langchain_ray/Dockerfile b/comps/dataprep/redis/langchain_ray/Dockerfile deleted file mode 100644 index 3de8693a5d..0000000000 --- a/comps/dataprep/redis/langchain_ray/Dockerfile +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - libcairo2 \ - libgl1-mesa-glx \ - libjemalloc-dev \ - poppler-utils \ - tesseract-ocr - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/dataprep/redis/langchain_ray/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -USER root - -RUN mkdir -p /home/user/comps/dataprep/redis/langchain_ray/uploaded_files && chown -R user /home/user/comps/dataprep/redis/langchain_ray/uploaded_files -RUN mkdir -p /home/user/comps/dataprep/redis/langchain_ray/status && chown -R user /home/user/comps/dataprep/redis/langchain_ray/status - -USER user - -WORKDIR /home/user/comps/dataprep/redis/langchain_ray - -ENTRYPOINT ["python", "prepare_doc_redis_on_ray.py"] diff --git a/comps/dataprep/redis/langchain_ray/config.py b/comps/dataprep/redis/langchain_ray/config.py deleted file mode 100644 index 7fd0b26786..0000000000 --- a/comps/dataprep/redis/langchain_ray/config.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -# Embedding model - -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - -# Redis Connection Information -REDIS_HOST = os.getenv("REDIS_HOST", "localhost") -REDIS_PORT = int(os.getenv("REDIS_PORT", 6379)) - - -def get_boolean_env_var(var_name, default_value=False): - """Retrieve the boolean value of an environment variable. - - Args: - var_name (str): The name of the environment variable to retrieve. - default_value (bool): The default value to return if the variable - is not found. - - Returns: - bool: The value of the environment variable, interpreted as a boolean. - """ - true_values = {"true", "1", "t", "y", "yes"} - false_values = {"false", "0", "f", "n", "no"} - - # Retrieve the environment variable's value - value = os.getenv(var_name, "").lower() - - # Decide the boolean value based on the content of the string - if value in true_values: - return True - elif value in false_values: - return False - else: - return default_value - - -def format_redis_conn_from_env(): - redis_url = os.getenv("REDIS_URL", None) - if redis_url: - return redis_url - else: - using_ssl = get_boolean_env_var("REDIS_SSL", False) - start = "rediss://" if using_ssl else "redis://" - - # if using RBAC - password = os.getenv("REDIS_PASSWORD", None) - username = os.getenv("REDIS_USERNAME", "default") - if password is not None: - start += f"{username}:{password}@" - - return start + f"{REDIS_HOST}:{REDIS_PORT}" - - -REDIS_URL = format_redis_conn_from_env() - -# Vector Index Configuration -INDEX_NAME = os.getenv("INDEX_NAME", "rag-redis") - -TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", 600)) diff --git a/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py b/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py deleted file mode 100644 index 83bd3e5856..0000000000 --- a/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py +++ /dev/null @@ -1,418 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import os -import pathlib -import shutil -import sys -from pathlib import Path -from typing import Callable, List, Optional, Union - -import pandas as pd -from config import EMBED_MODEL, INDEX_NAME, REDIS_URL, TIMEOUT_SECONDS -from fastapi import Body, File, Form, HTTPException, UploadFile -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings -from langchain_community.vectorstores import Redis - -cur_path = pathlib.Path(__file__).parent.resolve() -comps_path = os.path.join(cur_path, "../../../../") -sys.path.append(comps_path) -import hashlib -import timeit -from typing import Any, Dict, Iterator - -import pyarrow -import ray -from ray.data.block import Block -from ray.data.datasource import FileBasedDatasource -from tqdm import tqdm - -from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.src.utils import ( - Timer, - create_upload_folder, - document_loader, - encode_filename, - get_file_structure, - get_separators, - parse_html_new, - remove_folder_with_ignore, - save_content_to_local_disk, - timeout, -) - -logger = CustomLogger("prepare_doc_redis") -logflag = os.getenv("LOGFLAG", False) - -tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") -debug = False -upload_folder = "./uploaded_files/" - - -def prepare_env(enable_ray=False, pip_requirements=None): - if enable_ray: - import ray - - if ray.is_initialized(): - ray.shutdown() - if pip_requirements is not None: - ray.init(runtime_env={"pip": pip_requirements, "env_vars": {"PYTHONPATH": comps_path}}) - else: - ray.init(runtime_env={"env_vars": {"PYTHONPATH": comps_path}}) - - -def generate_log_name(file_list): - file_set = f"{sorted(file_list)}" - # if logflag: - # logger.info(f"file_set: {file_set}") - md5_str = hashlib.md5(file_set.encode(), usedforsecurity=False).hexdigest() - return f"status/status_{md5_str}.log" - - -def get_failable_with_time(callable): - def failable_callable(*args, **kwargs): - start_time = timeit.default_timer() - try: - content = callable(*args, **kwargs) - error = None - except Exception as e: - content = None - error = str(e) - end_time = timeit.default_timer() - return content, error, f"{'%.3f' % (end_time - start_time)}" - - return failable_callable - - -def get_max_cpus(total_num_tasks): - num_cpus_available = os.cpu_count() - num_cpus_per_task = num_cpus_available // total_num_tasks - if num_cpus_per_task == 0: - return 8 - return num_cpus_per_task - - -def save_logs(log_name, data): - df = pd.DataFrame.from_records(data) - try: - dir_path = os.path.dirname(log_name) - if not os.path.exists(dir_path): - os.makedirs(dir_path, exist_ok=True) - df.to_csv(log_name) - except: - pass - return df - - -def generate_ray_dataset(file_paths, dataloader_callable, lazy_mode=True, num_cpus=20): - decorated_dataloader_callable = get_failable_with_time(dataloader_callable) - if lazy_mode: - if num_cpus is None: - return ray.data.read_datasource(RayDataLoader(file_paths, decorated_dataloader_callable)) - else: - return ray.data.read_datasource( - RayDataLoader(file_paths, decorated_dataloader_callable), ray_remote_args={"num_cpus": num_cpus} - ) - else: - data = [] - for file in tqdm(file_paths, total=len(file_paths)): - content, error, elapse_time = decorated_dataloader_callable(file) - item = {"data": content, "filename": file, "error": error, "read_time": f"{elapse_time} secs"} - data.append(item) - return ray.data.from_items(data) - - -def ray_execute(ds, log_name): - with Timer(f"execute with Ray, status log: {log_name}"): - ret_with_status = ds.take_all() - df = save_logs(log_name, ret_with_status) - ret = df.to_dict(orient="records") - return ret - - -@timeout(seconds=TIMEOUT_SECONDS) -def data_to_redis_ray(data): - content = data["data"] - if content is None: - return { - "filename": data["filename"], - "content": content, - "status": "failed", - "num_chunks": -1, - "error": data["error"], - "read_time": data["read_time"], - "elaspe_time": "0.0 secs", - } - - decorated_callable = get_failable_with_time(data_to_redis) - num_chunks, error, elapse_time = decorated_callable(content) - status = "success" if not error else "failed" - if not debug: - content = None - return { - "filename": data["filename"], - "content": content, - "status": status, - "num_chunks": num_chunks, - "error": error, - "read_time": data["read_time"], - "elaspe_time": f"{elapse_time} secs", - } - - -def data_to_redis(data): - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=1500, chunk_overlap=100, add_start_index=True, separators=get_separators(), is_separator_regex=False - ) - if isinstance(data, list): - chunks = data - elif isinstance(data, str): - chunks = text_splitter.split_text(data) - else: - raise TypeError("The content must be either a list or a string.") - - # Create vectorstore - if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) - else: - # create embeddings using local embedding model - embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - # Batch size - batch_size = 32 - num_chunks = len(chunks) - for i in range(0, num_chunks, batch_size): - batch_chunks = chunks[i : i + batch_size] - batch_texts = batch_chunks - - _ = Redis.from_texts( - texts=batch_texts, - embedding=embedder, - index_name=INDEX_NAME, - redis_url=REDIS_URL, - ) - # if logflag: - # logger.info(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") - return num_chunks - - -class RayDataLoader(FileBasedDatasource): - def __init__( - self, - paths: Union[str, List[str]], - dataloader_callable: Optional[Callable], - document_ld_args: Optional[Dict[str, Any]] = None, - **file_based_datasource_kwargs, - ): - super().__init__(paths, **file_based_datasource_kwargs) - self.dataloader_callable = dataloader_callable - self.args = document_ld_args or {} - - def _read_stream(self, f: "pyarrow.NativeFile", path: str) -> Iterator[Block]: - from ray.data._internal.arrow_block import ArrowBlockBuilder - - builder = ArrowBlockBuilder() - path = f"{path}" - data, error, read_time = self.dataloader_callable(path) - item = {"data": data, "filename": path, "error": error, "read_time": f"{read_time} secs"} - builder.add(item) - yield builder.build() - - -def ingest_data_to_redis(file_list: List[DocPath], enable_ray=False, num_cpus=20): - """Ingest document to Redis.""" - file_list = [f.path for f in file_list] - - if enable_ray: - log_name = generate_log_name(file_list) - ds = generate_ray_dataset(file_list, document_loader, lazy_mode=True, num_cpus=num_cpus) - ds = ds.map(data_to_redis_ray, num_cpus=num_cpus) - return ray_execute(ds, log_name) - else: - for file in tqdm(file_list, total=len(file_list)): - with Timer(f"read document {file}."): - data = document_loader(file) - with Timer(f"ingest document {file} to Redis."): - data_to_redis(data) - return True - - -def ingest_link_to_redis(link_list: List[str], enable_ray=False, num_cpus=20): - link_list = [str(f) for f in link_list] - - def _parse_html(link): - data = parse_html_new([link], chunk_size=1500, chunk_overlap=100) - return data[0][0] - - if enable_ray: - log_name = generate_log_name(link_list) - ds = generate_ray_dataset(link_list, _parse_html, lazy_mode=True, num_cpus=num_cpus) - ds = ds.map(data_to_redis_ray, num_cpus=num_cpus) - return ray_execute(ds, log_name) - else: - for link in tqdm(link_list, total=len(link_list)): - with Timer(f"read document {link}."): - data = _parse_html(link) - if logflag: - logger.info("content is: ", data) - with Timer(f"ingest document {link} to Redis."): - data_to_redis(data) - return True - - -@register_microservice(name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep", host="0.0.0.0", port=6007) -async def ingest_documents(files: List[UploadFile] = File(None), link_list: str = Form(None)): - if logflag: - logger.info(files) - logger.info(link_list) - if files and link_list: - raise HTTPException(status_code=400, detail="Provide either a file or a string list, not both.") - - if not files and not link_list: - raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") - - saved_path_list = [] - if files: - try: - if not isinstance(files, list): - files = [files] - if not os.path.exists(upload_folder): - Path(upload_folder).mkdir(parents=True, exist_ok=True) - - # TODO: use ray to parallelize the file saving - for file in files: - save_path = upload_folder + file.filename - await save_content_to_local_disk(save_path, file) - saved_path_list.append(DocPath(path=save_path)) - - if len(saved_path_list) <= 10: - enable_ray = False - else: - enable_ray = True - prepare_env(enable_ray=enable_ray) - num_cpus = get_max_cpus(len(saved_path_list)) - if logflag: - logger.info(f"per task num_cpus: {num_cpus}") - ret = ingest_data_to_redis(saved_path_list, enable_ray=enable_ray, num_cpus=num_cpus) - result = {"status": 200, "message": f"Data preparation succeeded. ret msg is {ret}"} - if logflag: - logger.info(result) - return result - except Exception as e: - raise HTTPException(status_code=400, detail=f"An error occurred: {e}") - - if link_list: - try: - link_list = json.loads(link_list) # Parse JSON string to list - if not isinstance(link_list, list): - raise HTTPException(status_code=400, detail="link_list should be a list.") - if len(link_list) <= 10: - enable_ray = False - else: - enable_ray = True - prepare_env(enable_ray=enable_ray) - num_cpus = get_max_cpus(len(link_list)) - if logflag: - logger.info(f"per task num_cpus: {num_cpus}") - ret = ingest_link_to_redis(link_list, enable_ray=enable_ray, num_cpus=num_cpus) - result = {"status": 200, "message": f"Data preparation succeeded. ret msg is {ret}"} - if logflag: - logger.info(result) - return result - except json.JSONDecodeError: - raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") - except Exception as e: - raise HTTPException(status_code=400, detail=f"An error occurred: {e}") - - -@register_microservice( - name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007 -) -async def rag_get_file_structure(): - if logflag: - logger.info("[ get_file_structure] ") - - if not Path(upload_folder).exists(): - if logflag: - logger.info("No file uploaded, return empty list.") - return [] - - file_content = get_file_structure(upload_folder) - if logflag: - logger.info(file_content) - return file_content - - -@register_microservice( - name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007 -) -async def delete_single_file(file_path: str = Body(..., embed=True)): - """Delete file according to `file_path`. - - `file_path`: - - specific file path (e.g. /path/to/file.txt) - - folder path (e.g. /path/to/folder) - - "all": delete all files uploaded - """ - if logflag: - logger.info(file_path) - # delete all uploaded files - if file_path == "all": - if logflag: - logger.info("[dataprep - del] delete all files") - remove_folder_with_ignore(upload_folder) - if logflag: - logger.info("[dataprep - del] successfully delete all files.") - create_upload_folder(upload_folder) - if logflag: - logger.info({"status": True}) - return {"status": True} - - delete_path = Path(upload_folder + "/" + encode_filename(file_path)) - if logflag: - logger.info(f"[dataprep - del] delete_path: {delete_path}") - - # partially delete files/folders - if delete_path.exists(): - # delete file - if delete_path.is_file(): - try: - delete_path.unlink() - except Exception as e: - if logflag: - logger.info(f"[dataprep - del] fail to delete file {delete_path}: {e}") - logger.info({"status": False}) - return {"status": False} - # delete folder - else: - try: - shutil.rmtree(delete_path) - except Exception as e: - if logflag: - logger.info(f"[dataprep - del] fail to delete folder {delete_path}: {e}") - logger.info({"status": False}) - return {"status": False} - if logflag: - logger.info({"status": True}) - return {"status": True} - else: - raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") - - -if __name__ == "__main__": - - opea_microservices["opea_service@prepare_doc_redis"].start() diff --git a/comps/dataprep/redis/langchain_ray/redis_langchain_ray.yaml b/comps/dataprep/redis/langchain_ray/redis_langchain_ray.yaml deleted file mode 100644 index fe6bf2be5f..0000000000 --- a/comps/dataprep/redis/langchain_ray/redis_langchain_ray.yaml +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - redis-vector-db: - image: redis/redis-stack:7.2.0-v9 - container_name: redis-vector-db - ports: - - "6379:6379" - - "8001:8001" - tei-embedding-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - container_name: tei-embedding-server - ports: - - "6006:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate - dataprep-redis: - image: opea/dataprep-on-ray-redis:latest - container_name: dataprep-redis-server - ports: - - "6007:6007" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - REDIS_URL: ${REDIS_URL} - INDEX_NAME: ${INDEX_NAME} - TEI_ENDPOINT: ${TEI_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/redis/langchain_ray/requirements.txt b/comps/dataprep/redis/langchain_ray/requirements.txt deleted file mode 100644 index 8533045420..0000000000 --- a/comps/dataprep/redis/langchain_ray/requirements.txt +++ /dev/null @@ -1,31 +0,0 @@ -beautifulsoup4 -cairosvg -docarray[full] -docx2txt -easyocr -fastapi -html2text -huggingface_hub -langchain -langchain-community -numpy -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -pandas -Pillow -prometheus-fastapi-instrumentator -pyarrow -pymupdf -pytesseract -python-bidi==0.4.2 -python-docx -python-multipart -python-pptx -ray -redis -sentence_transformers -shortuuid -unstructured[all-docs]==0.15.7 -uvicorn -virtualenv diff --git a/comps/dataprep/redis/langchain_ray/schema_dim_768.yml b/comps/dataprep/redis/langchain_ray/schema_dim_768.yml deleted file mode 100644 index adacf98656..0000000000 --- a/comps/dataprep/redis/langchain_ray/schema_dim_768.yml +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -text: - - name: content - - name: source -numeric: - - name: start_index -vector: - - name: content_vector - algorithm: HNSW - datatype: FLOAT32 - dims: 768 - distance_metric: COSINE diff --git a/comps/dataprep/redis/llama_index/Dockerfile b/comps/dataprep/redis/llama_index/Dockerfile deleted file mode 100644 index 568fb9d6cf..0000000000 --- a/comps/dataprep/redis/llama_index/Dockerfile +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - libcairo2 \ - libgl1-mesa-glx \ - libjemalloc-dev \ - poppler-utils \ - tesseract-ocr - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/dataprep/redis/llama_index/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -USER root - -RUN mkdir -p /home/user/comps/dataprep/redis/llama_index/uploaded_files && chown -R user /home/user/comps/dataprep/redis/llama_index/uploaded_files - -USER user - -WORKDIR /home/user/comps/dataprep/redis/llama_index - -ENTRYPOINT ["python", "prepare_doc_redis.py"] diff --git a/comps/dataprep/redis/llama_index/config.py b/comps/dataprep/redis/llama_index/config.py deleted file mode 100644 index 0f99cc05eb..0000000000 --- a/comps/dataprep/redis/llama_index/config.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - -# Redis Connection Information -REDIS_HOST = os.getenv("REDIS_HOST", "localhost") -REDIS_PORT = int(os.getenv("REDIS_PORT", 6379)) - - -def get_boolean_env_var(var_name, default_value=False): - """Retrieve the boolean value of an environment variable. - - Args: - var_name (str): The name of the environment variable to retrieve. - default_value (bool): The default value to return if the variable - is not found. - - Returns: - bool: The value of the environment variable, interpreted as a boolean. - """ - true_values = {"true", "1", "t", "y", "yes"} - false_values = {"false", "0", "f", "n", "no"} - - # Retrieve the environment variable's value - value = os.getenv(var_name, "").lower() - - # Decide the boolean value based on the content of the string - if value in true_values: - return True - elif value in false_values: - return False - else: - return default_value - - -def format_redis_conn_from_env(): - redis_url = os.getenv("REDIS_URL", None) - if redis_url: - return redis_url - else: - using_ssl = get_boolean_env_var("REDIS_SSL", False) - start = "rediss://" if using_ssl else "redis://" - - # if using RBAC - password = os.getenv("REDIS_PASSWORD", None) - username = os.getenv("REDIS_USERNAME", "default") - if password is not None: - start += f"{username}:{password}@" - - return start + f"{REDIS_HOST}:{REDIS_PORT}" - - -INDEX_NAME = os.getenv("INDEX_NAME", "rag-redis") -REDIS_URL = format_redis_conn_from_env() diff --git a/comps/dataprep/redis/llama_index/prepare_doc_redis.py b/comps/dataprep/redis/llama_index/prepare_doc_redis.py deleted file mode 100644 index 546f951d90..0000000000 --- a/comps/dataprep/redis/llama_index/prepare_doc_redis.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import shutil -from pathlib import Path -from typing import List, Optional, Union - -from config import EMBED_MODEL, INDEX_NAME, REDIS_URL -from fastapi import Body, File, HTTPException, UploadFile -from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex -from llama_index.core.settings import Settings -from llama_index.embeddings.huggingface import HuggingFaceEmbedding -from llama_index.vector_stores.redis import RedisVectorStore -from redis import Redis -from redisvl.schema import IndexSchema -from utils import * - -from comps import CustomLogger, DocPath, opea_microservices, register_microservice - -logger = CustomLogger("prepare_doc_redis") -logflag = os.getenv("LOGFLAG", False) - -upload_folder = "./uploaded_files/" - - -async def ingest_data_to_redis(doc_path: DocPath): - embedder = HuggingFaceEmbedding(model_name=EMBED_MODEL) - print(f"embedder: {embedder}") - Settings.embed_model = embedder - doc_path = doc_path.path - content = SimpleDirectoryReader(input_files=[doc_path]).load_data() - redis_client = Redis.from_url(REDIS_URL) - schema = IndexSchema.from_dict( - { - "index": {"name": INDEX_NAME, "prefix": f"doc:{INDEX_NAME}"}, - "fields": [ - {"name": "id", "type": "tag"}, - {"name": "doc_id", "type": "tag"}, - {"name": "text", "type": "text"}, - {"name": "content", "type": "text"}, - {"name": "source", "type": "text"}, - {"name": "start_index", "type": "numeric"}, - { - "name": "vector", - "type": "vector", - "attrs": {"dims": 768, "algorithm": "HNSW", "date_type": "FLOAT32"}, - }, - ], - } - ) - vector_store = RedisVectorStore(redis_client=redis_client, schema=schema) - storage_context = StorageContext.from_defaults(vector_store=vector_store) - _ = VectorStoreIndex.from_documents(content, storage_context=storage_context) - if logflag: - logger.info("[ ingest data ] data ingested into Redis DB.") - return True - - -@register_microservice(name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep", host="0.0.0.0", port=6007) -# llama index only support upload files now -async def ingest_documents(files: Optional[Union[UploadFile, List[UploadFile]]] = File(None)): - if logflag: - logger.info(f"files:{files}") - if not files: - raise HTTPException(status_code=400, detail="Please provide at least one file.") - - if not isinstance(files, list): - files = [files] - if not os.path.exists(upload_folder): - Path(upload_folder).mkdir(parents=True, exist_ok=True) - try: - for file in files: - save_path = upload_folder + file.filename - await save_content_to_local_disk(save_path, file) - await ingest_data_to_redis(DocPath(path=save_path)) - if logflag: - logger.info(f"Successfully saved file {save_path}") - logger.info({"status": 200, "message": "Data preparation succeeded"}) - return {"status": 200, "message": "Data preparation succeeded"} - except Exception as e: - if logflag: - logger.info(f"Data preparation failed. Exception: {e}") - raise HTTPException(status_code=500, detail=f"Data preparation failed. Exception: {e}") - - -@register_microservice( - name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007 -) -async def rag_get_file_structure(): - if logflag: - logger.info("[ get_file_structure] ") - - if not Path(upload_folder).exists(): - if logflag: - logger.info("No file uploaded, return empty list.") - return [] - - file_content = get_file_structure(upload_folder) - if logflag: - logger.info(file_content) - return file_content - - -@register_microservice( - name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007 -) -async def delete_single_file(file_path: str = Body(..., embed=True)): - """Delete file according to `file_path`. - - `file_path`: - - specific file path (e.g. /path/to/file.txt) - - folder path (e.g. /path/to/folder) - - "all": delete all files uploaded - """ - if logflag: - logger.info(file_path) - # delete all uploaded files - if file_path == "all": - if logflag: - logger.info("[dataprep - del] delete all files") - remove_folder_with_ignore(upload_folder) - if logflag: - logger.info("[dataprep - del] successfully delete all files.") - create_upload_folder(upload_folder) - if logflag: - logger.info({"status": True}) - return {"status": True} - - delete_path = Path(upload_folder + "/" + encode_filename(file_path)) - if logflag: - logger.info(f"[dataprep - del] delete_path: {delete_path}") - - # partially delete files/folders - if delete_path.exists(): - # delete file - if delete_path.is_file(): - try: - delete_path.unlink() - except Exception as e: - if logflag: - logger.info(f"[dataprep - del] fail to delete file {delete_path}: {e}") - logger.info({"status": False}) - return {"status": False} - # delete folder - else: - try: - shutil.rmtree(delete_path) - except Exception as e: - if logflag: - logger.info(f"[dataprep - del] fail to delete folder {delete_path}: {e}") - logger.info({"status": False}) - return {"status": False} - if logflag: - logger.info({"status": True}) - return {"status": True} - else: - raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") - - -if __name__ == "__main__": - opea_microservices["opea_service@prepare_doc_redis"].start() diff --git a/comps/dataprep/redis/llama_index/redis_llama_index.yaml b/comps/dataprep/redis/llama_index/redis_llama_index.yaml deleted file mode 100644 index ecb1bf4bd3..0000000000 --- a/comps/dataprep/redis/llama_index/redis_llama_index.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - redis-vector-db: - image: redis/redis-stack:7.2.0-v9 - container_name: redis-vector-db - ports: - - "6379:6379" - - "8001:8001" - dataprep-redis: - image: opea/dataprep-redis:latest - container_name: dataprep-redis-server - ports: - - "6007:6007" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - REDIS_URL: ${REDIS_URL} - INDEX_NAME: ${INDEX_NAME} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/redis/llama_index/requirements.txt b/comps/dataprep/redis/llama_index/requirements.txt deleted file mode 100644 index 46640180e0..0000000000 --- a/comps/dataprep/redis/llama_index/requirements.txt +++ /dev/null @@ -1,19 +0,0 @@ -docarray[full] -fastapi -huggingface_hub -llama-index -llama-index-embeddings-huggingface==0.2.0 -llama-index-readers-file -llama-index-vector-stores-redis -numpy -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -pytesseract -python-bidi==0.4.2 -python-multipart -redis -sentence_transformers -shortuuid -uvicorn diff --git a/comps/dataprep/redis/llama_index/utils.py b/comps/dataprep/redis/llama_index/utils.py deleted file mode 100644 index a977fae0dd..0000000000 --- a/comps/dataprep/redis/llama_index/utils.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -import os -import urllib.parse -from pathlib import Path -from typing import Dict, List, Union - - -def create_upload_folder(upload_path): - if not os.path.exists(upload_path): - Path(upload_path).mkdir(parents=True, exist_ok=True) - - -def encode_filename(filename): - return urllib.parse.quote(filename, safe="") - - -def decode_filename(encoded_filename): - return urllib.parse.unquote(encoded_filename) - - -def get_file_structure(root_path: str, parent_path: str = "") -> List[Dict[str, Union[str, List]]]: - result = [] - for path in os.listdir(root_path): - complete_path = parent_path + "/" + path if parent_path else path - file_path = root_path + "/" + path - p = Path(file_path) - # append file into result - if p.is_file(): - file_dict = { - "name": decode_filename(path), - "id": decode_filename(complete_path), - "type": "File", - "parent": "", - } - result.append(file_dict) - else: - # append folder and inner files/folders into result using recursive function - folder_dict = { - "name": decode_filename(path), - "id": decode_filename(complete_path), - "type": "Directory", - "children": get_file_structure(file_path, complete_path), - "parent": "", - } - result.append(folder_dict) - - return result - - -def remove_folder_with_ignore(folder_path: str, except_patterns: List = []): - """Remove the specific folder, and ignore some files/folders. - - :param folder_path: file path to delete - :param except_patterns: files/folder name to ignore - """ - print(f"except patterns: {except_patterns}") - for root, dirs, files in os.walk(folder_path, topdown=False): - for name in files: - # delete files except ones that match patterns - file_path = os.path.join(root, name) - if except_patterns != [] and any(pattern in file_path for pattern in except_patterns): - continue - os.remove(file_path) - - # delete empty folder - for name in dirs: - dir_path = os.path.join(root, name) - # delete folders except ones that match patterns - if except_patterns != [] and any(pattern in dir_path for pattern in except_patterns): - continue - if not os.listdir(dir_path): - os.rmdir(dir_path) - - -async def save_content_to_local_disk(save_path: str, content): - save_path = Path(save_path) - try: - if isinstance(content, str): - with open(save_path, "w", encoding="utf-8") as file: - file.write(content) - else: - with save_path.open("wb") as fout: - content = await content.read() - fout.write(content) - except Exception as e: - print(f"Write file failed. Exception: {e}") - raise Exception(status_code=500, detail=f"Write file {save_path} failed. Exception: {e}") diff --git a/comps/dataprep/src/Dockerfile b/comps/dataprep/src/Dockerfile index 547ad30277..3752b36119 100644 --- a/comps/dataprep/src/Dockerfile +++ b/comps/dataprep/src/Dockerfile @@ -14,28 +14,40 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin libjemalloc-dev \ libreoffice \ poppler-utils \ - tesseract-ocr + tesseract-ocr \ + libpq-dev \ + libcairo2 \ + wget + +# Install ffmpeg static build +RUN cd /root && wget https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz && \ +mkdir ffmpeg-git-amd64-static && tar -xvf ffmpeg-git-amd64-static.tar.xz -C ffmpeg-git-amd64-static --strip-components 1 && \ +export PATH=/root/ffmpeg-git-amd64-static:$PATH && \ +cp /root/ffmpeg-git-amd64-static/ffmpeg /usr/local/bin/ + RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/dataprep/src/requirements.txt + if [ ${ARCH} = "cpu" ]; then \ + PIP_EXTRA_INDEX_URL="--extra-index-url https://download.pytorch.org/whl/cpu"; \ + else \ + PIP_EXTRA_INDEX_URL=""; \ + fi && \ + pip install --no-cache-dir torch torchvision ${PIP_EXTRA_INDEX_URL} && \ + pip install --no-cache-dir ${PIP_EXTRA_INDEX_URL} -r /home/user/comps/dataprep/src/requirements.txt && \ + pip install opentelemetry-api==1.29.0 opentelemetry-exporter-otlp==1.29.0 opentelemetry-sdk==1.29.0 ENV PYTHONPATH=$PYTHONPATH:/home/user -USER root - RUN mkdir -p /home/user/comps/dataprep/src/uploaded_files && chown -R user /home/user/comps/dataprep/src/uploaded_files USER user WORKDIR /home/user/comps/dataprep/src -ENTRYPOINT ["python", "opea_dataprep_microservice.py"] +ENTRYPOINT ["sh", "-c", "python $( [ \"$MULTIMODAL_DATAPREP\" = \"true\" ] && echo 'opea_dataprep_multimodal_microservice.py' || echo 'opea_dataprep_microservice.py')"] diff --git a/comps/dataprep/src/README.md b/comps/dataprep/src/README.md deleted file mode 100644 index 2550ec2489..0000000000 --- a/comps/dataprep/src/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# Dataprep Microservice - -The Dataprep Microservice aims to preprocess the data from various sources (either structured or unstructured data) to text data, and convert the text data to embedding vectors then store them in the database. - -## Install Requirements - -```bash -apt-get update -apt-get install libreoffice -``` - -## Use LVM (Large Vision Model) for Summarizing Image Data - -Occasionally unstructured data will contain image data, to convert the image data to the text data, LVM can be used to summarize the image. To leverage LVM, please refer to this [readme](../../lvms/llava/README.md) to start the LVM microservice first and then set the below environment variable, before starting any dataprep microservice. - -```bash -export SUMMARIZE_IMAGE_VIA_LVM=1 -``` diff --git a/comps/dataprep/elasticsearch/langchain/README.md b/comps/dataprep/src/README_elasticsearch.md similarity index 74% rename from comps/dataprep/elasticsearch/langchain/README.md rename to comps/dataprep/src/README_elasticsearch.md index 296a1d6db7..ab4b8547b5 100644 --- a/comps/dataprep/elasticsearch/langchain/README.md +++ b/comps/dataprep/src/README_elasticsearch.md @@ -17,7 +17,7 @@ export INDEX_NAME=${your_index_name} ### 1.3 Start Elasticsearch -Please refer to this [readme](../../../vectorstores/elasticsearch/README.md). +Please refer to this [readme](../../third_parties/elasticsearch/src/README.md). ### 1.4 Start Document Preparation Microservice for Elasticsearch with Python Script @@ -31,7 +31,7 @@ python prepare_doc_elastic.py ### 2.1 Start Elasticsearch -Please refer to this [readme](../../../vectorstores/elasticsearch/README.md). +Please refer to this [readme](../../third_parties/elasticsearch/src/README.md). ### 2.2 Setup Environment Variables @@ -44,20 +44,20 @@ export INDEX_NAME=${your_index_name} ```bash cd GenAIComps -docker build -t opea/dataprep-elasticsearch:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/elasticsearch/langchain/Dockerfile . +docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### 2.4 Run Docker with CLI (Option A) ```bash -docker run --name="dataprep-elasticsearch" -p 6011:6011 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/dataprep-elastic:latest +docker run --name="dataprep-elasticsearch" -p 6011:6011 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_ELASTICSEARCH" opea/dataprep:latest ``` ### 2.5 Run with Docker Compose (Option B) ```bash -cd comps/dataprep/elasticsearch/langchain -docker compose -f docker-compose-dataprep-elastic.yaml up -d +cd comps/dataprep/deployment/docker_compose/ +docker compose -f compose_elasticsearch.yaml up -d ``` ## 🚀3. Consume Microservice @@ -71,17 +71,17 @@ microservice to convert the document to embedding and save to the database. curl -X POST \ -H "Content-Type: application/json" \ -d '{"path":"/path/to/document"}' \ - http://localhost:6011/v1/dataprep + http://localhost:6011/v1/dataprep/ingest ``` -### 3.2 Consume get_file API +### 3.2 Consume get API To get uploaded file structures, use the following command: ```bash curl -X POST \ -H "Content-Type: application/json" \ - http://localhost:6011/v1/dataprep/get_file + http://localhost:6011/v1/dataprep/get ``` Then you will get the response JSON like this: @@ -103,28 +103,28 @@ Then you will get the response JSON like this: ] ``` -### 4.3 Consume delete_file API +### 4.3 Consume delete API To delete uploaded file/link, use the following command. -The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API. +The `file_path` here should be the `id` get from `/v1/dataprep/get` API. ```bash # delete link curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "https://www.ces.tech/.txt"}' \ - http://localhost:6011/v1/dataprep/delete_file + http://localhost:6011/v1/dataprep/delete # delete file curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "uploaded_file_1.txt"}' \ - http://localhost:6011/v1/dataprep/delete_file + http://localhost:6011/v1/dataprep/delete # delete all files and links curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "all"}' \ - http://localhost:6011/v1/dataprep/delete_file + http://localhost:6011/v1/dataprep/delete ``` diff --git a/comps/dataprep/milvus/langchain/README.md b/comps/dataprep/src/README_milvus.md similarity index 86% rename from comps/dataprep/milvus/langchain/README.md rename to comps/dataprep/src/README_milvus.md index 7fa5fe2b3f..bcf545f72b 100644 --- a/comps/dataprep/milvus/langchain/README.md +++ b/comps/dataprep/src/README_milvus.md @@ -13,7 +13,7 @@ apt-get install poppler-utils -y ### 1.2 Start Milvus Server -Please refer to this [readme](../../../vectorstores/milvus/README.md). +Please refer to this [readme](../../third_parties/milvus/src/README.md). ### 1.3 Setup Environment Variables @@ -56,14 +56,14 @@ python prepare_doc_milvus.py ### 2.1 Start Milvus Server -Please refer to this [readme](../../../vectorstores/milvus/README.md). +Please refer to this [readme](../../third_parties/milvus/src/README.md). ### 2.2 Build Docker Image ```bash cd ../../.. # build dataprep milvus docker image -docker build -t opea/dataprep-milvus:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg no_proxy=$no_proxy -f comps/dataprep/milvus/langchain/Dockerfile . +docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg no_proxy=$no_proxy -f comps/dataprep/src/Dockerfile . ``` ### 2.3 Setup Environment Variables @@ -76,7 +76,7 @@ export MILVUS_HOST=${your_host_ip} ### 2.3 Run Docker with CLI (Option A) ```bash -docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} opea/dataprep-milvus:latest +docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MILVUS" opea/dataprep:latest ``` ### 2.4 Run with Docker Compose (Option B) @@ -88,7 +88,7 @@ git clone https://huggingface.co/BAAI/bge-base-en-v1.5 cd ../ # Update `host_ip` and `HUGGINGFACEHUB_API_TOKEN` in set_env.sh . set_env.sh -docker compose -f docker-compose-dataprep-milvus.yaml up -d +docker compose -f compose_milvus.yaml up -d ``` ## 🚀3. Consume Microservice @@ -105,7 +105,7 @@ Make sure the file path after `files=@` is correct. curl -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@./file.pdf" \ - http://localhost:6010/v1/dataprep + http://localhost:6010/v1/dataprep/ingest ``` You can specify chunk_size and chunk_size by the following commands. To avoid big chunks, pass a small chun_size like 500 as below (default 1500). @@ -116,7 +116,7 @@ curl -X POST \ -F "files=@./file.pdf" \ -F "chunk_size=500" \ -F "chunk_overlap=100" \ - http://localhost:6010/v1/dataprep + http://localhost:6010/v1/dataprep/ingest ``` - Multiple file upload @@ -127,7 +127,7 @@ curl -X POST \ -F "files=@./file1.pdf" \ -F "files=@./file2.pdf" \ -F "files=@./file3.pdf" \ - http://localhost:6010/v1/dataprep + http://localhost:6010/v1/dataprep/ingest ``` - Links upload (not supported for llama_index now) @@ -135,7 +135,7 @@ curl -X POST \ ```bash curl -X POST \ -F 'link_list=["https://www.ces.tech/"]' \ - http://localhost:6010/v1/dataprep + http://localhost:6010/v1/dataprep/ingest ``` or @@ -145,7 +145,7 @@ import requests import json proxies = {"http": ""} -url = "http://localhost:6010/v1/dataprep" +url = "http://localhost:6010/v1/dataprep/ingest" urls = [ "https://towardsdatascience.com/no-gpu-no-party-fine-tune-bert-for-sentiment-analysis-with-vertex-ai-custom-jobs-d8fc410e908b?source=rss----7f60cf5620c9---4" ] @@ -173,17 +173,17 @@ We support table extraction from pdf documents. You can specify process_table an Note: If you specify "table_strategy=llm", You should first start TGI Service, please refer to 1.2.1, 1.3.1 in https://github.com/opea-project/GenAIComps/tree/main/comps/llms/README.md, and then `export TGI_LLM_ENDPOINT="http://${your_ip}:8008"`. ```bash -curl -X POST -H "Content-Type: application/json" -d '{"path":"/home/user/doc/your_document_name","process_table":true,"table_strategy":"hq"}' http://localhost:6010/v1/dataprep +curl -X POST -H "Content-Type: application/json" -d '{"path":"/home/user/doc/your_document_name","process_table":true,"table_strategy":"hq"}' http://localhost:6010/v1/dataprep/ingest ``` -### 3.2 Consume get_file API +### 3.2 Consume get API To get uploaded file structures, use the following command: ```bash curl -X POST \ -H "Content-Type: application/json" \ - http://localhost:6010/v1/dataprep/get_file + http://localhost:6010/v1/dataprep/get ``` Then you will get the response JSON like this: @@ -205,30 +205,30 @@ Then you will get the response JSON like this: ] ``` -### 3.3 Consume delete_file API +### 3.3 Consume delete API To delete uploaded file/link, use the following command. -The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API. +The `file_path` here should be the `id` get from `/v1/dataprep/get` API. ```bash # delete link curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "https://www.ces.tech/.txt"}' \ - http://localhost:6010/v1/dataprep/delete_file + http://localhost:6010/v1/dataprep/delete # delete file curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "uploaded_file_1.txt"}' \ - http://localhost:6010/v1/dataprep/delete_file + http://localhost:6010/v1/dataprep/delete # delete all files and links, will drop the entire db collection curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "all"}' \ - http://localhost:6010/v1/dataprep/delete_file + http://localhost:6010/v1/dataprep/delete ``` ## 🚀4. Troubleshooting @@ -240,5 +240,5 @@ curl -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@./file.pdf" \ -F "chunk_size=500" \ - http://localhost:6010/v1/dataprep + http://localhost:6010/v1/dataprep/ingest ``` diff --git a/comps/dataprep/multimodal/redis/langchain/README.md b/comps/dataprep/src/README_multimodal.md similarity index 75% rename from comps/dataprep/multimodal/redis/langchain/README.md rename to comps/dataprep/src/README_multimodal.md index db24b431fd..637ef9ca22 100644 --- a/comps/dataprep/multimodal/redis/langchain/README.md +++ b/comps/dataprep/src/README_multimodal.md @@ -5,6 +5,7 @@ This `dataprep` microservice accepts the following from the user and ingests the - Videos (mp4 files) and their transcripts (optional) - Images (gif, jpg, jpeg, and png files) and their captions (optional) - Audio (wav files) +- PDFs (with text and images) ## 🚀1. Start Microservice with Python(Option 1) @@ -23,7 +24,7 @@ pip install -r requirements.txt ### 1.2 Start Redis Stack Server -Please refer to this [readme](../../../../vectorstores/redis/README.md). +Please refer to this [readme](../../third_parties/redis/src/README.md). ### 1.3 Setup Environment Variables @@ -38,7 +39,7 @@ export PYTHONPATH=${path_to_comps} This is required only if you are going to consume the _generate_captions_ API of this microservice as in [Section 4.3](#43-consume-generate_captions-api). -Please refer to this [readme](../../../../lvms/llava/README.md) to start the LVM microservice. +Please refer to this [readme](../../lvms/src/README.md) to start the LVM microservice. After LVM is up, set up environment variables. ```bash @@ -58,13 +59,13 @@ python prepare_videodoc_redis.py ### 2.1 Start Redis Stack Server -Please refer to this [readme](../../../../vectorstores/redis/README.md). +Please refer to this [readme](../../third_parties/redis/src/README.md). ### 2.2 Start LVM Microservice (Optional) This is required only if you are going to consume the _generate_captions_ API of this microservice as described [here](#43-consume-generate_captions-api). -Please refer to this [readme](../../../../lvms/llava/README.md) to start the LVM microservice. +Please refer to this [readme](../../lvms/src/README.md) to start the LVM microservice. After LVM is up, set up environment variables. ```bash @@ -87,20 +88,20 @@ export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} ```bash cd ../../../../ -docker build -t opea/dataprep-multimodal-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimodal/redis/langchain/Dockerfile . +docker build -t opea/dataprep-multimodal-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### 2.5 Run Docker with CLI (Option A) ```bash -docker run -d --name="dataprep-multimodal-redis" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e LVM_ENDPOINT=$LVM_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep-multimodal-redis:latest +docker run -d --name="dataprep-multimodal-redis" -p 6007:5000 --runtime=runc --ipc=host -e no_proxy=$no_proxy -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_HOST=$your_ip -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e LVM_ENDPOINT=$LVM_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e MULTIMODAL_DATAPREP=true -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MULTIMODALREDIS" opea/dataprep-multimodal-redis:latest ``` ### 2.6 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future) ```bash cd comps/dataprep/multimodal/redis/langchain -docker compose -f docker-compose-dataprep-redis.yaml up -d +docker compose -f compose_redis_multimodal.yaml up -d ``` ## 🚀3. Status Microservice @@ -111,18 +112,19 @@ docker container logs -f dataprep-multimodal-redis ## 🚀4. Consume Microservice -Once this dataprep microservice is started, user can use the below commands to invoke the microservice to convert images and videos and their transcripts (optional) to embeddings and save to the Redis vector store. +Once this dataprep microservice is started, user can use the below commands to invoke the microservice to convert images, videos, text, and PDF files to embeddings and save to the Redis vector store. This microservice provides 3 different ways for users to ingest files into Redis vector store corresponding to the 3 use cases. ### 4.1 Consume _ingest_with_text_ API -**Use case:** This API is used when videos are accompanied by transcript files (`.vtt` format) or images are accompanied by text caption files (`.txt` format). +**Use case:** This API is used for videos accompanied by transcript files (`.vtt` format), images accompanied by text caption files (`.txt` format), and PDF files containing a mix of text and images. **Important notes:** - Make sure the file paths after `files=@` are correct. - Every transcript or caption file's name must be identical to its corresponding video or image file's name (except their extension - .vtt goes with .mp4 and .txt goes with .jpg, .jpeg, .png, or .gif). For example, `video1.mp4` and `video1.vtt`. Otherwise, if `video1.vtt` is not included correctly in the API call, the microservice will return an error `No captions file video1.vtt found for video1.mp4`. +- It is assumed that PDFs will contain at least one image. Each image in the file will be embedded along with the text that appears on the same page as the image. #### Single video-transcript pair upload @@ -131,7 +133,7 @@ curl -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@./video1.mp4" \ -F "files=@./video1.vtt" \ - http://localhost:6007/v1/ingest_with_text + http://localhost:6007/v1/dataprep/ingest ``` #### Single image-caption pair upload @@ -141,7 +143,7 @@ curl -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@./image.jpg" \ -F "files=@./image.txt" \ - http://localhost:6007/v1/ingest_with_text + http://localhost:6007/v1/dataprep/ingest ``` #### Multiple file pair upload @@ -157,7 +159,8 @@ curl -X POST \ -F "files=@./image1.txt" \ -F "files=@./image2.jpg" \ -F "files=@./image2.txt" \ - http://localhost:6007/v1/ingest_with_text + -F "files=@./example.pdf" \ + http://localhost:6007/v1/dataprep/ingest ``` ### 4.2 Consume _generate_transcripts_ API @@ -172,7 +175,7 @@ In this use case, this microservice will use [`whisper`](https://openai.com/inde curl -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@./video1.mp4" \ - http://localhost:6007/v1/generate_transcripts + http://localhost:6007/v1/dataprep/generate_transcripts ``` #### Multiple file upload @@ -183,7 +186,7 @@ curl -X POST \ -F "files=@./video1.mp4" \ -F "files=@./video2.mp4" \ -F "files=@./audio1.wav" \ - http://localhost:6007/v1/generate_transcripts + http://localhost:6007/v1/dataprep/generate_transcripts ``` ### 4.3 Consume _generate_captions_ API @@ -198,7 +201,7 @@ In this use case, there is no meaningful language transcription. Thus, it is pre curl -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@./video1.mp4" \ - http://localhost:6007/v1/generate_captions + http://localhost:6007/v1/dataprep/generate_captions ``` - Multiple video upload @@ -208,7 +211,7 @@ curl -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@./video1.mp4" \ -F "files=@./video2.mp4" \ - http://localhost:6007/v1/generate_captions + http://localhost:6007/v1/dataprep/generate_captions ``` - Single image upload @@ -217,25 +220,26 @@ curl -X POST \ curl -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@./image.jpg" \ - http://localhost:6007/v1/generate_captions + http://localhost:6007/v1/dataprep/generate_captions ``` -### 4.4 Consume get_files API +### 4.4 Consume get API To get names of uploaded files, use the following command. ```bash curl -X POST \ -H "Content-Type: application/json" \ - http://localhost:6007/v1/dataprep/get_files + http://localhost:6007/v1/dataprep/get ``` -### 4.5 Consume delete_files API +### 4.5 Consume delete API To delete uploaded files and clear the database, use the following command. ```bash curl -X POST \ -H "Content-Type: application/json" \ - http://localhost:6007/v1/dataprep/delete_files + -d '{"file_path": "all"}' \ + http://localhost:6007/v1/dataprep/delete ``` diff --git a/comps/dataprep/neo4j/langchain/README.md b/comps/dataprep/src/README_neo4j_langchain.md similarity index 89% rename from comps/dataprep/neo4j/langchain/README.md rename to comps/dataprep/src/README_neo4j_langchain.md index 31f92548b4..7c5e3ed476 100644 --- a/comps/dataprep/neo4j/langchain/README.md +++ b/comps/dataprep/src/README_neo4j_langchain.md @@ -51,13 +51,13 @@ python prepare_doc_neo4j.py ```bash cd ../../../../ -docker build -t opea/dataprep-neo4j:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/neo4j/langchain/Dockerfile . +docker build -t opea/dataprep-neo4j:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### Run Docker with CLI ```bash -docker run -d --name="dataprep-neo4j-server" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/dataprep-neo4j:latest +docker run -d --name="dataprep-neo4j-server" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LANGCHAIN" opea/dataprep-neo4j:latest ``` ### Setup Environment Variables @@ -74,8 +74,8 @@ export NEO4J_PASSWORD=${your_neo4j_password} ### Run Docker with Docker Compose ```bash -cd comps/dataprep/neo4j/langchain -docker compose -f docker-compose-dataprep-neo4j.yaml up -d +cd comps/dataprep/deployment/docker_compose/ +docker compose -f compose_neo4j_langchain.yaml up -d ``` ## Invoke Microservice @@ -86,7 +86,7 @@ Once document preparation microservice for Neo4J is started, user can use below curl -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@./file1.txt" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` You can specify chunk_size and chunk_size by the following commands. @@ -97,7 +97,7 @@ curl -X POST \ -F "files=@./file1.txt" \ -F "chunk_size=1500" \ -F "chunk_overlap=100" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` We support table extraction from pdf documents. You can specify process_table and table_strategy by the following commands. "table_strategy" refers to the strategies to understand tables for table retrieval. As the setting progresses from "fast" to "hq" to "llm," the focus shifts towards deeper table understanding at the expense of processing speed. The default strategy is "fast". @@ -112,5 +112,5 @@ curl -X POST \ -F "files=@./your_file.pdf" \ -F "process_table=true" \ -F "table_strategy=hq" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` diff --git a/comps/dataprep/neo4j/llama_index/README.md b/comps/dataprep/src/README_neo4j_llamaindex.md similarity index 78% rename from comps/dataprep/neo4j/llama_index/README.md rename to comps/dataprep/src/README_neo4j_llamaindex.md index 74822f1898..c80fc6256a 100644 --- a/comps/dataprep/neo4j/llama_index/README.md +++ b/comps/dataprep/src/README_neo4j_llamaindex.md @@ -2,14 +2,14 @@ This Dataprep microservice performs: -- Graph extraction (entities, relationships and descripttions) using LLM +- Graph extraction (entities, relationships and descriptions) using LLM - Performs hierarchical_leiden clustering to identify communities in the knowledge graph - Generates a community symmary for each community - Stores all of the above in Neo4j Graph DB -This microservice follows the graphRAG approached defined by Microsoft paper ["From Local to Global: A Graph RAG Approach to Query-Focused Summarization"](https://www.microsoft.com/en-us/research/publication/from-local-to-global-a-graph-rag-approach-to-query-focused-summarization/) with some differences such as: 1) only level zero cluster summaries are leveraged, 2) The input context to the final answer generation is trimmed to fit maximum context length. +This microservice follows the graphRAG approached defined by Microsoft paper ["From Local to Global: A Graph RAG Approach to Query-Focused Summarization"](https://www.microsoft.com/en-us/research/publication/from-local-to-global-a-graph-rag-approach-to-query-focused-summarization/) with some differences such as: 1) no node degree prioritization is used in populating the LLM context window for community summaries, 2) no ranking of sub-communities is applied in generating higher level communities summaries. -This dataprep microservice ingests the input files and uses LLM (TGI or OpenAI model when OPENAI_API_KEY is set) to extract entities, relationships and descriptions of those to build a graph-based text index. +This dataprep microservice ingests the input files and uses LLM (TGI, VLLM or OpenAI model when OPENAI_API_KEY is set) to extract entities, relationships and descriptions of those to build a graph-based text index. Compose yaml file deploys TGI but works also with vLLM inference endpoint. ## Setup Environment Variables @@ -23,10 +23,20 @@ export NEO4J_URI=${your_neo4j_url} export NEO4J_USERNAME=${your_neo4j_username} export NEO4J_PASSWORD=${your_neo4j_password} # should match what was used in NEO4J_AUTH when running the neo4j-apoc export PYTHONPATH=${path_to_comps} -export OPENAI_KEY=${your_openai_api_key} # optional, when not provided will use smaller models TGI/TEI +export OPENAI_KEY=${your_openai_api_key} # optional, when not provided will use open models TGI/TEI export HUGGINGFACEHUB_API_TOKEN=${your_hf_token} + # set additional environment settings -source ./set_env.sh +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export OPENAI_EMBEDDING_MODEL="text-embedding-3-small" +export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" +export OPENAI_LLM_MODEL="gpt-4o" +export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006" +export TGI_LLM_ENDPOINT="http://${host_ip}:6005" +export NEO4J_URL="bolt://${host_ip}:7687" +export NEO4J_USERNAME=neo4j +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6004/v1/dataprep" +export LOGFLAG=True ``` ## 🚀Start Microservice with Docker @@ -35,7 +45,7 @@ source ./set_env.sh ```bash cd ../../../../ -docker build -t opea/dataprep-neo4j-llamaindex:latest --build-arg no_proxy=$no_proxy --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/neo4j/llama_index/Dockerfile . +docker build -t opea/dataprep-neo4j-llamaindex:latest --build-arg no_proxy=$no_proxy --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### 2. Setup Environment Variables @@ -61,7 +71,7 @@ source ./set_env.sh Docker compose will start 4 microservices: dataprep-neo4j-llamaindex, neo4j-apoc, tgi-gaudi-service and tei-embedding-service. The reason TGI and TEI are needed is because dataprep relies on LLM to extract entities and relationships from text to build the graph and Neo4j Property Graph Index. Neo4j database supports embeddings natively so we do not need a separate vector store. Checkout the blog [Introducing the Property Graph Index: A Powerful New Way to Build Knowledge Graphs with LLMs](https://www.llamaindex.ai/blog/introducing-the-property-graph-index-a-powerful-new-way-to-build-knowledge-graphs-with-llms) for a better understanding of Property Graph Store and Index. ```bash -cd comps/dataprep/neo4j/llama_index +cd comps/dataprep/deployment/docker_compose docker compose -f compose.yaml up -d ``` @@ -73,7 +83,7 @@ Once document preparation microservice for Neo4J is started, user can use below curl -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@./file1.txt" \ - http://${host_ip}:6004/v1/dataprep + http://${host_ip}:6004/v1/dataprep/ingest ``` You can specify chunk_size and chunk_size by the following commands. @@ -84,7 +94,7 @@ curl -X POST \ -F "files=@./file1.txt" \ -F "chunk_size=1500" \ -F "chunk_overlap=100" \ - http://${host_ip}:6004/v1/dataprep + http://${host_ip}:6004/v1/dataprep/ingest ``` Please note that clustering of extracted entities and summarization happens in this data preparation step. The result of this is: @@ -104,5 +114,5 @@ curl -X POST \ -F "files=@./your_file.pdf" \ -F "process_table=true" \ -F "table_strategy=hq" \ - http://localhost:6004/v1/dataprep + http://localhost:6004/v1/dataprep/ingest ``` diff --git a/comps/dataprep/opensearch/README.md b/comps/dataprep/src/README_opensearch.md similarity index 86% rename from comps/dataprep/opensearch/README.md rename to comps/dataprep/src/README_opensearch.md index a4067b7eaa..b5d14c9a9d 100644 --- a/comps/dataprep/opensearch/README.md +++ b/comps/dataprep/src/README_opensearch.md @@ -18,7 +18,7 @@ pip install -r requirements.txt ### 1.2 Start OpenSearch Stack Server -Please refer to this [readme](../../vectorstores/opensearch/README.md). +Please refer to this [readme](../../third_parties/opensearch/src/README.md). ### 1.3 Setup Environment Variables @@ -69,7 +69,7 @@ python prepare_doc_opensearch.py ### 2.1 Start OpenSearch Stack Server -Please refer to this [readme](../../vectorstores/opensearch/README.md). +Please refer to this [readme](../../third_parties/opensearch/src/README.md). ### 2.2 Setup Environment Variables @@ -89,7 +89,7 @@ export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} ```bash cd ../../ -docker build -t opea/dataprep-opensearch:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/opensearch/langchain/Dockerfile . +docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### 2.4 Run Docker with CLI (Option A) @@ -97,16 +97,16 @@ docker build -t opea/dataprep-opensearch:latest --build-arg https_proxy=$https_p - option 1: Start single-process version (for processing up to 10 files) ```bash -docker run -d --name="dataprep-opensearch-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e OPENSEARCH_URL=$OPENSEARCH_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep-opensearch:latest +docker run -d --name="dataprep-opensearch-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e OPENSEARCH_URL=$OPENSEARCH_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_OPENSEARCH" opea/dataprep:latest ``` ### 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future) ```bash # for langchain -cd comps/dataprep/opensearch/langchain +cd comps/dataprep/deployment/docker_compose # common command -docker compose -f docker-compose-dataprep-opensearch.yaml up -d +docker compose -f compose_opensearch.yaml up -d ``` ## 🚀3. Status Microservice @@ -129,7 +129,7 @@ Make sure the file path after `files=@` is correct. curl -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@./file1.txt" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` You can specify chunk_size and chunk_size by the following commands. @@ -140,7 +140,7 @@ curl -X POST \ -F "files=@./file1.txt" \ -F "chunk_size=1500" \ -F "chunk_overlap=100" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` We support table extraction from pdf documents. You can specify process_table and table_strategy by the following commands. "table_strategy" refers to the strategies to understand tables for table retrieval. As the setting progresses from "fast" to "hq" to "llm," the focus shifts towards deeper table understanding at the expense of processing speed. The default strategy is "fast". @@ -153,7 +153,7 @@ curl -X POST \ -F "files=@./your_file.pdf" \ -F "process_table=true" \ -F "table_strategy=hq" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` - Multiple file upload @@ -164,7 +164,7 @@ curl -X POST \ -F "files=@./file1.txt" \ -F "files=@./file2.txt" \ -F "files=@./file3.txt" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` - Links upload (not supported for llama_index now) @@ -172,7 +172,7 @@ curl -X POST \ ```bash curl -X POST \ -F 'link_list=["https://www.ces.tech/"]' \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` or @@ -182,7 +182,7 @@ import requests import json proxies = {"http": ""} -url = "http://localhost:6007/v1/dataprep" +url = "http://localhost:6007/v1/dataprep/ingest" urls = [ "https://towardsdatascience.com/no-gpu-no-party-fine-tune-bert-for-sentiment-analysis-with-vertex-ai-custom-jobs-d8fc410e908b?source=rss----7f60cf5620c9---4" ] @@ -204,7 +204,7 @@ To get uploaded file structures, use the following command: ```bash curl -X POST \ -H "Content-Type: application/json" \ - http://localhost:6007/v1/dataprep/get_file + http://localhost:6007/v1/dataprep/get ``` Then you will get the response JSON like this: @@ -230,24 +230,24 @@ Then you will get the response JSON like this: To delete uploaded file/link, use the following command. -The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API. +The `file_path` here should be the `id` get from `/v1/dataprep/get` API. ```bash # delete link curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "https://www.ces.tech/.txt"}' \ - http://localhost:6007/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete # delete file curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "uploaded_file_1.txt"}' \ - http://localhost:6007/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete # delete all files and links curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "all"}' \ - http://localhost:6007/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete ``` diff --git a/comps/dataprep/pgvector/langchain/README.md b/comps/dataprep/src/README_pgvector.md similarity index 75% rename from comps/dataprep/pgvector/langchain/README.md rename to comps/dataprep/src/README_pgvector.md index 37a47d1c38..92d0db577a 100644 --- a/comps/dataprep/pgvector/langchain/README.md +++ b/comps/dataprep/src/README_pgvector.md @@ -17,7 +17,7 @@ export INDEX_NAME=${your_index_name} ### 1.3 Start PGVector -Please refer to this [readme](../../../vectorstores/pgvector/README.md). +Please refer to this [readme](../../third_parties/pgvector/src/README.md). ### 1.4 Start Document Preparation Microservice for PGVector with Python Script @@ -31,7 +31,7 @@ python prepare_doc_pgvector.py ### 2.1 Start PGVector -Please refer to this [readme](../../../vectorstores/pgvector/README.md). +Please refer to this [readme](../../third_parties/pgvector/src/README.md). ### 2.2 Setup Environment Variables @@ -44,20 +44,20 @@ export INDEX_NAME=${your_index_name} ```bash cd GenAIComps -docker build -t opea/dataprep-pgvector:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/pgvector/langchain/Dockerfile . +docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### 2.4 Run Docker with CLI (Option A) ```bash -docker run --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/dataprep-pgvector:latest +docker run --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_PGVECTOR" opea/dataprep:latest ``` ### 2.5 Run with Docker Compose (Option B) ```bash -cd comps/dataprep/pgvector/langchain -docker compose -f docker-compose-dataprep-pgvector.yaml up -d +cd comps/dataprep/deployment/docker_compose +docker compose -f compose_pgvector.yaml up -d ``` ## 🚀3. Consume Microservice @@ -70,17 +70,17 @@ Once document preparation microservice for PGVector is started, user can use bel curl -X POST \ -H "Content-Type: application/json" \ -d '{"path":"/path/to/document"}' \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` -### 3.2 Consume get_file API +### 3.2 Consume get API To get uploaded file structures, use the following command: ```bash curl -X POST \ -H "Content-Type: application/json" \ - http://localhost:6007/v1/dataprep/get_file + http://localhost:6007/v1/dataprep/get ``` Then you will get the response JSON like this: @@ -102,28 +102,28 @@ Then you will get the response JSON like this: ] ``` -### 4.3 Consume delete_file API +### 4.3 Consume delete API To delete uploaded file/link, use the following command. -The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API. +The `file_path` here should be the `id` get from `/v1/dataprep/get` API. ```bash # delete link curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "https://www.ces.tech/.txt"}' \ - http://localhost:6007/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete # delete file curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "uploaded_file_1.txt"}' \ - http://localhost:6007/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete # delete all files and links curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "all"}' \ - http://localhost:6007/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete ``` diff --git a/comps/dataprep/pinecone/langchain/README.md b/comps/dataprep/src/README_pinecone.md similarity index 58% rename from comps/dataprep/pinecone/langchain/README.md rename to comps/dataprep/src/README_pinecone.md index 980772c4e7..3beeef9699 100644 --- a/comps/dataprep/pinecone/langchain/README.md +++ b/comps/dataprep/src/README_pinecone.md @@ -10,7 +10,24 @@ pip install -r requirements.txt ### Start Pinecone Server -Please refer to this [readme](../../../vectorstores/pinecone/README.md). +1. Create Pinecone account from the below link + +https://app.pinecone.io/ + +More details from Pinecone quick start guide https://docs.pinecone.io/guides/get-started/quickstart + +2. Get API key + +API Key is needed to make the API calls. API key can get it from the Project -> Manage -> API keys + +3. Create the index in https://app.pinecone.io/ + +Following details are to be provided + + - Index name + - Based on the embedding model selected, following has to be provided + a. Dimensions + b. Metric ### Setup Environment Variables @@ -35,13 +52,13 @@ python prepare_doc_pinecone.py ```bash cd ../../../../ -docker build -t opea/dataprep-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/pinecone/langchain/Dockerfile . +docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### Run Docker with CLI ```bash -docker run -d --name="dataprep-pinecone-server" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/dataprep-pinecone:latest +docker run -d --name="dataprep-pinecone-server" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_PINECONE" opea/dataprep:latest ``` ### Setup Environment Variables @@ -56,8 +73,8 @@ export PINECONE_INDEX_NAME=${PINECONE_INDEX_NAME} ### Run Docker with Docker Compose ```bash -cd comps/dataprep/pinecone/langchain -docker compose -f docker-compose-dataprep-pinecone.yaml up -d +cd comps/dataprep/deployment/docker_compose +docker compose -f compose_pipecone.yaml up -d ``` ## Invoke Microservice @@ -65,5 +82,5 @@ docker compose -f docker-compose-dataprep-pinecone.yaml up -d Once document preparation microservice for Pinecone is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database. ```bash -curl -X POST -H "Content-Type: application/json" -d '{"path":"/path/to/document"}' http://localhost:6007/v1/dataprep +curl -X POST -H "Content-Type: application/json" -d '{"path":"/path/to/document"}' http://localhost:6007/v1/dataprep/ingest ``` diff --git a/comps/dataprep/qdrant/langchain/README.md b/comps/dataprep/src/README_qdrant.md similarity index 81% rename from comps/dataprep/qdrant/langchain/README.md rename to comps/dataprep/src/README_qdrant.md index 9d58ffa779..fac7349ef3 100644 --- a/comps/dataprep/qdrant/langchain/README.md +++ b/comps/dataprep/src/README_qdrant.md @@ -13,7 +13,7 @@ apt-get install poppler-utils -y ### Start Qdrant Server -Please refer to this [readme](../../../vectorstores/qdrant/README.md). +docker run -p 6333:6333 -p 6334:6334 -v ./qdrant_storage:/qdrant/storage:z qdrant/qdrant ### Setup Environment Variables @@ -41,13 +41,13 @@ python prepare_doc_qdrant.py ```bash cd ../../../../ -docker build -t opea/dataprep-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/qdrant/langchain/Dockerfile . +docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### Run Docker with CLI ```bash -docker run -d --name="dataprep-qdrant-server" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/dataprep-qdrant:latest +docker run -d --name="dataprep-qdrant-server" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_QDRANT" opea/dataprep:latest ``` ### Setup Environment Variables @@ -63,8 +63,8 @@ export COLLECTION_NAME=${your_collection_name} ### Run Docker with Docker Compose ```bash -cd comps/dataprep/qdrant/langchain -docker compose -f docker-compose-dataprep-qdrant.yaml up -d +cd comps/dataprep/deployment/docker_compose +docker compose -f compose_qdrant.yaml up -d ``` ## Invoke Microservice @@ -75,7 +75,7 @@ Once document preparation microservice for Qdrant is started, user can use below curl -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@./file1.txt" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` You can specify chunk_size and chunk_size by the following commands. @@ -86,7 +86,7 @@ curl -X POST \ -F "files=@./file1.txt" \ -F "chunk_size=1500" \ -F "chunk_overlap=100" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` We support table extraction from pdf documents. You can specify process_table and table_strategy by the following commands. "table_strategy" refers to the strategies to understand tables for table retrieval. As the setting progresses from "fast" to "hq" to "llm," the focus shifts towards deeper table understanding at the expense of processing speed. The default strategy is "fast". @@ -99,5 +99,5 @@ curl -X POST \ -F "files=@./your_file.pdf" \ -F "process_table=true" \ -F "table_strategy=hq" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` diff --git a/comps/dataprep/redis/README.md b/comps/dataprep/src/README_redis.md similarity index 74% rename from comps/dataprep/redis/README.md rename to comps/dataprep/src/README_redis.md index 384d9018f4..c6e4555295 100644 --- a/comps/dataprep/redis/README.md +++ b/comps/dataprep/src/README_redis.md @@ -1,6 +1,6 @@ # Dataprep Microservice with Redis -We have provided dataprep microservice for multimodal data input (e.g., text and image) [here](../multimodal/redis/langchain/README.md). +We have provided dataprep microservice for multimodal data input (e.g., text and image) [here](./README_multimodal.md). For dataprep microservice for text input, we provide here two frameworks: `Langchain` and `LlamaIndex`. We also provide `Langchain_ray` which uses ray to parallel the data prep for multi-file performance improvement(observed 5x - 15x speedup by processing 1000 files/links.). @@ -33,7 +33,7 @@ cd langchain_ray; pip install -r requirements_ray.txt ### 1.2 Start Redis Stack Server -Please refer to this [readme](../../vectorstores/redis/README.md). +Please refer to this [readme](../../third_parties/redis/src/README.md). ### 1.3 Setup Environment Variables @@ -90,7 +90,7 @@ python prepare_doc_redis_on_ray.py ### 2.1 Start Redis Stack Server -Please refer to this [readme](../../vectorstores/redis/README.md). +Please refer to this [readme](../../third_parties/redis/src/README.md). ### 2.2 Setup Environment Variables @@ -104,54 +104,23 @@ export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} ### 2.3 Build Docker Image -- Build docker image with langchain - -- option 1: Start single-process version (for 1-10 files processing) - -```bash -cd ../../ -docker build -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile . -``` - -- Build docker image with llama_index - ```bash cd ../../ -docker build -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/llama_index/Dockerfile . -``` - -- option 2: Start multi-process version (for >10 files processing) - -```bash -cd ../../../ -docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/Dockerfile . +docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### 2.4 Run Docker with CLI (Option A) -- option 1: Start single-process version (for 1-10 files processing) - ```bash -docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep-redis:latest -``` - -- option 2: Start multi-process version (for >10 files processing) - -```bash -docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:latest +docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep:latest ``` ### 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future) ```bash -# for langchain -cd comps/dataprep/redis/langchain -# for langchain_ray -cd comps/dataprep/redis/langchain_ray -# for llama_index -cd comps/dataprep/redis/llama_index -# common command -docker compose -f docker-compose-dataprep-redis.yaml up -d + +cd comps/deployment/docker_compose +docker compose -f compose_redis.yaml up -d ``` ## 🚀3. Status Microservice @@ -174,7 +143,7 @@ Make sure the file path after `files=@` is correct. curl -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@./file1.txt" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` You can specify chunk_size and chunk_size by the following commands. @@ -185,7 +154,7 @@ curl -X POST \ -F "files=@./file1.txt" \ -F "chunk_size=1500" \ -F "chunk_overlap=100" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` We support table extraction from pdf documents. You can specify process_table and table_strategy by the following commands. "table_strategy" refers to the strategies to understand tables for table retrieval. As the setting progresses from "fast" to "hq" to "llm," the focus shifts towards deeper table understanding at the expense of processing speed. The default strategy is "fast". @@ -198,7 +167,7 @@ curl -X POST \ -F "files=@./your_file.pdf" \ -F "process_table=true" \ -F "table_strategy=hq" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` - Multiple file upload @@ -209,7 +178,7 @@ curl -X POST \ -F "files=@./file1.txt" \ -F "files=@./file2.txt" \ -F "files=@./file3.txt" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` - Links upload (not supported for llama_index now) @@ -217,7 +186,7 @@ curl -X POST \ ```bash curl -X POST \ -F 'link_list=["https://www.ces.tech/"]' \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` or @@ -227,7 +196,7 @@ import requests import json proxies = {"http": ""} -url = "http://localhost:6007/v1/dataprep" +url = "http://localhost:6007/v1/dataprep/ingest" urls = [ "https://towardsdatascience.com/no-gpu-no-party-fine-tune-bert-for-sentiment-analysis-with-vertex-ai-custom-jobs-d8fc410e908b?source=rss----7f60cf5620c9---4" ] @@ -242,14 +211,14 @@ except requests.exceptions.RequestException as e: print("An error occurred:", e) ``` -### 4.2 Consume get_file API +### 4.2 Consume get API To get uploaded file structures, use the following command: ```bash curl -X POST \ -H "Content-Type: application/json" \ - http://localhost:6007/v1/dataprep/get_file + http://localhost:6007/v1/dataprep/get ``` Then you will get the response JSON like this: @@ -271,28 +240,28 @@ Then you will get the response JSON like this: ] ``` -### 4.3 Consume delete_file API +### 4.3 Consume delete API To delete uploaded file/link, use the following command. -The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API. +The `file_path` here should be the `id` get from `/v1/dataprep/get` API. ```bash # delete link curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "https://www.ces.tech/.txt"}' \ - http://localhost:6007/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete # delete file curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "uploaded_file_1.txt"}' \ - http://localhost:6007/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete # delete all files and links curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "all"}' \ - http://localhost:6007/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete ``` diff --git a/comps/dataprep/vdms/README.md b/comps/dataprep/src/README_vdms.md similarity index 76% rename from comps/dataprep/vdms/README.md rename to comps/dataprep/src/README_vdms.md index 132a8816b3..7571ca80d2 100644 --- a/comps/dataprep/vdms/README.md +++ b/comps/dataprep/src/README_vdms.md @@ -27,7 +27,7 @@ cd langchain_ray; pip install -r requirements_ray.txt ### 1.2 Start VDMS Server -Refer to this [readme](../../vectorstores/vdms/README.md). +Refer to this [readme](../../third_parties/vdms/src/README.md). ### 1.3 Setup Environment Variables @@ -60,7 +60,7 @@ python prepare_doc_redis_on_ray.py ### 2.1 Start VDMS Server -Refer to this [readme](../../vectorstores/vdms/README.md). +Refer to this [readme](../../third_parties/vdms/src/README.md). ### 2.2 Setup Environment Variables @@ -78,20 +78,10 @@ export PYTHONPATH=${path_to_comps} ### 2.3 Build Docker Image -- Build docker image with langchain - - Start single-process version (for 1-10 files processing) - - ```bash - cd ../../../ - docker build -t opea/dataprep-vdms:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/vdms/langchain/Dockerfile . - ``` - - +cd ../../../ +docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . +``` ### 2.4 Run Docker with CLI @@ -101,18 +91,9 @@ Start single-process version (for 1-10 files processing) docker run -d --name="dataprep-vdms-server" -p 6007:6007 --runtime=runc --ipc=host \ -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_ENDPOINT=$TEI_ENDPOINT \ -e COLLECTION_NAME=$COLLECTION_NAME -e VDMS_HOST=$VDMS_HOST -e VDMS_PORT=$VDMS_PORT \ -opea/dataprep-vdms:latest +-e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_VDMS" opea/dataprep:latest ``` - - ## 🚀3. Status Microservice ```bash @@ -131,7 +112,7 @@ Make sure the file path after `files=@` is correct. curl -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@./file1.txt" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` You can specify `chunk_size` and `chunk_overlap` by the following commands. @@ -142,7 +123,7 @@ Make sure the file path after `files=@` is correct. -F "files=@./LLAMA2_page6.pdf" \ -F "chunk_size=1500" \ -F "chunk_overlap=100" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` - Multiple file upload @@ -153,7 +134,7 @@ Make sure the file path after `files=@` is correct. -F "files=@./file1.txt" \ -F "files=@./file2.txt" \ -F "files=@./file3.txt" \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` - Links upload (not supported for `llama_index` now) @@ -161,7 +142,7 @@ Make sure the file path after `files=@` is correct. ```bash curl -X POST \ -F 'link_list=["https://www.ces.tech/"]' \ - http://localhost:6007/v1/dataprep + http://localhost:6007/v1/dataprep/ingest ``` or @@ -171,7 +152,7 @@ Make sure the file path after `files=@` is correct. import json proxies = {"http": ""} - url = "http://localhost:6007/v1/dataprep" + url = "http://localhost:6007/v1/dataprep/ingest" urls = [ "https://towardsdatascience.com/no-gpu-no-party-fine-tune-bert-for-sentiment-analysis-with-vertex-ai-custom-jobs-d8fc410e908b?source=rss----7f60cf5620c9---4" ] diff --git a/comps/dataprep/src/integrations/config.py b/comps/dataprep/src/integrations/config.py deleted file mode 100644 index 43a43471f1..0000000000 --- a/comps/dataprep/src/integrations/config.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - - -####################################################### -# Common Functions # -####################################################### -def get_boolean_env_var(var_name, default_value=False): - """Retrieve the boolean value of an environment variable. - - Args: - var_name (str): The name of the environment variable to retrieve. - default_value (bool): The default value to return if the variable - is not found. - - Returns: - bool: The value of the environment variable, interpreted as a boolean. - """ - true_values = {"true", "1", "t", "y", "yes"} - false_values = {"false", "0", "f", "n", "no"} - - # Retrieve the environment variable's value - value = os.getenv(var_name, "").lower() - - # Decide the boolean value based on the content of the string - if value in true_values: - return True - elif value in false_values: - return False - else: - return default_value - - -# Embedding model -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") -# TEI Embedding endpoints -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") - -# Vector Index Configuration -INDEX_NAME = os.getenv("INDEX_NAME", "rag_redis") -KEY_INDEX_NAME = os.getenv("KEY_INDEX_NAME", "file-keys") -TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", 600)) -SEARCH_BATCH_SIZE = int(os.getenv("SEARCH_BATCH_SIZE", 10)) - - -####################################################### -# Redis # -####################################################### -# Redis Connection Information -REDIS_HOST = os.getenv("REDIS_HOST", "localhost") -REDIS_PORT = int(os.getenv("REDIS_PORT", 6379)) - - -def format_redis_conn_from_env(): - redis_url = os.getenv("REDIS_URL", None) - if redis_url: - return redis_url - else: - using_ssl = get_boolean_env_var("REDIS_SSL", False) - start = "rediss://" if using_ssl else "redis://" - - # if using RBAC - password = os.getenv("REDIS_PASSWORD", None) - username = os.getenv("REDIS_USERNAME", "default") - if password is not None: - start += f"{username}:{password}@" - - return start + f"{REDIS_HOST}:{REDIS_PORT}" - - -REDIS_URL = format_redis_conn_from_env() - - -####################################################### -# Milvus # -####################################################### -# Local Embedding model -LOCAL_EMBEDDING_MODEL = os.getenv("LOCAL_EMBEDDING_MODEL", "maidalun1020/bce-embedding-base_v1") -# TEI configuration -TEI_EMBEDDING_MODEL = os.environ.get("TEI_EMBEDDING_MODEL", "/home/user/bge-large-zh-v1.5") -TEI_EMBEDDING_ENDPOINT = os.environ.get("TEI_EMBEDDING_ENDPOINT", "") -os.environ["OPENAI_API_BASE"] = TEI_EMBEDDING_ENDPOINT -os.environ["OPENAI_API_KEY"] = "Dummy key" -# MILVUS configuration -MILVUS_HOST = os.getenv("MILVUS_HOST", "localhost") -MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530)) -MILVUS_URI = f"http://{MILVUS_HOST}:{MILVUS_PORT}" -INDEX_PARAMS = {"index_type": "FLAT", "metric_type": "IP", "params": {}} -COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag_milvus") diff --git a/comps/dataprep/src/integrations/config/__init__.py b/comps/dataprep/src/integrations/config/__init__.py new file mode 100644 index 0000000000..4057dc0163 --- /dev/null +++ b/comps/dataprep/src/integrations/config/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/dataprep/vdms/multimodal_langchain/config.yaml b/comps/dataprep/src/integrations/config/config.yaml similarity index 100% rename from comps/dataprep/vdms/multimodal_langchain/config.yaml rename to comps/dataprep/src/integrations/config/config.yaml diff --git a/comps/dataprep/multimodal/redis/langchain/schema.yml b/comps/dataprep/src/integrations/config/schema.yml similarity index 100% rename from comps/dataprep/multimodal/redis/langchain/schema.yml rename to comps/dataprep/src/integrations/config/schema.yml diff --git a/comps/dataprep/src/integrations/elasticsearch.py b/comps/dataprep/src/integrations/elasticsearch.py new file mode 100644 index 0000000000..ed07d157ea --- /dev/null +++ b/comps/dataprep/src/integrations/elasticsearch.py @@ -0,0 +1,389 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +from pathlib import Path +from typing import List, Optional, Union + +from elasticsearch import Elasticsearch +from fastapi import Body, File, Form, HTTPException, UploadFile +from langchain.text_splitter import HTMLHeaderTextSplitter, RecursiveCharacterTextSplitter +from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_core.documents import Document +from langchain_elasticsearch import ElasticsearchStore +from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings + +from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.dataprep.src.utils import ( + create_upload_folder, + document_loader, + encode_filename, + get_file_structure, + get_separators, + get_tables_result, + parse_html, + remove_folder_with_ignore, + save_content_to_local_disk, +) + +logger = CustomLogger("opea_dataprep_elasticsearch") +logflag = os.getenv("LOGFLAG", False) + +ES_CONNECTION_STRING = os.getenv("ES_CONNECTION_STRING", "http://localhost:9200") +UPLOADED_FILES_PATH = os.getenv("UPLOADED_FILES_PATH", "./uploaded_files/") + +# Embedding model +EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") + +# TEI Embedding endpoints +TEI_ENDPOINT = os.getenv("TEI_ENDPOINT", "") + +# Vector Index Configuration +INDEX_NAME = os.getenv("INDEX_NAME", "rag-elastic") + +# chunk parameters +CHUNK_SIZE = os.getenv("CHUNK_SIZE", 1500) +CHUNK_OVERLAP = os.getenv("CHUNK_OVERLAP", 100) + + +@OpeaComponentRegistry.register("OPEA_DATAPREP_ELASTICSEARCH") +class OpeaElasticSearchDataprep(OpeaComponent): + """Dataprep component for ElasticSearch ingestion and search services.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) + self.es_client = Elasticsearch(hosts=ES_CONNECTION_STRING) + self.es_store = self.get_elastic_store(self.get_embedder()) + self.create_index() + + # Perform health check + health_status = self.check_health() + if not health_status: + logger.error("OpeaElasticSearchDataprep health check failed.") + + def check_health(self) -> bool: + """Checks the health of the ElasticSearch service.""" + if self.es_client is None: + logger.error("ElasticSearch client is not initialized.") + return False + + return True + + def invoke(self, *args, **kwargs): + pass + + def create_index(self) -> None: + if not self.es_client.indices.exists(index=INDEX_NAME): + self.es_client.indices.create(index=INDEX_NAME) + + def get_embedder(self) -> Union[HuggingFaceEndpointEmbeddings, HuggingFaceBgeEmbeddings]: + """Obtain required Embedder.""" + if TEI_ENDPOINT: + return HuggingFaceEndpointEmbeddings(model=TEI_ENDPOINT) + else: + return HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + + def get_elastic_store( + self, embedder: Union[HuggingFaceEndpointEmbeddings, HuggingFaceBgeEmbeddings] + ) -> ElasticsearchStore: + """Get Elasticsearch vector store.""" + return ElasticsearchStore(index_name=INDEX_NAME, embedding=embedder, es_connection=self.es_client) + + def delete_embeddings(self, doc_name: str) -> bool: + """Delete documents from Elasticsearch.""" + try: + if doc_name == "all": + if logflag: + logger.info("Deleting all documents from vectorstore") + + query = {"query": {"match_all": {}}} + else: + if logflag: + logger.info(f"Deleting {doc_name} from vectorstore") + + query = {"query": {"match": {"metadata.doc_name": {"query": doc_name, "operator": "AND"}}}} + + self.es_client.delete_by_query(index=INDEX_NAME, body=query) + return True + + except Exception as e: + if logflag: + logger.info(f"An unexpected error occurred: {e}") + + return False + + def search_by_filename(self, file_name: str) -> bool: + """Search Elasticsearch by file name.""" + + query = {"query": {"match": {"metadata.doc_name": {"query": file_name, "operator": "AND"}}}} + results = self.es_client.search(index=INDEX_NAME, body=query) + + if logflag: + logger.info(f"[ search by file ] searched by {file_name}") + logger.info(f"[ search by file ] {len(results['hits'])} results: {results}") + + return results["hits"]["total"]["value"] > 0 + + def ingest_doc_to_elastic(self, doc_path: DocPath) -> None: + """Ingest documents to Elasticsearch.""" + + path = doc_path.path + file_name = path.split("/")[-1] + if logflag: + logger.info(f"Parsing document {path}, file name: {file_name}.") + + if path.endswith(".html"): + headers_to_split_on = [ + ("h1", "Header 1"), + ("h2", "Header 2"), + ("h3", "Header 3"), + ] + text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) + else: + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=doc_path.chunk_size, + chunk_overlap=doc_path.chunk_overlap, + add_start_index=True, + separators=get_separators(), + ) + + content = document_loader(path) + + structured_types = [".xlsx", ".csv", ".json", "jsonl"] + _, ext = os.path.splitext(path) + + if ext in structured_types: + chunks = content + else: + chunks = text_splitter.split_text(content) + + if doc_path.process_table and path.endswith(".pdf"): + table_chunks = get_tables_result(path, doc_path.table_strategy) + chunks = chunks + table_chunks + + if logflag: + logger.info(f"Done preprocessing. Created {len(chunks)} chunks of the original file.") + + batch_size = 32 + num_chunks = len(chunks) + + metadata = dict({"doc_name": str(file_name)}) + + for i in range(0, num_chunks, batch_size): + batch_chunks = chunks[i : i + batch_size] + batch_texts = batch_chunks + + documents = [Document(page_content=text, metadata=metadata) for text in batch_texts] + _ = self.es_store.add_documents(documents=documents) + if logflag: + logger.info(f"Processed batch {i // batch_size + 1}/{(num_chunks - 1) // batch_size + 1}") + + async def ingest_link_to_elastic(self, link_list: List[str]) -> None: + """Ingest data scraped from website links into Elasticsearch.""" + + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=CHUNK_SIZE, + chunk_overlap=CHUNK_OVERLAP, + add_start_index=True, + separators=get_separators(), + ) + + batch_size = 32 + + for link in link_list: + content = parse_html([link])[0][0] + if logflag: + logger.info(f"[ ingest link ] link: {link} content: {content}") + + encoded_link = encode_filename(link) + save_path = UPLOADED_FILES_PATH + encoded_link + ".txt" + doc_path = UPLOADED_FILES_PATH + link + ".txt" + if logflag: + logger.info(f"[ ingest link ] save_path: {save_path}") + + await save_content_to_local_disk(save_path, content) + + chunks = text_splitter.split_text(content) + + num_chunks = len(chunks) + metadata = [dict({"doc_name": str(doc_path)})] + + for i in range(0, num_chunks, batch_size): + batch_chunks = chunks[i : i + batch_size] + batch_texts = batch_chunks + + documents = [Document(page_content=text, metadata=metadata) for text in batch_texts] + _ = self.es_store.add_documents(documents=documents) + + if logflag: + logger.info(f"Processed batch {i // batch_size + 1}/{(num_chunks - 1) // batch_size + 1}") + + async def ingest_files( + self, + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), + link_list: Optional[str] = Form(None), + chunk_size: int = Form(1500), + chunk_overlap: int = Form(100), + process_table: bool = Form(False), + table_strategy: str = Form("fast"), + ): + """Ingest files/links content into ElasticSearch database. + + Save in the format of vector[768]. + Returns '{"status": 200, "message": "Data preparation succeeded"}' if successful. + Args: + files (Union[UploadFile, List[UploadFile]], optional): A file or a list of files to be ingested. Defaults to File(None). + link_list (str, optional): A list of links to be ingested. Defaults to Form(None). + chunk_size (int, optional): The size of the chunks to be split. Defaults to Form(1500). + chunk_overlap (int, optional): The overlap between chunks. Defaults to Form(100). + process_table (bool, optional): Whether to process tables in PDFs. Defaults to Form(False). + table_strategy (str, optional): The strategy to process tables in PDFs. Defaults to Form("fast"). + """ + if logflag: + logger.info(f"files:{files}") + logger.info(f"link_list:{link_list}") + + if files and link_list: + raise HTTPException(status_code=400, detail="Provide either a file or a string list, not both.") + + if files: + if not isinstance(files, list): + files = [files] + + if not os.path.exists(UPLOADED_FILES_PATH): + Path(UPLOADED_FILES_PATH).mkdir(parents=True, exist_ok=True) + + for file in files: + encode_file = encode_filename(file.filename) + save_path = UPLOADED_FILES_PATH + encode_file + filename = save_path.split("/")[-1] + + try: + exists = self.search_by_filename(filename) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Failed when searching in Elasticsearch for file {file.filename}.", + ) + + if exists: + if logflag: + logger.info(f"[ upload ] File {file.filename} already exists.") + + raise HTTPException( + status_code=400, + detail=f"Uploaded file {file.filename} already exists. Please change file name.", + ) + + await save_content_to_local_disk(save_path, file) + + self.ingest_doc_to_elastic( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + if logflag: + logger.info(f"Successfully saved file {save_path}") + + result = {"status": 200, "message": "Data preparation succeeded"} + + if logflag: + logger.info(result) + return result + + if link_list: + try: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail="link_list should be a list.") + + await self.ingest_link_to_elastic(link_list) + + if logflag: + logger.info(f"Successfully saved link list {link_list}") + + result = {"status": 200, "message": "Data preparation succeeded"} + + if logflag: + logger.info(result) + return result + + except json.JSONDecodeError: + raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") + + raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") + + async def get_files(self): + """Get file structure from pipecone database in the format of + { + "name": "File Name", + "id": "File Name", + "type": "File", + "parent": "", + }""" + if logflag: + logger.info("[ dataprep - get file ] start to get file structure") + + if not Path(UPLOADED_FILES_PATH).exists(): + if logflag: + logger.info("No file uploaded, return empty list.") + return [] + + file_content = get_file_structure(UPLOADED_FILES_PATH) + + if logflag: + logger.info(file_content) + + return file_content + + async def delete_files(self, file_path: str = Body(..., embed=True)): + """Delete file according to `file_path`. + + `file_path`: + - specific file path (e.g. /path/to/file.txt) + - "all": delete all files uploaded + """ + if file_path == "all": + if logflag: + logger.info("[dataprep - del] delete all files") + remove_folder_with_ignore(UPLOADED_FILES_PATH) + assert self.delete_embeddings(file_path) + if logflag: + logger.info("[dataprep - del] successfully delete all files.") + create_upload_folder(UPLOADED_FILES_PATH) + if logflag: + logger.info({"status": True}) + return {"status": True} + + delete_path = Path(UPLOADED_FILES_PATH + "/" + encode_filename(file_path)) + + if logflag: + logger.info(f"[dataprep - del] delete_path: {delete_path}") + + if delete_path.exists(): + # delete file + if delete_path.is_file(): + try: + assert self.delete_embeddings(file_path) + delete_path.unlink() + except Exception as e: + if logflag: + logger.info(f"[dataprep - del] fail to delete file {delete_path}: {e}") + logger.info({"status": False}) + return {"status": False} + # delete folder + else: + if logflag: + logger.info("[dataprep - del] delete folder is not supported for now.") + logger.info({"status": False}) + return {"status": False} + if logflag: + logger.info({"status": True}) + return {"status": True} + else: + raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") diff --git a/comps/dataprep/src/integrations/milvus.py b/comps/dataprep/src/integrations/milvus.py index 2eb517e40a..c3e3e57309 100644 --- a/comps/dataprep/src/integrations/milvus.py +++ b/comps/dataprep/src/integrations/milvus.py @@ -28,13 +28,23 @@ save_content_to_local_disk, ) -from .config import COLLECTION_NAME, INDEX_PARAMS, LOCAL_EMBEDDING_MODEL, MILVUS_URI, TEI_EMBEDDING_ENDPOINT - logger = CustomLogger("milvus_dataprep") logflag = os.getenv("LOGFLAG", False) partition_field_name = "filename" upload_folder = "./uploaded_files/" +# Local Embedding model +LOCAL_EMBEDDING_MODEL = os.getenv("LOCAL_EMBEDDING_MODEL", "maidalun1020/bce-embedding-base_v1") +# TEI configuration +TEI_EMBEDDING_MODEL = os.environ.get("TEI_EMBEDDING_MODEL", "/home/user/bge-large-zh-v1.5") +TEI_EMBEDDING_ENDPOINT = os.environ.get("TEI_EMBEDDING_ENDPOINT", "") +# MILVUS configuration +MILVUS_HOST = os.getenv("MILVUS_HOST", "localhost") +MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530)) +MILVUS_URI = f"http://{MILVUS_HOST}:{MILVUS_PORT}" +INDEX_PARAMS = {"index_type": "FLAT", "metric_type": "IP", "params": {}} +COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag_milvus") + def ingest_chunks_to_milvus(embeddings, file_name: str, chunks: List): if logflag: diff --git a/comps/dataprep/src/integrations/neo4j_langchain.py b/comps/dataprep/src/integrations/neo4j_langchain.py new file mode 100644 index 0000000000..ba03437972 --- /dev/null +++ b/comps/dataprep/src/integrations/neo4j_langchain.py @@ -0,0 +1,240 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +from typing import List, Optional, Union + +import openai +from fastapi import Body, File, Form, HTTPException, UploadFile +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.graphs import Neo4jGraph +from langchain_community.llms import HuggingFaceEndpoint +from langchain_core.documents import Document +from langchain_experimental.graph_transformers import LLMGraphTransformer +from langchain_openai import ChatOpenAI +from langchain_text_splitters import HTMLHeaderTextSplitter + +from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.dataprep.src.utils import ( + document_loader, + encode_filename, + get_separators, + get_tables_result, + parse_html, + save_content_to_local_disk, +) + +logger = CustomLogger("opea_dataprep_neo4j_langchain") +logflag = os.getenv("LOGFLAG", False) + + +# Neo4J configuration +NEO4J_URL = os.getenv("NEO4J_URI", "bolt://localhost:7687") +NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j") +NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "test") + +# LLM/Embedding endpoints +TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") +TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT") +OPENAI_KEY = os.getenv("OPENAI_API_KEY") + + +@OpeaComponentRegistry.register("OPEA_DATAPREP_NEO4J_LANGCHAIN") +class OpeaNeo4jDataprep(OpeaComponent): + """Dataprep component for Neo4j ingestion and search services.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) + self.upload_folder = "./uploaded_files/" + + if OPENAI_KEY: + logger.info("OpenAI API Key is set. Verifying its validity...") + openai.api_key = OPENAI_KEY + + try: + response = openai.Engine.list() + logger.info("OpenAI API Key is valid.") + llm = ChatOpenAI(temperature=0, model_name="gpt-4o") + except openai.error.AuthenticationError: + logger.info("OpenAI API Key is invalid.") + except Exception as e: + logger.info(f"An error occurred while verifying the API Key: {e}") + else: + llm = HuggingFaceEndpoint( + endpoint_url=TGI_LLM_ENDPOINT, + max_new_tokens=512, + top_k=40, + top_p=0.9, + temperature=0.8, + timeout=600, + ) + + self.llm_transformer = LLMGraphTransformer( + llm=llm, node_properties=["description"], relationship_properties=["description"] + ) + self.graph = Neo4jGraph(url=NEO4J_URL, username=NEO4J_USERNAME, password=NEO4J_PASSWORD) + + # Perform health check + health_status = self.check_health() + if not health_status: + logger.error("OpeaNeo4jDataprep health check failed.") + + def check_health(self) -> bool: + """Checks the health of the Neo4j service.""" + if self.graph is None: + logger.error("Neo4j graph is not initialized.") + return False + + return True + + def invoke(self, *args, **kwargs): + pass + + def ingest_data_to_neo4j(self, doc_path: DocPath): + """Ingest document to Neo4J.""" + path = doc_path.path + if logflag: + logger.info(f"Parsing document {path}.") + + if path.endswith(".html"): + headers_to_split_on = [ + ("h1", "Header 1"), + ("h2", "Header 2"), + ("h3", "Header 3"), + ] + text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) + else: + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=doc_path.chunk_size, + chunk_overlap=doc_path.chunk_overlap, + add_start_index=True, + separators=get_separators(), + ) + + content = document_loader(path) + + structured_types = [".xlsx", ".csv", ".json", "jsonl"] + _, ext = os.path.splitext(path) + + if ext in structured_types: + chunks = content + else: + chunks = text_splitter.split_text(content) + + if doc_path.process_table and path.endswith(".pdf"): + table_chunks = get_tables_result(path, doc_path.table_strategy) + chunks = chunks + table_chunks + if logflag: + logger.info("Done preprocessing. Created ", len(chunks), " chunks of the original file.") + + doc_list = [Document(page_content=text) for text in chunks] + graph_doc = self.llm_transformer.convert_to_graph_documents(doc_list) + self.graph.add_graph_documents(graph_doc, baseEntityLabel=True, include_source=True) + + if logflag: + logger.info("The graph is built.") + + return True + + async def ingest_files( + self, + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), + link_list: Optional[str] = Form(None), + chunk_size: int = Form(1500), + chunk_overlap: int = Form(100), + process_table: bool = Form(False), + table_strategy: str = Form("fast"), + ): + """Ingest files/links content into Neo4j database. + + Save in the format of vector[768]. + Returns '{"status": 200, "message": "Data preparation succeeded"}' if successful. + Args: + files (Union[UploadFile, List[UploadFile]], optional): A file or a list of files to be ingested. Defaults to File(None). + link_list (str, optional): A list of links to be ingested. Defaults to Form(None). + chunk_size (int, optional): The size of the chunks to be split. Defaults to Form(1500). + chunk_overlap (int, optional): The overlap between chunks. Defaults to Form(100). + process_table (bool, optional): Whether to process tables in PDFs. Defaults to Form(False). + table_strategy (str, optional): The strategy to process tables in PDFs. Defaults to Form("fast"). + """ + if logflag: + logger.info(f"files:{files}") + logger.info(f"link_list:{link_list}") + + if files: + if not isinstance(files, list): + files = [files] + uploaded_files = [] + for file in files: + encode_file = encode_filename(file.filename) + save_path = self.upload_folder + encode_file + await save_content_to_local_disk(save_path, file) + self.ingest_data_to_neo4j( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + uploaded_files.append(save_path) + if logflag: + logger.info(f"Successfully saved file {save_path}") + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + + if link_list: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail="link_list should be a list.") + for link in link_list: + encoded_link = encode_filename(link) + save_path = self.upload_folder + encoded_link + ".txt" + content = parse_html([link])[0][0] + try: + await save_content_to_local_disk(save_path, content) + self.ingest_data_to_neo4j( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + except json.JSONDecodeError: + raise HTTPException(status_code=500, detail="Fail to ingest data into Neo4j.") + + if logflag: + logger.info(f"Successfully saved link {link}") + + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + + raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") + + async def get_files(self): + """Get file structure from pipecone database in the format of + { + "name": "File Name", + "id": "File Name", + "type": "File", + "parent": "", + }""" + pass + + async def delete_files(self, file_path: str = Body(..., embed=True)): + """Delete file according to `file_path`. + + `file_path`: + - specific file path (e.g. /path/to/file.txt) + - "all": delete all files uploaded + """ + pass diff --git a/comps/dataprep/neo4j/llama_index/extract_graph_neo4j.py b/comps/dataprep/src/integrations/neo4j_llamaindex.py similarity index 59% rename from comps/dataprep/neo4j/llama_index/extract_graph_neo4j.py rename to comps/dataprep/src/integrations/neo4j_llamaindex.py index 7785778a41..e970499971 100644 --- a/comps/dataprep/neo4j/llama_index/extract_graph_neo4j.py +++ b/comps/dataprep/src/integrations/neo4j_llamaindex.py @@ -6,6 +6,11 @@ import json import os +# Add the directory containing config.py to the Python path +import sys + +sys.path.append(os.path.dirname(__file__)) + # GraphRAGStore dependencies import re from collections import defaultdict @@ -15,17 +20,6 @@ import networkx as nx import openai import requests -from config import ( - NEO4J_PASSWORD, - NEO4J_URL, - NEO4J_USERNAME, - OPENAI_API_KEY, - OPENAI_EMBEDDING_MODEL, - OPENAI_LLM_MODEL, - TEI_EMBEDDING_ENDPOINT, - TGI_LLM_ENDPOINT, - host_ip, -) from fastapi import File, Form, HTTPException, UploadFile from graspologic.partition import hierarchical_leiden from langchain.text_splitter import RecursiveCharacterTextSplitter @@ -37,12 +31,11 @@ from llama_index.embeddings.text_embeddings_inference import TextEmbeddingsInference from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore from llama_index.llms.openai import OpenAI -from llama_index.llms.text_generation_inference import TextGenerationInference +from llama_index.llms.openai_like import OpenAILike from neo4j import GraphDatabase -from openai import Client from transformers import AutoTokenizer -from comps import CustomLogger, DocPath, opea_microservices, register_microservice +from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType from comps.dataprep.src.utils import ( document_loader, encode_filename, @@ -54,6 +47,9 @@ nest_asyncio.apply() +import time +import traceback + from llama_index.core.async_utils import run_jobs from llama_index.core.bridge.pydantic import BaseModel, Field from llama_index.core.graph_stores.types import KG_NODES_KEY, KG_RELATIONS_KEY, EntityNode, Relation @@ -63,6 +59,24 @@ from llama_index.core.prompts.default_prompts import DEFAULT_KG_TRIPLET_EXTRACT_PROMPT from llama_index.core.schema import BaseNode, TransformComponent +host_ip = os.getenv("host_ip") +# Neo4J configuration +NEO4J_URL = os.getenv("NEO4J_URL", f"bolt://{host_ip}:7687") +NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j") +NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "neo4jtest") + +# LLM/Embedding endpoints +TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", f"http://{host_ip}:6005") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", f"http://{host_ip}:6006") + +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") +OPENAI_EMBEDDING_MODEL = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small") +OPENAI_LLM_MODEL = os.getenv("OPENAI_LLM_MODEL", "gpt-4o") + +LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3.1-70B-Instruct") +MAX_INPUT_LEN = os.getenv("MAX_INPUT_LEN", "8192") +MAX_OUTPUT_TOKENS = os.getenv("MAX_OUTPUT_TOKENS", "1024") + class GraphRAGStore(Neo4jPropertyGraphStore): # https://github.com/run-llama/llama_index/blob/main/docs/docs/examples/cookbooks/GraphRAG_v2.ipynb @@ -71,19 +85,17 @@ class GraphRAGStore(Neo4jPropertyGraphStore): max_cluster_size = 100 def __init__(self, username: str, password: str, url: str, llm: LLM): - super().__init__(username=username, password=password, url=url) + super().__init__(username=username, password=password, url=url, refresh_schema=False) self.llm = llm self.driver = GraphDatabase.driver(NEO4J_URL, auth=(NEO4J_USERNAME, NEO4J_PASSWORD)) - def generate_community_summary(self, text): + async def generate_community_summary(self, text): """Generate summary for a given text using an LLM.""" - # Get model information from the TGI endpoint - model_name = get_attribute_from_tgi_endpoint(TGI_LLM_ENDPOINT, "model_id") - max_input_length = get_attribute_from_tgi_endpoint(TGI_LLM_ENDPOINT, "max_input_length") + model_name = LLM_MODEL_ID + max_input_length = int(MAX_INPUT_LEN) if not model_name or not max_input_length: raise ValueError(f"Could not retrieve model information from TGI endpoint: {TGI_LLM_ENDPOINT}") - # Get the tokenizer tokenizer = AutoTokenizer.from_pretrained(model_name) messages = [ @@ -105,14 +117,14 @@ def generate_community_summary(self, text): trimmed_messages = trim_messages_to_token_limit(tokenizer, messages, max_input_length) if OPENAI_API_KEY: - response = OpenAI().chat(messages) + response = OpenAI().achat(messages) else: - response = self.llm.chat(trimmed_messages) + response = await self.llm.achat(trimmed_messages) clean_response = re.sub(r"^assistant:\s*", "", str(response)).strip() return clean_response - def build_communities(self): + async def build_communities(self): """Builds communities from the graph and summarizes them.""" nx_graph = self._create_nx_graph() community_hierarchical_clusters = hierarchical_leiden(nx_graph, max_cluster_size=self.max_cluster_size) @@ -124,7 +136,7 @@ def build_communities(self): # self._print_cluster_info(self.entity_info, community_info) self.save_entity_info(self.entity_info) # entity_from_db = self.read_entity_info() # to verify if the data is stored in db - self._summarize_communities(community_info) + await self._summarize_communities(community_info) # sum = self.read_all_community_summaries() # to verify summaries are stored in db def _create_nx_graph(self): @@ -232,29 +244,37 @@ def read_entity_info(self) -> dict: entity_info[record["entity_id"]] = [int(cluster_id) for cluster_id in record["cluster_ids"]] return entity_info - def _summarize_communities(self, community_info): + async def _summarize_communities(self, community_info, num_workers=5): """Generate and store summaries for each community.""" + # Run tasks concurrently with a limited number of workers + tasks = [] for community_id, details in community_info.items(): logger.info(f"Summarizing community {community_id}") details_text = "\n".join(details) + "." # Ensure it ends with a period - self.community_summary[community_id] = self.generate_community_summary(details_text) + tasks.append(self._process_community(community_id, details_text)) + await run_jobs( + tasks, + workers=num_workers, + show_progress=True, + desc="Summarize communities", + ) - # To store summaries in neo4j - summary = self.generate_community_summary(details_text) - self.store_community_summary_in_neo4j(community_id, summary) - # self.community_summary[ - # community_id - # ] = self.store_community_summary_in_neo4j(community_id, summary) + async def _process_community(self, community_id, details_text): + """Process a single community and store the summary.""" + summary = await self.generate_community_summary(details_text) + self.store_community_summary_in_neo4j(community_id, summary) def store_community_summary_in_neo4j(self, community_id, summary): """Store the community summary in Neo4j.""" + logger.info(f"Community_id: {community_id} type: {type(community_id)}") with self.driver.session() as session: session.run( """ - MERGE (c:Cluster {id: $community_id}) - SET c.summary = $summary + MATCH (c:Cluster {id: $community_id, name: $community_name}) + SET c.summary = $summary """, - community_id=int(community_id), + community_id=str(community_id), + community_name=str(community_id), summary=summary, ) @@ -347,7 +367,7 @@ def __call__(self, nodes: List[BaseNode], show_progress: bool = False, **kwargs: async def _aextract(self, node: BaseNode) -> BaseNode: """Extract triples from a node.""" assert hasattr(node, "text") - + start = time.time() text = node.get_content(metadata_mode="llm") try: llm_response = await self.llm.apredict( @@ -359,7 +379,9 @@ async def _aextract(self, node: BaseNode) -> BaseNode: except ValueError: entities = [] entities_relationship = [] + logger.info(f"Time taken to LLM and parse: {time.time() - start}") + start = time.time() existing_nodes = node.metadata.pop(KG_NODES_KEY, []) existing_relations = node.metadata.pop(KG_RELATIONS_KEY, []) entity_metadata = node.metadata.copy() @@ -383,6 +405,7 @@ async def _aextract(self, node: BaseNode) -> BaseNode: node.metadata[KG_NODES_KEY] = existing_nodes node.metadata[KG_RELATIONS_KEY] = existing_relations + logger.info(f"Time taken to process entities and relations: {time.time() - start}") logger.info(f"number of extracted nodes {len(existing_nodes), existing_nodes}") logger.info(f"number of extracted relations {len(existing_relations), existing_relations}") return node @@ -469,218 +492,264 @@ def trim_messages_to_token_limit(tokenizer, messages, max_tokens): """Trim the messages to fit within the token limit.""" total_tokens = 0 trimmed_messages = [] + buffer = 100 + effective_max_tokens = max_tokens - buffer for message in messages: tokens = tokenizer.tokenize(message.content) - total_tokens += len(tokens) - if total_tokens > max_tokens: + message_token_count = len(tokens) + if total_tokens + message_token_count > effective_max_tokens: # Trim the message to fit within the remaining token limit - logger.info(f"Trimming messages: {total_tokens} > {max_tokens}") - remaining_tokens = max_tokens - (total_tokens - len(tokens)) + logger.info(f"Trimming messages: {total_tokens + message_token_count} > {effective_max_tokens}") + logger.info(f"message_token_count: {message_token_count}") + remaining_tokens = effective_max_tokens - total_tokens + logger.info(f"remaining_tokens: {remaining_tokens}") tokens = tokens[:remaining_tokens] message.content = tokenizer.convert_tokens_to_string(tokens) trimmed_messages.append(message) break else: + total_tokens += message_token_count trimmed_messages.append(message) return trimmed_messages -logger = CustomLogger("prepare_doc_neo4j") +logger = CustomLogger("opea_dataprep_neo4j_llamaindex") logflag = os.getenv("LOGFLAG", False) -upload_folder = "./uploaded_files/" -client = OpenAI() +@OpeaComponentRegistry.register("OPEA_DATAPREP_NEO4J_LLAMAINDEX") +class OpeaNeo4jLlamaIndexDataprep(OpeaComponent): + """Dataprep component for Neo4j ingestion and search services.""" -def ingest_data_to_neo4j(doc_path: DocPath): - """Ingest document to Neo4J.""" - path = doc_path.path - if logflag: - logger.info(f"Parsing document {path}.") + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) + self.upload_folder = "./uploaded_files/" + self.initialize_graph_store_and_models() - if path.endswith(".html"): - headers_to_split_on = [ - ("h1", "Header 1"), - ("h2", "Header 2"), - ("h3", "Header 3"), - ] - text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) - else: - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=doc_path.chunk_size, - chunk_overlap=doc_path.chunk_overlap, - add_start_index=True, - separators=get_separators(), + # Perform health check + health_status = self.check_health() + if not health_status: + logger.error("OpeaNeo4jDataprep health check failed.") + + def initialize_graph_store_and_models(self): + starttime = time.time() + if OPENAI_API_KEY: + logger.info("OpenAI API Key is set. Verifying its validity...") + openai.api_key = OPENAI_API_KEY + try: + self.llm = OpenAI(temperature=0, model=OPENAI_LLM_MODEL) + self.embed_model = OpenAIEmbedding(model=OPENAI_EMBEDDING_MODEL, embed_batch_size=100) + logger.info("OpenAI API Key is valid.") + except openai.AuthenticationError: + logger.info("OpenAI API Key is invalid.") + except Exception as e: + logger.info(f"An error occurred while verifying the API Key: {e}") + else: + logger.info("NO OpenAI API Key. TGI/VLLM/TEI endpoints will be used.") + # works with TGI and VLLM endpoints + self.llm = OpenAILike( + model=LLM_MODEL_ID, + api_base=TGI_LLM_ENDPOINT + "/v1", + api_key="fake", + temperature=0.7, + max_tokens=int(MAX_OUTPUT_TOKENS), # 1512 + timeout=1200, # timeout in seconds) + ) + emb_name = get_attribute_from_tgi_endpoint(TEI_EMBEDDING_ENDPOINT, "model_id") + self.embed_model = TextEmbeddingsInference( + base_url=TEI_EMBEDDING_ENDPOINT, + model_name=emb_name, + timeout=600, # timeout in seconds + embed_batch_size=10, # batch size for embedding + ) + Settings.embed_model = self.embed_model + Settings.llm = self.llm + self.kg_extractor = GraphRAGExtractor( + llm=self.llm, + extract_prompt=KG_TRIPLET_EXTRACT_TMPL, + max_paths_per_chunk=2, + parse_fn=parse_fn, ) + self.graph_store = GraphRAGStore(username=NEO4J_USERNAME, password=NEO4J_PASSWORD, url=NEO4J_URL, llm=self.llm) + self.initialized = True + logger.info(f"Time taken to initialize: {time.time() - starttime}") - content = document_loader(path) # single doc string - document = Document(text=content) + def check_health(self) -> bool: + """Checks the health of the Neo4j service.""" + if self.graph_store is None: + logger.error("Neo4j graph store is not initialized.") + return False - structured_types = [".xlsx", ".csv", ".json", "jsonl"] - _, ext = os.path.splitext(path) + return True - # create llama-index nodes (chunks) - if ext in structured_types: - nodes = [document] - else: - parser = LangchainNodeParser(text_splitter) # wrap text splitting from langchain w node parser - nodes = parser.get_nodes_from_documents([document]) + def invoke(self, *args, **kwargs): + pass - if doc_path.process_table and path.endswith(".pdf"): - table_chunks = get_tables_result(path, doc_path.table_strategy) # list of text - if table_chunks: - table_docs = [Document(text=chunk) for chunk in table_chunks] - nodes = nodes + table_docs - if logflag: - logger.info(f"extract tables nodes: len of table_docs {len(table_docs)}") + def ingest_data_to_neo4j(self, doc_path: DocPath): + """Ingest document to Neo4J.""" + path = doc_path.path + if logflag: + logger.info(f"Parsing document {path}.") + + if path.endswith(".html"): + headers_to_split_on = [ + ("h1", "Header 1"), + ("h2", "Header 2"), + ("h3", "Header 3"), + ] + text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) + else: + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=doc_path.chunk_size, + chunk_overlap=doc_path.chunk_overlap, + add_start_index=True, + separators=get_separators(), + ) - if logflag: - logger.info(f"Done preprocessing. Created {len(nodes)} chunks of the original file.") + content = document_loader(path) # single doc string + document = Document(text=content) - if OPENAI_API_KEY: - logger.info("OpenAI API Key is set. Verifying its validity...") - openai.api_key = OPENAI_API_KEY - try: - llm = OpenAI(temperature=0, model=OPENAI_LLM_MODEL) - embed_model = OpenAIEmbedding(model=OPENAI_EMBEDDING_MODEL, embed_batch_size=100) - logger.info("OpenAI API Key is valid.") - except openai.AuthenticationError: - logger.info("OpenAI API Key is invalid.") - except Exception as e: - logger.info(f"An error occurred while verifying the API Key: {e}") - else: - logger.info("NO OpenAI API Key. TGI/TEI endpoints will be used.") - llm_name = get_attribute_from_tgi_endpoint(TGI_LLM_ENDPOINT, "model_id") - llm = TextGenerationInference( - model_url=TGI_LLM_ENDPOINT, - model_name=llm_name, - temperature=0.7, - max_tokens=1512, - timeout=600, # timeout in seconds - ) - emb_name = get_attribute_from_tgi_endpoint(TEI_EMBEDDING_ENDPOINT, "model_id") - embed_model = TextEmbeddingsInference( - base_url=TEI_EMBEDDING_ENDPOINT, - model_name=emb_name, - timeout=600, # timeout in seconds - embed_batch_size=10, # batch size for embedding - ) - Settings.embed_model = embed_model - Settings.llm = llm - kg_extractor = GraphRAGExtractor( - llm=llm, - extract_prompt=KG_TRIPLET_EXTRACT_TMPL, - max_paths_per_chunk=2, - parse_fn=parse_fn, - ) - graph_store = GraphRAGStore(username=NEO4J_USERNAME, password=NEO4J_PASSWORD, url=NEO4J_URL, llm=llm) - - # nodes are the chunked docs to insert - index = PropertyGraphIndex( - nodes=nodes, - llm=llm, - kg_extractors=[kg_extractor], - property_graph_store=graph_store, - embed_model=embed_model or Settings.embed_model, - show_progress=True, - ) - if logflag: - logger.info("The graph is built.") - logger.info(f"Total number of triplets {len(index.property_graph_store.get_triplets())}") + structured_types = [".xlsx", ".csv", ".json", "jsonl"] + _, ext = os.path.splitext(path) - if logflag: - logger.info("Done building communities.") + # create llama-index nodes (chunks) + if ext in structured_types: + nodes = [document] + else: + parser = LangchainNodeParser(text_splitter) # wrap text splitting from langchain w node parser + nodes = parser.get_nodes_from_documents([document]) - return index + if doc_path.process_table and path.endswith(".pdf"): + table_chunks = get_tables_result(path, doc_path.table_strategy) # list of text + if table_chunks: + table_docs = [Document(text=chunk) for chunk in table_chunks] + nodes = nodes + table_docs + if logflag: + logger.info(f"extract tables nodes: len of table_docs {len(table_docs)}") + if logflag: + logger.info(f"Done preprocessing. Created {len(nodes)} chunks of the original file.") + + start = time.time() + # nodes are the chunked docs to insert + index = PropertyGraphIndex( + nodes=nodes, + llm=self.llm, + kg_extractors=[self.kg_extractor], + property_graph_store=self.graph_store, + embed_model=self.embed_model or Settings.embed_model, + show_progress=True, + ) + if logflag: + logger.info("The graph is built.") + logger.info(f"Time taken to update PropertyGraphIndex: {time.time() - start}") -def build_communities(index: PropertyGraphIndex): - try: - index.property_graph_store.build_communities() if logflag: logger.info("Done building communities.") - except Exception as e: - logger.error(f"Error building communities: {e}") - return True - - -@register_microservice( - name="opea_service@extract_graph_neo4j", - endpoint="/v1/dataprep", - host="0.0.0.0", - port=6004, - input_datatype=DocPath, - output_datatype=None, -) -async def ingest_documents( - files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), - link_list: Optional[str] = Form(None), - chunk_size: int = Form(1500), - chunk_overlap: int = Form(100), - process_table: bool = Form(False), - table_strategy: str = Form("fast"), -): - if logflag: - logger.info(f"files:{files}") - logger.info(f"link_list:{link_list}") - - if files: - if not isinstance(files, list): - files = [files] - uploaded_files = [] - for file in files: - encode_file = encode_filename(file.filename) - save_path = upload_folder + encode_file - await save_content_to_local_disk(save_path, file) - index = ingest_data_to_neo4j( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ) - ) - uploaded_files.append(save_path) - if logflag: - logger.info(f"Successfully saved file {save_path}") - - if link_list: - link_list = json.loads(link_list) # Parse JSON string to list - if not isinstance(link_list, list): - raise HTTPException(status_code=400, detail="link_list should be a list.") - for link in link_list: - encoded_link = encode_filename(link) - save_path = upload_folder + encoded_link + ".txt" - content = parse_html_new([link], chunk_size=chunk_size, chunk_overlap=chunk_overlap) - try: - await save_content_to_local_disk(save_path, content) - index = ingest_data_to_neo4j( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ) - ) - except json.JSONDecodeError: - raise HTTPException(status_code=500, detail="Fail to ingest data") + return index + + async def build_communities(self, index: PropertyGraphIndex): + try: + await index.property_graph_store.build_communities() if logflag: - logger.info(f"Successfully saved link {link}") + logger.info("Done building communities.") + except Exception as e: + logger.error(f"Error building communities: {e}") + error_trace = traceback.format_exc() + logger.error(f"Error building communities: {e}\n{error_trace}") + return True - if files or link_list: - build_communities(index) - result = {"status": 200, "message": "Data preparation succeeded"} + async def ingest_files( + self, + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), + link_list: Optional[str] = Form(None), + chunk_size: int = Form(1500), + chunk_overlap: int = Form(100), + process_table: bool = Form(False), + table_strategy: str = Form("fast"), + skip_ingestion: bool = Form(False), + ): + """Ingest files/links content into Neo4j database. + + Save in the format of vector[768]. + Returns '{"status": 200, "message": "Data preparation succeeded"}' if successful. + Args: + files (Union[UploadFile, List[UploadFile]], optional): A file or a list of files to be ingested. Defaults to File(None). + link_list (str, optional): A list of links to be ingested. Defaults to Form(None). + chunk_size (int, optional): The size of the chunks to be split. Defaults to Form(1500). + chunk_overlap (int, optional): The overlap between chunks. Defaults to Form(100). + process_table (bool, optional): Whether to process tables in PDFs. Defaults to Form(False). + table_strategy (str, optional): The strategy to process tables in PDFs. Defaults to Form("fast"). + """ if logflag: - logger.info(result) - return result - else: - raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") - - -if __name__ == "__main__": - opea_microservices["opea_service@extract_graph_neo4j"].start() + logger.info(f"files:{files}") + logger.info(f"link_list:{link_list}") + logger.info(f"skip_ingestion:{skip_ingestion}") + + if skip_ingestion: + self.initialize_graph_store_and_models() + index = PropertyGraphIndex.from_existing( + property_graph_store=self.graph_store, + embed_model=self.embed_model or Settings.embed_model, + embed_kg_nodes=True, + ) + else: + if files: + if not isinstance(files, list): + files = [files] + uploaded_files = [] + for file in files: + encode_file = encode_filename(file.filename) + save_path = self.upload_folder + encode_file + await save_content_to_local_disk(save_path, file) + starttime = time.time() + index = self.ingest_data_to_neo4j( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + logger.info(f"Time taken to ingest file:{encode_file} {time.time() - starttime}") + uploaded_files.append(save_path) + if logflag: + logger.info(f"Successfully saved file {save_path}") + + if link_list: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail="link_list should be a list.") + for link in link_list: + encoded_link = encode_filename(link) + save_path = self.upload_folder + encoded_link + ".txt" + content = parse_html_new([link], chunk_size=chunk_size, chunk_overlap=chunk_overlap) + try: + await save_content_to_local_disk(save_path, content) + index = self.ingest_data_to_neo4j( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + except json.JSONDecodeError: + raise HTTPException(status_code=500, detail="Fail to ingest data") + + if logflag: + logger.info(f"Successfully saved link {link}") + + if files or link_list or skip_ingestion: + await self.build_communities(index) + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + else: + raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") diff --git a/comps/dataprep/src/integrations/opensearch.py b/comps/dataprep/src/integrations/opensearch.py new file mode 100644 index 0000000000..6f4b10bbd1 --- /dev/null +++ b/comps/dataprep/src/integrations/opensearch.py @@ -0,0 +1,512 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +from typing import List, Optional, Union + +from fastapi import Body, File, Form, HTTPException, UploadFile +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_community.vectorstores import OpenSearchVectorSearch +from langchain_huggingface import HuggingFaceEndpointEmbeddings +from langchain_text_splitters import HTMLHeaderTextSplitter +from opensearchpy import OpenSearch + +from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.dataprep.src.utils import ( + create_upload_folder, + document_loader, + encode_filename, + get_separators, + get_tables_result, + parse_html, + remove_folder_with_ignore, + save_content_to_local_disk, +) + +logger = CustomLogger("opea_dataprep_opensearch") +logflag = os.getenv("LOGFLAG", False) + + +class Config: + """Configuration class to store environment variables and default settings.""" + + EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") + OPENSEARCH_HOST = os.getenv("OPENSEARCH_HOST", "localhost") + OPENSEARCH_PORT = int(os.getenv("OPENSEARCH_PORT", 9200)) + OPENSEARCH_INITIAL_ADMIN_PASSWORD = os.getenv("OPENSEARCH_INITIAL_ADMIN_PASSWORD", "") + OPENSEARCH_SSL = os.getenv("OPENSEARCH_SSL", "false").lower() == "true" + OPENSEARCH_URL = os.getenv("OPENSEARCH_URL", None) + INDEX_NAME = os.getenv("INDEX_NAME", "rag-opensearch") + KEY_INDEX_NAME = os.getenv("KEY_INDEX_NAME", "file-keys") + TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", 600)) + SEARCH_BATCH_SIZE = int(os.getenv("SEARCH_BATCH_SIZE", 10)) + + @staticmethod + def get_boolean_env_var(var_name, default_value=False): + """Retrieve the boolean value of an environment variable.""" + true_values = {"true", "1", "t", "y", "yes"} + false_values = {"false", "0", "f", "n", "no"} + value = os.getenv(var_name, "").lower() + if value in true_values: + return True + elif value in false_values: + return False + else: + return default_value + + @staticmethod + def format_opensearch_conn_from_env(): + """Format the OpenSearch connection URL based on environment variables.""" + opensearch_url = Config.OPENSEARCH_URL + if opensearch_url: + return opensearch_url + else: + start = "https://" if Config.OPENSEARCH_SSL else "http://" + return f"{start}{Config.OPENSEARCH_HOST}:{Config.OPENSEARCH_PORT}" + + +# Initialize the OpenSearch URL based on configuration +OPENSEARCH_URL = Config.format_opensearch_conn_from_env() + + +@OpeaComponentRegistry.register("OPEA_DATAPREP_OPENSEARCH") +class OpeaOpenSearchDataprep(OpeaComponent): + """Dataprep component for OpenSearch ingestion and search services.""" + + def __init__(self, name: str, description: str, config: dict = None): + self.upload_folder = "./uploaded_files/" + super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) + # Initialize embeddings + tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") + if tei_embedding_endpoint: + self.embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint) + else: + self.embeddings = HuggingFaceBgeEmbeddings(model_name=Config.EMBED_MODEL) + + # OpenSearch client setup + self.auth = ("admin", Config.OPENSEARCH_INITIAL_ADMIN_PASSWORD) + self.opensearch_client = OpenSearchVectorSearch( + opensearch_url=OPENSEARCH_URL, + index_name=Config.INDEX_NAME, + embedding_function=self.embeddings, + http_auth=self.auth, + use_ssl=True, + verify_certs=False, + ssl_assert_hostname=False, + ssl_show_warn=False, + ) + + # Perform health check + health_status = self.check_health() + if not health_status: + logger.error("OpeaOpenSearchDataprep health check failed.") + + def check_health(self) -> bool: + """Checks the health of the OpenSearch service.""" + try: + client = OpenSearch( + hosts=[{"host": Config.OPENSEARCH_HOST, "port": Config.OPENSEARCH_PORT}], + http_auth=self.auth, + use_ssl=True, + verify_certs=False, + ) + info = client.info() + logger.info(f"[ health check ] OpenSearch info: {info}") + return True + except Exception as e: + logger.error(f"[ health check ] Failed to connect to OpenSearch: {e}") + return False + + def invoke(self, *args, **kwargs): + pass + + def check_index_existence(self, client, index_name): + """Check if an index exists in OpenSearch.""" + try: + exists = client.index_exists(index_name) or False + if exists: + logger.info(f"[ check index existence ] Index {index_name} exists.") + else: + logger.info(f"[ check index existence ] Index {index_name} does not exist.") + return exists + except Exception as e: + logger.error(f"[ check index existence ] Error checking index {index_name}: {e}") + return False + + def create_index(self, client, index_name: str = Config.KEY_INDEX_NAME): + """Create a new index in OpenSearch.""" + try: + index_body = { + "mappings": { + "properties": { + "file_name": {"type": "text"}, + "key_ids": {"type": "text"}, + } + } + } + client.client.indices.create(index_name, body=index_body) + logger.info(f"[ create index ] Index {index_name} created successfully.") + return True + except Exception as e: + logger.error(f"[ create index ] Failed to create index {index_name}: {e}") + return False + + def store_by_id(self, client, key, value): + if logflag: + logger.info(f"[ store by id ] storing ids of {key}") + try: + client.client.index( + index=Config.KEY_INDEX_NAME, + body={"file_name": f"file:${key}", "key_ids:": value}, + id="file:" + key, + refresh=True, + ) + if logflag: + logger.info(f"[ store by id ] store document success. id: file:{key}") + except Exception as e: + if logflag: + logger.info(f"[ store by id ] fail to store document file:{key}: {e}") + return False + return True + + def search_by_id(self, client, doc_id): + if logflag: + logger.info(f"[ search by id ] searching docs of {doc_id}") + try: + result = client.client.get(index=Config.KEY_INDEX_NAME, id=doc_id) + if result["found"]: + if logflag: + logger.info(f"[ search by id ] search success of {doc_id}: {result}") + return result + return None + except Exception as e: + if logflag: + logger.info(f"[ search by id ] fail to search docs of {doc_id}: {e}") + return None + + def drop_index(self, client, index_name): + if logflag: + logger.info(f"[ drop index ] dropping index {index_name}") + try: + client.client.indices.delete(index=index_name) + if logflag: + logger.info(f"[ drop index ] index {index_name} deleted") + except Exception as e: + if logflag: + logger.info(f"[ drop index ] index {index_name} delete failed: {e}") + return False + return True + + def delete_by_id(self, client, doc_id): + try: + response = client.client.delete(index=Config.KEY_INDEX_NAME, id=doc_id) + if response["result"] == "deleted": + if logflag: + logger.info(f"[ delete by id ] delete id success: {doc_id}") + return True + else: + if logflag: + logger.info(f"[ delete by id ] delete id failed: {doc_id}") + return False + except Exception as e: + if logflag: + logger.info(f"[ delete by id ] fail to delete ids {doc_id}: {e}") + return False + + def ingest_chunks_to_opensearch(self, file_name: str, chunks: List): + """Ingest chunks of text data to OpenSearch.""" + batch_size = 32 + num_chunks = len(chunks) + file_ids = [] + + for i in range(0, num_chunks, batch_size): + batch_chunks = chunks[i : i + batch_size] + keys = self.opensearch_client.add_texts( + texts=batch_chunks, metadatas=[{"source": file_name}] * len(batch_chunks) + ) + file_ids.extend(keys) + logger.info(f"[ ingest chunks ] Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + + if not self.check_index_existence(self.opensearch_client, Config.KEY_INDEX_NAME): + self.create_index(self.opensearch_client) + + try: + self.store_by_id(self.opensearch_client, key=file_name, value="#".join(file_ids)) + except Exception as e: + logger.error(f"[ ingest chunks ] Failed to store chunks of file {file_name}: {e}") + raise HTTPException(status_code=500, detail=f"Failed to store chunks of file {file_name}.") + return True + + def ingest_data_to_opensearch(self, doc_path: DocPath): + """Ingest document to OpenSearch.""" + path = doc_path.path + if logflag: + logger.info(f"[ ingest data ] Parsing document {path}.") + + if path.endswith(".html"): + headers_to_split_on = [ + ("h1", "Header 1"), + ("h2", "Header 2"), + ("h3", "Header 3"), + ] + text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) + else: + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=doc_path.chunk_size, + chunk_overlap=doc_path.chunk_overlap, + add_start_index=True, + separators=get_separators(), + ) + + content = document_loader(path) + if logflag: + logger.info("[ ingest data ] file content loaded") + + structured_types = [".xlsx", ".csv", ".json", "jsonl"] + _, ext = os.path.splitext(path) + + if ext in structured_types: + chunks = content + else: + chunks = text_splitter.split_text(content) + + ### Specially processing for the table content in PDFs + if doc_path.process_table and path.endswith(".pdf"): + table_chunks = get_tables_result(path, doc_path.table_strategy) + chunks = chunks + table_chunks + if logflag: + logger.info(f"[ ingest data ] Done preprocessing. Created {len(chunks)} chunks of the given file.") + + file_name = doc_path.path.split("/")[-1] + return self.ingest_chunks_to_opensearch(file_name, chunks) + + def search_all_documents(self, index_name, offset, search_batch_size): + try: + response = self.opensearch_client.client.search( + index=index_name, + body={ + "query": {"match_all": {}}, + "from": offset, # Starting position + "size": search_batch_size, # Number of results to return + }, + ) + # Get total number of matching documents + total_hits = response["hits"]["total"]["value"] + # Get the documents from the current batch + documents = response["hits"]["hits"] + + return {"total_hits": total_hits, "documents": documents} + + except Exception as e: + print(f"Error performing search: {e}") + return None + + async def ingest_files( + self, + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), + link_list: Optional[str] = Form(None), + chunk_size: int = Form(1500), + chunk_overlap: int = Form(100), + process_table: bool = Form(False), + table_strategy: str = Form("fast"), + ): + """Ingest files/links content into opensearch database. + + Save in the format of vector[768]. + Returns '{"status": 200, "message": "Data preparation succeeded"}' if successful. + Args: + files (Union[UploadFile, List[UploadFile]], optional): A file or a list of files to be ingested. Defaults to File(None). + link_list (str, optional): A list of links to be ingested. Defaults to Form(None). + chunk_size (int, optional): The size of the chunks to be split. Defaults to Form(1500). + chunk_overlap (int, optional): The overlap between chunks. Defaults to Form(100). + process_table (bool, optional): Whether to process tables in PDFs. Defaults to Form(False). + table_strategy (str, optional): The strategy to process tables in PDFs. Defaults to Form("fast"). + """ + if logflag: + logger.info(f"[ upload ] files:{files}") + logger.info(f"[ upload ] link_list:{link_list}") + + if files: + if not isinstance(files, list): + files = [files] + uploaded_files = [] + + for file in files: + encode_file = encode_filename(file.filename) + doc_id = "file:" + encode_file + if logflag: + logger.info(f"[ upload ] processing file {doc_id}") + + # check whether the file already exists + key_ids = None + try: + document = self.search_by_id(self.opensearch_client, doc_id) + if document: + if logflag: + logger.info(f"[ upload ] File {file.filename} already exists.") + key_ids = document["_id"] + except Exception as e: + logger.info(f"[ upload ] File {file.filename} does not exist.") + if key_ids: + raise HTTPException( + status_code=400, + detail=f"Uploaded file {file.filename} already exists. Please change file name.", + ) + + save_path = self.upload_folder + encode_file + await save_content_to_local_disk(save_path, file) + self.ingest_data_to_opensearch( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + uploaded_files.append(save_path) + if logflag: + logger.info(f"[ upload ] Successfully saved file {save_path}") + + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + + if link_list: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail=f"Link_list {link_list} should be a list.") + for link in link_list: + encoded_link = encode_filename(link) + doc_id = "file:" + encoded_link + ".txt" + if logflag: + logger.info(f"[ upload ] processing link {doc_id}") + + # check whether the link file already exists + key_ids = None + try: + document = self.search_by_id(self.opensearch_client, doc_id) + if document: + if logflag: + logger.info(f"[ upload ] Link {link} already exists.") + key_ids = document["_id"] + except Exception as e: + logger.info(f"[ upload ] Link {link} does not exist. Keep storing.") + if key_ids: + raise HTTPException( + status_code=400, detail=f"Uploaded link {link} already exists. Please change another link." + ) + + save_path = self.upload_folder + encoded_link + ".txt" + content = parse_html([link])[0][0] + await save_content_to_local_disk(save_path, content) + self.ingest_data_to_opensearch( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + if logflag: + logger.info(f"[ upload ] Successfully saved link list {link_list}") + return {"status": 200, "message": "Data preparation succeeded"} + + raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") + + async def get_files(self): + """Get file structure from opensearch database in the format of + { + "name": "File Name", + "id": "File Name", + "type": "File", + "parent": "", + }""" + if logflag: + logger.info("[ get ] start to get file structure") + + offset = 0 + file_list = [] + + # check index existence + res = self.check_index_existence(self.opensearch_client, Config.KEY_INDEX_NAME) + if not res: + if logflag: + logger.info(f"[ get ] index {Config.KEY_INDEX_NAME} does not exist") + return file_list + + while True: + response = self.search_all_documents(Config.KEY_INDEX_NAME, offset, Config.SEARCH_BATCH_SIZE) + # no doc retrieved + if len(response) < 2: + break + + def format_opensearch_results(response, file_list): + for document in response["documents"]: + file_id = document["_id"] + file_list.append({"name": file_id, "id": file_id, "type": "File", "parent": ""}) + + file_list = format_opensearch_results(response, file_list) + offset += Config.SEARCH_BATCH_SIZE + # last batch + if (len(response) - 1) // 2 < Config.SEARCH_BATCH_SIZE: + break + if logflag: + logger.info(f"[get] final file_list: {file_list}") + return file_list + + async def delete_files(self, file_path: str = Body(..., embed=True)): + """Delete file according to `file_path`. + + `file_path`: + - specific file path (e.g. /path/to/file.txt) + - "all": delete all files uploaded + """ + # delete all uploaded files + if file_path == "all": + if logflag: + logger.info("[ delete ] delete all files") + + # drop index KEY_INDEX_NAME + if self.check_index_existence(self.opensearch_client, Config.KEY_INDEX_NAME): + try: + assert self.drop_index(index_name=Config.KEY_INDEX_NAME) + except Exception as e: + if logflag: + logger.info(f"[ delete ] {e}. Fail to drop index {Config.KEY_INDEX_NAME}.") + raise HTTPException(status_code=500, detail=f"Fail to drop index {Config.KEY_INDEX_NAME}.") + else: + logger.info(f"[ delete ] Index {Config.KEY_INDEX_NAME} does not exits.") + + # drop index INDEX_NAME + if self.check_index_existence(self.opensearch_client, Config.INDEX_NAME): + try: + assert self.drop_index(index_name=Config.INDEX_NAME) + except Exception as e: + if logflag: + logger.info(f"[ delete ] {e}. Fail to drop index {Config.INDEX_NAME}.") + raise HTTPException(status_code=500, detail=f"Fail to drop index {Config.INDEX_NAME}.") + else: + if logflag: + logger.info(f"[ delete ] Index {Config.INDEX_NAME} does not exits.") + + # delete files on local disk + try: + remove_folder_with_ignore(self.upload_folder) + except Exception as e: + if logflag: + logger.info(f"[ delete ] {e}. Fail to delete {self.upload_folder}.") + raise HTTPException(status_code=500, detail=f"Fail to delete {self.upload_folder}.") + + if logflag: + logger.info("[ delete ] successfully delete all files.") + create_upload_folder(self.upload_folder) + if logflag: + logger.info({"status": True}) + return {"status": True} + else: + raise HTTPException(status_code=404, detail="Single file deletion is not implemented yet") diff --git a/comps/dataprep/src/integrations/pgvect.py b/comps/dataprep/src/integrations/pgvect.py new file mode 100644 index 0000000000..43b38e5d6d --- /dev/null +++ b/comps/dataprep/src/integrations/pgvect.py @@ -0,0 +1,356 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +from pathlib import Path +from typing import List, Optional, Union +from urllib.parse import urlparse + +import psycopg2 +from fastapi import Body, File, Form, HTTPException, UploadFile +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.vectorstores import PGVector + +from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.dataprep.src.utils import ( + create_upload_folder, + document_loader, + encode_filename, + get_file_structure, + get_separators, + parse_html_new, + remove_folder_with_ignore, + save_content_to_local_disk, +) + +logger = CustomLogger("opea_dataprep_pgvector") +logflag = os.getenv("LOGFLAG", False) + +# Embedding model +EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") + +PG_CONNECTION_STRING = os.getenv("PG_CONNECTION_STRING", "localhost") + +# Vector Index Configuration +INDEX_NAME = os.getenv("INDEX_NAME", "rag-pgvector") + +# chunk parameters +CHUNK_SIZE = os.getenv("CHUNK_SIZE", 1500) +CHUNK_OVERLAP = os.getenv("CHUNK_OVERLAP", 100) + + +@OpeaComponentRegistry.register("OPEA_DATAPREP_PGVECTOR") +class OpeaPgvectorDataprep(OpeaComponent): + """Dataprep component for PgVector ingestion and search services.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) + self.tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") + self.upload_folder = "./uploaded_files/" + # Create vectorstore + if self.tei_embedding_endpoint: + # create embeddings using TEI endpoint service + self.embedder = HuggingFaceHubEmbeddings(model=self.tei_embedding_endpoint) + else: + # create embeddings using local embedding model + self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + + # Perform health check + health_status = self.check_health() + if not health_status: + logger.error("OpeaPgvectorDataprep health check failed.") + + def check_health(self) -> bool: + """Checks the health of the PgVector service.""" + try: + result = urlparse(PG_CONNECTION_STRING) + username = result.username + password = result.password + database = result.path[1:] + hostname = result.hostname + port = result.port + + psycopg2.connect(database=database, user=username, password=password, host=hostname, port=port) + return True + except psycopg2.Error as e: + if logflag: + logger.info(f"Error connect to PG vectorstore: {e}") + return False + + except Exception as e: + if logflag: + logger.info(f"An unexpected error occurred: {e}") + return False + + def invoke(self, *args, **kwargs): + pass + + async def save_file_to_local_disk(self, save_path: str, file): + save_path = Path(save_path) + with save_path.open("wb") as fout: + try: + content = await file.read() + fout.write(content) + except Exception as e: + if logflag: + logger.info(f"Write file failed. Exception: {e}") + raise HTTPException(status_code=500, detail=f"Write file {save_path} failed. Exception: {e}") + + def delete_embeddings(self, doc_name): + """Get all ids from a vectorstore.""" + try: + result = urlparse(PG_CONNECTION_STRING) + username = result.username + password = result.password + database = result.path[1:] + hostname = result.hostname + port = result.port + + connection = psycopg2.connect(database=database, user=username, password=password, host=hostname, port=port) + + # Create a cursor object to execute SQL queries + + if logflag: + logger.info(f"Deleting {doc_name} from vectorstore") + + cur = connection.cursor() + if doc_name == "all": + cur.execute( + "DELETE FROM langchain_pg_collection lpe WHERE lpe.name = %(index_name)s", + {"index_name": INDEX_NAME}, + ) + else: + cur.execute( + "DELETE FROM langchain_pg_embedding lpe WHERE lpe.uuid in (SELECT lpc.uuid\ + FROM langchain_pg_embedding lpc where lpc.cmetadata ->> 'doc_name' = %(doc_name)s)", + {"doc_name": doc_name}, + ) + + connection.commit() # commit the transaction + cur.close() + + return True + + except psycopg2.Error as e: + if logflag: + logger.info(f"Error deleting document from vectorstore: {e}") + return False + + except Exception as e: + if logflag: + logger.info(f"An unexpected error occurred: {e}") + return False + + def ingest_doc_to_pgvector(self, doc_path: DocPath): + """Ingest document to PGVector.""" + doc_path = doc_path.path + if logflag: + logger.info(f"Parsing document {doc_path}.") + + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, add_start_index=True, separators=get_separators() + ) + + content = document_loader(doc_path) + + structured_types = [".xlsx", ".csv", ".json", "jsonl"] + _, ext = os.path.splitext(doc_path) + + if ext in structured_types: + chunks = content + else: + chunks = text_splitter.split_text(content) + + if logflag: + logger.info("Done preprocessing. Created ", len(chunks), " chunks of the original file.") + logger.info("PG Connection", PG_CONNECTION_STRING) + metadata = [dict({"doc_name": str(doc_path)})] + + # Batch size + batch_size = 32 + num_chunks = len(chunks) + for i in range(0, num_chunks, batch_size): + batch_chunks = chunks[i : i + batch_size] + batch_texts = batch_chunks + + _ = PGVector.from_texts( + texts=batch_texts, + embedding=self.embedder, + metadatas=metadata, + collection_name=INDEX_NAME, + connection_string=PG_CONNECTION_STRING, + ) + if logflag: + logger.info(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + return True + + async def ingest_link_to_pgvector(self, link_list: List[str]): + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, add_start_index=True, separators=get_separators() + ) + + for link in link_list: + texts = [] + content = parse_html_new([link], chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP) + if logflag: + logger.info(f"[ ingest link ] link: {link} content: {content}") + encoded_link = encode_filename(link) + save_path = self.upload_folder + encoded_link + ".txt" + doc_path = self.upload_folder + link + ".txt" + if logflag: + logger.info(f"[ ingest link ] save_path: {save_path}") + await save_content_to_local_disk(save_path, content) + metadata = [dict({"doc_name": str(doc_path)})] + + chunks = text_splitter.split_text(content) + + batch_size = 32 + num_chunks = len(chunks) + for i in range(0, num_chunks, batch_size): + batch_chunks = chunks[i : i + batch_size] + batch_texts = batch_chunks + + _ = PGVector.from_texts( + texts=batch_texts, + embedding=self.embedder, + metadatas=metadata, + collection_name=INDEX_NAME, + connection_string=PG_CONNECTION_STRING, + ) + if logflag: + logger.info(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + + return True + + async def ingest_files( + self, + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), + link_list: Optional[str] = Form(None), + chunk_size: int = Form(1500), + chunk_overlap: int = Form(100), + process_table: bool = Form(False), + table_strategy: str = Form("fast"), + ): + """Ingest files/links content into pgvector database. + + Save in the format of vector[768]. + Returns '{"status": 200, "message": "Data preparation succeeded"}' if successful. + Args: + files (Union[UploadFile, List[UploadFile]], optional): A file or a list of files to be ingested. Defaults to File(None). + link_list (str, optional): A list of links to be ingested. Defaults to Form(None). + chunk_size (int, optional): The size of the chunks to be split. Defaults to Form(1500). + chunk_overlap (int, optional): The overlap between chunks. Defaults to Form(100). + process_table (bool, optional): Whether to process tables in PDFs. Defaults to Form(False). + table_strategy (str, optional): The strategy to process tables in PDFs. Defaults to Form("fast"). + """ + if logflag: + logger.info(f"files:{files}") + logger.info(f"link_list:{link_list}") + if files and link_list: + raise HTTPException(status_code=400, detail="Provide either a file or a string list, not both.") + + if files: + if not isinstance(files, list): + files = [files] + + if not os.path.exists(self.upload_folder): + Path(self.upload_folder).mkdir(parents=True, exist_ok=True) + for file in files: + save_path = self.upload_folder + file.filename + await self.save_file_to_local_disk(save_path, file) + + self.ingest_doc_to_pgvector(DocPath(path=save_path)) + if logflag: + logger.info(f"Successfully saved file {save_path}") + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + + if link_list: + try: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail="link_list should be a list.") + await self.ingest_link_to_pgvector(link_list) + if logflag: + logger.info(f"Successfully saved link list {link_list}") + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + except json.JSONDecodeError: + raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") + + raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") + + async def get_files(self): + """Get file structure from pgvector database in the format of + { + "name": "File Name", + "id": "File Name", + "type": "File", + "parent": "", + }""" + if logflag: + logger.info("[ dataprep - get file ] start to get file structure") + + if not Path(self.upload_folder).exists(): + if logflag: + logger.info("No file uploaded, return empty list.") + return [] + + file_content = get_file_structure(self.upload_folder) + if logflag: + logger.info(file_content) + return file_content + + async def delete_files(self, file_path: str = Body(..., embed=True)): + """Delete file according to `file_path`. + + `file_path`: + - specific file path (e.g. /path/to/file.txt) + - "all": delete all files uploaded + """ + if file_path == "all": + if logflag: + logger.info("[dataprep - del] delete all files") + remove_folder_with_ignore(self.upload_folder) + assert self.delete_embeddings(file_path) + if logflag: + logger.info("[dataprep - del] successfully delete all files.") + create_upload_folder(self.upload_folder) + if logflag: + logger.info({"status": True}) + return {"status": True} + + delete_path = Path(self.upload_folder + "/" + encode_filename(file_path)) + doc_path = self.upload_folder + file_path + if logflag: + logger.info(f"[dataprep - del] delete_path: {delete_path}") + + # partially delete files/folders + if delete_path.exists(): + # delete file + if delete_path.is_file(): + try: + assert self.delete_embeddings(doc_path) + delete_path.unlink() + except Exception as e: + if logflag: + logger.info(f"[dataprep - del] fail to delete file {delete_path}: {e}") + logger.info({"status": False}) + return {"status": False} + # delete folder + else: + if logflag: + logger.info("[dataprep - del] delete folder is not supported for now.") + logger.info({"status": False}) + return {"status": False} + if logflag: + logger.info({"status": True}) + return {"status": True} + else: + raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") diff --git a/comps/dataprep/src/integrations/pipecone.py b/comps/dataprep/src/integrations/pipecone.py new file mode 100644 index 0000000000..33ffeea4b6 --- /dev/null +++ b/comps/dataprep/src/integrations/pipecone.py @@ -0,0 +1,322 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +from pathlib import Path +from typing import List, Optional, Union + +from fastapi import Body, File, Form, HTTPException, UploadFile +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_pinecone import PineconeVectorStore +from langchain_text_splitters import HTMLHeaderTextSplitter +from pinecone import Pinecone, ServerlessSpec + +from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.dataprep.src.utils import ( + create_upload_folder, + document_loader, + encode_filename, + get_file_structure, + get_separators, + get_tables_result, + parse_html_new, + remove_folder_with_ignore, + save_content_to_local_disk, +) + +logger = CustomLogger("opea_dataprep_pinecone") +logflag = os.getenv("LOGFLAG", False) + +# Embedding model +EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") + +# Pinecone configuration +PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "") +PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME", "langchain-test") + +# LLM/Embedding endpoints +TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") +TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT") + + +@OpeaComponentRegistry.register("OPEA_DATAPREP_PINECONE") +class OpeaPineConeDataprep(OpeaComponent): + """Dataprep component for Pinecone ingestion and search services.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) + self.tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") + self.upload_folder = "./uploaded_files/" + # Create vectorstore + if self.tei_embedding_endpoint: + # create embeddings using TEI endpoint service + self.embedder = HuggingFaceHubEmbeddings(model=self.tei_embedding_endpoint) + else: + # create embeddings using local embedding model + self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + self.pc = Pinecone(api_key=PINECONE_API_KEY) + + # Perform health check + health_status = self.check_health() + if not health_status: + logger.error("OpeaPineConeDataprep health check failed.") + + def check_health(self) -> bool: + """Checks the health of the Pinecone service.""" + if self.pc is None: + logger.error("Pinecone client is not initialized.") + return False + + try: + # Perform a simple health check via listing indexes + self.pc.list_indexes() + return True + except Exception as e: + logger.error(f"Pinecone health check failed: {e}") + return False + + def invoke(self, *args, **kwargs): + pass + + def check_index_existance(self): + if logflag: + logger.info(f"[ check index existence ] checking {PINECONE_INDEX_NAME}") + + existing_indexes = [index_info["name"] for index_info in self.pc.list_indexes()] + if PINECONE_INDEX_NAME not in existing_indexes: + if logflag: + logger.info("[ check index existence ] index does not exist") + return None + else: + return True + + def create_index(self, client): + if logflag: + logger.info(f"[ create index ] creating index {PINECONE_INDEX_NAME}") + try: + client.create_index( + name=PINECONE_INDEX_NAME, + dimension=768, + metric="cosine", + spec=ServerlessSpec(cloud="aws", region="us-east-1"), + ) + if logflag: + logger.info(f"[ create index ] index {PINECONE_INDEX_NAME} successfully created") + except Exception as e: + if logflag: + logger.info(f"[ create index ] fail to create index {PINECONE_INDEX_NAME}: {e}") + return False + return True + + def drop_index(self, index_name): + if logflag: + logger.info(f"[ drop index ] dropping index {index_name}") + try: + self.pc.delete_index(index_name) + if logflag: + logger.info(f"[ drop index ] index {index_name} deleted") + except Exception as e: + if logflag: + logger.info(f"[ drop index ] index {index_name} delete failed: {e}") + return False + return True + + def ingest_data_to_pinecone(self, doc_path: DocPath): + """Ingest document to Pinecone.""" + path = doc_path.path + if logflag: + logger.info(f"Parsing document {path}.") + + if path.endswith(".html"): + headers_to_split_on = [ + ("h1", "Header 1"), + ("h2", "Header 2"), + ("h3", "Header 3"), + ] + text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) + else: + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=doc_path.chunk_size, + chunk_overlap=doc_path.chunk_overlap, + add_start_index=True, + separators=get_separators(), + ) + + content = document_loader(path) + + structured_types = [".xlsx", ".csv", ".json", "jsonl"] + _, ext = os.path.splitext(path) + + if ext in structured_types: + chunks = content + else: + chunks = text_splitter.split_text(content) + + if doc_path.process_table and path.endswith(".pdf"): + table_chunks = get_tables_result(path, doc_path.table_strategy) + chunks = chunks + table_chunks + if logflag: + logger.info(f"Done preprocessing. Created {len(chunks)} chunks of the original file.") + + # Checking Index existence + if not self.check_index_existance(): + # Creating the index + self.create_index(self.pc) + if logflag: + logger.info(f"Successfully created the index {PINECONE_INDEX_NAME}") + + # Batch size + batch_size = 32 + num_chunks = len(chunks) + file_ids = [] + + for i in range(0, num_chunks, batch_size): + batch_chunks = chunks[i : i + batch_size] + batch_texts = batch_chunks + + vectorstore = PineconeVectorStore.from_texts( + texts=batch_texts, + embedding=self.embedder, + index_name=PINECONE_INDEX_NAME, + ) + if logflag: + logger.info(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + + async def ingest_link_to_pinecone(self, link_list: List[str], chunk_size, chunk_overlap): + # Checking Index existence + if not self.check_index_existance(): + # Creating the index + self.create_index(self.pc) + if logflag: + logger.info(f"Successfully created the index {PINECONE_INDEX_NAME}") + + # save link contents and doc_ids one by one + for link in link_list: + content = parse_html_new([link], chunk_size=chunk_size, chunk_overlap=chunk_overlap) + if logflag: + logger.info(f"[ ingest link ] link: {link} content: {content}") + encoded_link = encode_filename(link) + save_path = self.upload_folder + encoded_link + ".txt" + if logflag: + logger.info(f"[ ingest link ] save_path: {save_path}") + await save_content_to_local_disk(save_path, content) + + vectorstore = PineconeVectorStore.from_texts( + texts=content, + embedding=self.embedder, + index_name=PINECONE_INDEX_NAME, + ) + + return True + + async def ingest_files( + self, + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), + link_list: Optional[str] = Form(None), + chunk_size: int = Form(1500), + chunk_overlap: int = Form(100), + process_table: bool = Form(False), + table_strategy: str = Form("fast"), + ): + """Ingest files/links content into pipecone database. + + Save in the format of vector[768]. + Returns '{"status": 200, "message": "Data preparation succeeded"}' if successful. + Args: + files (Union[UploadFile, List[UploadFile]], optional): A file or a list of files to be ingested. Defaults to File(None). + link_list (str, optional): A list of links to be ingested. Defaults to Form(None). + chunk_size (int, optional): The size of the chunks to be split. Defaults to Form(1500). + chunk_overlap (int, optional): The overlap between chunks. Defaults to Form(100). + process_table (bool, optional): Whether to process tables in PDFs. Defaults to Form(False). + table_strategy (str, optional): The strategy to process tables in PDFs. Defaults to Form("fast"). + """ + if logflag: + logger.info(f"files:{files}") + logger.info(f"link_list:{link_list}") + + if files: + if not isinstance(files, list): + files = [files] + uploaded_files = [] + for file in files: + encode_file = encode_filename(file.filename) + save_path = self.upload_folder + encode_file + await save_content_to_local_disk(save_path, file) + self.ingest_data_to_pinecone( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + uploaded_files.append(save_path) + if logflag: + logger.info(f"Successfully saved file {save_path}") + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + + if link_list: + try: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail="link_list should be a list.") + await self.ingest_link_to_pinecone(link_list, chunk_size, chunk_overlap) + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(f"Successfully saved link list {link_list}") + logger.info(result) + return result + except json.JSONDecodeError: + raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") + + raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") + + async def get_files(self): + """Get file structure from pipecone database in the format of + { + "name": "File Name", + "id": "File Name", + "type": "File", + "parent": "", + }""" + if logflag: + logger.info("[ dataprep - get file ] start to get file structure") + + if not Path(self.upload_folder).exists(): + if logflag: + logger.info("No file uploaded, return empty list.") + return [] + + file_content = get_file_structure(self.upload_folder) + if logflag: + logger.info(file_content) + return file_content + + async def delete_files(self, file_path: str = Body(..., embed=True)): + """Delete file according to `file_path`. + + `file_path`: + - specific file path (e.g. /path/to/file.txt) + - "all": delete all files uploaded + """ + # delete all uploaded files + if file_path == "all": + if logflag: + logger.info("[dataprep - del] delete all files") + remove_folder_with_ignore(self.upload_folder) + assert self.drop_index(index_name=PINECONE_INDEX_NAME) + if logflag: + logger.info("[dataprep - del] successfully delete all files.") + create_upload_folder(self.upload_folder) + if logflag: + logger.info('{"status": True}') + return {"status": True} + else: + raise HTTPException(status_code=404, detail="Single file deletion is not implemented yet") diff --git a/comps/dataprep/src/integrations/qdrant.py b/comps/dataprep/src/integrations/qdrant.py new file mode 100644 index 0000000000..e54c6c572b --- /dev/null +++ b/comps/dataprep/src/integrations/qdrant.py @@ -0,0 +1,237 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +from typing import List, Optional, Union + +from fastapi import Body, File, Form, HTTPException, UploadFile +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_community.vectorstores import Qdrant +from langchain_huggingface import HuggingFaceEndpointEmbeddings +from langchain_text_splitters import HTMLHeaderTextSplitter +from qdrant_client import QdrantClient + +from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.dataprep.src.utils import ( + document_loader, + encode_filename, + get_separators, + get_tables_result, + parse_html_new, + save_content_to_local_disk, +) + +logger = CustomLogger("opea_dataprep_qdrant") +logflag = os.getenv("LOGFLAG", False) + + +# Embedding model +EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2") + +# Qdrant configuration +QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost") +QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333)) +COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag-qdrant") + +# LLM/Embedding endpoints +TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") +TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT") + + +@OpeaComponentRegistry.register("OPEA_DATAPREP_QDRANT") +class OpeaQdrantDataprep(OpeaComponent): + """Dataprep component for Qdrant ingestion and search services.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) + self.tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") + self.upload_folder = "./uploaded_files/" + # Create vectorstore + if TEI_EMBEDDING_ENDPOINT: + # create embeddings using TEI endpoint service + self.embedder = HuggingFaceEndpointEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + else: + # create embeddings using local embedding model + self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + + # Perform health check + health_status = self.check_health() + if not health_status: + logger.error("OpeaQdrantDataprep health check failed.") + + def check_health(self) -> bool: + """Checks the health of the Qdrant service.""" + if self.embedder is None: + logger.error("Qdrant embedder is not initialized.") + return False + + try: + client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT) + logger.info(client.info()) + return True + except Exception as e: + logger.error(f"Qdrant health check failed: {e}") + return False + + def invoke(self, *args, **kwargs): + pass + + def ingest_data_to_qdrant(self, doc_path: DocPath): + """Ingest document to Qdrant.""" + path = doc_path.path + if logflag: + logger.info(f"Parsing document {path}.") + + if path.endswith(".html"): + headers_to_split_on = [ + ("h1", "Header 1"), + ("h2", "Header 2"), + ("h3", "Header 3"), + ] + text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) + else: + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=doc_path.chunk_size, + chunk_overlap=doc_path.chunk_overlap, + add_start_index=True, + separators=get_separators(), + ) + + content = document_loader(path) + + structured_types = [".xlsx", ".csv", ".json", "jsonl"] + _, ext = os.path.splitext(path) + + if ext in structured_types: + chunks = content + else: + chunks = text_splitter.split_text(content) + + if doc_path.process_table and path.endswith(".pdf"): + table_chunks = get_tables_result(path, doc_path.table_strategy) + chunks = chunks + table_chunks + if logflag: + logger.info("Done preprocessing. Created ", len(chunks), " chunks of the original file.") + + # Batch size + batch_size = 32 + num_chunks = len(chunks) + for i in range(0, num_chunks, batch_size): + batch_chunks = chunks[i : i + batch_size] + batch_texts = batch_chunks + + _ = Qdrant.from_texts( + texts=batch_texts, + embedding=self.embedder, + collection_name=COLLECTION_NAME, + host=QDRANT_HOST, + port=QDRANT_PORT, + ) + if logflag: + logger.info(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + + return True + + async def ingest_files( + self, + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), + link_list: Optional[str] = Form(None), + chunk_size: int = Form(1500), + chunk_overlap: int = Form(100), + process_table: bool = Form(False), + table_strategy: str = Form("fast"), + ): + """Ingest files/links content into qdrant database. + + Save in the format of vector[768]. + Returns '{"status": 200, "message": "Data preparation succeeded"}' if successful. + Args: + files (Union[UploadFile, List[UploadFile]], optional): A file or a list of files to be ingested. Defaults to File(None). + link_list (str, optional): A list of links to be ingested. Defaults to Form(None). + chunk_size (int, optional): The size of the chunks to be split. Defaults to Form(1500). + chunk_overlap (int, optional): The overlap between chunks. Defaults to Form(100). + process_table (bool, optional): Whether to process tables in PDFs. Defaults to Form(False). + table_strategy (str, optional): The strategy to process tables in PDFs. Defaults to Form("fast"). + """ + if logflag: + logger.info(f"files:{files}") + logger.info(f"link_list:{link_list}") + + if files: + if not isinstance(files, list): + files = [files] + uploaded_files = [] + for file in files: + encode_file = encode_filename(file.filename) + save_path = self.upload_folder + encode_file + await save_content_to_local_disk(save_path, file) + self.ingest_data_to_qdrant( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + uploaded_files.append(save_path) + if logflag: + logger.info(f"Successfully saved file {save_path}") + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + + if link_list: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail="link_list should be a list.") + for link in link_list: + encoded_link = encode_filename(link) + save_path = self.upload_folder + encoded_link + ".txt" + content = parse_html_new([link], chunk_size=chunk_size, chunk_overlap=chunk_overlap) + try: + await save_content_to_local_disk(save_path, content) + self.ingest_data_to_qdrant( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + except json.JSONDecodeError: + raise HTTPException(status_code=500, detail="Fail to ingest data into qdrant.") + + if logflag: + logger.info(f"Successfully saved link {link}") + + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + + raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") + + async def get_files(self): + """Get file structure from pipecone database in the format of + { + "name": "File Name", + "id": "File Name", + "type": "File", + "parent": "", + }""" + pass + + async def delete_files(self, file_path: str = Body(..., embed=True)): + """Delete file according to `file_path`. + + `file_path`: + - specific file path (e.g. /path/to/file.txt) + - "all": delete all files uploaded + """ + pass diff --git a/comps/dataprep/src/integrations/redis.py b/comps/dataprep/src/integrations/redis.py index 1d084d1b7a..06cb0d7f27 100644 --- a/comps/dataprep/src/integrations/redis.py +++ b/comps/dataprep/src/integrations/redis.py @@ -31,11 +31,71 @@ save_content_to_local_disk, ) -from .config import EMBED_MODEL, INDEX_NAME, KEY_INDEX_NAME, REDIS_URL, SEARCH_BATCH_SIZE, TEI_EMBEDDING_ENDPOINT - logger = CustomLogger("redis_dataprep") logflag = os.getenv("LOGFLAG", False) upload_folder = "./uploaded_files/" + + +# Embedding model +EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") +# TEI Embedding endpoints +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") + +# Vector Index Configuration +INDEX_NAME = os.getenv("INDEX_NAME", "rag_redis") +KEY_INDEX_NAME = os.getenv("KEY_INDEX_NAME", "file-keys") +TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", 600)) +SEARCH_BATCH_SIZE = int(os.getenv("SEARCH_BATCH_SIZE", 10)) + +# Redis Connection Information +REDIS_HOST = os.getenv("REDIS_HOST", "localhost") +REDIS_PORT = int(os.getenv("REDIS_PORT", 6379)) + + +def get_boolean_env_var(var_name, default_value=False): + """Retrieve the boolean value of an environment variable. + + Args: + var_name (str): The name of the environment variable to retrieve. + default_value (bool): The default value to return if the variable + is not found. + + Returns: + bool: The value of the environment variable, interpreted as a boolean. + """ + true_values = {"true", "1", "t", "y", "yes"} + false_values = {"false", "0", "f", "n", "no"} + + # Retrieve the environment variable's value + value = os.getenv(var_name, "").lower() + + # Decide the boolean value based on the content of the string + if value in true_values: + return True + elif value in false_values: + return False + else: + return default_value + + +def format_redis_conn_from_env(): + redis_url = os.getenv("REDIS_URL", None) + if redis_url: + return redis_url + else: + using_ssl = get_boolean_env_var("REDIS_SSL", False) + start = "rediss://" if using_ssl else "redis://" + + # if using RBAC + password = os.getenv("REDIS_PASSWORD", None) + username = os.getenv("REDIS_USERNAME", "default") + if password is not None: + start += f"{username}:{password}@" + + return start + f"{REDIS_HOST}:{REDIS_PORT}" + + +REDIS_URL = format_redis_conn_from_env() redis_pool = redis.ConnectionPool.from_url(REDIS_URL) diff --git a/comps/dataprep/src/integrations/redis_multimodal.py b/comps/dataprep/src/integrations/redis_multimodal.py new file mode 100644 index 0000000000..6ae4d185bc --- /dev/null +++ b/comps/dataprep/src/integrations/redis_multimodal.py @@ -0,0 +1,821 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import base64 +import json +import os +import shutil +import time +import uuid +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional, Type, Union + +import pymupdf +from fastapi import File, HTTPException, UploadFile +from langchain_community.utilities.redis import _array_to_buffer +from langchain_community.vectorstores import Redis +from langchain_community.vectorstores.redis.base import _generate_field_schema, _prepare_metadata +from langchain_core.embeddings import Embeddings +from langchain_core.utils import get_from_dict_or_env +from PIL import Image + +from comps import CustomLogger, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.third_parties.bridgetower.src.bridgetower_embedding import BridgeTowerEmbedding + +from .utils.multimodal import ( + clear_upload_folder, + convert_video_to_audio, + delete_audio_file, + extract_frames_and_annotations_from_transcripts, + extract_frames_and_generate_captions, + extract_transcript_from_audio, + generate_annotations_from_transcript, + generate_id, + load_json_file, + load_whisper_model, + write_vtt, +) + +# Models +EMBED_MODEL = os.getenv("EMBEDDING_MODEL_ID", "BridgeTower/bridgetower-large-itm-mlm-itc") +WHISPER_MODEL = os.getenv("WHISPER_MODEL", "small") + +# Redis Connection Information +REDIS_HOST = os.getenv("REDIS_HOST", "localhost") +REDIS_PORT = int(os.getenv("REDIS_PORT", 6379)) + +# Lvm Microservice Information +LVM_ENDPOINT = os.getenv("LVM_ENDPOINT", "http://localhost:9399/v1/lvm") + + +def get_boolean_env_var(var_name, default_value=False): + """Retrieve the boolean value of an environment variable. + + Args: + var_name (str): The name of the environment variable to retrieve. + default_value (bool): The default value to return if the variable + is not found. + Returns: + bool: The value of the environment variable, interpreted as a boolean. + """ + true_values = {"true", "1", "t", "y", "yes"} + false_values = {"false", "0", "f", "n", "no"} + + # Retrieve the environment variable's value + value = os.getenv(var_name, "").lower() + + # Decide the boolean value based on the content of the string + if value in true_values: + return True + elif value in false_values: + return False + else: + return default_value + + +def format_redis_conn_from_env(): + redis_url = os.getenv("REDIS_URL", None) + if redis_url: + return redis_url + else: + using_ssl = get_boolean_env_var("REDIS_SSL", False) + start = "rediss://" if using_ssl else "redis://" + + # if using RBAC + password = os.getenv("REDIS_PASSWORD", None) + username = os.getenv("REDIS_USERNAME", "default") + if password is not None: + start += f"{username}:{password}@" + + return start + f"{REDIS_HOST}:{REDIS_PORT}" + + +REDIS_URL = format_redis_conn_from_env() + +# Vector Index Configuration +INDEX_NAME = os.getenv("INDEX_NAME", "mm-rag-redis") + +current_file_path = os.path.abspath(__file__) +parent_dir = os.path.dirname(current_file_path) +REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "./config/schema.yml") +TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", 600)) +schema_path = os.path.join(parent_dir, REDIS_SCHEMA) +INDEX_SCHEMA = schema_path + +logger = CustomLogger("opea_dataprep_redis_multimodal") +logflag = os.getenv("LOGFLAG", False) + + +class MultimodalRedis(Redis): + """Redis vector database to process multimodal data.""" + + @classmethod + def from_text_image_pairs_return_keys( + cls: Type[Redis], + texts: List[str], + images: List[str] = None, + embedding: Embeddings = BridgeTowerEmbedding, + metadatas: Optional[List[dict]] = None, + index_name: Optional[str] = None, + index_schema: Optional[Union[Dict[str, str], str, os.PathLike]] = None, + vector_schema: Optional[Dict[str, Union[str, int]]] = None, + **kwargs: Any, + ): + """ + Args: + texts (List[str]): List of texts to add to the vectorstore. + images (List[str]): Optional list of path-to-images to add to the vectorstore. If provided, the length of + the list of images must match the length of the list of text strings. + embedding (Embeddings): Embeddings to use for the vectorstore. + metadatas (Optional[List[dict]], optional): Optional list of metadata + dicts to add to the vectorstore. Defaults to None. + index_name (Optional[str], optional): Optional name of the index to + create or add to. Defaults to None. + index_schema (Optional[Union[Dict[str, str], str, os.PathLike]], optional): + Optional fields to index within the metadata. Overrides generated + schema. Defaults to None. + vector_schema (Optional[Dict[str, Union[str, int]]], optional): Optional + vector schema to use. Defaults to None. + **kwargs (Any): Additional keyword arguments to pass to the Redis client. + Returns: + Tuple[Redis, List[str]]: Tuple of the Redis instance and the keys of + the newly created documents. + Raises: + ValueError: If the number of texts does not equal the number of images. + ValueError: If the number of metadatas does not match the number of texts. + """ + # If images are provided, the length of texts must be equal to the length of images + if images and len(texts) != len(images): + raise ValueError(f"the len of captions {len(texts)} does not equal the len of images {len(images)}") + + redis_url = get_from_dict_or_env(kwargs, "redis_url", "REDIS_URL") + + if "redis_url" in kwargs: + kwargs.pop("redis_url") + + # flag to use generated schema + if "generate" in kwargs: + kwargs.pop("generate") + + # see if the user specified keys + keys = None + if "keys" in kwargs: + keys = kwargs.pop("keys") + + # Name of the search index if not given + if not index_name: + index_name = uuid.uuid4().hex + + # type check for metadata + if metadatas: + if isinstance(metadatas, list) and len(metadatas) != len(texts): # type: ignore # noqa: E501 + raise ValueError("Number of metadatas must match number of texts") + if not (isinstance(metadatas, list) and isinstance(metadatas[0], dict)): + raise ValueError("Metadatas must be a list of dicts") + generated_schema = _generate_field_schema(metadatas[0]) + + if not index_schema: + index_schema = generated_schema + + # Create instance + instance = cls( + redis_url, + index_name, + embedding, + index_schema=index_schema, + vector_schema=vector_schema, + **kwargs, + ) + # Add data to Redis + keys = ( + instance.add_text_image_pairs(texts, images, metadatas, keys=keys) + if images + else instance.add_text(texts, metadatas, keys=keys) + ) + return instance, keys + + def add_text_image_pairs( + self, + texts: Iterable[str], + images: Iterable[str], + metadatas: Optional[List[dict]] = None, + embeddings: Optional[List[List[float]]] = None, + batch_size: int = 2, + clean_metadata: bool = True, + **kwargs: Any, + ) -> List[str]: + """Add more embeddings of text-image pairs to the vectorstore. + + Args: + texts (Iterable[str]): Iterable of strings/text to add to the vectorstore. + images: Iterable[str]: Iterable of strings/text of path-to-image to add to the vectorstore. + metadatas (Optional[List[dict]], optional): Optional list of metadatas. + Defaults to None. + embeddings (Optional[List[List[float]]], optional): Optional pre-generated + embeddings. Defaults to None. + keys (List[str]) or ids (List[str]): Identifiers of entries. + Defaults to None. + batch_size (int, optional): Batch size to use for writes. Defaults to 1000. + Returns: + List[str]: List of ids added to the vectorstore + """ + ids = [] + # Get keys or ids from kwargs + # Other vectorstores use ids + keys_or_ids = kwargs.get("keys", kwargs.get("ids")) + + # type check for metadata + if metadatas: + if isinstance(metadatas, list) and len(metadatas) != len(texts): # type: ignore # noqa: E501 + raise ValueError("Number of metadatas must match number of texts") + if not (isinstance(metadatas, list) and isinstance(metadatas[0], dict)): + raise ValueError("Metadatas must be a list of dicts") + pil_imgs = [Image.open(img) for img in images] + if not embeddings: + embeddings = self._embeddings.embed_image_text_pairs(list(texts), pil_imgs, batch_size=batch_size) + self._create_index_if_not_exist(dim=len(embeddings[0])) + + # Write data to redis + pipeline = self.client.pipeline(transaction=False) + for i, text in enumerate(texts): + # Use provided values by default or fallback + key = keys_or_ids[i] if keys_or_ids else str(uuid.uuid4().hex) + if not key.startswith(self.key_prefix + ":"): + key = self.key_prefix + ":" + key + metadata = metadatas[i] if metadatas else {} + metadata = _prepare_metadata(metadata) if clean_metadata else metadata + pipeline.hset( + key, + mapping={ + self._schema.content_key: text, + self._schema.content_vector_key: _array_to_buffer(embeddings[i], self._schema.vector_dtype), + **metadata, + }, + ) + ids.append(key) + + # Write batch + if i % batch_size == 0: + pipeline.execute() + + # Cleanup final batch + pipeline.execute() + return ids + + def add_text( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + embeddings: Optional[List[List[float]]] = None, + clean_metadata: bool = True, + **kwargs: Any, + ) -> List[str]: + """Add more embeddings of text to the vectorstore. + + Args: + texts (Iterable[str]): Iterable of strings/text to add to the vectorstore. + metadatas (Optional[List[dict]], optional): Optional list of metadatas. + Defaults to None. + embeddings (Optional[List[List[float]]], optional): Optional pre-generated + embeddings. Defaults to None. + keys (List[str]) or ids (List[str]): Identifiers of entries. + Defaults to None. + Returns: + List[str]: List of ids added to the vectorstore + """ + ids = [] + # Get keys or ids from kwargs + # Other vectorstores use ids + keys_or_ids = kwargs.get("keys", kwargs.get("ids")) + + # type check for metadata + if metadatas: + if isinstance(metadatas, list) and len(metadatas) != len(texts): # type: ignore # noqa: E501 + raise ValueError("Number of metadatas must match number of texts") + if not (isinstance(metadatas, list) and isinstance(metadatas[0], dict)): + raise ValueError("Metadatas must be a list of dicts") + + if not embeddings: + embeddings = self._embeddings.embed_documents(list(texts)) + self._create_index_if_not_exist(dim=len(embeddings[0])) + + # Write data to redis + pipeline = self.client.pipeline(transaction=False) + for i, text in enumerate(texts): + # Use provided values by default or fallback + key = keys_or_ids[i] if keys_or_ids else str(uuid.uuid4().hex) + if not key.startswith(self.key_prefix + ":"): + key = self.key_prefix + ":" + key + metadata = metadatas[i] if metadatas else {} + metadata = _prepare_metadata(metadata) if clean_metadata else metadata + pipeline.hset( + key, + mapping={ + self._schema.content_key: text, + self._schema.content_vector_key: _array_to_buffer(embeddings[i], self._schema.vector_dtype), + **metadata, + }, + ) + ids.append(key) + + # Cleanup final batch + pipeline.execute() + return ids + + +@OpeaComponentRegistry.register("OPEA_DATAPREP_MULTIMODALREDIS") +class OpeaMultimodalRedisDataprep(OpeaComponent): + """Dataprep component for Multimodal Redis ingestion and search services.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) + self.device = "cpu" + self.upload_folder = "./uploaded_files/" + # Load embeddings model + logger.info("Initializing BridgeTower model as embedder...") + self.embeddings = BridgeTowerEmbedding(model_name=EMBED_MODEL, device=self.device) + logger.info("Done initialization of embedder!") + + # Perform health check + health_status = self.check_health() + if not health_status: + logger.error("OpeaMultimodalRedisDataprep health check failed.") + + def check_health(self) -> bool: + """Checks the health of the Multimodal Redis service.""" + if self.embeddings is None: + logger.error("Multimodal Redis is not initialized.") + return False + + return True + + def invoke(self, *args, **kwargs): + pass + + def prepare_data_and_metadata_from_annotation( + self, + annotation, + path_to_frames, + title, + num_transcript_concat_for_ingesting=2, + num_transcript_concat_for_inference=7, + ): + text_list = [] + image_list = [] + metadatas = [] + for i, frame in enumerate(annotation): + frame_index = frame["sub_video_id"] + path_to_frame = os.path.join(path_to_frames, f"frame_{frame_index}.png") + # augment this frame's transcript with a reasonable number of neighboring frames' transcripts helps semantic retrieval + lb_ingesting = max(0, i - num_transcript_concat_for_ingesting) + ub_ingesting = min(len(annotation), i + num_transcript_concat_for_ingesting + 1) + caption_for_ingesting = " ".join([annotation[j]["caption"] for j in range(lb_ingesting, ub_ingesting)]) + + # augment this frame's transcript with more neighboring frames' transcript to provide more context to LVM for question answering + lb_inference = max(0, i - num_transcript_concat_for_inference) + ub_inference = min(len(annotation), i + num_transcript_concat_for_inference + 1) + caption_for_inference = " ".join([annotation[j]["caption"] for j in range(lb_inference, ub_inference)]) + + video_id = frame["video_id"] + b64_img_str = frame["b64_img_str"] + time_of_frame = frame["time"] + embedding_type = "pair" if b64_img_str else "text" + source_video = frame["video_name"] + + text_list.append(caption_for_ingesting) + + if b64_img_str: + image_list.append(path_to_frame) + + metadatas.append( + { + "content": caption_for_ingesting, + "b64_img_str": b64_img_str, + "video_id": video_id, + "source_video": source_video, + "time_of_frame_ms": float(time_of_frame), + "embedding_type": embedding_type, + "title": title, + "transcript_for_inference": caption_for_inference, + } + ) + + return text_list, image_list, metadatas + + def prepare_pdf_data_from_annotation(self, annotation, path_to_files, title): + """PDF data processing has some key differences from videos and images. + + 1. Neighboring transcripts are not currently considered relevant. + We are only taking the text located on the same page as the image. + 2. The images within PDFs are indexed by page and image-within-page + indices, as opposed to a single frame index. + 3. Instead of time of frame in ms, we return the PDF page index through + the pre-existing time_of_frame_ms metadata key to maintain compatibility. + """ + text_list = [] + image_list = [] + metadatas = [] + for item in annotation: + page_index = item["frame_no"] + image_index = item["sub_video_id"] + path_to_image = os.path.join(path_to_files, f"page{page_index}_image{image_index}.png") + caption_for_ingesting = item["caption"] + caption_for_inference = item["caption"] + + pdf_id = item["video_id"] + b64_img_str = item["b64_img_str"] + embedding_type = "pair" if b64_img_str else "text" + source = item["video_name"] + + text_list.append(caption_for_ingesting) + + if b64_img_str: + image_list.append(path_to_image) + + metadatas.append( + { + "content": caption_for_ingesting, + "b64_img_str": b64_img_str, + "video_id": pdf_id, + "source_video": source, + "time_of_frame_ms": page_index, # For PDFs save the page number + "embedding_type": embedding_type, + "title": title, + "transcript_for_inference": caption_for_inference, + } + ) + + return text_list, image_list, metadatas + + def ingest_multimodal(self, filename, data_folder, embeddings, is_pdf=False): + """Ingest text image pairs to Redis from the data/ directory that consists of frames and annotations.""" + data_folder = os.path.abspath(data_folder) + annotation_file_path = os.path.join(data_folder, "annotations.json") + path_to_frames = os.path.join(data_folder, "frames") + + annotation = load_json_file(annotation_file_path) + + # prepare data to ingest + if is_pdf: + text_list, image_list, metadatas = self.prepare_pdf_data_from_annotation( + annotation, path_to_frames, filename + ) + else: + text_list, image_list, metadatas = self.prepare_data_and_metadata_from_annotation( + annotation, path_to_frames, filename + ) + + MultimodalRedis.from_text_image_pairs_return_keys( + texts=[f"From {filename}. " + text for text in text_list], + images=image_list, + embedding=embeddings, + metadatas=metadatas, + index_name=INDEX_NAME, + index_schema=INDEX_SCHEMA, + redis_url=REDIS_URL, + ) + + def drop_index(self, index_name, redis_url=REDIS_URL): + logger.info(f"dropping index {index_name}") + try: + assert Redis.drop_index(index_name=index_name, delete_documents=True, redis_url=redis_url) + logger.info(f"index {index_name} deleted") + except Exception as e: + logger.info(f"index {index_name} delete failed: {e}") + return False + return True + + async def ingest_generate_transcripts(self, files: List[UploadFile] = File(None)): + """Upload videos or audio files with speech, generate transcripts using whisper and ingest into redis.""" + + if files: + files_to_ingest = [] + uploaded_files_map = {} + for file in files: + if os.path.splitext(file.filename)[1] in [".mp4", ".wav"]: + files_to_ingest.append(file) + else: + raise HTTPException( + status_code=400, + detail=f"File {file.filename} is not an mp4 file. Please upload mp4 files only.", + ) + + for file_to_ingest in files_to_ingest: + st = time.time() + file_extension = os.path.splitext(file_to_ingest.filename)[1] + is_video = file_extension == ".mp4" + file_type_str = "video" if is_video else "audio file" + logger.info(f"Processing {file_type_str} {file_to_ingest.filename}") + + # Assign unique identifier to video + file_id = generate_id() + + # Create video file name by appending identifier + base_file_name = os.path.splitext(file_to_ingest.filename)[0] + file_name_with_id = f"{base_file_name}_{file_id}{file_extension}" + dir_name = os.path.splitext(file_name_with_id)[0] + + # Save file in upload_directory + with open(os.path.join(self.upload_folder, file_name_with_id), "wb") as f: + shutil.copyfileobj(file_to_ingest.file, f) + + uploaded_files_map[base_file_name] = file_name_with_id + + if is_video: + # Extract temporary audio wav file from video mp4 + audio_file = dir_name + ".wav" + logger.info(f"Extracting {audio_file}") + convert_video_to_audio( + os.path.join(self.upload_folder, file_name_with_id), + os.path.join(self.upload_folder, audio_file), + ) + logger.info(f"Done extracting {audio_file}") + else: + # We already have an audio file + audio_file = file_name_with_id + + # Load whisper model + logger.info("Loading whisper model....") + whisper_model = load_whisper_model(model_name=WHISPER_MODEL) + logger.info("Done loading whisper!") + + # Extract transcript from audio + logger.info("Extracting transcript from audio") + transcripts = extract_transcript_from_audio(whisper_model, os.path.join(self.upload_folder, audio_file)) + + # Save transcript as vtt file and delete audio file + vtt_file = dir_name + ".vtt" + write_vtt(transcripts, os.path.join(self.upload_folder, vtt_file)) + if is_video: + delete_audio_file(os.path.join(self.upload_folder, audio_file)) + logger.info("Done extracting transcript.") + + if is_video: + # Store frames and caption annotations in a new directory + logger.info("Extracting frames and generating annotation") + extract_frames_and_annotations_from_transcripts( + file_id, + os.path.join(self.upload_folder, file_name_with_id), + os.path.join(self.upload_folder, vtt_file), + os.path.join(self.upload_folder, dir_name), + ) + else: + # Generate annotations based on the transcript + logger.info("Generating annotations for the transcription") + generate_annotations_from_transcript( + file_id, + os.path.join(self.upload_folder, file_name_with_id), + os.path.join(self.upload_folder, vtt_file), + os.path.join(self.upload_folder, dir_name), + ) + + logger.info("Done extracting frames and generating annotation") + # Delete temporary vtt file + os.remove(os.path.join(self.upload_folder, vtt_file)) + + # Ingest multimodal data into redis + logger.info("Ingesting data to redis vector store") + self.ingest_multimodal(base_file_name, os.path.join(self.upload_folder, dir_name), self.embeddings) + + # Delete temporary video directory containing frames and annotations + shutil.rmtree(os.path.join(self.upload_folder, dir_name)) + + logger.info(f"Processed file {file_to_ingest.filename}") + end = time.time() + logger.info(str(end - st)) + + return { + "status": 200, + "message": "Data preparation succeeded", + "file_id_maps": uploaded_files_map, + } + + raise HTTPException(status_code=400, detail="Must provide at least one video (.mp4) or audio (.wav) file.") + + async def ingest_generate_captions(self, files: List[UploadFile] = File(None)): + """Upload images and videos without speech (only background music or no audio), generate captions using lvm microservice and ingest into redis.""" + + if files: + file_paths = [] + uploaded_files_saved_files_map = {} + for file in files: + if os.path.splitext(file.filename)[1] in [".mp4", ".png", ".jpg", ".jpeg", ".gif"]: + file_paths.append(file) + else: + raise HTTPException( + status_code=400, + detail=f"File {file.filename} is not a supported file type. Please upload mp4, png, jpg, jpeg, and gif files only.", + ) + + for file in file_paths: + logger.info(f"Processing file {file.filename}") + + # Assign unique identifier to file + id = generate_id() + + # Create file name by appending identifier + name, ext = os.path.splitext(file.filename) + file_name = f"{name}_{id}{ext}" + dir_name = os.path.splitext(file_name)[0] + + # Save file in upload_directory + with open(os.path.join(self.upload_folder, file_name), "wb") as f: + shutil.copyfileobj(file.file, f) + uploaded_files_saved_files_map[name] = file_name + + # Store frames and caption annotations in a new directory + extract_frames_and_generate_captions( + id, + os.path.join(self.upload_folder, file_name), + LVM_ENDPOINT, + os.path.join(self.upload_folder, dir_name), + ) + + # Ingest multimodal data into redis + self.ingest_multimodal(name, os.path.join(self.upload_folder, dir_name), self.embeddings) + + # Delete temporary directory containing frames and annotations + # shutil.rmtree(os.path.join(upload_folder, dir_name)) + + logger.info(f"Processed file {file.filename}") + + return { + "status": 200, + "message": "Data preparation succeeded", + "file_id_maps": uploaded_files_saved_files_map, + } + + raise HTTPException(status_code=400, detail="Must provide at least one file.") + + async def ingest_files(self, files: Optional[Union[UploadFile, List[UploadFile]]] = File(None)): + if files: + accepted_media_formats = [".mp4", ".png", ".jpg", ".jpeg", ".gif", ".pdf"] + # Create a lookup dictionary containing all media files + matched_files = { + f.filename: [f] for f in files if os.path.splitext(f.filename)[1] in accepted_media_formats + } + uploaded_files_map = {} + + # Go through files again and match caption files to media files + for file in files: + file_base, file_extension = os.path.splitext(file.filename) + if file_extension == ".vtt": + if "{}.mp4".format(file_base) in matched_files: + matched_files["{}.mp4".format(file_base)].append(file) + else: + logger.info(f"No video was found for caption file {file.filename}.") + elif file_extension == ".txt": + if "{}.png".format(file_base) in matched_files: + matched_files["{}.png".format(file_base)].append(file) + elif "{}.jpg".format(file_base) in matched_files: + matched_files["{}.jpg".format(file_base)].append(file) + elif "{}.jpeg".format(file_base) in matched_files: + matched_files["{}.jpeg".format(file_base)].append(file) + elif "{}.gif".format(file_base) in matched_files: + matched_files["{}.gif".format(file_base)].append(file) + else: + logger.info(f"No image was found for caption file {file.filename}.") + elif file_extension not in accepted_media_formats: + logger.info(f"Skipping file {file.filename} because of unsupported format.") + + # Check that every media file that is not a pdf has a caption file + for media_file_name, file_list in matched_files.items(): + if len(file_list) != 2 and os.path.splitext(media_file_name)[1] != ".pdf": + raise HTTPException(status_code=400, detail=f"No caption file found for {media_file_name}") + + if len(matched_files.keys()) == 0: + return HTTPException( + status_code=400, + detail="The uploaded files have unsupported formats. Please upload at least one video file (.mp4) with captions (.vtt) or one image (.png, .jpg, .jpeg, or .gif) with caption (.txt) or one .pdf file", + ) + + for media_file in matched_files: + logger.info(f"Processing file {media_file}") + file_name, file_extension = os.path.splitext(media_file) + + # Assign unique identifier to file + file_id = generate_id() + + # Create file name by appending identifier + media_file_name = f"{file_name}_{file_id}{file_extension}" + media_dir_name = os.path.splitext(media_file_name)[0] + + # Save file in upload_directory + with open(os.path.join(self.upload_folder, media_file_name), "wb") as f: + shutil.copyfileobj(matched_files[media_file][0].file, f) + uploaded_files_map[file_name] = media_file_name + + if file_extension == ".pdf": + # Set up location to store pdf images and text, reusing "frames" and "annotations" from video + output_dir = os.path.join(self.upload_folder, media_dir_name) + os.makedirs(output_dir, exist_ok=True) + os.makedirs(os.path.join(output_dir, "frames"), exist_ok=True) + doc = pymupdf.open(os.path.join(self.upload_folder, media_file_name)) + annotations = [] + for page_idx, page in enumerate(doc, start=1): + text = page.get_text() + images = page.get_images() + for image_idx, image in enumerate(images, start=1): + # Write image and caption file for each image found in pdf + img_fname = f"page{page_idx}_image{image_idx}" + img_fpath = os.path.join(output_dir, "frames", img_fname + ".png") + pix = pymupdf.Pixmap(doc, image[0]) # create pixmap + + if pix.n - pix.alpha > 3: # if CMYK, convert to RGB first + pix = pymupdf.Pixmap(pymupdf.csRGB, pix) + + pix.save(img_fpath) # pixmap to png + pix = None + + # Convert image to base64 encoded string + with open(img_fpath, "rb") as image2str: + encoded_string = base64.b64encode(image2str.read()) # png to bytes + + decoded_string = encoded_string.decode() # bytes to string + + # Create annotations file, reusing metadata keys from video + annotations.append( + { + "video_id": file_id, + "video_name": os.path.basename(os.path.join(self.upload_folder, media_file_name)), + "b64_img_str": decoded_string, + "caption": text, + "time": 0.0, + "frame_no": page_idx, + "sub_video_id": image_idx, + } + ) + + with open(os.path.join(output_dir, "annotations.json"), "w") as f: + json.dump(annotations, f) + + # Ingest multimodal data into redis + self.ingest_multimodal( + file_name, os.path.join(self.upload_folder, media_dir_name), self.embeddings, is_pdf=True + ) + else: + # Save caption file in upload directory + caption_file_extension = os.path.splitext(matched_files[media_file][1].filename)[1] + caption_file = f"{media_dir_name}{caption_file_extension}" + with open(os.path.join(self.upload_folder, caption_file), "wb") as f: + shutil.copyfileobj(matched_files[media_file][1].file, f) + + # Store frames and caption annotations in a new directory + extract_frames_and_annotations_from_transcripts( + file_id, + os.path.join(self.upload_folder, media_file_name), + os.path.join(self.upload_folder, caption_file), + os.path.join(self.upload_folder, media_dir_name), + ) + + # Delete temporary caption file + os.remove(os.path.join(self.upload_folder, caption_file)) + + # Ingest multimodal data into redis + self.ingest_multimodal(file_name, os.path.join(self.upload_folder, media_dir_name), self.embeddings) + + # Delete temporary media directory containing frames and annotations + shutil.rmtree(os.path.join(self.upload_folder, media_dir_name)) + + logger.info(f"Processed file {media_file}") + + return { + "status": 200, + "message": "Data preparation succeeded", + "file_id_maps": uploaded_files_map, + } + + raise HTTPException( + status_code=400, + detail="Must provide at least one pair consisting of video (.mp4) and captions (.vtt) or image (.png, .jpg, .jpeg, .gif) with caption (.txt)", + ) + + async def get_files(self): + """Returns list of names of uploaded videos saved on the server.""" + + if not Path(self.upload_folder).exists(): + logger.info("No file uploaded, return empty list.") + return [] + + uploaded_videos = os.listdir(self.upload_folder) + return uploaded_videos + + async def delete_files(self, file_path): + """Delete all uploaded files along with redis index.""" + index_deleted = self.drop_index(index_name=INDEX_NAME) + + if not index_deleted: + raise HTTPException(status_code=409, detail="Uploaded files could not be deleted. Index does not exist") + + clear_upload_folder(self.upload_folder) + logger.info("Successfully deleted all uploaded files.") + return {"status": True} + + async def ingest_videos(self, files: List[UploadFile] = File(None)): + pass + + async def get_videos(self): + pass + + async def get_one_file(self, filename: str): + pass diff --git a/comps/dataprep/src/integrations/utils/__init__.py b/comps/dataprep/src/integrations/utils/__init__.py new file mode 100644 index 0000000000..4057dc0163 --- /dev/null +++ b/comps/dataprep/src/integrations/utils/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/dataprep/multimodal/redis/langchain/multimodal_utils.py b/comps/dataprep/src/integrations/utils/multimodal.py similarity index 100% rename from comps/dataprep/multimodal/redis/langchain/multimodal_utils.py rename to comps/dataprep/src/integrations/utils/multimodal.py diff --git a/comps/dataprep/vdms/multimodal_langchain/utils/store_embeddings.py b/comps/dataprep/src/integrations/utils/store_embeddings.py similarity index 100% rename from comps/dataprep/vdms/multimodal_langchain/utils/store_embeddings.py rename to comps/dataprep/src/integrations/utils/store_embeddings.py diff --git a/comps/dataprep/vdms/multimodal_langchain/utils/utils.py b/comps/dataprep/src/integrations/utils/utils.py similarity index 100% rename from comps/dataprep/vdms/multimodal_langchain/utils/utils.py rename to comps/dataprep/src/integrations/utils/utils.py diff --git a/comps/dataprep/vdms/multimodal_langchain/utils/vclip.py b/comps/dataprep/src/integrations/utils/vclip.py similarity index 100% rename from comps/dataprep/vdms/multimodal_langchain/utils/vclip.py rename to comps/dataprep/src/integrations/utils/vclip.py diff --git a/comps/dataprep/src/integrations/vdms.py b/comps/dataprep/src/integrations/vdms.py new file mode 100644 index 0000000000..998b23a5c7 --- /dev/null +++ b/comps/dataprep/src/integrations/vdms.py @@ -0,0 +1,234 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +from typing import List, Optional, Union + +from fastapi import Body, File, Form, HTTPException, UploadFile +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.vectorstores.vdms import VDMS, VDMS_Client +from langchain_text_splitters import HTMLHeaderTextSplitter + +from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.dataprep.src.utils import ( + create_upload_folder, + document_loader, + encode_filename, + get_separators, + get_tables_result, + parse_html_new, + save_content_to_local_disk, +) + +logger = CustomLogger("opea_dataprep_vdms") +logflag = os.getenv("LOGFLAG", False) + + +def getEnv(key, default_value=None): + env_value = os.getenv(key, default=default_value) + print(f"{key}: {env_value}") + return env_value + + +# Embedding model +EMBED_MODEL = getEnv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") + +# VDMS configuration +VDMS_HOST = getEnv("VDMS_HOST", "localhost") +VDMS_PORT = int(getEnv("VDMS_PORT", 55555)) +COLLECTION_NAME = getEnv("COLLECTION_NAME", "rag-vdms") +SEARCH_ENGINE = getEnv("SEARCH_ENGINE", "FaissFlat") +DISTANCE_STRATEGY = getEnv("DISTANCE_STRATEGY", "L2") + +# LLM/Embedding endpoints +TGI_LLM_ENDPOINT = getEnv("TGI_LLM_ENDPOINT", "http://localhost:8080") +TGI_LLM_ENDPOINT_NO_RAG = getEnv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") +TEI_EMBEDDING_ENDPOINT = getEnv("TEI_ENDPOINT") + +# chunk parameters +CHUNK_SIZE = getEnv("CHUNK_SIZE", 1500) +CHUNK_OVERLAP = getEnv("CHUNK_OVERLAP", 100) + + +@OpeaComponentRegistry.register("OPEA_DATAPREP_VDMS") +class OpeaVdmsDataprep(OpeaComponent): + """Dataprep component for VDMS ingestion and search services.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) + self.tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") + self.upload_folder = "./uploaded_files/" + create_upload_folder(self.upload_folder) + self.client = VDMS_Client(VDMS_HOST, int(VDMS_PORT)) + # Create vectorstore + if self.tei_embedding_endpoint: + # create embeddings using TEI endpoint service + self.embedder = HuggingFaceHubEmbeddings(model=self.tei_embedding_endpoint) + else: + # create embeddings using local embedding model + self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + + # Perform health check + health_status = self.check_health() + if not health_status: + logger.error("OpeaVdmsDataprep health check failed.") + + def check_health(self) -> bool: + """Checks the health of the VDMS service.""" + if self.client is None: + logger.error("VDMS client is not initialized.") + return False + + return True + + def invoke(self, *args, **kwargs): + pass + + def ingest_data_to_vdms(self, doc_path: DocPath): + """Ingest document to VDMS.""" + path = doc_path.path + print(f"Parsing document {doc_path}.") + + if path.endswith(".html"): + headers_to_split_on = [ + ("h1", "Header 1"), + ("h2", "Header 2"), + ("h3", "Header 3"), + ] + text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) + else: + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=doc_path.chunk_size, chunk_overlap=100, add_start_index=True, separators=get_separators() + ) + + content = document_loader(path) + chunks = text_splitter.split_text(content) + if doc_path.process_table and path.endswith(".pdf"): + table_chunks = get_tables_result(path, doc_path.table_strategy) + chunks = chunks + table_chunks + + logger.info(f"Done preprocessing. Created {len(chunks)} chunks of the original pdf") + + # Batch size + batch_size = 32 + num_chunks = len(chunks) + for i in range(0, num_chunks, batch_size): + batch_chunks = chunks[i : i + batch_size] + batch_texts = batch_chunks + + _ = VDMS.from_texts( + client=self.client, + embedding=self.embedder, + collection_name=COLLECTION_NAME, + distance_strategy=DISTANCE_STRATEGY, + engine=SEARCH_ENGINE, + texts=batch_texts, + ) + logger.info(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + + async def ingest_files( + self, + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), + link_list: Optional[str] = Form(None), + chunk_size: int = Form(1500), + chunk_overlap: int = Form(100), + process_table: bool = Form(False), + table_strategy: str = Form("fast"), + ): + """Ingest files/links content into VDMS database. + + Save in the format of vector[768]. + Returns '{"status": 200, "message": "Data preparation succeeded"}' if successful. + Args: + files (Union[UploadFile, List[UploadFile]], optional): A file or a list of files to be ingested. Defaults to File(None). + link_list (str, optional): A list of links to be ingested. Defaults to Form(None). + chunk_size (int, optional): The size of the chunks to be split. Defaults to Form(1500). + chunk_overlap (int, optional): The overlap between chunks. Defaults to Form(100). + process_table (bool, optional): Whether to process tables in PDFs. Defaults to Form(False). + table_strategy (str, optional): The strategy to process tables in PDFs. Defaults to Form("fast"). + """ + if logflag: + logger.info(f"[ upload ] files:{files}") + logger.info(f"[ upload ] link_list:{link_list}") + + if files: + if not isinstance(files, list): + files = [files] + uploaded_files = [] + + for file in files: + encode_file = encode_filename(file.filename) + doc_id = "file:" + encode_file + if logflag: + logger.info(f"[ upload ] processing file {doc_id}") + + save_path = self.upload_folder + encode_file + await save_content_to_local_disk(save_path, file) + self.ingest_data_to_vdms( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + uploaded_files.append(save_path) + if logflag: + logger.info(f"[ upload ] Successfully saved file {save_path}") + + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + + if link_list: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail=f"Link_list {link_list} should be a list.") + for link in link_list: + encoded_link = encode_filename(link) + doc_id = "file:" + encoded_link + ".txt" + if logflag: + logger.info(f"[ upload ] processing link {doc_id}") + + # check whether the link file already exists + + save_path = self.upload_folder + encoded_link + ".txt" + content = parse_html_new([link], chunk_size=chunk_size, chunk_overlap=chunk_overlap) + await save_content_to_local_disk(save_path, content) + self.ingest_data_to_vdms( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + if logflag: + logger.info(f"[ upload ] Successfully saved link list {link_list}") + return {"status": 200, "message": "Data preparation succeeded"} + + raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") + + async def get_files(self): + """Get file structure from pipecone database in the format of + { + "name": "File Name", + "id": "File Name", + "type": "File", + "parent": "", + }""" + pass + + async def delete_files(self, file_path: str = Body(..., embed=True)): + """Delete file according to `file_path`. + + `file_path`: + - specific file path (e.g. /path/to/file.txt) + - "all": delete all files uploaded + """ + pass diff --git a/comps/dataprep/src/integrations/vdms_multimodal.py b/comps/dataprep/src/integrations/vdms_multimodal.py new file mode 100644 index 0000000000..4438ac3b39 --- /dev/null +++ b/comps/dataprep/src/integrations/vdms_multimodal.py @@ -0,0 +1,193 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +import shutil +import time +import uuid +from pathlib import Path +from typing import List, Optional, Union + +from fastapi import File, HTTPException, UploadFile +from fastapi.responses import FileResponse +from tqdm import tqdm + +from comps import CustomLogger, OpeaComponent, OpeaComponentRegistry, ServiceType + +from .utils import store_embeddings +from .utils.utils import process_all_videos, read_config +from .utils.vclip import vCLIP + +VECTORDB_SERVICE_HOST_IP = os.getenv("VDMS_HOST", "0.0.0.0") +VECTORDB_SERVICE_PORT = os.getenv("VDMS_PORT", 55555) +collection_name = os.getenv("INDEX_NAME", "rag-vdms") + +logger = CustomLogger("opea_dataprep_vdms_multimodal") +logflag = os.getenv("LOGFLAG", False) + + +@OpeaComponentRegistry.register("OPEA_DATAPREP_MULTIMODALVDMS") +class OpeaMultimodalVdmsDataprep(OpeaComponent): + """Dataprep component for Multimodal Redis ingestion and search services.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) + self.device = "cpu" + self.upload_folder = "./uploaded_files/" + + # Perform health check + health_status = self.check_health() + if not health_status: + logger.error("OpeaMultimodalVdmsDataprep health check failed.") + + def check_health(self) -> bool: + """Checks the health of the Multimodal Redis service.""" + return True + + def invoke(self, *args, **kwargs): + pass + + def setup_vclip_model(self, config, device="cpu"): + model = vCLIP(config) + return model + + def read_json(self, path): + with open(path) as f: + x = json.load(f) + return x + + def store_into_vectordb(self, vs, metadata_file_path, dimensions): + GMetadata = self.read_json(metadata_file_path) + + total_videos = len(GMetadata.keys()) + + for idx, (video, data) in enumerate(tqdm(GMetadata.items())): + metadata_list = [] + ids = [] + + data["video"] = video + video_name_list = [data["video_path"]] + metadata_list = [data] + if vs.selected_db == "vdms": + vs.video_db.add_videos( + paths=video_name_list, + metadatas=metadata_list, + start_time=[data["timestamp"]], + clip_duration=[data["clip_duration"]], + ) + else: + logger.info(f"ERROR: selected_db {vs.selected_db} not supported. Supported:[vdms]") + + # clean up tmp_ folders containing frames (jpeg) + for i in os.listdir(): + if i.startswith("tmp_"): + logger.info("removing tmp_*") + os.system("rm -r tmp_*") + break + + def generate_video_id(self): + """Generates a unique identifier for a video file.""" + return str(uuid.uuid4()) + + def generate_embeddings(self, config, dimensions, vs): + process_all_videos(config) + global_metadata_file_path = os.path.join(config["meta_output_dir"], "metadata.json") + logger.info(f"global metadata file available at {global_metadata_file_path}") + self.store_into_vectordb(vs, global_metadata_file_path, dimensions) + + async def ingest_videos(self, files: List[UploadFile] = File(None)): + """Ingest videos to VDMS.""" + current_dir = os.path.dirname(os.path.abspath(__file__)) + config = read_config(os.path.join(current_dir, "./config/config.yaml")) + meanclip_cfg = { + "model_name": config["embeddings"]["vclip_model_name"], + "num_frm": config["embeddings"]["vclip_num_frm"], + } + generate_frames = config["generate_frames"] + path = config["videos"] + meta_output_dir = config["meta_output_dir"] + emb_path = config["embeddings"]["path"] + host = VECTORDB_SERVICE_HOST_IP + port = int(VECTORDB_SERVICE_PORT) + selected_db = config["vector_db"]["choice_of_db"] + vector_dimensions = config["embeddings"]["vector_dimensions"] + logger.info(f"Parsing videos {path}.") + + # Saving videos + if files: + video_files = [] + for file in files: + if os.path.splitext(file.filename)[1] == ".mp4": + video_files.append(file) + else: + raise HTTPException( + status_code=400, + detail=f"File {file.filename} is not an mp4 file. Please upload mp4 files only.", + ) + + for video_file in video_files: + video_id = self.generate_video_id() + video_name = os.path.splitext(video_file.filename)[0] + video_file_name = f"{video_name}_{video_id}.mp4" + video_dir_name = os.path.splitext(video_file_name)[0] + # Save video file in upload_directory + with open(os.path.join(path, video_file_name), "wb") as f: + shutil.copyfileobj(video_file.file, f) + + # Creating DB + logger.info( + "Creating DB with video embedding and metadata support, \nIt may take few minutes to download and load all required models if you are running for first time." + ) + logger.info("Connecting to {} at {}:{}".format(selected_db, host, port)) + + # init meanclip model + model = self.setup_vclip_model(meanclip_cfg, device="cpu") + vs = store_embeddings.VideoVS( + host, port, selected_db, model, collection_name, embedding_dimensions=vector_dimensions + ) + logger.info("done creating DB, sleep 5s") + time.sleep(5) + + self.generate_embeddings(config, vector_dimensions, vs) + + return {"message": "Videos ingested successfully"} + + async def get_videos(self): + """Returns list of names of uploaded videos saved on the server.""" + current_dir = os.path.dirname(os.path.abspath(__file__)) + config = read_config(os.path.join(current_dir, "./config/config.yaml")) + if not Path(config["videos"]).exists(): + logger.info("No file uploaded, return empty list.") + return [] + + uploaded_videos = os.listdir(config["videos"]) + mp4_files = [file for file in uploaded_videos if file.endswith(".mp4")] + return mp4_files + + async def get_one_file(self, filename: str): + """Download the file from remote.""" + + current_dir = os.path.dirname(os.path.abspath(__file__)) + config = read_config(os.path.join(current_dir, "./config/config.yaml")) + UPLOAD_DIR = config["videos"] + file_path = os.path.join(UPLOAD_DIR, filename) + if os.path.exists(file_path): + return FileResponse(path=file_path, filename=filename) + else: + return {"error": "File not found"} + + async def ingest_generate_transcripts(self, files: List[UploadFile] = File(None)): + pass + + async def ingest_generate_caption(self, files: List[UploadFile] = File(None)): + pass + + async def ingest_files(self, files: Optional[Union[UploadFile, List[UploadFile]]] = File(None)): + pass + + async def get_files(self): + pass + + async def delete_files(self, file_path): + pass diff --git a/comps/dataprep/src/opea_dataprep_loader.py b/comps/dataprep/src/opea_dataprep_loader.py index cd0ffb5aba..8ec1042f8d 100644 --- a/comps/dataprep/src/opea_dataprep_loader.py +++ b/comps/dataprep/src/opea_dataprep_loader.py @@ -31,3 +31,51 @@ async def delete_files(self, *args, **kwargs): if logflag: logger.info("[ dataprep loader ] delete files") return await self.component.delete_files(*args, **kwargs) + + +class OpeaDataprepMultiModalLoader(OpeaComponentLoader): + def __init__(self, component_name, **kwargs): + super().__init__(component_name=component_name, **kwargs) + + def invoke(self, *args, **kwargs): + pass + + async def ingest_files(self, *args, **kwargs): + if logflag: + logger.info("[ dataprep loader ] ingest files") + return await self.component.ingest_files(*args, **kwargs) + + async def ingest_videos(self, *args, **kwargs): + if logflag: + logger.info("[ dataprep loader ] ingest files") + return await self.component.ingest_videos(*args, **kwargs) + + async def ingest_generate_transcripts(self, *args, **kwargs): + if logflag: + logger.info("[ dataprep loader ] ingest generate transcripts") + return await self.component.ingest_generate_transcripts(*args, **kwargs) + + async def ingest_generate_captions(self, *args, **kwargs): + if logflag: + logger.info("[ dataprep loader ] ingest generate captions") + return await self.component.ingest_generate_captions(*args, **kwargs) + + async def get_files(self, *args, **kwargs): + if logflag: + logger.info("[ dataprep loader ] get files") + return await self.component.get_files(*args, **kwargs) + + async def get_one_file(self, *args, **kwargs): + if logflag: + logger.info("[ dataprep loader ] get one file") + return await self.component.get_one_file(*args, **kwargs) + + async def get_videos(self, *args, **kwargs): + if logflag: + logger.info("[ dataprep loader ] get videos") + return await self.component.get_videos(*args, **kwargs) + + async def delete_files(self, *args, **kwargs): + if logflag: + logger.info("[ dataprep loader ] delete files") + return await self.component.delete_files(*args, **kwargs) diff --git a/comps/dataprep/src/opea_dataprep_microservice.py b/comps/dataprep/src/opea_dataprep_microservice.py index cb38163969..7dda2879d4 100644 --- a/comps/dataprep/src/opea_dataprep_microservice.py +++ b/comps/dataprep/src/opea_dataprep_microservice.py @@ -7,8 +7,15 @@ from typing import List, Optional, Union from fastapi import Body, File, Form, UploadFile +from integrations.elasticsearch import OpeaElasticSearchDataprep from integrations.milvus import OpeaMilvusDataprep +from integrations.neo4j_llamaindex import OpeaNeo4jLlamaIndexDataprep +from integrations.opensearch import OpeaOpenSearchDataprep +from integrations.pgvect import OpeaPgvectorDataprep +from integrations.pipecone import OpeaPineConeDataprep +from integrations.qdrant import OpeaQdrantDataprep from integrations.redis import OpeaRedisDataprep +from integrations.vdms import OpeaVdmsDataprep from opea_dataprep_loader import OpeaDataprepLoader from comps import ( diff --git a/comps/dataprep/src/opea_dataprep_multimodal_microservice.py b/comps/dataprep/src/opea_dataprep_multimodal_microservice.py new file mode 100644 index 0000000000..9fbb562a17 --- /dev/null +++ b/comps/dataprep/src/opea_dataprep_multimodal_microservice.py @@ -0,0 +1,264 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os +import time +from typing import List, Optional, Union + +from fastapi import Body, File, UploadFile +from integrations.redis_multimodal import OpeaMultimodalRedisDataprep +from integrations.vdms_multimodal import OpeaMultimodalVdmsDataprep +from opea_dataprep_loader import OpeaDataprepMultiModalLoader + +from comps import ( + CustomLogger, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) +from comps.dataprep.src.utils import create_upload_folder + +logger = CustomLogger("opea_dataprep_multimodal_microservice") +logflag = os.getenv("LOGFLAG", False) +upload_folder = "./uploaded_files/" + +dataprep_component_name = os.getenv("DATAPREP_COMPONENT_NAME", "OPEA_DATAPREP_MULTIMODALVDMS") +# Initialize OpeaComponentLoader +loader = OpeaDataprepMultiModalLoader( + dataprep_component_name, + description=f"OPEA DATAPREP Multimodal Component: {dataprep_component_name}", +) + + +@register_microservice( + name="opea_service@dataprep_multimodal", + service_type=ServiceType.DATAPREP, + endpoint="/v1/dataprep/ingest", + host="0.0.0.0", + port=5000, +) +@register_statistics(names=["opea_service@dataprep_multimodal"]) +async def ingest_files(files: Optional[Union[UploadFile, List[UploadFile]]] = File(None)): + start = time.time() + + if logflag: + logger.info(f"[ ingest ] files:{files}") + + try: + # Use the loader to invoke the component + response = await loader.ingest_files(files) + # Log the result if logging is enabled + if logflag: + logger.info(f"[ ingest ] Output generated: {response}") + # Record statistics + statistics_dict["opea_service@dataprep_multimodal"].append_latency(time.time() - start, None) + return response + except Exception as e: + logger.error(f"Error during dataprep ingest files invocation: {e}") + raise + + +@register_microservice( + name="opea_service@dataprep_multimodal", + service_type=ServiceType.DATAPREP, + endpoint="/v1/dataprep/ingest_videos", + host="0.0.0.0", + port=5000, +) +@register_statistics(names=["opea_service@dataprep_multimodal"]) +async def ingest_videos(files: Optional[Union[UploadFile, List[UploadFile]]] = File(None)): + start = time.time() + + if logflag: + logger.info(f"[ ingest ] files:{files}") + + try: + # Use the loader to invoke the component + response = await loader.ingest_videos(files) + # Log the result if logging is enabled + if logflag: + logger.info(f"[ ingest ] Output generated: {response}") + # Record statistics + statistics_dict["opea_service@dataprep_multimodal"].append_latency(time.time() - start, None) + return response + except Exception as e: + logger.error(f"Error during dataprep ingest videos invocation: {e}") + raise + + +@register_microservice( + name="opea_service@dataprep_multimodal", + service_type=ServiceType.DATAPREP, + endpoint="/v1/dataprep/generate_transcripts", + host="0.0.0.0", + port=5000, +) +@register_statistics(names=["opea_service@dataprep_multimodal"]) +async def ingest_generate_transcripts(files: Optional[Union[UploadFile, List[UploadFile]]] = File(None)): + start = time.time() + + if logflag: + logger.info(f"[ ingest ] files:{files}") + try: + # Use the loader to invoke the component + response = await loader.ingest_generate_transcripts(files) + # Log the result if logging is enabled + if logflag: + logger.info(f"[ ingest ] Output generated: {response}") + # Record statistics + statistics_dict["opea_service@dataprep_multimodal"].append_latency(time.time() - start, None) + return response + except Exception as e: + logger.error(f"Error during dataprep generate_transcripts invocation: {e}") + raise + + +@register_microservice( + name="opea_service@dataprep_multimodal", + service_type=ServiceType.DATAPREP, + endpoint="/v1/dataprep/generate_captions", + host="0.0.0.0", + port=5000, +) +@register_statistics(names=["opea_service@dataprep_multimodal"]) +async def ingest_generate_captions(files: Optional[Union[UploadFile, List[UploadFile]]] = File(None)): + start = time.time() + + if logflag: + logger.info(f"[ ingest ] files:{files}") + + try: + # Use the loader to invoke the component + response = await loader.ingest_generate_captions(files) + # Log the result if logging is enabled + if logflag: + logger.info(f"[ ingest ] Output generated: {response}") + # Record statistics + statistics_dict["opea_service@dataprep_multimodal"].append_latency(time.time() - start, None) + return response + except Exception as e: + logger.error(f"Error during dataprep generate_captions invocation: {e}") + raise + + +@register_microservice( + name="opea_service@dataprep_multimodal", + service_type=ServiceType.DATAPREP, + endpoint="/v1/dataprep/get", + host="0.0.0.0", + port=5000, +) +@register_statistics(names=["opea_service@dataprep_multimodal"]) +async def get_files(): + start = time.time() + + if logflag: + logger.info("[ get ] start to get ingested files") + + try: + # Use the loader to invoke the component + response = await loader.get_files() + # Log the result if logging is enabled + if logflag: + logger.info(f"[ get ] ingested files: {response}") + # Record statistics + statistics_dict["opea_service@dataprep_multimodal"].append_latency(time.time() - start, None) + return response + except Exception as e: + logger.error(f"Error during dataprep get files invocation: {e}") + raise + + +@register_microservice( + name="opea_service@dataprep_multimodal", + service_type=ServiceType.DATAPREP, + endpoint="/v1/dataprep/get/{filename}", + host="0.0.0.0", + port=5000, + methods=["GET"], +) +@register_statistics(names=["opea_service@dataprep_multimodal"]) +async def get_one_file(filename: str): + start = time.time() + + if logflag: + logger.info("[ get ] start to get ingested files") + + try: + # Use the loader to invoke the component + response = await loader.get_one_file(filename) + # Log the result if logging is enabled + if logflag: + logger.info(f"[ get ] ingested files: {response}") + # Record statistics + statistics_dict["opea_service@dataprep_multimodal"].append_latency(time.time() - start, None) + return response + except Exception as e: + logger.error(f"Error during dataprep get one file invocation: {e}") + raise + + +@register_microservice( + name="opea_service@dataprep_multimodal", + service_type=ServiceType.DATAPREP, + endpoint="/v1/dataprep/get_videos", + host="0.0.0.0", + port=5000, + methods=["GET"], +) +@register_statistics(names=["opea_service@dataprep_multimodal"]) +async def get_videos(): + start = time.time() + + if logflag: + logger.info("[ get ] start to get ingested files") + + try: + # Use the loader to invoke the component + response = await loader.get_videos() + # Log the result if logging is enabled + if logflag: + logger.info(f"[ get ] ingested files: {response}") + # Record statistics + statistics_dict["opea_service@dataprep_multimodal"].append_latency(time.time() - start, None) + return response + except Exception as e: + logger.error(f"Error during dataprep get videos invocation: {e}") + raise + + +@register_microservice( + name="opea_service@dataprep_multimodal", + service_type=ServiceType.DATAPREP, + endpoint="/v1/dataprep/delete", + host="0.0.0.0", + port=5000, +) +@register_statistics(names=["opea_service@dataprep_multimodal"]) +async def delete_files(file_path: str = Body(..., embed=True)): + start = time.time() + + if logflag: + logger.info("[ delete ] start to delete ingested files") + + try: + # Use the loader to invoke the component + response = await loader.delete_files(file_path) + # Log the result if logging is enabled + if logflag: + logger.info(f"[ delete ] deleted result: {response}") + # Record statistics + statistics_dict["opea_service@dataprep_multimodal"].append_latency(time.time() - start, None) + return response + except Exception as e: + logger.error(f"Error during dataprep delete invocation: {e}") + raise + + +if __name__ == "__main__": + logger.info("OPEA Dataprep Multimodal Microservice is starting...") + create_upload_folder(upload_folder) + opea_microservices["opea_service@dataprep_multimodal"].start() diff --git a/comps/dataprep/src/requirements.txt b/comps/dataprep/src/requirements.txt index fed3242969..b2c7f02fbb 100644 --- a/comps/dataprep/src/requirements.txt +++ b/comps/dataprep/src/requirements.txt @@ -1,33 +1,62 @@ beautifulsoup4 cairosvg +decord docarray[full] docx2txt easyocr +einops +elasticsearch fastapi +future +graspologic html2text huggingface_hub -langchain --extra-index-url https://download.pytorch.org/whl/cpu -langchain-community --extra-index-url https://download.pytorch.org/whl/cpu -langchain-text-splitters --extra-index-url https://download.pytorch.org/whl/cpu -langchain_huggingface --extra-index-url https://download.pytorch.org/whl/cpu -langchain_milvus --extra-index-url https://download.pytorch.org/whl/cpu +ipython +langchain +langchain-community +langchain-elasticsearch +langchain-experimental +langchain-openai +langchain-pinecone +langchain-text-splitters +langchain_huggingface +langchain_milvus +llama-index +llama-index-core +llama-index-embeddings-text-embeddings-inference +llama-index-graph-stores-neo4j +llama-index-llms-openai +llama-index-llms-openai-like markdown +moviepy +neo4j numpy openai -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk +openai-whisper +opencv-python +opensearch-py pandas +pgvector==0.2.5 Pillow +pinecone-client prometheus-fastapi-instrumentator +psycopg2 pymupdf pyspark pytesseract python-bidi python-docx python-pptx +qdrant-client redis +scipy sentence_transformers shortuuid +tiktoken +tqdm +typing +tzlocal unstructured[all-docs] uvicorn +vdms +webvtt-py diff --git a/comps/dataprep/vdms/langchain/Dockerfile b/comps/dataprep/vdms/langchain/Dockerfile deleted file mode 100644 index fbaf9f0f68..0000000000 --- a/comps/dataprep/vdms/langchain/Dockerfile +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - libcairo2-dev \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/dataprep/vdms/langchain/requirements.txt - -ENV PYTHONPATH=/home/user - -USER root - -RUN mkdir -p /home/user/comps/dataprep/vdms/langchain/uploaded_files && chown -R user /home/user/comps/dataprep/vdms/langchain - -USER user - -WORKDIR /home/user/comps/dataprep/vdms/langchain - -ENTRYPOINT ["python", "prepare_doc_vdms.py"] diff --git a/comps/dataprep/vdms/langchain/config.py b/comps/dataprep/vdms/langchain/config.py deleted file mode 100644 index e12ba15029..0000000000 --- a/comps/dataprep/vdms/langchain/config.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - - -def getEnv(key, default_value=None): - env_value = os.getenv(key, default=default_value) - print(f"{key}: {env_value}") - return env_value - - -# Embedding model -EMBED_MODEL = getEnv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - -# VDMS configuration -VDMS_HOST = getEnv("VDMS_HOST", "localhost") -VDMS_PORT = int(getEnv("VDMS_PORT", 55555)) -COLLECTION_NAME = getEnv("COLLECTION_NAME", "rag-vdms") -SEARCH_ENGINE = getEnv("SEARCH_ENGINE", "FaissFlat") -DISTANCE_STRATEGY = getEnv("DISTANCE_STRATEGY", "L2") - -# LLM/Embedding endpoints -TGI_LLM_ENDPOINT = getEnv("TGI_LLM_ENDPOINT", "http://localhost:8080") -TGI_LLM_ENDPOINT_NO_RAG = getEnv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = getEnv("TEI_ENDPOINT") - -# chunk parameters -CHUNK_SIZE = getEnv("CHUNK_SIZE", 1500) -CHUNK_OVERLAP = getEnv("CHUNK_OVERLAP", 100) - -current_file_path = os.path.abspath(__file__) -parent_dir = os.path.dirname(current_file_path) diff --git a/comps/dataprep/vdms/langchain/prepare_doc_vdms.py b/comps/dataprep/vdms/langchain/prepare_doc_vdms.py deleted file mode 100644 index a6d1958c1f..0000000000 --- a/comps/dataprep/vdms/langchain/prepare_doc_vdms.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -from typing import List, Optional, Union - -from config import COLLECTION_NAME, DISTANCE_STRATEGY, EMBED_MODEL, SEARCH_ENGINE, VDMS_HOST, VDMS_PORT -from fastapi import File, Form, HTTPException, UploadFile -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings -from langchain_community.vectorstores.vdms import VDMS, VDMS_Client -from langchain_text_splitters import HTMLHeaderTextSplitter - -from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.src.utils import ( - create_upload_folder, - document_loader, - encode_filename, - get_separators, - get_tables_result, - parse_html_new, - save_content_to_local_disk, -) - -tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") -client = VDMS_Client(VDMS_HOST, int(VDMS_PORT)) -logger = CustomLogger("prepare_doc_redis") -logflag = os.getenv("LOGFLAG", False) -upload_folder = "./uploaded_files/" - - -def ingest_data_to_vdms(doc_path: DocPath): - """Ingest document to VDMS.""" - path = doc_path.path - print(f"Parsing document {doc_path}.") - - if path.endswith(".html"): - headers_to_split_on = [ - ("h1", "Header 1"), - ("h2", "Header 2"), - ("h3", "Header 3"), - ] - text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) - else: - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=doc_path.chunk_size, chunk_overlap=100, add_start_index=True, separators=get_separators() - ) - - content = document_loader(path) - chunks = text_splitter.split_text(content) - if doc_path.process_table and path.endswith(".pdf"): - table_chunks = get_tables_result(path, doc_path.table_strategy) - chunks = chunks + table_chunks - - print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf") - - # Create vectorstore - if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) - else: - # create embeddings using local embedding model - embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - # Batch size - batch_size = 32 - num_chunks = len(chunks) - for i in range(0, num_chunks, batch_size): - batch_chunks = chunks[i : i + batch_size] - batch_texts = batch_chunks - - _ = VDMS.from_texts( - client=client, - embedding=embedder, - collection_name=COLLECTION_NAME, - distance_strategy=DISTANCE_STRATEGY, - engine=SEARCH_ENGINE, - texts=batch_texts, - ) - print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") - - -@register_microservice( - name="opea_service@prepare_doc_vdms", - endpoint="/v1/dataprep", - host="0.0.0.0", - port=6007, -) -async def ingest_documents( - files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), - link_list: Optional[str] = Form(None), - chunk_size: int = Form(1500), - chunk_overlap: int = Form(100), - process_table: bool = Form(False), - table_strategy: str = Form("fast"), -): - if logflag: - logger.info(f"[ upload ] files:{files}") - logger.info(f"[ upload ] link_list:{link_list}") - - if files: - if not isinstance(files, list): - files = [files] - uploaded_files = [] - - for file in files: - encode_file = encode_filename(file.filename) - doc_id = "file:" + encode_file - if logflag: - logger.info(f"[ upload ] processing file {doc_id}") - - save_path = upload_folder + encode_file - await save_content_to_local_disk(save_path, file) - ingest_data_to_vdms( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ) - ) - uploaded_files.append(save_path) - if logflag: - logger.info(f"[ upload ] Successfully saved file {save_path}") - - result = {"status": 200, "message": "Data preparation succeeded"} - if logflag: - logger.info(result) - return result - - if link_list: - link_list = json.loads(link_list) # Parse JSON string to list - if not isinstance(link_list, list): - raise HTTPException(status_code=400, detail=f"Link_list {link_list} should be a list.") - for link in link_list: - encoded_link = encode_filename(link) - doc_id = "file:" + encoded_link + ".txt" - if logflag: - logger.info(f"[ upload ] processing link {doc_id}") - - # check whether the link file already exists - - save_path = upload_folder + encoded_link + ".txt" - content = parse_html_new([link], chunk_size=chunk_size, chunk_overlap=chunk_overlap) - await save_content_to_local_disk(save_path, content) - ingest_data_to_vdms( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ) - ) - if logflag: - logger.info(f"[ upload ] Successfully saved link list {link_list}") - return {"status": 200, "message": "Data preparation succeeded"} - - raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") - - -if __name__ == "__main__": - create_upload_folder(upload_folder) - opea_microservices["opea_service@prepare_doc_vdms"].start() diff --git a/comps/dataprep/vdms/langchain/requirements.txt b/comps/dataprep/vdms/langchain/requirements.txt deleted file mode 100644 index 96fac92158..0000000000 --- a/comps/dataprep/vdms/langchain/requirements.txt +++ /dev/null @@ -1,41 +0,0 @@ -beautifulsoup4 -cairosvg -decord -docarray[full] -docx2txt -easyocr -einops -fastapi -html2text -huggingface_hub -langchain -langchain-community -langchain-core -langchain-text-splitters -langsmith -markdown -numpy -opencv-python -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-proto==1.23.0 -opentelemetry-sdk -pandas -Pillow -prometheus-fastapi-instrumentator -protobuf==4.24.2 -pymupdf -pyspark -pytesseract -python-bidi==0.4.2 -python-docx -python-pptx -PyYAML -sentence_transformers -shortuuid -tqdm -typing -tzlocal -unstructured[all-docs]==0.11.5 -uvicorn -vdms>=0.0.20 diff --git a/comps/dataprep/vdms/langchain/vdms_langchain.yaml b/comps/dataprep/vdms/langchain/vdms_langchain.yaml deleted file mode 100644 index 46880119e5..0000000000 --- a/comps/dataprep/vdms/langchain/vdms_langchain.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - vdms-vector-db: - image: intellabs/vdms:latest - container_name: vdms-vector-db - ports: - - "55555:55555" - dataprep-vdms: - image: opea/dataprep-vdms:latest - container_name: dataprep-vdms-server - ports: - - "6007:6007" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - VDMS_HOST: ${VDMS_HOST} - VDMS_PORT: ${VDMS_PORT} - COLLECTION_NAME: ${COLLECTION_NAME} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/dataprep/vdms/multimodal_langchain/Dockerfile b/comps/dataprep/vdms/multimodal_langchain/Dockerfile deleted file mode 100644 index 4794e855ea..0000000000 --- a/comps/dataprep/vdms/multimodal_langchain/Dockerfile +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - libcairo2-dev \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/dataprep/vdms/multimodal_langchain/requirements.txt - -ENV PYTHONPATH=/home/user - -USER root - -RUN mkdir -p /home/user/comps/dataprep/vdms/multimodal_langchain/uploaded_files && chown -R user /home/user/comps/dataprep/vdms/multimodal_langchain - -USER user - -WORKDIR /home/user/comps/dataprep/vdms/multimodal_langchain - -ENTRYPOINT ["python", "ingest_videos.py"] diff --git a/comps/dataprep/vdms/multimodal_langchain/README.md b/comps/dataprep/vdms/multimodal_langchain/README.md deleted file mode 100644 index 2d86c28b13..0000000000 --- a/comps/dataprep/vdms/multimodal_langchain/README.md +++ /dev/null @@ -1,124 +0,0 @@ -# Multimodal Dataprep Microservice with VDMS - -For dataprep microservice, we currently provide one framework: `Langchain`. - -## 🚀1. Start Microservice with Python (Option 1) - -### 1.1 Install Requirements - -- option 1: Install Single-process version (for 1-10 files processing) - - ```bash - apt-get update - apt-get install -y default-jre tesseract-ocr libtesseract-dev poppler-utils - pip install -r requirements.txt - ``` - -### 1.2 Start VDMS Server - -```bash -docker run -d --name="vdms-vector-db" -p 55555:55555 intellabs/vdms:latest -``` - -### 1.3 Setup Environment Variables - -```bash -export http_proxy=${your_http_proxy} -export https_proxy=${your_http_proxy} -export host_ip=$(hostname -I | awk '{print $1}') -export VDMS_HOST=${host_ip} -export VDMS_PORT=55555 -export INDEX_NAME="rag-vdms" -export your_hf_api_token="{your_hf_token}" -export PYTHONPATH=${path_to_comps} -``` - -### 1.4 Start Data Preparation Microservice for VDMS with Python Script - -Start document preparation microservice for VDMS with below command. - -```bash -python ingest_videos.py -``` - -## 🚀2. Start Microservice with Docker (Option 2) - -### 2.1 Start VDMS Server - -```bash -docker run -d --name="vdms-vector-db" -p 55555:55555 intellabs/vdms:latest -``` - -### 2.1 Setup Environment Variables - -```bash -export http_proxy=${your_http_proxy} -export https_proxy=${your_http_proxy} -export host_ip=$(hostname -I | awk '{print $1}') -export VDMS_HOST=${host_ip} -export VDMS_PORT=55555 -export INDEX_NAME="rag-vdms" -export your_hf_api_token="{your_hf_token}" -``` - -### 2.3 Build Docker Image - -- Build docker image - - ```bash - cd ../../../ - docker build -t opea/dataprep-vdms:latest --network host --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/vdms/multimodal_langchain/Dockerfile . - - ``` - -### 2.4 Run Docker Compose - -```bash -docker compose -f comps/dataprep/vdms/multimodal_langchain/docker-compose-dataprep-vdms.yaml up -d -``` - -## 🚀3. Status Microservice - -```bash -docker container logs -f dataprep-vdms-server -``` - -## 🚀4. Consume Microservice - -Once data preparation microservice for VDMS is started, user can use below command to invoke the microservice to convert the videos to embedding and save to the database. - -Make sure the file path after `files=@` is correct. - -- Single file upload - - ```bash - curl -X POST \ - -H "Content-Type: multipart/form-data" \ - -F "files=@./file1.mp4" \ - http://localhost:6007/v1/dataprep - ``` - -- Multiple file upload - - ```bash - curl -X POST \ - -H "Content-Type: multipart/form-data" \ - -F "files=@./file1.mp4" \ - -F "files=@./file2.mp4" \ - -F "files=@./file3.mp4" \ - http://localhost:6007/v1/dataprep - ``` - -- List of uploaded files - - ```bash - curl -X GET http://localhost:6007/v1/dataprep/get_videos - ``` - -- Download uploaded files - - Use the file name from the list - - ```bash - curl -X GET http://localhost:6007/v1/dataprep/get_file/${filename} - ``` diff --git a/comps/dataprep/vdms/multimodal_langchain/ingest_videos.py b/comps/dataprep/vdms/multimodal_langchain/ingest_videos.py deleted file mode 100644 index 7ce4ccf81b..0000000000 --- a/comps/dataprep/vdms/multimodal_langchain/ingest_videos.py +++ /dev/null @@ -1,177 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -import shutil -import time -import uuid -from pathlib import Path -from typing import List - -from fastapi import File, HTTPException, UploadFile -from fastapi.responses import FileResponse -from tqdm import tqdm -from utils import store_embeddings -from utils.utils import process_all_videos, read_config -from utils.vclip import vCLIP - -from comps import opea_microservices, register_microservice - -VECTORDB_SERVICE_HOST_IP = os.getenv("VDMS_HOST", "0.0.0.0") -VECTORDB_SERVICE_PORT = os.getenv("VDMS_PORT", 55555) -collection_name = os.getenv("INDEX_NAME", "rag-vdms") - - -def setup_vclip_model(config, device="cpu"): - model = vCLIP(config) - return model - - -def read_json(path): - with open(path) as f: - x = json.load(f) - return x - - -def store_into_vectordb(vs, metadata_file_path, dimensions): - GMetadata = read_json(metadata_file_path) - - total_videos = len(GMetadata.keys()) - - for idx, (video, data) in enumerate(tqdm(GMetadata.items())): - metadata_list = [] - ids = [] - - data["video"] = video - video_name_list = [data["video_path"]] - metadata_list = [data] - if vs.selected_db == "vdms": - vs.video_db.add_videos( - paths=video_name_list, - metadatas=metadata_list, - start_time=[data["timestamp"]], - clip_duration=[data["clip_duration"]], - ) - else: - print(f"ERROR: selected_db {vs.selected_db} not supported. Supported:[vdms]") - - # clean up tmp_ folders containing frames (jpeg) - for i in os.listdir(): - if i.startswith("tmp_"): - print("removing tmp_*") - os.system("rm -r tmp_*") - break - - -def generate_video_id(): - """Generates a unique identifier for a video file.""" - return str(uuid.uuid4()) - - -def generate_embeddings(config, dimensions, vs): - process_all_videos(config) - global_metadata_file_path = os.path.join(config["meta_output_dir"], "metadata.json") - print(f"global metadata file available at {global_metadata_file_path}") - store_into_vectordb(vs, global_metadata_file_path, dimensions) - - -@register_microservice(name="opea_service@prepare_videodoc_vdms", endpoint="/v1/dataprep", host="0.0.0.0", port=6007) -async def process_videos(files: List[UploadFile] = File(None)): - """Ingest videos to VDMS.""" - - config = read_config("./config.yaml") - meanclip_cfg = { - "model_name": config["embeddings"]["vclip_model_name"], - "num_frm": config["embeddings"]["vclip_num_frm"], - } - generate_frames = config["generate_frames"] - path = config["videos"] - meta_output_dir = config["meta_output_dir"] - emb_path = config["embeddings"]["path"] - host = VECTORDB_SERVICE_HOST_IP - port = int(VECTORDB_SERVICE_PORT) - selected_db = config["vector_db"]["choice_of_db"] - vector_dimensions = config["embeddings"]["vector_dimensions"] - print(f"Parsing videos {path}.") - - # Saving videos - if files: - video_files = [] - for file in files: - if os.path.splitext(file.filename)[1] == ".mp4": - video_files.append(file) - else: - raise HTTPException( - status_code=400, detail=f"File {file.filename} is not an mp4 file. Please upload mp4 files only." - ) - - for video_file in video_files: - video_id = generate_video_id() - video_name = os.path.splitext(video_file.filename)[0] - video_file_name = f"{video_name}_{video_id}.mp4" - video_dir_name = os.path.splitext(video_file_name)[0] - # Save video file in upload_directory - with open(os.path.join(path, video_file_name), "wb") as f: - shutil.copyfileobj(video_file.file, f) - - # Creating DB - print( - "Creating DB with video embedding and metadata support, \nIt may take few minutes to download and load all required models if you are running for first time.", - flush=True, - ) - print("Connecting to {} at {}:{}".format(selected_db, host, port), flush=True) - - # init meanclip model - model = setup_vclip_model(meanclip_cfg, device="cpu") - vs = store_embeddings.VideoVS( - host, port, selected_db, model, collection_name, embedding_dimensions=vector_dimensions - ) - print("done creating DB, sleep 5s", flush=True) - time.sleep(5) - - generate_embeddings(config, vector_dimensions, vs) - - return {"message": "Videos ingested successfully"} - - -@register_microservice( - name="opea_service@prepare_videodoc_vdms", - endpoint="/v1/dataprep/get_videos", - host="0.0.0.0", - port=6007, - methods=["GET"], -) -async def rag_get_file_structure(): - """Returns list of names of uploaded videos saved on the server.""" - config = read_config("./config.yaml") - if not Path(config["videos"]).exists(): - print("No file uploaded, return empty list.") - return [] - - uploaded_videos = os.listdir(config["videos"]) - mp4_files = [file for file in uploaded_videos if file.endswith(".mp4")] - return mp4_files - - -@register_microservice( - name="opea_service@prepare_videodoc_vdms", - endpoint="/v1/dataprep/get_file/{filename}", - host="0.0.0.0", - port=6007, - methods=["GET"], -) -async def rag_get_file(filename: str): - """Download the file from remote.""" - - config = read_config("./config.yaml") - UPLOAD_DIR = config["videos"] - file_path = os.path.join(UPLOAD_DIR, filename) - if os.path.exists(file_path): - return FileResponse(path=file_path, filename=filename) - else: - return {"error": "File not found"} - - -if __name__ == "__main__": - opea_microservices["opea_service@prepare_videodoc_vdms"].start() diff --git a/comps/dataprep/vdms/multimodal_langchain/requirements.txt b/comps/dataprep/vdms/multimodal_langchain/requirements.txt deleted file mode 100644 index 773f912897..0000000000 --- a/comps/dataprep/vdms/multimodal_langchain/requirements.txt +++ /dev/null @@ -1,39 +0,0 @@ -beautifulsoup4 -cairosvg -decord -docarray[full] -docx2txt -easyocr -einops -fastapi -huggingface_hub -langchain -langchain-community -langchain-core -langchain-text-splitters -langsmith -markdown -numpy -opencv-python -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-proto==1.23.0 -opentelemetry-sdk -pandas -Pillow -prometheus-fastapi-instrumentator -protobuf==4.24.2 -pymupdf -pyspark -python-bidi==0.4.2 -python-docx -python-pptx -PyYAML -sentence_transformers -shortuuid -tqdm -typing -tzlocal -unstructured[all-docs]==0.11.5 -uvicorn -vdms>=0.0.20 diff --git a/comps/dataprep/vdms/multimodal_langchain/vdms_multimodal_langchain.yaml b/comps/dataprep/vdms/multimodal_langchain/vdms_multimodal_langchain.yaml deleted file mode 100644 index 785dc64085..0000000000 --- a/comps/dataprep/vdms/multimodal_langchain/vdms_multimodal_langchain.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - vdms-vector-db: - image: intellabs/vdms:latest - container_name: vdms-vector-db - ports: - - "55555:55555" - dataprep-vdms: - image: opea/dataprep-vdms:latest - container_name: dataprep-vdms-server - ports: - - "6007:6007" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - VDMS_HOST: ${VDMS_HOST} - VDMS_PORT: ${VDMS_PORT} - INDEX_NAME: ${INDEX_NAME} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/embeddings/deployment/docker_compose/compose.yaml b/comps/embeddings/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..2a347f4f20 --- /dev/null +++ b/comps/embeddings/deployment/docker_compose/compose.yaml @@ -0,0 +1,72 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +include: + - ../../../third_parties/tei/deployment/docker_compose/compose.yaml + - ../../../third_parties/bridgetower/deployment/docker_compose/compose.yaml + +x-multimodal-bridgetower-embedding-config: &multimodal-bridgetower-embedding-config + image: ${REGISTRY:-opea}/embedding:${TAG:-latest} + container_name: multimodal-bridgetower-embedding-server + ports: + - ${MM_EMBEDDING_PORT_MICROSERVICE:-10200}:${MM_EMBEDDING_PORT_MICROSERVICE:-10200} + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MULTIMODAL_EMBEDDING: true + EMBEDDING_COMPONENT_NAME: "OPEA_MULTIMODAL_EMBEDDING_BRIDGETOWER" + MMEI_EMBEDDING_ENDPOINT: ${MMEI_EMBEDDING_ENDPOINT} + MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE} + restart: unless-stopped + +services: + tei-embedding-server: + image: ${REGISTRY:-opea}/embedding:${TAG:-latest} + container_name: tei-embedding-server + ports: + - "${EMBEDDER_PORT:-10200}:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + EMBEDDING_COMPONENT_NAME: "OPEA_TEI_EMBEDDING" + depends_on: + tei-embedding-serving: + condition: service_healthy + restart: unless-stopped + + pg-embedding-server: + image: ${REGISTRY:-opea}/embedding:${TAG:-latest} + container_name: pg-embedding-server + ports: + - ${EMBEDDER_PORT:-10200}:6000 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PG_EMBEDDING_MODEL_NAME: ${PG_EMBEDDING_MODEL_NAME} + PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} + EMBEDDING_COMPONENT_NAME: "OPEA_PREDICTIONGUARD_EMBEDDING" + restart: unless-stopped + + multimodal-bridgetower-embedding-server: + <<: *multimodal-bridgetower-embedding-config + depends_on: + multimodal-bridgetower-embedding-serving: + condition: service_healthy + + multimodal-bridgetower-embedding-gaudi-server: + <<: *multimodal-bridgetower-embedding-config + container_name: embedding-multimodal-bridgetower-gaudi-server + depends_on: + multimodal-bridgetower-embedding-gaudi-serving: + condition: service_healthy + +networks: + default: + driver: bridge diff --git a/comps/embeddings/deployment/docker_compose/compose_multimodal_bridgetower.yaml b/comps/embeddings/deployment/docker_compose/compose_multimodal_bridgetower.yaml deleted file mode 100644 index 40b1008568..0000000000 --- a/comps/embeddings/deployment/docker_compose/compose_multimodal_bridgetower.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - embedding-multimodal-bridgetower: - image: opea/embedding-multimodal-bridgetower:latest - container_name: embedding-multimodal-bridgetower - ports: - - ${EMBEDDER_PORT}:${EMBEDDER_PORT} - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PORT: ${EMBEDDER_PORT} - restart: unless-stopped - healthcheck: - test: ["CMD-SHELL", "http_proxy='' curl -f http://localhost:${EMBEDDER_PORT}/v1/health_check"] - interval: 10s - timeout: 6s - retries: 18 - start_period: 30s - embedding: - image: opea/embedding:latest - container_name: embedding-multimodal-bridgetower-server - ports: - - ${MM_EMBEDDING_PORT_MICROSERVICE}:${MM_EMBEDDING_PORT_MICROSERVICE} - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - MULTIMODAL_EMBEDDING: true - EMBEDDING_COMPONENT_NAME: "OPEA_MULTIMODAL_EMBEDDING_BRIDGETOWER" - MMEI_EMBEDDING_ENDPOINT: ${MMEI_EMBEDDING_ENDPOINT} - MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE} - restart: unless-stopped - depends_on: - embedding-multimodal-bridgetower: - condition: service_healthy - -networks: - default: - driver: bridge diff --git a/comps/embeddings/deployment/docker_compose/compose_predictionguard.yaml b/comps/embeddings/deployment/docker_compose/compose_predictionguard.yaml deleted file mode 100644 index b55c6f88f7..0000000000 --- a/comps/embeddings/deployment/docker_compose/compose_predictionguard.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (C) 2024 Prediction Guard, Inc -# SPDX-License-Identifier: Apache-2.0 - -services: - embedding: - image: opea/embedding:latest - container_name: embedding - ports: - - "6000:6000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PG_EMBEDDING_MODEL_NAME: ${PG_EMBEDDING_MODEL_NAME} - PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} - EMBEDDING_COMPONENT_NAME: "OPEA_PREDICTIONGUARD_EMBEDDING" - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/embeddings/deployment/kubernetes/README.md b/comps/embeddings/deployment/kubernetes/README.md index e69de29bb2..567987a983 100644 --- a/comps/embeddings/deployment/kubernetes/README.md +++ b/comps/embeddings/deployment/kubernetes/README.md @@ -0,0 +1,11 @@ +# Deploy Embedding microservice on Kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Kubernetes + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install embedding-usvc oci://ghcr.io/opea-project/charts/embedding-usvc --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` diff --git a/comps/dataprep/redis/llama_index/__init__.py b/comps/embeddings/deployment/kubernetes/cpu-values.yaml similarity index 77% rename from comps/dataprep/redis/llama_index/__init__.py rename to comps/embeddings/deployment/kubernetes/cpu-values.yaml index 916f3a44b2..e2d62ff26f 100644 --- a/comps/dataprep/redis/llama_index/__init__.py +++ b/comps/embeddings/deployment/kubernetes/cpu-values.yaml @@ -1,2 +1,5 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 + +tei: + enabled: true diff --git a/comps/embeddings/src/README.md b/comps/embeddings/src/README.md index a22f81e498..be7c89a4f2 100644 --- a/comps/embeddings/src/README.md +++ b/comps/embeddings/src/README.md @@ -13,3 +13,15 @@ Key Features: **Customizable**: Supports configuration and customization to meet specific use case requirements, including different embedding models and preprocessing techniques. Users are albe to configure and build embedding-related services according to their actual needs. + +## Embeddings Microservice with TEI + +For details, please refer to [readme](./README_tei.md). + +## Embeddings Microservice with Prediction Guard + +For details, please refer to this [readme](./README_predictionguard.md). + +## Embeddings Microservice with Multimodal + +For details, please refer to this [readme](./README_bridgetower.md). diff --git a/comps/embeddings/src/README_bridgetower.md b/comps/embeddings/src/README_bridgetower.md new file mode 100644 index 0000000000..608470e659 --- /dev/null +++ b/comps/embeddings/src/README_bridgetower.md @@ -0,0 +1,106 @@ +# Multimodal Embeddings Microservice + +The Multimodal Embedding Microservice is designed to efficiently convert pairs of textual string and image into vectorized embeddings, facilitating seamless integration into various machine learning and data processing workflows. This service utilizes advanced algorithms to generate high-quality embeddings that capture the joint semantic essence of the input text-and-image pairs, making it ideal for applications in multi-modal data processing, information retrieval, and similar fields. + +Key Features: + +**High Performance**: Optimized for quick and reliable conversion of textual data and image inputs into vector embeddings. + +**Scalability**: Built to handle high volumes of requests simultaneously, ensuring robust performance even under heavy loads. + +**Ease of Integration**: Provides a simple and intuitive API, allowing for straightforward integration into existing systems and workflows. + +**Customizable**: Supports configuration and customization to meet specific use case requirements, including different embedding models and preprocessing techniques. + +Users are albe to configure and build embedding-related services according to their actual needs. + +## 📦 1. Start Microservice + +### 🔹 1.1 Build Docker Image + +#### Build bridgetower multimodal embedding service + +- For Gaudi HPU: + +```bash +cd ../../../ +docker build -t opea/embedding-multimodal-bridgetower-hpu:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/bridgetower/src/Dockerfile.intel_hpu . +``` + +- For Xeon CPU: + +```bash +cd ../../../ +docker build -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/bridgetower/src/Dockerfile . +``` + +#### Build Embedding Microservice Docker + +```bash +cd ../../../ +docker build -t opea/embedding:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/Dockerfile . +``` + +### 🔹 1.2 Run Docker with Docker Compose + +```bash +export your_mmei_port=8080 +export EMBEDDER_PORT=$your_mmei_port +export MMEI_EMBEDDING_ENDPOINT="http://$ip_address:$your_mmei_port" +export your_embedding_port_microservice=6600 +export MM_EMBEDDING_PORT_MICROSERVICE=$your_embedding_port_microservice +cd comps/embeddings/deployment/docker_compose/ +``` + +- For Gaudi HPU: + +```bash +docker compose up multimodal-bridgetower-embedding-gaudi-serving multimodal-bridgetower-embedding-gaudi-server -d +``` + +- For Xeon CPU: + +```bash +docker compose up multimodal-bridgetower-embedding-serving multimodal-bridgetower-embedding-server -d +``` + +## 📦 2. Consume Embedding Service + +Once the service is running, you can start using the API to generate embeddings for text and image pairs. + +### 🔹 2.1 Check Service Status + +Verify that the embedding service is running properly by checking its health status with this command: + +```bash +curl http://localhost:6000/v1/health_check \ +-X GET \ +-H 'Content-Type: application/json' +``` + +### 🔹 2.2 Use the Embedding Service API + +You can now make API requests to generate embeddings. The service supports both single text embeddings and joint text-image embeddings. + +**Compute a Joint Embedding of an Image-Text Pair** +To compute an embedding for a text and image pair, use the following API request: + +```bash +curl -X POST http://0.0.0.0:6600/v1/embeddings \ + -H "Content-Type: application/json" \ + -d '{"text": {"text" : "This is some sample text."}, "image" : {"url": "https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true"}}' +``` + +In this example, the input is a text and an image URL. The service will return a vectorized embedding that represents both the text and image. + +**Compute an embedding of a text** + +To generate an embedding for just a text input, use this request: + +```bash +curl -X POST http://0.0.0.0:6600/v1/embeddings \ + -H "Content-Type: application/json" \ + -d '{"text" : "This is some sample text."}' +``` + +This request will return an embedding representing the semantic meaning of the input text. diff --git a/comps/embeddings/src/README_predictionguard.md b/comps/embeddings/src/README_predictionguard.md new file mode 100644 index 0000000000..b6f6f6c91f --- /dev/null +++ b/comps/embeddings/src/README_predictionguard.md @@ -0,0 +1,100 @@ +# Embedding Microservice with Prediction Guard + +[Prediction Guard](https://docs.predictionguard.com) allows you to utilize hosted open access LLMs, LVMs, and embedding functionality with seamlessly integrated safeguards. In addition to providing a scalable access to open models, Prediction Guard allows you to configure factual consistency checks, toxicity filters, PII filters, and prompt injection blocking. Join the [Prediction Guard Discord channel](https://discord.gg/TFHgnhAFKd) and request an API key to get started. + +This embedding microservice is designed to efficiently convert text into vectorized embeddings using the [BridgeTower model](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-itc). Thus, it is ideal for both RAG or semantic search applications. + +**Note** - The BridgeTower model implemented in Prediction Guard can actually embed text, images, or text + images (jointly). For now this service only embeds text, but a follow on contribution will enable the multimodal functionality. + +## 📦 1. Start Microservice with `docker run` + +### 🔹 1.1 Start Embedding Service with TEI + +Before starting the service, ensure the following environment variable is set: + +```bash +export PREDICTIONGUARD_API_KEY=${your_predictionguard_api_key} +``` + +### 🔹 1.2 Build Docker Image + +To build the Docker image for the embedding service, run the following command: + +```bash +cd ../../../ +docker build -t opea/embedding:latest -f comps/embeddings/src/Dockerfile . +``` + +### 🔹 1.3 Start Service + +Run the Docker container in detached mode with the following command: + +```bash +docker run -d --name="embedding-predictionguard" -p 6000:6000 -e PREDICTIONGUARD_API_KEY=$PREDICTIONGUARD_API_KEY opea/embedding:latest +``` + +## 📦 2. Start Microservice with docker compose + +You can also deploy the Prediction Guard embedding service using Docker Compose for easier management of multi-container setups. + +🔹 Steps: + +1. Set environment variables: + + ```bash + export PG_EMBEDDING_MODEL_NAME="bridgetower-large-itm-mlm-itc" + export EMBEDDER_PORT=6000 + export PREDICTIONGUARD_API_KEY=${your_predictionguard_api_key} + ``` + +2. Navigate to the Docker Compose directory: + + ```bash + cd comps/embeddings/deployment/docker_compose/ + ``` + +3. Start the services: + + ```bash + docker compose up pg-embedding-server -d + ``` + +## 📦 3. Consume Embedding Service + +### 🔹 3.1 Check Service Status + +Verify the embedding service is running: + +```bash +curl http://localhost:6000/v1/health_check \ +-X GET \ +-H 'Content-Type: application/json' +``` + +### 🔹 3.2 Use the Embedding Service API + +The API is compatible with the [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings). + +1. Single Text Input + + ```bash + curl http://localhost:6000/v1/embeddings \ + -X POST \ + -d '{"input":"Hello, world!"}' \ + -H 'Content-Type: application/json' + ``` + +2. Multiple Text Inputs with Parameters + + ```bash + curl http://localhost:6000/v1/embeddings \ + -X POST \ + -d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \ + -H 'Content-Type: application/json' + ``` + +## ✨ Additional Notes + +- Prediction Guard Features: Prediction Guard comes with built-in safeguards such as factual consistency checks, toxicity filters, PII detection, and prompt injection protection, ensuring safe use of the service. +- Multimodal Support: While the service currently only supports text embeddings, we plan to extend this functionality to support images and joint text-image embeddings in future releases. +- Scalability: The microservice can easily scale to handle large volumes of requests for embedding generation, making it suitable for large-scale semantic search and RAG applications. diff --git a/comps/embeddings/src/README_tei.md b/comps/embeddings/src/README_tei.md new file mode 100644 index 0000000000..ba1417b589 --- /dev/null +++ b/comps/embeddings/src/README_tei.md @@ -0,0 +1,129 @@ +# 🌟 Embedding Microservice with TEI + +This guide walks you through starting, deploying, and consuming the **TEI-based Embeddings Microservice**. 🚀 + +--- + +## 📦 1. Start Microservice with `docker run` + +### 🔹 1.1 Start Embedding Service with TEI + +1. **Start the TEI service**: + Replace `your_port` and `model` with desired values to start the service. + + ```bash + your_port=8090 + model="BAAI/bge-large-en-v1.5" + docker run -p $your_port:80 -v ./data:/data --name tei-embedding-serving \ + -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always \ + ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model + ``` + +2. **Test the TEI service**: + Run the following command to check if the service is up and running. + + ```bash + curl localhost:$your_port/v1/embeddings \ + -X POST \ + -d '{"input":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json' + ``` + +### 🔹 1.2 Build Docker Image and Run Docker with CLI + +1. Build the Docker image for the embedding microservice: + + ```bash + cd ../../../ + docker build -t opea/embedding:latest \ + --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy \ + -f comps/embeddings/src/Dockerfile . + ``` + +2. Run the embedding microservice and connect it to the TEI service: + + ```bash + docker run -d --name="embedding-tei-server" \ + -p 6000:5000 \ + -e http_proxy=$http_proxy -e https_proxy=$https_proxy \ + --ipc=host \ + -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT \ + -e EMBEDDING_COMPONENT_NAME="OPEA_TEI_EMBEDDING" \ + opea/embedding:latest + ``` + +## 📦 2. Start Microservice with docker compose + +Deploy both the TEI Embedding Service and the Embedding Microservice using Docker Compose. + +🔹 Steps: + +1. Set environment variables: + + ```bash + export host_ip=${your_ip_address} + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export TEI_EMBEDDER_PORT=8090 + export EMBEDDER_PORT=6000 + export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" + ``` + +2. Navigate to the Docker Compose directory: + + ```bash + cd comps/embeddings/deployment/docker_compose/ + ``` + +3. Start the services: + + ```bash + docker compose up tei-embedding-serving tei-embedding-server -d + ``` + +## 📦 3. Consume Embedding Service + +### 🔹 3.1 Check Service Status + +Verify the embedding service is running: + +```bash +curl http://localhost:6000/v1/health_check \ +-X GET \ +-H 'Content-Type: application/json' +``` + +### 🔹 3.2 Use the Embedding Service API + +The API is compatible with the [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings). + +1. Single Text Input + + ```bash + curl http://localhost:6000/v1/embeddings \ + -X POST \ + -d '{"input":"Hello, world!"}' \ + -H 'Content-Type: application/json' + ``` + +2. Multiple Text Inputs with Parameters + + ```bash + curl http://localhost:6000/v1/embeddings \ + -X POST \ + -d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \ + -H 'Content-Type: application/json' + ``` + +## ✨ Tips for Better Understanding: + +1. Port Mapping: + Ensure the ports are correctly mapped to avoid conflicts with other services. + +2. Model Selection: + Choose a model appropriate for your use case, like "BAAI/bge-large-en-v1.5" or "BAAI/bge-base-en-v1.5". + +3. Environment Variables: + Use http_proxy and https_proxy for proxy setup if necessary. + +4. Data Volume: + The `-v ./data:/data` flag ensures the data directory is correctly mounted. diff --git a/comps/embeddings/src/integrations/multimodal_bridgetower.py b/comps/embeddings/src/integrations/multimodal_bridgetower.py index c9152136ca..d1b0110d74 100644 --- a/comps/embeddings/src/integrations/multimodal_bridgetower.py +++ b/comps/embeddings/src/integrations/multimodal_bridgetower.py @@ -51,9 +51,13 @@ async def invoke(self, input: MultimodalDoc) -> EmbedMultimodalDoc: json["text"] = input.text elif isinstance(input, TextImageDoc): json["text"] = input.text.text - img_bytes = input.image.url.load_bytes() - base64_img = base64.b64encode(img_bytes).decode("utf-8") - json["img_b64_str"] = base64_img + if input.image.url: + img_bytes = input.image.url.load_bytes() + base64_img = base64.b64encode(img_bytes).decode("utf-8") + elif input.image.base64_image: + base64_img = input.image.base64_image + if base64_img: + json["img_b64_str"] = base64_img else: raise TypeError( f"Unsupported input type: {type(input)}. " @@ -71,6 +75,9 @@ async def invoke(self, input: MultimodalDoc) -> EmbedMultimodalDoc: elif isinstance(input, TextImageDoc): res = EmbedMultimodalDoc(text=input.text.text, url=input.image.url, embedding=embed_vector) + if base64_img: + res.base64_image = base64_img + return res def check_health(self) -> bool: diff --git a/comps/embeddings/src/opea_embedding_microservice.py b/comps/embeddings/src/opea_embedding_microservice.py index 3351731278..ac6788510c 100644 --- a/comps/embeddings/src/opea_embedding_microservice.py +++ b/comps/embeddings/src/opea_embedding_microservice.py @@ -17,6 +17,7 @@ statistics_dict, ) from comps.cores.proto.api_protocol import EmbeddingRequest, EmbeddingResponse +from comps.cores.telemetry.opea_telemetry import opea_telemetry logger = CustomLogger("opea_embedding_microservice") logflag = os.getenv("LOGFLAG", False) @@ -36,6 +37,7 @@ host="0.0.0.0", port=6000, ) +@opea_telemetry @register_statistics(names=["opea_service@embedding"]) async def embedding(input: EmbeddingRequest) -> EmbeddingResponse: start = time.time() diff --git a/comps/feedback_management/deployment/docker_compose/compose_feedback_mongo.yaml b/comps/feedback_management/deployment/docker_compose/compose.yaml similarity index 86% rename from comps/feedback_management/deployment/docker_compose/compose_feedback_mongo.yaml rename to comps/feedback_management/deployment/docker_compose/compose.yaml index 858e59d446..173eee5c3d 100644 --- a/comps/feedback_management/deployment/docker_compose/compose_feedback_mongo.yaml +++ b/comps/feedback_management/deployment/docker_compose/compose.yaml @@ -15,10 +15,10 @@ services: command: mongod --quiet --logpath /dev/null feedbackmanagement-mongo: - image: opea/feedbackmanagement-mongo:latest + image: ${REGISTRY:-opea}/feedbackmanagement-mongo:${TAG:-latest} container_name: feedbackmanagement-mongo-server ports: - - "6016:6016" + - "${FEEDBACK_MANAGEMENT_PORT:-6016}:6016" ipc: host environment: http_proxy: ${http_proxy} diff --git a/comps/feedback_management/src/Dockerfile b/comps/feedback_management/src/Dockerfile index cc5641e2bc..a3c5242514 100644 --- a/comps/feedback_management/src/Dockerfile +++ b/comps/feedback_management/src/Dockerfile @@ -14,8 +14,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps COPY requirements.txt /home/user/ @@ -25,6 +23,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/feedback_management/src ENTRYPOINT ["python", "opea_feedback_microservice.py"] diff --git a/comps/feedback_management/src/README.md b/comps/feedback_management/src/README.md index 349a6098c3..0634a39bcb 100644 --- a/comps/feedback_management/src/README.md +++ b/comps/feedback_management/src/README.md @@ -17,13 +17,13 @@ export COLLECTION_NAME=${COLLECTION_NAME} --- -## 🚀Start Microservice with Docker +## 🚀 Start Microservice with Docker (Option 1) ### Build Docker Image ```bash cd ~/GenAIComps -docker build -t opea/feedbackmanagement:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/feedback_management/src/Dockerfile . +docker build -t opea/feedbackmanagement-mongo:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/feedback_management/src/Dockerfile . ``` ### Run Docker with CLI @@ -37,11 +37,19 @@ docker build -t opea/feedbackmanagement:latest --build-arg https_proxy=$https_pr - Run Feedback Management microservice ```bash - docker run -d --name="feedbackmanagement-mongo-server" -p 6016:6016 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/feedbackmanagement:latest + docker run -d --name="feedbackmanagement-mongo-server" -p 6016:6016 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/feedbackmanagement-mongo:latest ``` --- +## 🚀 Start Microservice with Docker Compose (Option 2) + +```bash +docker compose -f ../deployment/docker_compose/compose.yaml up -d +``` + +--- + ### ✅ Invoke Microservice The Feedback Management microservice exposes the following API endpoints: diff --git a/comps/finetuning/deployment/docker_compose/compose.yaml b/comps/finetuning/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..376e536aa4 --- /dev/null +++ b/comps/finetuning/deployment/docker_compose/compose.yaml @@ -0,0 +1,31 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + finetuning: + image: ${REGISTRY:-opea}/finetuning:${TAG:-latest} + container_name: finetuning + ports: + - "8015:8015" + - "8265:8265" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - HF_TOKEN=${HF_TOKEN} + ipc: host + restart: always + finetuning-gaudi: + extends: finetuning + image: ${REGISTRY:-opea}/finetuning-gaudi:${TAG:-latest} + container_name: finetuning-gaudi + environment: + - HABANA_VISIBLE_DEVICES=all + - OMPI_MCA_btl_vader_single_copy_mechanism=none + runtime: habana + cap_add: + - SYS_NICE + +networks: + default: + driver: bridge diff --git a/comps/finetuning/src/Dockerfile b/comps/finetuning/src/Dockerfile index 1edc8c3f5c..0f55a441be 100644 --- a/comps/finetuning/src/Dockerfile +++ b/comps/finetuning/src/Dockerfile @@ -16,8 +16,6 @@ COPY comps /home/user/comps RUN chown -R user /home/user/comps/finetuning -USER user - ENV PATH=$PATH:/home/user/.local/bin RUN python -m pip install --no-cache-dir --upgrade pip && \ @@ -28,6 +26,8 @@ RUN python -m pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/finetuning/src RUN echo PKGPATH=$(python3 -c "import pkg_resources; print(pkg_resources.get_distribution('oneccl-bind-pt').location)") >> run.sh && \ diff --git a/comps/finetuning/src/Dockerfile.intel_hpu b/comps/finetuning/src/Dockerfile.intel_hpu index ab40f9c48e..4324de9999 100644 --- a/comps/finetuning/src/Dockerfile.intel_hpu +++ b/comps/finetuning/src/Dockerfile.intel_hpu @@ -14,8 +14,6 @@ COPY comps /home/user/comps RUN chown -R user /home/user/comps/finetuning -USER user - ENV PATH=$PATH:/home/user/.local/bin RUN python -m pip install --no-cache-dir --upgrade pip && \ @@ -24,6 +22,8 @@ RUN python -m pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/finetuning/src ENTRYPOINT ["/bin/bash", "launch.sh"] diff --git a/comps/finetuning/src/README.md b/comps/finetuning/src/README.md index ac6bb8cad7..4bc39ee078 100644 --- a/comps/finetuning/src/README.md +++ b/comps/finetuning/src/README.md @@ -64,6 +64,13 @@ Start docker container with below command: docker run -d --name="finetuning-server" -p 8015:8015 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/finetuning:latest ``` +Or use docker compose with below command: + +```bash +cd ../deployment/docker_compose +docker compose -f compose.yaml up finetuning -d +``` + ### 2.2 Setup on Gaudi2 #### 2.2.1 Build Docker Image @@ -84,6 +91,14 @@ export HF_TOKEN=${your_huggingface_token} docker run --runtime=habana -e HABANA_VISIBLE_DEVICES=all -p 8015:8015 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host -e https_proxy=$https_proxy -e http_proxy=$http_proxy -e no_proxy=$no_proxy -e HF_TOKEN=$HF_TOKEN opea/finetuning-gaudi:latest ``` +Or use docker compose with below command: + +```bash +export HF_TOKEN=${your_huggingface_token} +cd ../deployment/docker_compose +docker compose -f compose.yaml up finetuning-gaudi -d +``` + ## 🚀3. Consume Finetuning Service ### 3.1 Upload a training file @@ -244,7 +259,7 @@ curl http://${your_ip}:8015/v1/finetune/list_checkpoints -X POST -H "Content-Typ ### 3.4 Leverage fine-tuned model -After fine-tuning job is done, fine-tuned model can be chosen from listed checkpoints, then the fine-tuned model can be used in other microservices. For example, fine-tuned reranking model can be used in [reranks](../../rerankings/src/README.md) microservice by assign its path to the environment variable `RERANK_MODEL_ID`, fine-tuned embedding model can be used in [embeddings](../../embeddings/src/README.md) microservice by assign its path to the environment variable `model`, LLMs after instruction tuning can be used in [llms](../../llms/text-generation/README.md) microservice by assign its path to the environment variable `your_hf_llm_model`. +After fine-tuning job is done, fine-tuned model can be chosen from listed checkpoints, then the fine-tuned model can be used in other microservices. For example, fine-tuned reranking model can be used in [reranks](../../rerankings/src/README.md) microservice by assign its path to the environment variable `RERANK_MODEL_ID`, fine-tuned embedding model can be used in [embeddings](../../embeddings/src/README.md) microservice by assign its path to the environment variable `model`, LLMs after instruction tuning can be used in [llms](../../llms/src/text-generation/README.md) microservice by assign its path to the environment variable `your_hf_llm_model`. ## 🚀4. Descriptions for Finetuning parameters diff --git a/comps/guardrails/deployment/docker_compose/compose.yaml b/comps/guardrails/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..122004c0bf --- /dev/null +++ b/comps/guardrails/deployment/docker_compose/compose.yaml @@ -0,0 +1,137 @@ +# Copyright (C) 2024 Prediction Guard, Inc +# SPDX-License-Identifier: Apache-2.0 + +include: + - ../../../third_parties/tgi/deployment/docker_compose/compose.yaml + - ../../../third_parties/vllm/deployment/docker_compose/compose.yaml + +services: + # bias detection service + guardrails-bias-detection-server: + image: ${REGISTRY:-opea}/guardrails-bias-detection:${TAG:-latest} + container_name: guardrails-bias-detection-server + ports: + - "${BIAS_DETECTION_PORT:-9092}:9092" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + restart: unless-stopped + + # factuality alignment service + guardrails-factuality-predictionguard-server: + image: ${REGISTRY:-opea}/guardrails-factuality-predictionguard:${TAG:-latest} + container_name: guardrails-factuality-predictionguard-server + ports: + - "${FACTUALITY_ALIGNMENT_PORT:-9075}:9075" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} + restart: unless-stopped + + # guardrails service + guardrails-server: + image: ${REGISTRY:-opea}/guardrails:${TAG:-latest} + container_name: guardrails-server + ports: + - "${GUARDRAILS_PORT:-9090}:9090" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + SAFETY_GUARD_ENDPOINT: ${SAFETY_GUARD_ENDPOINT} + SAFETY_GUARD_MODEL_ID: ${SAFETY_GUARD_MODEL_ID} + GUARDRAILS_COMPONENT_NAME: "OPEA_LLAMA_GUARD" + restart: unless-stopped + + llamaguard-guardrails-server: + extends: guardrails-server + container_name: llamaguard-guardrails-server + environment: + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN} + depends_on: + tgi-gaudi-server: + condition: service_healthy + + wildguard-guardrails-server: + extends: guardrails-server + container_name: wildguard-guardrails-server + environment: + GUARDRAILS_COMPONENT_NAME: "OPEA_WILD_GUARD" + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN} + depends_on: + tgi-gaudi-server: + condition: service_healthy + + # hallucination detection service + hallucination-detection-server: + image: ${REGISTRY:-opea}/hallucination-detection:${TAG:-latest} + container_name: hallucination-detection-server + ports: + - "${HALLUCINATION_DETECTION_PORT:-9090}:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_MODEL: $LLM_MODEL_ID + vLLM_ENDPOINT: $vLLM_ENDPOINT + HUGGINGFACEHUB_API_TOKEN: $HF_TOKEN + restart: unless-stopped + depends_on: + vllm-gaudi-server: + condition: service_healthy + + # predictionguard PII detection service + pii-predictionguard-server: + image: ${REGISTRY:-opea}/pii-detection-predictionguard:${TAG:-latest} + container_name: pii-predictionguard-server + ports: + - "${PII_PREDICTIONGUARD_PORT:-9080}:9080" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} + restart: unless-stopped + + # predictionguard injection service + injection-predictionguard-server: + image: ${REGISTRY:-opea}/injection-predictionguard:${TAG:-latest} + container_name: injection-predictionguard-server + ports: + - "${INJECTION_PREDICTIONGUARD_PORT:-9085}:9085" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} + restart: unless-stopped + + # predictionguard toxicity service + toxicity-predictionguard-server: + image: ${REGISTRY:-opea}/toxicity-predictionguard:${TAG:-latest} + container_name: toxicity-predictionguard-server + ports: + - "${TOXICITY_PREDICTIONGUARD_PORT:-9090}:9090" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/guardrails/deployment/docker_compose/compose_factuality.yaml b/comps/guardrails/deployment/docker_compose/compose_factuality.yaml deleted file mode 100644 index 7fe32fc2a4..0000000000 --- a/comps/guardrails/deployment/docker_compose/compose_factuality.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (C) 2024 Prediction Guard, Inc -# SPDX-License-Identifier: Apache-2.0 - -services: - factuality: - image: opea/guardrails-factuality-predictionguard:latest - container_name: guardrails-factuality-predictionguard - ports: - - "9075:9075" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/guardrails/deployment/docker_compose/compose_injection.yaml b/comps/guardrails/deployment/docker_compose/compose_injection.yaml deleted file mode 100644 index a877e5c4bd..0000000000 --- a/comps/guardrails/deployment/docker_compose/compose_injection.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (C) 2024 Prediction Guard, Inc -# SPDX-License-Identifier: Apache-2.0 - -services: - injection: - image: opea/guardrails-injection-predictionguard:latest - container_name: guardrails-injection-predictionguard - ports: - - "9085:9085" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/guardrails/deployment/docker_compose/compose_llamaguard.yaml b/comps/guardrails/deployment/docker_compose/compose_llamaguard.yaml deleted file mode 100644 index a0ff166762..0000000000 --- a/comps/guardrails/deployment/docker_compose/compose_llamaguard.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - tgi_gaudi_service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - container_name: tgi-service - ports: - - "8088:80" - volumes: - - "./data:/data" - environment: - HF_TOKEN: ${HF_TOKEN} - shm_size: 1g - command: --model-id ${LLM_MODEL_ID} --max-input-tokens 1024 --max-total-tokens 2048 - guardrails: - image: opea/guardrails:latest - container_name: guardrails-llamaguard-gaudi-server - ports: - - "9090:9090" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - SAFETY_GUARD_ENDPOINT: ${SAFETY_GUARD_ENDPOINT} - GUARDRAILS_COMPONENT_NAME: "OPEA_LLAMA_GUARD" - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/guardrails/deployment/docker_compose/compose_pii.yaml b/comps/guardrails/deployment/docker_compose/compose_pii.yaml deleted file mode 100644 index ba646d5bed..0000000000 --- a/comps/guardrails/deployment/docker_compose/compose_pii.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (C) 2024 Prediction Guard, Inc -# SPDX-License-Identifier: Apache-2.0 - -services: - pii: - image: opea/guardrails-pii-predictionguard:latest - container_name: pii-predictionguard - ports: - - "9080:9080" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/guardrails/deployment/docker_compose/compose_toxicity.yaml b/comps/guardrails/deployment/docker_compose/compose_toxicity.yaml deleted file mode 100644 index fb8818cecc..0000000000 --- a/comps/guardrails/deployment/docker_compose/compose_toxicity.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (C) 2024 Prediction Guard, Inc -# SPDX-License-Identifier: Apache-2.0 - -services: - toxicity: - image: opea/guardrails-toxicity-predictionguard:latest - container_name: guardrails-toxicity-predictionguard - ports: - - "9090:9090" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/guardrails/deployment/docker_compose/compose_wildguard.yaml b/comps/guardrails/deployment/docker_compose/compose_wildguard.yaml deleted file mode 100644 index e5ceb10d9b..0000000000 --- a/comps/guardrails/deployment/docker_compose/compose_wildguard.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - tgi_gaudi_service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - container_name: tgi-service - ports: - - "8088:80" - volumes: - - "./data:/data" - environment: - HF_TOKEN: ${HF_TOKEN} - shm_size: 1g - command: --model-id ${LLM_MODEL_ID} --max-input-tokens 1024 --max-total-tokens 2048 - guardrails: - image: opea/guardrails:latest - container_name: guardrails-wildguard-gaudi-server - ports: - - "9090:9090" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - SAFETY_GUARD_ENDPOINT: ${SAFETY_GUARD_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - GUARDRAILS_COMPONENT_NAME: "OPEA_WILD_GUARD" - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/guardrails/deployment/kubernetes/README.md b/comps/guardrails/deployment/kubernetes/README.md index e69de29bb2..b309900a07 100644 --- a/comps/guardrails/deployment/kubernetes/README.md +++ b/comps/guardrails/deployment/kubernetes/README.md @@ -0,0 +1,11 @@ +# Deploy guardrails microservice on Kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Kubernetes + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install guardrails oci://ghcr.io/opea-project/charts/guardrails --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` diff --git a/comps/guardrails/deployment/kubernetes/cpu-values.yaml b/comps/guardrails/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..346a39496e --- /dev/null +++ b/comps/guardrails/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +tgi-guardrails: + enabled: true diff --git a/comps/guardrails/src/bias_detection/Dockerfile b/comps/guardrails/src/bias_detection/Dockerfile index 0ed299b6b7..6eb97a7b8c 100644 --- a/comps/guardrails/src/bias_detection/Dockerfile +++ b/comps/guardrails/src/bias_detection/Dockerfile @@ -16,8 +16,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ @@ -26,6 +24,8 @@ RUN pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/guardrails/src/bias_detection/ ENTRYPOINT ["python", "opea_bias_detection_microservice.py"] diff --git a/comps/guardrails/src/guardrails/Dockerfile b/comps/guardrails/src/guardrails/Dockerfile index c89fbb5cb8..890dd23790 100644 --- a/comps/guardrails/src/guardrails/Dockerfile +++ b/comps/guardrails/src/guardrails/Dockerfile @@ -15,8 +15,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -25,6 +23,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/guardrails/src/guardrails/ ENTRYPOINT ["python", "opea_guardrails_microservice.py"] diff --git a/comps/guardrails/src/hallucination_detection/Dockerfile b/comps/guardrails/src/hallucination_detection/Dockerfile new file mode 100644 index 0000000000..73d075f4bc --- /dev/null +++ b/comps/guardrails/src/hallucination_detection/Dockerfile @@ -0,0 +1,31 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +ARG ARCH="cpu" # Set this to "cpu" or "gpu" + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY comps /home/user/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + if [ ${ARCH} = "cpu" ]; then \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/guardrails/src/hallucination_detection/requirements.txt; \ + else \ + pip install --no-cache-dir -r /home/user/comps/guardrails/src/hallucination_detection/requirements.txt; \ + fi + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +USER user + +WORKDIR /home/user/comps/guardrails/src/hallucination_detection + +ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/guardrails/src/hallucination_detection/README.md b/comps/guardrails/src/hallucination_detection/README.md new file mode 100644 index 0000000000..7d79289f68 --- /dev/null +++ b/comps/guardrails/src/hallucination_detection/README.md @@ -0,0 +1,162 @@ +# Hallucination Detection Microservice + +## Introduction + +Hallucination in AI, particularly in large language models (LLMs), spans a wide range of issues that can impact reliability, trustworthiness, and utility of AI-generated content. The content could be plausible-sounding but factually incorrect, irrelevant, or entirely fabricated. This phenomenon occurs when the model generates outputs that are not grounded in the input context, training data, or real-world knowledge. While LLMs excel at generating coherent responses, hallucinations pose a critical challenge for applications that demand accuracy, reliability, and trustworthiness. + +### Forms of Hallucination + +- **Factual Errors**: The AI generates responses containing incorrect or fabricated facts. _Example_: Claiming a historical event occurred when it did not. + +- **Logical Inconsistencies**: Outputs that fail to follow logical reasoning or contradict themselves. _Example_: Stating that a person is alive in one sentence and deceased in another. + +- **Context Misalignment**: Responses that diverge from the input prompt or fail to address the intended context. _Example_: Providing irrelevant information or deviating from topic. + +- **Fabricated References**: Creating citations, statistics, or other details that appear authentic but lack real-world grounding. _Example_: Inventing a study or paper that doesn't exist. + +### Importance of Hallucination Detection + +The Importance of hallucination detection cannot be overstated. Ensuring the factual correctness and contextual fidelity of AI-generated content is essential for: + +- **Building Trust**: Reducing hallucinations foster user confidence in AI system. +- **Ensuring Compliance**: Meeting legal and ethical standards in regulated industries. +- **Enhancing Reliability**: Improving the overall robustness and performance of AI applications. + +### Define the Scope of Our Hallucination Detection + +Tackling the entire scope of hallucination is beyond our immediate scope. Training datasets inherently lag behind the question-and-answer needs due to their static nature. Also, Retrieval-Augmented Generation (RAG) is emerging as a preferred approach for LLMs, where model outputs are grounded in retrieved context to enhance accuracy and relevance and rely on integration of Document-Question-Answer triplets. + +Therefore, we focus on detecting contextualized hallucinations with the following strategies: + +- Using LLM-as-a-judge to evaluate hallucinations. +- Detect whether Context-Question-Answer triplet contains hallucinations. + +## 🚀1. Start Microservice based on vLLM endpoint on Intel Gaudi Accelerator + +### 1.1 Define Environment Variables + +```bash +export your_ip= +export port_number=9008 +export HUGGINGFACEHUB_API_TOKEN= +export vLLM_ENDPOINT="http://${your_ip}:${port_number}" +export LLM_MODEL="PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct" +``` + +For gated models such as `LLAMA-2`, you will have to pass the environment HUGGINGFACEHUB_API_TOKEN. Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token. + +### 1.2 Launch vLLM Service on Gaudi Accelerator + +#### Launch vLLM service on a single node + +```bash +bash ./launch_vllm_service.sh ${port_number} ${LLM_MODEL} hpu 1 +``` + +## 2. Set up Hallucination Microservice + +Then we wrap the vLLM Service into Hallucination Microservice. + +### 2.1 Build Docker + +```bash +bash build_docker_hallucination_microservice.sh +``` + +### 2.2 Launch Hallucination Microservice + +```bash +bash launch_hallucination_microservice.sh +``` + +## 🚀3. Get Status of Hallucination Microservice + +```bash +docker container logs -f hallucination-detection +``` + +## 🚀4. Consume Guardrail Micorservice Post-LLM + +Once microservice starts, users can use examples (bash or python) below to apply hallucination detection for LLM's response (Post-LLM) + +**Bash:** + +_Case without Hallucination (Valid Output)_ + +```bash +DOCUMENT=".......An important part of CDC’s role during a public health emergency is to develop a test for the pathogen and equip state and local public health labs with testing capacity. CDC developed an rRT-PCR test to diagnose COVID-19. As of the evening of March 17, 89 state and local public health labs in 50 states......" + +QUESTION="What kind of test can diagnose COVID-19?" + +ANSWER=" rRT-PCR test" + +DATA='{"messages":[{"role": "user", "content": "Given the following QUESTION, DOCUMENT and ANSWER you must analyze the provided answer and determine whether it is faithful to the contents of the DOCUMENT. The ANSWER must not offer new information beyond the context provided in the DOCUMENT. The ANSWER also must not contradict information provided in the DOCUMENT. Output your final verdict by strictly following this format: \"PASS\" is the answer is faithful to the DOCUMENT and \"FAIL\" if the answer is not faithful to the DOCUMENT. Show your reasoning.\n\n--\nQUESTION (THIS DOES NOT COUNT AS BACKGROUND INFORMATION):\n{question}\n\n--\nDOCUMENT:\n{document}\n\n--\nANSWER:\n{answer}\n\n--\n\n Your output should be in JSON FORMAT with the keys \"REASONING\" and \"SCORE\":\n{{\"REASONING\": , \"SCORE\": }}"}], "max_tokens":600,"model": "PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct" }' + +DATA=$(echo $DATA | sed "s/{question}/$QUESTION/g; s/{document}/$DOCUMENT/g; s/{answer}/$ANSWER/g") + +printf "$DATA" + +curl http://localhost:9080/v1/hallucination_detection \ + -H 'Content-Type: application/json' \ + -d "$DATA" +``` + +Example Output: + +```bash +{"REASONING": ['The CONTEXT mentions that the CDC developed an rRT-PCR test to diagnose COVID-19.', 'The CONTEXT does not describe what rRT-PCR stands for or how the test works.', 'The ANSWER simply states that the test is an rRT-PCR test.', 'The ANSWER does not provide additional information about the test, such as its full form or methodology.', 'Given the QUESTION about what kind of test can diagnose COVID-19, the ANSWER is faithful to the CONTEXT because it correctly identifies the type of test developed by the CDC, even though it lacks detailed explanation.'], "SCORE": PASS} +``` + +_Case with Hallucination (Invalid or Inconsistent Output)_ + +```bash +DOCUMENT="750 Seventh Avenue is a 615 ft (187m) tall Class-A office skyscraper in New York City. 101 Park Avenue is a 629 ft tall skyscraper in New York City, New York." + +QUESTION=" 750 7th Avenue and 101 Park Avenue, are located in which city?" + +ANSWER="750 7th Avenue and 101 Park Avenue are located in Albany, New York" + +DATA='{"messages":[{"role": "user", "content": "Given the following QUESTION, DOCUMENT and ANSWER you must analyze the provided answer and determine whether it is faithful to the contents of the DOCUMENT. The ANSWER must not offer new information beyond the context provided in the DOCUMENT. The ANSWER also must not contradict information provided in the DOCUMENT. Output your final verdict by strictly following this format: \"PASS\" is the answer is faithful to the DOCUMENT and \"FAIL\" if the answer is not faithful to the DOCUMENT. Show your reasoning.\n\n--\nQUESTION (THIS DOES NOT COUNT AS BACKGROUND INFORMATION):\n{question}\n\n--\nDOCUMENT:\n{document}\n\n--\nANSWER:\n{answer}\n\n--\n\n Your output should be in JSON FORMAT with the keys \"REASONING\" and \"SCORE\":\n{{\"REASONING\": , \"SCORE\": }}"}], "max_tokens":600,"model": "PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct" }' + +DATA=$(echo $DATA | sed "s/{question}/$QUESTION/g; s/{document}/$DOCUMENT/g; s/{answer}/$ANSWER/g") + +printf "$DATA" + +curl http://localhost:9080/v1/hallucination_detection \ + -H 'Content-Type: application/json' \ + -d "$DATA" + +``` + +Example Output: + +```bash +{"REASONING": ['The CONTEXT specifies that 750 Seventh Avenue and 101 Park Avenue are located in New York City.', 'The ANSWER incorrectly states that these locations are in Albany, New York.', 'The QUESTION asks for the city where these addresses are located.', 'The correct answer should be New York City, not Albany.'], "SCORE": FAIL} +``` + +**Python Script:** + +```python +import requests +import json + +proxies = {"http": ""} +url = "http://localhost:9080/v1/hallucination_detection" +data = { + "messages": [ + { + "role": "user", + "content": 'Given the following QUESTION, DOCUMENT and ANSWER you must analyze the provided answer and determine whether it is faithful to the contents of the DOCUMENT. The ANSWER must not offer new information beyond the context provided in the DOCUMENT. The ANSWER also must not contradict information provided in the DOCUMENT. Output your final verdict by strictly following this format: "PASS" is the answer is faithful to the DOCUMENT and "FAIL" if the answer is not faithful to the DOCUMENT. Show your reasoning.\n\n--\nQUESTION (THIS DOES NOT COUNT AS BACKGROUND INFORMATION):\n 750 7th Avenue and 101 Park Avenue, are located in which city?\n\n--\nDOCUMENT:\n750 Seventh Avenue is a 615 ft (187m) tall Class-A office skyscraper in New York City. 101 Park Avenue is a 629 ft tall skyscraper in New York City, New York.\n\n--\nANSWER:\n750 7th Avenue and 101 Park Avenue are located in Albany, New York\n\n--\n\n Your output should be in JSON FORMAT with the keys "REASONING" and "SCORE":\n{{"REASONING": , "SCORE": }}', + } + ], + "max_tokens": 600, + "model": "PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct", +} + +try: + resp = requests.post(url=url, data=data, proxies=proxies) + print(resp.json()) + print("Request successful!") +except requests.exceptions.RequestException as e: + print("An error occurred:", e) +``` diff --git a/comps/guardrails/src/hallucination_detection/build_docker_hallucination_microservice.sh b/comps/guardrails/src/hallucination_detection/build_docker_hallucination_microservice.sh new file mode 100644 index 0000000000..7b3ef942cc --- /dev/null +++ b/comps/guardrails/src/hallucination_detection/build_docker_hallucination_microservice.sh @@ -0,0 +1,27 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Folder name you're looking for +target_folder="GenAIComps" +proj_folder=$(pwd) + +# Start from the current directory +current_dir=$(pwd) + +# Loop until the folder is found or we reach the root +while [[ "$current_dir" != "/" ]]; do + # Check if the folder exists in the current directory + if [ -d "$current_dir/$target_folder" ]; then + # If found, change to that directory and exit + cd "$current_dir/$target_folder" || exit + echo "Found and changed to $current_dir/$target_folder" + fi + # Move up one level + current_dir=$(dirname "$current_dir") +done + +docker build --no-cache \ + -t opea/guardrails-hallucination-detection:latest \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy \ + -f $proj_folder/Dockerfile . diff --git a/comps/llms/text-generation/vllm/langchain/entrypoint.sh b/comps/guardrails/src/hallucination_detection/entrypoint.sh similarity index 56% rename from comps/llms/text-generation/vllm/langchain/entrypoint.sh rename to comps/guardrails/src/hallucination_detection/entrypoint.sh index d60eddd36b..a74dea21cd 100644 --- a/comps/llms/text-generation/vllm/langchain/entrypoint.sh +++ b/comps/guardrails/src/hallucination_detection/entrypoint.sh @@ -1,8 +1,8 @@ #!/usr/bin/env bash -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 pip --no-cache-dir install -r requirements-runtime.txt -python llm.py +python opea_hallucination_detection_microservice.py diff --git a/comps/guardrails/src/hallucination_detection/integrations/__init__.py b/comps/guardrails/src/hallucination_detection/integrations/__init__.py new file mode 100644 index 0000000000..4057dc0163 --- /dev/null +++ b/comps/guardrails/src/hallucination_detection/integrations/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/guardrails/src/hallucination_detection/integrations/hallucination_guard.py b/comps/guardrails/src/hallucination_detection/integrations/hallucination_guard.py new file mode 100644 index 0000000000..5da0983fbb --- /dev/null +++ b/comps/guardrails/src/hallucination_detection/integrations/hallucination_guard.py @@ -0,0 +1,85 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +from typing import Union + +import requests +from fastapi.responses import StreamingResponse +from langchain.schema import HumanMessage, SystemMessage +from langchain_community.llms import VLLMOpenAI +from langchain_core.prompts import ChatPromptTemplate, PromptTemplate + +from comps import ( + CustomLogger, + GeneratedDoc, + LLMParamsDoc, + OpeaComponent, + OpeaComponentRegistry, + SearchedDoc, + ServiceType, +) +from comps.cores.proto.api_protocol import ChatCompletionRequest +from comps.guardrails.src.hallucination_detection.integrations.template import ChatTemplate + +logger = CustomLogger("opea_hallucination_guard") +logflag = os.getenv("LOGFLAG", False) + +llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8008") +model_name = os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct") +llm = VLLMOpenAI(openai_api_key="EMPTY", openai_api_base=llm_endpoint + "/v1", model_name=model_name) + + +@OpeaComponentRegistry.register("OPEA_HALLUCINATION_GUARD") +class OpeaHallucinationGuard(OpeaComponent): + """A specialized hallucination detection component derived from OpeaComponent.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.LLM.name.lower(), description, config) + self.model = os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct") + health_status = self.check_health() + if not health_status: + logger.error("OpenAIHallucinationGuard health check failed.") + + async def invoke(self, input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]): + """Invokes the hallucination detection for the input. + + Args: + input (Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]) + """ + if logflag: + logger.info(f"Input received: {input}") + + if isinstance(input, ChatCompletionRequest): + if logflag: + logger.info("[ ChatCompletionRequest ] input from user") + + headers = {"Content-Type": "application/json"} + payload = {} + payload["messages"] = input.messages + payload["max_tokens"] = input.max_tokens + payload["model"] = input.model + response = requests.post(llm_endpoint + "/v1/chat/completions", json=payload, headers=headers) + + if logflag: + logger.info(response.text) + + return GeneratedDoc(text=response.json()["choices"][0]["message"]["content"], prompt="") + else: + logger.info("[ UNKNOWN ] input from user") + + def check_health(self) -> bool: + """Checks the health of the hallucination detection service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + try: + response = requests.get(llm_endpoint + "health") + if response.status_code == 200: + return True + else: + return False + except Exception as e: + logger.error(f"Health check failed due to an exception: {e}") + return False diff --git a/comps/llms/text-generation/native/langchain/template.py b/comps/guardrails/src/hallucination_detection/integrations/template.py similarity index 100% rename from comps/llms/text-generation/native/langchain/template.py rename to comps/guardrails/src/hallucination_detection/integrations/template.py diff --git a/comps/llms/text-generation/vllm/langchain/launch_microservice.sh b/comps/guardrails/src/hallucination_detection/launch_hallucination_microservice.sh similarity index 66% rename from comps/llms/text-generation/vllm/langchain/launch_microservice.sh rename to comps/guardrails/src/hallucination_detection/launch_hallucination_microservice.sh index 70822ed841..17c1e54113 100644 --- a/comps/llms/text-generation/vllm/langchain/launch_microservice.sh +++ b/comps/guardrails/src/hallucination_detection/launch_hallucination_microservice.sh @@ -1,9 +1,9 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 docker run -d --rm \ - --name="llm-vllm-server" \ - -p 9000:9000 \ + --name="hallucination-detection" \ + -p 9080:9000 \ --ipc=host \ -e http_proxy=$http_proxy \ -e https_proxy=$https_proxy \ @@ -11,4 +11,4 @@ docker run -d --rm \ -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ -e LLM_MODEL=$LLM_MODEL \ -e LOGFLAG=$LOGFLAG \ - opea/llm-textgen:latest + opea/guardrails-hallucination-detection:latest diff --git a/comps/llms/text-generation/vllm/langchain/dependency/launch_vllm_service.sh b/comps/guardrails/src/hallucination_detection/launch_vllm_service.sh similarity index 73% rename from comps/llms/text-generation/vllm/langchain/dependency/launch_vllm_service.sh rename to comps/guardrails/src/hallucination_detection/launch_vllm_service.sh index 83ecd67530..fa9d04c8a4 100644 --- a/comps/llms/text-generation/vllm/langchain/dependency/launch_vllm_service.sh +++ b/comps/guardrails/src/hallucination_detection/launch_vllm_service.sh @@ -2,6 +2,9 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +# Enable debug mode +set -x + # Set default values default_port=8008 default_model=$LLM_MODEL @@ -38,7 +41,7 @@ volume=$PWD/data # Build the Docker run command based on hardware mode if [ "$hw_mode" = "hpu" ]; then - docker run -d --rm --runtime=habana --name="vllm-service" -p $port_number:80 -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HF_TOKEN} opea/vllm-gaudi:latest --model $model_name --tensor-parallel-size $parallel_number --host 0.0.0.0 --port 80 --block-size $block_size --max-num-seqs $max_num_seqs --max-seq_len-to-capture $max_seq_len_to_capture + docker run -d --rm --runtime=habana --name="vllm-service" -v $volume:/data -p $port_number:80 -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HF_TOKEN} opea/vllm-gaudi:latest --model $model_name --tensor-parallel-size $parallel_number --host 0.0.0.0 --port 80 --block-size $block_size --max-num-seqs $max_num_seqs --max-seq-len-to-capture $max_seq_len_to_capture --trust-remote-code else - docker run -d --rm --name="vllm-service" -p $port_number:80 --network=host -v $volume:/data -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HF_TOKEN} -e VLLM_CPU_KVCACHE_SPACE=40 opea/vllm-cpu:latest --model $model_name --host 0.0.0.0 --port 80 + docker run -d --rm --name="vllm-service" -p $port_number:80 --network=host -v $volume:/data -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HF_TOKEN} -e VLLM_CPU_KVCACHE_SPACE=40 opea/vllm:cpu --model $model_name --host 0.0.0.0 --port 80 fi diff --git a/comps/guardrails/src/hallucination_detection/opea_hallucination_detection_microservice.py b/comps/guardrails/src/hallucination_detection/opea_hallucination_detection_microservice.py new file mode 100644 index 0000000000..12d62b2f54 --- /dev/null +++ b/comps/guardrails/src/hallucination_detection/opea_hallucination_detection_microservice.py @@ -0,0 +1,64 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time +from typing import Union + +from integrations.hallucination_guard import OpeaHallucinationGuard + +from comps import ( + CustomLogger, + GeneratedDoc, + LLMParamsDoc, + OpeaComponentLoader, + SearchedDoc, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) +from comps.cores.proto.api_protocol import ChatCompletionRequest + +logger = CustomLogger("opea_hallucination_detection_microservice") +logflag = os.getenv("LOGFLAG", False) + +hallucination_detection_component_name = os.getenv("HALLUCINATION_DETECTION_COMPONENT_NAME", "OPEA_HALLUCINATION_GUARD") +# Initialize OpeaComponentLoader +loader = OpeaComponentLoader( + hallucination_detection_component_name, + name=hallucination_detection_component_name, + description=f"OPEA Hallucination Detection Component: {hallucination_detection_component_name}", +) + + +@register_microservice( + name="opea_service@hallucination_detection", + service_type=ServiceType.GUARDRAIL, + endpoint="/v1/hallucination_detection", + host="0.0.0.0", + port=9000, +) +@register_statistics(names=["opea_service@hallucination_detection"]) +async def hallucination_guard(input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]): + start = time.time() + if logflag: + logger.info(input) + + try: + hallucination_response = await loader.invoke(input) + + if logflag: + logger.info(hallucination_response) + + statistics_dict["opea_service@hallucination_detection"].append_latency(time.time() - start, None) + return hallucination_response + except Exception as e: + logger.error(f"Error during hallucination detection invocation: {e}") + raise + + +if __name__ == "__main__": + opea_microservices["opea_service@hallucination_detection"].start() + logger.info("Hallucination Detection Microservice is up and running successfully...") diff --git a/comps/llms/summarization/tgi/langchain/requirements-runtime.txt b/comps/guardrails/src/hallucination_detection/requirements-runtime.txt similarity index 100% rename from comps/llms/summarization/tgi/langchain/requirements-runtime.txt rename to comps/guardrails/src/hallucination_detection/requirements-runtime.txt diff --git a/comps/llms/text-generation/vllm/langchain/requirements.txt b/comps/guardrails/src/hallucination_detection/requirements.txt similarity index 100% rename from comps/llms/text-generation/vllm/langchain/requirements.txt rename to comps/guardrails/src/hallucination_detection/requirements.txt diff --git a/comps/guardrails/src/toxicity_detection/Dockerfile b/comps/guardrails/src/toxicity_detection/Dockerfile index 5f8bf60c82..fd397c9f61 100644 --- a/comps/guardrails/src/toxicity_detection/Dockerfile +++ b/comps/guardrails/src/toxicity_detection/Dockerfile @@ -16,8 +16,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -26,6 +24,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/guardrails/src/toxicity_detection/ ENTRYPOINT ["python", "opea_toxicity_detection_microservice.py"] diff --git a/comps/llms/text-generation/vllm/langchain/docker_compose_llm.yaml b/comps/image2image/deployment/docker_compose/compose.yaml similarity index 56% rename from comps/llms/text-generation/vllm/langchain/docker_compose_llm.yaml rename to comps/image2image/deployment/docker_compose/compose.yaml index 7737048ba4..5077a6af5d 100644 --- a/comps/llms/text-generation/vllm/langchain/docker_compose_llm.yaml +++ b/comps/image2image/deployment/docker_compose/compose.yaml @@ -1,44 +1,38 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -version: "3.8" - services: - vllm-service: - image: opea/vllm-gaudi:latest - container_name: vllm-gaudi-server + image2image: + image: ${REGISTRY:-opea}/image2image:${TAG:-latest} + container_name: image2image-server ports: - - "8008:80" - volumes: - - "./data:/data" + - ${IMAGE2IMAGE_PORT:-9389}:9389 + ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} HF_TOKEN: ${HF_TOKEN} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - LLM_MODEL: ${LLM_MODEL} - runtime: habana - cap_add: - - SYS_NICE - ipc: host - command: --model $LLM_MODEL --tensor-parallel-size 1 --host 0.0.0.0 --port 80 - llm: - image: opea/llm-textgen:latest - container_name: llm-vllm-gaudi-server - depends_on: - - vllm-service + MODEL: ${MODEL} + restart: unless-stopped + + image2image-gaudi: + image: ${REGISTRY:-opea}/image2image-gaudi:${TAG:-latest} + container_name: image2image-gaudi-server ports: - - "9000:9000" + - ${IMAGE2IMAGE_PORT:-9389}:9389 ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - vLLM_ENDPOINT: ${vLLM_ENDPOINT} HF_TOKEN: ${HF_TOKEN} - LLM_MODEL: ${LLM_MODEL} + MODEL: ${MODEL} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + runtime: habana + cap_add: + - SYS_NICE restart: unless-stopped networks: diff --git a/comps/image2image/src/Dockerfile.intel_hpu b/comps/image2image/src/Dockerfile.intel_hpu index dd0d29f523..f9090fa191 100644 --- a/comps/image2image/src/Dockerfile.intel_hpu +++ b/comps/image2image/src/Dockerfile.intel_hpu @@ -12,7 +12,7 @@ COPY comps /home/user/comps RUN chown -R user /home/user/comps/image2image RUN rm -rf /etc/ssh/ssh_host* -USER user + # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/home/user/optimum-habana @@ -21,7 +21,7 @@ ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/home/user/optimum-habana RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /home/user/comps/image2image/src/requirements.txt && \ pip install --no-cache-dir optimum[habana] - +USER user WORKDIR /home/user/comps/image2image/src RUN echo python opea_image2image_microservice.py --device hpu --use_hpu_graphs --bf16 >> run.sh diff --git a/comps/image2image/src/README.md b/comps/image2image/src/README.md index 4d71161758..95b2f1e735 100644 --- a/comps/image2image/src/README.md +++ b/comps/image2image/src/README.md @@ -60,7 +60,7 @@ cd ../.. docker build -t opea/image2image-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/image2image/src/Dockerfile.intel_hpu . ``` -## 2.2 Start Image-to-Image Service +## 2.2 Start Image-to-Image Service with Docker ### 2.2.1 Start Image-to-Image Service on Xeon @@ -78,7 +78,25 @@ Start image-to-image service on Gaudi with below command: docker run -p 9389:9389 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HF_TOKEN=$HF_TOKEN -e MODEL=$MODEL opea/image2image-gaudi:latest ``` -# 3 Test Image-to-Image Service +# 🚀3. Start Image-to-Image with Docker Compose + +Alternatively, you can also start the Image-to-Image microservice with Docker Compose. + +- Xeon CPU + +```bash +cd comps/image2image/deployment/docker_compose +docker compose -f compose.yaml up image2image -d +``` + +- Gaudi2 HPU + +```bash +cd comps/image2image/deployment/docker_compose +docker compose -f compose.yaml up image2image-gaudi -d +``` + +# 4 Test Image-to-Image Service ```bash http_proxy="" curl http://localhost:9389/v1/image2image -XPOST -d '{"image": "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png", "prompt":"a photo of an astronaut riding a horse on mars", "num_images_per_prompt":1}' -H 'Content-Type: application/json' diff --git a/comps/image2video/deployment/docker_compose/compose.yaml b/comps/image2video/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..abb3433612 --- /dev/null +++ b/comps/image2video/deployment/docker_compose/compose.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + image2video: + image: ${REGISTRY:-opea}/image2video:${TAG:-latest} + container_name: image2video + ports: + - "9369:9369" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - HF_TOKEN=${HF_TOKEN} + ipc: host + restart: always + image2video-gaudi: + extends: image2video + image: ${REGISTRY:-opea}/image2video-gaudi:${TAG:-latest} + container_name: image2video-gaudi + environment: + - HABANA_VISIBLE_DEVICES=all + - OMPI_MCA_btl_vader_single_copy_mechanism=none + runtime: habana + cap_add: + - SYS_NICE + +networks: + default: + driver: bridge diff --git a/comps/image2video/src/Dockerfile.intel_hpu b/comps/image2video/src/Dockerfile.intel_hpu index 67be7913ca..9b8f7f8362 100644 --- a/comps/image2video/src/Dockerfile.intel_hpu +++ b/comps/image2video/src/Dockerfile.intel_hpu @@ -25,7 +25,7 @@ COPY comps /home/user/comps RUN chown -R user /home/user/comps/image2video RUN rm -rf /etc/ssh/ssh_host* -USER user + # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana @@ -36,7 +36,7 @@ RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir optimum[habana] ENV PYTHONPATH=$PYTHONPATH:/home/user - +USER user WORKDIR /home/user/comps/image2video/src ENTRYPOINT ["python", "opea_image2video_microservice.py", "--device", "hpu"] diff --git a/comps/image2video/src/README.md b/comps/image2video/src/README.md index aec8161f28..4d29683f47 100644 --- a/comps/image2video/src/README.md +++ b/comps/image2video/src/README.md @@ -44,12 +44,26 @@ Start SVD server on Xeon with below command: docker run --ipc=host -p 9369:9369 -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/image2video:latest ``` +Or use docker compose with below command: + +```bash +cd ../deployment/docker_compose +docker compose -f compose.yaml up image2video -d +``` + Start SVD server on Gaudi with below command: ```bash docker run -p 9369:9369 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/image2video-gaudi:latest ``` +Or use docker compose with below command: + +```bash +cd ../deployment/docker_compose +docker compose -f compose.yaml up image2video-gaudi -d +``` + ## 2.3 Test ```bash diff --git a/comps/intent_detection/langchain/Dockerfile b/comps/intent_detection/langchain/Dockerfile deleted file mode 100644 index e80aba6216..0000000000 --- a/comps/intent_detection/langchain/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/user/comps/intent_detection/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/intent_detection/langchain -ENTRYPOINT ["python", "intent_detection.py"] diff --git a/comps/intent_detection/langchain/README.md b/comps/intent_detection/langchain/README.md deleted file mode 100644 index b86b82364c..0000000000 --- a/comps/intent_detection/langchain/README.md +++ /dev/null @@ -1,88 +0,0 @@ -# Intent Detection Microservice by TGI - -## 🚀1. Start Microservice with Python(Option 1) - -### 1.1 Install Requirements - -```bash -pip install -r requirements.txt -``` - -### 1.2 Start TGI Service - -```bash -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model} -``` - -### 1.3 Verify the TGI Service - -```bash -curl http://${your_ip}:8008/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ - -H 'Content-Type: application/json' -``` - -### 1.4 Setup Environment Variables - -```bash -export TGI_LLM_ENDPOINT="http://${your_ip}:8008" -``` - -### 1.5 Start Intent Detection Microservice with Python Script - -Start intent detection microservice with below command. - -```bash -cd ../../../ -cp comps/intent_detection/langchain/intent_detection.py . -python intent_detection.py -``` - -## 🚀2. Start Microservice with Docker (Option 2) - -### 2.1 Start TGI Service - -Please refer to 1.2. - -### 2.2 Setup Environment Variables - -```bash -export TGI_LLM_ENDPOINT="http://${your_ip}:8008" -``` - -### 2.3 Build Docker Image - -```bash -cd ../../../ -docker build --no-cache -t opea/intent-detection-tgi:latest -f comps/intent_detection/langchain/Dockerfile . -``` - -### 2.4 Run Docker with CLI (Option A) - -```bash -docker run -it --name="intent-tgi-server" --net=host --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/intent-detection-tgi:latest -``` - -### 2.5 Run with Docker Compose (Option B) - -```bash -export LLM_MODEL_ID=${your_hf_llm_model} -export http_proxy=${your_http_proxy} -export https_proxy=${your_http_proxy} -export TGI_LLM_ENDPOINT="http://tgi-service:80" -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -docker compose -f docker_compose_intent.yaml up -d -``` - -## 🚀3. Consume Microservice - -Once intent detection microservice is started, user can use below command to invoke the microservice. - -```bash -curl http://${your_ip}:9000/v1/chat/intent\ - -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":10,"top_k":1,"temperature":0.001,"stream":false}' \ - -H 'Content-Type: application/json' -``` diff --git a/comps/intent_detection/langchain/docker_compose_intent.yaml b/comps/intent_detection/langchain/docker_compose_intent.yaml deleted file mode 100644 index 84dbcabe2e..0000000000 --- a/comps/intent_detection/langchain/docker_compose_intent.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - tgi_service: - image: ghcr.io/huggingface/text-generation-inference:1.4 - container_name: tgi-service - ports: - - "8008:80" - volumes: - - "./data:/data" - shm_size: 1g - command: --model-id ${LLM_MODEL_ID} - llm: - image: opea/intent-detection-tgi:latest - container_name: intent-tgi-server - ports: - - "9000:9000" - ipc: host - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/intent_detection/langchain/intent_detection.py b/comps/intent_detection/langchain/intent_detection.py deleted file mode 100644 index 8da7ca22e7..0000000000 --- a/comps/intent_detection/langchain/intent_detection.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -from langchain import LLMChain, PromptTemplate -from langchain_community.llms import HuggingFaceEndpoint - -from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice - - -@register_microservice( - name="opea_service@llm_intent", - service_type=ServiceType.LLM, - endpoint="/v1/chat/intent", - host="0.0.0.0", - port=9000, -) -async def llm_generate(input: LLMParamsDoc): - llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") - llm = HuggingFaceEndpoint( - endpoint_url=llm_endpoint, - max_new_tokens=input.max_new_tokens, - top_k=input.top_k, - top_p=input.top_p, - typical_p=input.typical_p, - temperature=input.temperature, - repetition_penalty=input.repetition_penalty, - streaming=input.stream, - timeout=600, - ) - - prompt_template = 'Please identify the intent of the user query. You may only respond with "chitchat" or \QA" without explanations or engaging in conversation.### User Query: {query}, ### Response: ' - prompt = PromptTemplate(template=prompt_template, input_variables=["query"]) - - llm_chain = LLMChain(prompt=prompt, llm=llm) - - response = await llm_chain.ainvoke(input.query) - response = response["text"] - print("response", response) - return GeneratedDoc(text=response, prompt=input.query) - - -if __name__ == "__main__": - opea_microservices["opea_service@llm_intent"].start() diff --git a/comps/intent_detection/langchain/requirements.txt b/comps/intent_detection/langchain/requirements.txt deleted file mode 100644 index d979c95d4d..0000000000 --- a/comps/intent_detection/langchain/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -docarray[full] -fastapi -huggingface_hub -langchain -langchain_community -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -shortuuid -uvicorn diff --git a/comps/llms/deployment/docker_compose/compose_doc-summarization.yaml b/comps/llms/deployment/docker_compose/compose_doc-summarization.yaml new file mode 100644 index 0000000000..8eb0044247 --- /dev/null +++ b/comps/llms/deployment/docker_compose/compose_doc-summarization.yaml @@ -0,0 +1,65 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +include: + - ../../../third_parties/tgi/deployment/docker_compose/compose.yaml + - ../../../third_parties/vllm/deployment/docker_compose/compose.yaml + +services: + docsum: + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + container_name: llm-docsum-server + ports: + - ${DOCSUM_PORT:-9000}:9000 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + HF_TOKEN: ${HF_TOKEN} + LOGFLAG: ${LOGFLAG:-False} + MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} + restart: unless-stopped + + docsum-tgi: + extends: docsum + container_name: docsum-tgi + environment: + DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumTgi} + depends_on: + tgi-server: + condition: service_healthy + + docsum-tgi-gaudi: + extends: docsum + container_name: docsum-tgi-gaudi + environment: + DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumTgi} + depends_on: + tgi-gaudi-server: + condition: service_healthy + + docsum-vllm: + extends: docsum + container_name: docsum-vllm + environment: + DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumvLLM} + depends_on: + vllm-server: + condition: service_healthy + + docsum-vllm-gaudi: + extends: docsum + container_name: docsum-vllm-gaudi + environment: + DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumvLLM} + depends_on: + vllm-gaudi-server: + condition: service_healthy + +networks: + default: + driver: bridge diff --git a/comps/llms/deployment/docker_compose/compose_faq-generation.yaml b/comps/llms/deployment/docker_compose/compose_faq-generation.yaml new file mode 100644 index 0000000000..c232fa7427 --- /dev/null +++ b/comps/llms/deployment/docker_compose/compose_faq-generation.yaml @@ -0,0 +1,63 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +include: + - ../../../third_parties/tgi/deployment/docker_compose/compose.yaml + - ../../../third_parties/vllm/deployment/docker_compose/compose.yaml + +services: + faqgen: + image: ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest} + container_name: llm-faqgen-server + ports: + - ${FAQ_PORT:-9000}:9000 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + HF_TOKEN: ${HF_TOKEN} + LOGFLAG: ${LOGFLAG:-False} + restart: unless-stopped + + faqgen-tgi: + extends: faqgen + container_name: faqgen-tgi + environment: + FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME:-OpeaFaqGenTgi} + depends_on: + tgi-server: + condition: service_healthy + + faqgen-tgi-gaudi: + extends: faqgen + container_name: faqgen-tgi-gaudi + environment: + FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME:-OpeaFaqGenTgi} + depends_on: + tgi-gaudi-server: + condition: service_healthy + + faqgen-vllm: + extends: faqgen + container_name: faqgen-vllm + environment: + FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME:-OpeaFaqGenvLLM} + depends_on: + vllm-server: + condition: service_healthy + + faqgen-vllm-gaudi: + extends: faqgen + container_name: faqgen-vllm-gaudi + environment: + FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME:-OpeaFaqGenvLLM} + depends_on: + vllm-gaudi-server: + condition: service_healthy + +networks: + default: + driver: bridge diff --git a/comps/llms/deployment/docker_compose/compose_text-generation.yaml b/comps/llms/deployment/docker_compose/compose_text-generation.yaml new file mode 100644 index 0000000000..fbf503ed62 --- /dev/null +++ b/comps/llms/deployment/docker_compose/compose_text-generation.yaml @@ -0,0 +1,105 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +include: + - ../../../third_parties/tgi/deployment/docker_compose/compose.yaml + - ../../../third_parties/vllm/deployment/docker_compose/compose.yaml + - ../../../third_parties/ollama/deployment/docker_compose/compose.yaml + +services: + textgen: + image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} + container_name: llm-textgen-server + ports: + - ${TEXTGEN_PORT:-9000}:9000 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + HF_TOKEN: ${HF_TOKEN} + LOGFLAG: ${LOGFLAG:-False} + restart: unless-stopped + + textgen-gaudi: + image: ${REGISTRY:-opea}/llm-textgen-gaudi:${TAG:-latest} + container_name: llm-textgen-gaudi-server + ports: + - ${TEXTGEN_PORT:-9000}:9000 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + HF_TOKEN: ${HF_TOKEN} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + TOKENIZERS_PARALLELISM: False + LOGFLAG: ${LOGFLAG:-False} + runtime: habana + cap_add: + - SYS_NICE + restart: unless-stopped + + textgen-service-tgi: + extends: textgen + container_name: textgen-service-tgi + environment: + LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME:-OpeaTextGenService} + depends_on: + tgi-server: + condition: service_healthy + + textgen-service-tgi-gaudi: + extends: textgen + container_name: textgen-service-tgi-gaudi + environment: + LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME:-OpeaTextGenService} + depends_on: + tgi-gaudi-server: + condition: service_healthy + + textgen-service-vllm: + extends: textgen + container_name: textgen-service-vllm + environment: + LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME:-OpeaTextGenService} + depends_on: + vllm-server: + condition: service_healthy + + textgen-service-vllm-gaudi: + extends: textgen + container_name: textgen-service-vllm-gaudi + environment: + LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME:-OpeaTextGenService} + depends_on: + vllm-gaudi-server: + condition: service_healthy + + textgen-service-ollama: + extends: textgen + container_name: textgen-service-ollama + environment: + LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME:-OpeaTextGenService} + + textgen-predictionguard: + extends: textgen + container_name: textgen-predictionguard + environment: + LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME:-OpeaTextGenPredictionguard} + PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} + + textgen-native-gaudi: + extends: textgen-gaudi + container_name: textgen-native-gaudi + environment: + LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME:-OpeaTextGenNative} + +networks: + default: + driver: bridge diff --git a/comps/llms/deployment/docker_compose/faq-generation_tgi.yaml b/comps/llms/deployment/docker_compose/faq-generation_tgi.yaml deleted file mode 100644 index 8b56031dfb..0000000000 --- a/comps/llms/deployment/docker_compose/faq-generation_tgi.yaml +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu - container_name: tgi-server - ports: - - ${LLM_ENDPOINT_PORT:-8008}:80 - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - host_ip: ${host_ip} - LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT} - healthcheck: - test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"] - interval: 10s - timeout: 10s - retries: 100 - command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 - llm: - image: opea/llm-faqgen:latest - container_name: llm-faqgen-server - depends_on: - tgi-service: - condition: service_healthy - ports: - - ${FAQ_PORT:-9000}:9000 - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: ${LLM_ENDPOINT} - LLM_MODEL_ID: ${LLM_MODEL_ID} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME} - LOGFLAG: ${LOGFLAG:-False} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/llms/deployment/docker_compose/faq-generation_vllm.yaml b/comps/llms/deployment/docker_compose/faq-generation_vllm.yaml deleted file mode 100644 index 7ae89c0fb8..0000000000 --- a/comps/llms/deployment/docker_compose/faq-generation_vllm.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - vllm-service: - image: opea/vllm:latest - container_name: vllm-server - ports: - - ${LLM_ENDPOINT_PORT:-8008}:80 - volumes: - - "./data:/data" - shm_size: 128g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL_ID: ${LLM_MODEL_ID} - VLLM_TORCH_PROFILER_DIR: "/mnt" - host_ip: ${host_ip} - LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT} - VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false} - healthcheck: - test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"] - interval: 10s - timeout: 10s - retries: 100 - command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 - llm: - image: opea/llm-faqgen:latest - container_name: llm-faqgen-server - depends_on: - vllm-service: - condition: service_healthy - ports: - - ${FAQ_PORT:-9000}:9000 - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: ${LLM_ENDPOINT} - LLM_MODEL_ID: ${LLM_MODEL_ID} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME} - LOGFLAG: ${LOGFLAG:-False} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/llms/deployment/docker_compose/faq-generation_vllm_on_intel_hpu.yaml b/comps/llms/deployment/docker_compose/faq-generation_vllm_on_intel_hpu.yaml deleted file mode 100644 index fc5b6c9d89..0000000000 --- a/comps/llms/deployment/docker_compose/faq-generation_vllm_on_intel_hpu.yaml +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - vllm-service: - image: opea/vllm-gaudi:latest - container_name: vllm-gaudi-server - ports: - - ${LLM_ENDPOINT_PORT:-8008}:80 - volumes: - - "./data:/data" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - LLM_MODEL_ID: ${LLM_MODEL_ID} - VLLM_TORCH_PROFILER_DIR: "/mnt" - host_ip: ${host_ip} - LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT} - VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false} - runtime: habana - cap_add: - - SYS_NICE - ipc: host - healthcheck: - test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"] - interval: 10s - timeout: 10s - retries: 100 - command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048 - llm: - image: opea/llm-faqgen:latest - container_name: llm-faqgen-server - depends_on: - vllm-service: - condition: service_healthy - ports: - - ${FAQ_PORT:-9000}:9000 - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: ${LLM_ENDPOINT} - LLM_MODEL_ID: ${LLM_MODEL_ID} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME} - LOGFLAG: ${LOGFLAG:-False} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/llms/deployment/docker_compose/text-generation_native_langchain.yaml b/comps/llms/deployment/docker_compose/text-generation_native_langchain.yaml deleted file mode 100644 index 241853efc7..0000000000 --- a/comps/llms/deployment/docker_compose/text-generation_native_langchain.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - llm: - image: opea/llm-native:latest - container_name: llm-native-server - ports: - - "9000:9000" - runtime: habana - cap_add: - - SYS_NICE - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_NATIVE_MODEL: ${LLM_NATIVE_MODEL} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - TOKENIZERS_PARALLELISM: false - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/llms/deployment/docker_compose/text-generation_native_llama_index.yaml b/comps/llms/deployment/docker_compose/text-generation_native_llama_index.yaml deleted file mode 100644 index f3a36e5bb8..0000000000 --- a/comps/llms/deployment/docker_compose/text-generation_native_llama_index.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - llm: - image: opea/llm-native:latest - container_name: llm-native-server - ports: - - "9000:9000" - runtime: habana - cap_add: - - SYS_NICE - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_NATIVE_MODEL: ${LLM_NATIVE_MODEL} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - TOKENIZERS_PARALLELISM: false - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/llms/deployment/docker_compose/text-generation_predictionguard.yaml b/comps/llms/deployment/docker_compose/text-generation_predictionguard.yaml deleted file mode 100644 index bde9fa10a9..0000000000 --- a/comps/llms/deployment/docker_compose/text-generation_predictionguard.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (C) 2024 Prediction Guard, Inc -# SPDX-License-Identifier: Apache-2.0 - -services: - llm: - image: opea/llm-textgen-predictionguard:latest - container_name: llm-textgen-predictionguard - ports: - - "9000:9000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/llms/deployment/docker_compose/text-generation_tgi.yaml b/comps/llms/deployment/docker_compose/text-generation_tgi.yaml deleted file mode 100644 index 6aabf2ede0..0000000000 --- a/comps/llms/deployment/docker_compose/text-generation_tgi.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - tgi_service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu - container_name: tgi-service - ports: - - "8008:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HF_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8008/health"] - interval: 10s - timeout: 10s - retries: 30 - command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 - llm: - image: opea/llm:latest - container_name: llm-server - ports: - - "9000:9000" - ipc: host - depends_on: - tgi_service: - condition: service_healthy - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: ${LLM_ENDPOINT} - HF_TOKEN: ${HF_TOKEN} - LLM_MODEL_ID: ${LLM_MODEL_ID} - LLM_COMPONENT_NAME: "OPEA_LLM" - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/llms/deployment/docker_compose/text-generation_vllm_langchain.yaml b/comps/llms/deployment/docker_compose/text-generation_vllm_langchain.yaml deleted file mode 100644 index 5871367ceb..0000000000 --- a/comps/llms/deployment/docker_compose/text-generation_vllm_langchain.yaml +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - vllm-service: - image: opea/vllm-gaudi:latest - container_name: vllm-gaudi-server - ports: - - "8008:80" - volumes: - - "./data:/data" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HF_TOKEN} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - LLM_MODEL: ${LLM_MODEL} - runtime: habana - cap_add: - - SYS_NICE - ipc: host - command: --model $LLM_MODEL --tensor-parallel-size 1 --host 0.0.0.0 --port 80 - llm: - image: opea/llm-textgen:latest - container_name: llm-vllm-gaudi-server - depends_on: - - vllm-service - ports: - - "9000:9000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - vLLM_ENDPOINT: ${vLLM_ENDPOINT} - HF_TOKEN: ${HF_TOKEN} - LLM_MODEL: ${LLM_MODEL} - LLM_COMPONENT_NAME: "OPEA_LLM" - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/llms/deployment/docker_compose/text-generation_vllm_llama_index.yaml b/comps/llms/deployment/docker_compose/text-generation_vllm_llama_index.yaml deleted file mode 100644 index 6bfc0d500f..0000000000 --- a/comps/llms/deployment/docker_compose/text-generation_vllm_llama_index.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - vllm-service: - image: opea/vllm-gaudi:latest - container_name: vllm-gaudi-server - ports: - - "8008:80" - volumes: - - "./data:/data" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - LLM_MODEL: ${LLM_MODEL} - runtime: habana - cap_add: - - SYS_NICE - ipc: host - command: --model $LLM_MODEL --tensor-parallel-size 1 --host 0.0.0.0 --port 80 - llm: - image: opea/llm-vllm-llamaindex:latest - container_name: llm-vllm-gaudi-server - depends_on: - - vllm-service - ports: - - "9000:9000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - vLLM_ENDPOINT: ${vLLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL: ${LLM_MODEL} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/llms/deployment/kubernetes/README.md b/comps/llms/deployment/kubernetes/README.md index e69de29bb2..3c2ee474ba 100644 --- a/comps/llms/deployment/kubernetes/README.md +++ b/comps/llms/deployment/kubernetes/README.md @@ -0,0 +1,11 @@ +# Deploy LLM microservice on Kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Kubernetes + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install llm oci://ghcr.io/opea-project/charts/llm-uservice --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` diff --git a/comps/dataprep/vdms/langchain/__init__.py b/comps/llms/deployment/kubernetes/cpu-values.yaml similarity index 77% rename from comps/dataprep/vdms/langchain/__init__.py rename to comps/llms/deployment/kubernetes/cpu-values.yaml index 916f3a44b2..3de5b26fce 100644 --- a/comps/dataprep/vdms/langchain/__init__.py +++ b/comps/llms/deployment/kubernetes/cpu-values.yaml @@ -1,2 +1,5 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 + +tgi: + enabled: true diff --git a/comps/retrievers/pathway/langchain/Dockerfile b/comps/llms/src/doc-summarization/Dockerfile similarity index 81% rename from comps/retrievers/pathway/langchain/Dockerfile rename to comps/llms/src/doc-summarization/Dockerfile index 3aba6ab457..c1e0686b30 100644 --- a/comps/retrievers/pathway/langchain/Dockerfile +++ b/comps/llms/src/doc-summarization/Dockerfile @@ -15,14 +15,14 @@ RUN useradd -m -s /bin/bash user && \ COPY comps /home/user/comps -USER user - RUN pip install --no-cache-dir --upgrade pip setuptools && \ if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/retrievers/pathway/langchain/requirements.txt + pip install --no-cache-dir -r /home/user/comps/llms/src/doc-summarization/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/retrievers/pathway/langchain +USER user + +WORKDIR /home/user/comps/llms/src/doc-summarization ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/src/doc-summarization/README.md b/comps/llms/src/doc-summarization/README.md new file mode 100644 index 0000000000..77e0969d2a --- /dev/null +++ b/comps/llms/src/doc-summarization/README.md @@ -0,0 +1,175 @@ +# Document Summary LLM Microservice + +This microservice leverages LangChain to implement summarization strategies and facilitate LLM inference using Text Generation Inference on Intel Xeon and Gaudi2 processors. You can set backend service either [TGI](../../../third_parties/tgi) or [vLLM](../../../third_parties/vllm). + +## 🚀1. Start Microservice with Docker 🐳 + +### 1.1 Setup Environment Variables + +In order to start DocSum services, you need to setup the following environment variables first. + +```bash +export host_ip=${your_host_ip} +export LLM_ENDPOINT_PORT=8008 +export DOCSUM_PORT=9000 +export HF_TOKEN=${your_hf_api_token} +export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" +export LLM_MODEL_ID=${your_hf_llm_model} +export MAX_INPUT_TOKENS=2048 +export MAX_TOTAL_TOKENS=4096 +``` + +Please make sure MAX_TOTAL_TOKENS should be larger than (MAX_INPUT_TOKENS + max_new_tokens + 50), 50 is reserved prompt length. + +### 1.2 Build Docker Image + +Step 1: Prepare backend LLM docker image. + +If you want to use vLLM backend, refer to [vLLM](../../../third_parties/vllm/) to build vLLM docker images first. + +No need for TGI. + +Step 2: Build DocSum docker image. + +```bash +cd ../../../../ +docker build -t opea/llm-docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . +``` + +### 1.3 Run Docker + +To start a docker container, you have two options: + +- A. Run Docker with CLI +- B. Run Docker with Docker Compose + +You can choose one as needed. + +### 1.3.1 Run Docker with CLI (Option A) + +Step 1: Start the backend LLM service +Please refer to [TGI](../../../third_parties/tgi) or [vLLM](../../../third_parties/vllm) guideline to start a backend LLM service. + +Step 2: Start the DocSum microservices + +```bash +export DocSum_COMPONENT_NAME="OpeaDocSumTgi" # or "OpeaDocSumvLLM" +docker run -d \ + --name="llm-docsum-server" \ + -p 9000:9000 \ + --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e LLM_MODEL_ID=$LLM_MODEL_ID \ + -e LLM_ENDPOINT=$LLM_ENDPOINT \ + -e HF_TOKEN=$HF_TOKEN \ + -e DocSum_COMPONENT_NAME=$DocSum_COMPONENT_NAME \ + -e MAX_INPUT_TOKENS=${MAX_INPUT_TOKENS} \ + -e MAX_TOTAL_TOKENS=${MAX_TOTAL_TOKENS} \ + opea/llm-docsum:latest +``` + +### 1.3.2 Run Docker with Docker Compose (Option B) + +Set `service_name` to match backend service. + +```bash +export service_name="docsum-tgi" +# export service_name="docsum-tgi-gaudi" +# export service_name="docsum-vllm" +# export service_name="docsum-vllm-gaudi" + +cd ../../deployment/docker_compose/ +docker compose -f compose_doc-summarization.yaml up ${service_name} -d +``` + +## 🚀3. Consume LLM Service + +### 3.1 Check Service Status + +```bash +curl http://${your_ip}:9000/v1/health_check\ + -X GET \ + -H 'Content-Type: application/json' +``` + +### 3.2 Consume LLM Service + +In DocSum microservice, except for basic LLM parameters, we also support several optimization parameters setting. + +- "language": specify the language, can be "auto", "en", "zh", default is "auto" + +If you want to deal with long context, can select suitable summary type, details in section 3.2.2. + +- "summary_type": can be "auto", "stuff", "truncate", "map_reduce", "refine", default is "auto" +- "chunk_size": max token length for each chunk. Set to be different default value according to "summary_type". +- "chunk_overlap": overlap token length between each chunk, default is 0.1\*chunk_size + +#### 3.2.1 Basic usage + +```bash +# Enable stream to receive a stream response. By default, this is set to True. +curl http://${your_ip}:9000/v1/docsum \ + -X POST \ + -d '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en"}' \ + -H 'Content-Type: application/json' + +# Disable stream to receive a non-stream response. +curl http://${your_ip}:9000/v1/docsum \ + -X POST \ + -d '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "stream":false}' \ + -H 'Content-Type: application/json' + +# Use Chinese mode +curl http://${your_ip}:9000/v1/docsum \ + -X POST \ + -d '{"messages":"2024年9月26日,北京——今日,英特尔正式发布英特尔® 至强® 6性能核处理器(代号Granite Rapids),为AI、数据分析、科学计算等计算密集型业务提供卓越性能。", "max_tokens":32, "language":"zh", "stream":false}' \ + -H 'Content-Type: application/json' +``` + +#### 3.2.2 Long context summarization with "summary_type" + +**summary_type=auto** + +"summary_type" is set to be "auto" by default, in this mode we will check input token length, if it exceed `MAX_INPUT_TOKENS`, `summary_type` will automatically be set to `refine` mode, otherwise will be set to `stuff` mode. + +**summary_type=stuff** + +In this mode LLM generate summary based on complete input text. In this case please carefully set `MAX_INPUT_TOKENS` and `MAX_TOTAL_TOKENS` according to your model and device memory, otherwise it may exceed LLM context limit and raise error when meet long context. + +**summary_type=truncate** + +Truncate mode will truncate the input text and keep only the first chunk, whose length is equal to `min(MAX_TOTAL_TOKENS - input.max_tokens - 50, MAX_INPUT_TOKENS)` + +```bash +curl http://${your_ip}:9000/v1/docsum \ + -X POST \ + -d '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "truncate", "chunk_size": 2000}' \ + -H 'Content-Type: application/json' +``` + +**summary_type=map_reduce** + +Map_reduce mode will split the inputs into multiple chunks, map each document to an individual summary, then consolidate those summaries into a single global summary. `stream=True` is not allowed here. + +In this mode, default `chunk_size` is set to be `min(MAX_TOTAL_TOKENS - input.max_tokens - 50, MAX_INPUT_TOKENS)` + +```bash +curl http://${your_ip}:9000/v1/docsum \ + -X POST \ + -d '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false}' \ + -H 'Content-Type: application/json' +``` + +**summary_type=refine** + +Refin mode will split the inputs into multiple chunks, generate summary for the first one, then combine with the second, loops over every remaining chunks to get the final summary. + +In this mode, default `chunk_size` is set to be `min(MAX_TOTAL_TOKENS - 2 * input.max_tokens - 128, MAX_INPUT_TOKENS)`. + +```bash +curl http://${your_ip}:9000/v1/docsum \ + -X POST \ + -d '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000}' \ + -H 'Content-Type: application/json' +``` diff --git a/comps/llms/summarization/tgi/langchain/entrypoint.sh b/comps/llms/src/doc-summarization/entrypoint.sh similarity index 81% rename from comps/llms/summarization/tgi/langchain/entrypoint.sh rename to comps/llms/src/doc-summarization/entrypoint.sh index d60eddd36b..64c8df3b4d 100644 --- a/comps/llms/summarization/tgi/langchain/entrypoint.sh +++ b/comps/llms/src/doc-summarization/entrypoint.sh @@ -5,4 +5,4 @@ pip --no-cache-dir install -r requirements-runtime.txt -python llm.py +python opea_docsum_microservice.py diff --git a/comps/dataprep/elasticsearch/langchain/__init__.py b/comps/llms/src/doc-summarization/integrations/__init__.py similarity index 100% rename from comps/dataprep/elasticsearch/langchain/__init__.py rename to comps/llms/src/doc-summarization/integrations/__init__.py diff --git a/comps/llms/src/doc-summarization/integrations/common.py b/comps/llms/src/doc-summarization/integrations/common.py new file mode 100644 index 0000000000..a866ef6be8 --- /dev/null +++ b/comps/llms/src/doc-summarization/integrations/common.py @@ -0,0 +1,219 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +import requests +from fastapi.responses import StreamingResponse +from langchain.chains.summarize import load_summarize_chain +from langchain.docstore.document import Document +from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter +from langchain_core.prompts import PromptTemplate +from transformers import AutoTokenizer + +from comps import CustomLogger, GeneratedDoc, OpeaComponent, ServiceType +from comps.cores.mega.utils import ConfigError, get_access_token, load_model_configs +from comps.cores.proto.api_protocol import DocSumChatCompletionRequest + +from .template import templ_en, templ_refine_en, templ_refine_zh, templ_zh + +logger = CustomLogger("llm_docsum") +logflag = os.getenv("LOGFLAG", False) + +# Environment variables +MODEL_NAME = os.getenv("LLM_MODEL_ID") +MODEL_CONFIGS = os.getenv("MODEL_CONFIGS") +TOKEN_URL = os.getenv("TOKEN_URL") +CLIENTID = os.getenv("CLIENTID") +CLIENT_SECRET = os.getenv("CLIENT_SECRET") +MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", 2048)) +MAX_TOTAL_TOKENS = int(os.getenv("MAX_TOTAL_TOKENS", 4096)) + +if os.getenv("LLM_ENDPOINT") is not None: + DEFAULT_ENDPOINT = os.getenv("LLM_ENDPOINT") +elif os.getenv("TGI_LLM_ENDPOINT") is not None: + DEFAULT_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT") +elif os.getenv("vLLM_ENDPOINT") is not None: + DEFAULT_ENDPOINT = os.getenv("vLLM_ENDPOINT") +else: + DEFAULT_ENDPOINT = "http://localhost:8080" + + +def get_llm_endpoint(): + if not MODEL_CONFIGS: + return DEFAULT_ENDPOINT + else: + # Validate and Load the models config if MODEL_CONFIGS is not null + configs_map = {} + try: + configs_map = load_model_configs(MODEL_CONFIGS) + except ConfigError as e: + logger.error(f"Failed to load model configurations: {e}") + raise ConfigError(f"Failed to load model configurations: {e}") + try: + return configs_map.get(MODEL_NAME).get("endpoint") + except ConfigError as e: + logger.error(f"Input model {MODEL_NAME} not present in model_configs. Error {e}") + raise ConfigError(f"Input model {MODEL_NAME} not present in model_configs") + + +class OpeaDocSum(OpeaComponent): + """A specialized OPEA DocSum component derived from OpeaComponent. + + Attributes: + client (TGI/vLLM): An instance of the TGI/vLLM client for text generation. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.LLM.name.lower(), description, config) + self.access_token = ( + get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None + ) + self.llm_endpoint = get_llm_endpoint() + self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) + health_status = self.check_health() + if not health_status: + logger.error("OpeaDocSum health check failed.") + + async def generate(self, input: DocSumChatCompletionRequest, client): + """Invokes the TGI/vLLM LLM service to generate summarization for the provided input. + + Args: + input (DocSumChatCompletionRequest): The input text(s). + client: TGI/vLLM based client + """ + ### get input text + message = None + if isinstance(input.messages, str): + message = input.messages + else: # List[Dict] + for input_data in input.messages: + if "role" in input_data and input_data["role"] == "user" and "content" in input_data: + message = input_data["content"] + if logflag: + logger.info(f"Get input text:\n {message}") + if message is None: + logger.error("Don't receive any input text, exit!") + return GeneratedDoc(text=None, prompt=None) + + ### check summary type + summary_types = ["auto", "stuff", "truncate", "map_reduce", "refine"] + if input.summary_type not in summary_types: + raise NotImplementedError(f"Please specify the summary_type in {summary_types}") + if input.summary_type == "auto": ### Check input token length in auto mode + token_len = len(self.tokenizer.encode(message)) + if token_len > MAX_INPUT_TOKENS + 50: + input.summary_type = "refine" + if logflag: + logger.info( + f"Input token length {token_len} exceed MAX_INPUT_TOKENS + 50 {MAX_INPUT_TOKENS+50}, auto switch to 'refine' mode." + ) + else: + input.summary_type = "stuff" + if logflag: + logger.info( + f"Input token length {token_len} not exceed MAX_INPUT_TOKENS + 50 {MAX_INPUT_TOKENS+50}, auto switch to 'stuff' mode." + ) + + ### Check input language + if input.language in ["en", "auto"]: + templ = templ_en + templ_refine = templ_refine_en + elif input.language in ["zh"]: + templ = templ_zh + templ_refine = templ_refine_zh + else: + raise NotImplementedError('Please specify the input language in "en", "zh", "auto"') + + ## Prompt + PROMPT = PromptTemplate.from_template(templ) + if input.summary_type == "refine": + PROMPT_REFINE = PromptTemplate.from_template(templ_refine) + if logflag: + logger.info("After prompting:") + logger.info(PROMPT) + if input.summary_type == "refine": + logger.info(PROMPT_REFINE) + + ## Split text + if input.summary_type == "stuff": + text_splitter = CharacterTextSplitter() + else: + if input.summary_type == "refine": + if MAX_TOTAL_TOKENS <= 2 * input.max_tokens + 128: ## 128 is reserved prompt length + raise RuntimeError("In Refine mode, Please set MAX_TOTAL_TOKENS larger than (max_tokens * 2 + 128)") + max_input_tokens = min(MAX_TOTAL_TOKENS - 2 * input.max_tokens - 128, MAX_INPUT_TOKENS) + else: + if MAX_TOTAL_TOKENS <= input.max_tokens + 50: # 50 is reserved token length for prompt + raise RuntimeError("Please set MAX_TOTAL_TOKENS larger than max_tokens + 50)") + max_input_tokens = min(MAX_TOTAL_TOKENS - input.max_tokens - 50, MAX_INPUT_TOKENS) + chunk_size = min(input.chunk_size, max_input_tokens) if input.chunk_size > 0 else max_input_tokens + chunk_overlap = input.chunk_overlap if input.chunk_overlap > 0 else int(0.1 * chunk_size) + text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer( + tokenizer=self.tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap + ) + if logflag: + logger.info(f"set chunk size to: {chunk_size}") + logger.info(f"set chunk overlap to: {chunk_overlap}") + + texts = text_splitter.split_text(message) + docs = [Document(page_content=t) for t in texts] + if logflag: + logger.info(f"Split input query into {len(docs)} chunks") + logger.info(f"The character length of the first chunk is {len(texts[0])}") + + ## LLM chain + summary_type = input.summary_type + if summary_type == "stuff": + llm_chain = load_summarize_chain(llm=client, prompt=PROMPT) + elif summary_type == "truncate": + docs = [docs[0]] + llm_chain = load_summarize_chain(llm=client, prompt=PROMPT) + elif summary_type == "map_reduce": + llm_chain = load_summarize_chain( + llm=client, + map_prompt=PROMPT, + combine_prompt=PROMPT, + chain_type="map_reduce", + return_intermediate_steps=True, + ) + elif summary_type == "refine": + llm_chain = load_summarize_chain( + llm=client, + question_prompt=PROMPT, + refine_prompt=PROMPT_REFINE, + chain_type="refine", + return_intermediate_steps=True, + ) + else: + raise NotImplementedError(f"Please specify the summary_type in {summary_types}") + + if input.stream: + + async def stream_generator(): + from langserve.serialization import WellKnownLCSerializer + + _serializer = WellKnownLCSerializer() + async for chunk in llm_chain.astream_log(docs): + data = _serializer.dumps({"ops": chunk.ops}).decode("utf-8") + if logflag: + logger.info(data) + yield f"data: {data}\n\n" + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), media_type="text/event-stream") + else: + response = await llm_chain.ainvoke(docs) + + if input.summary_type in ["map_reduce", "refine"]: + intermediate_steps = response["intermediate_steps"] + if logflag: + logger.info("intermediate_steps:") + logger.info(intermediate_steps) + + output_text = response["output_text"] + if logflag: + logger.info("\n\noutput_text:") + logger.info(output_text) + + return GeneratedDoc(text=output_text, prompt=message) diff --git a/comps/llms/src/doc-summarization/integrations/template.py b/comps/llms/src/doc-summarization/integrations/template.py new file mode 100644 index 0000000000..20ef59454c --- /dev/null +++ b/comps/llms/src/doc-summarization/integrations/template.py @@ -0,0 +1,58 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +templ_en = """Write a concise summary of the following: + + +"{text}" + + +CONCISE SUMMARY:""" + +templ_zh = """请简要概括以下内容: + + +"{text}" + + +概况:""" + + +templ_refine_en = """Your job is to produce a final summary. +We have provided an existing summary up to a certain point, then we will provide more context. +You need to refine the existing summary (only if needed) with new context and generate a final summary. + + +Existing Summary: +"{existing_answer}" + + + +New Context: +"{text}" + + + +Final Summary: + +""" + +templ_refine_zh = """\ +你的任务是生成一个最终摘要。 +我们已经处理好部分文本并生成初始摘要, 并提供了新的未处理文本 +你需要根据新提供的文本,结合初始摘要,生成一个最终摘要。 + + +初始摘要: +"{existing_answer}" + + + +新的文本: +"{text}" + + + +最终摘要: + +""" diff --git a/comps/llms/src/doc-summarization/integrations/tgi.py b/comps/llms/src/doc-summarization/integrations/tgi.py new file mode 100644 index 0000000000..002f8de19f --- /dev/null +++ b/comps/llms/src/doc-summarization/integrations/tgi.py @@ -0,0 +1,77 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +import requests +from langchain_community.llms import HuggingFaceEndpoint + +from comps import CustomLogger, GeneratedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.cores.proto.api_protocol import DocSumChatCompletionRequest + +from .common import * + +logger = CustomLogger("llm_docsum_tgi") +logflag = os.getenv("LOGFLAG", False) + + +@OpeaComponentRegistry.register("OpeaDocSumTgi") +class OpeaDocSumTgi(OpeaDocSum): + """A specialized OPEA DocSum TGI component derived from OpeaDocSum for interacting with TGI services based on Lanchain HuggingFaceEndpoint API. + + Attributes: + client (TGI): An instance of the TGI client for text generation. + """ + + def check_health(self) -> bool: + """Checks the health of the TGI LLM service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + + try: + # response = requests.get(f"{self.llm_endpoint}/health") + + # Will remove after TGI gaudi fix health bug + url = f"{self.llm_endpoint}/generate" + data = {"inputs": "What is Deep Learning?", "parameters": {"max_new_tokens": 17}} + headers = {"Content-Type": "application/json"} + response = requests.post(url=url, json=data, headers=headers) + + if response.status_code == 200: + return True + else: + return False + except Exception as e: + logger.error(e) + logger.error("Health check failed") + return False + + async def invoke(self, input: DocSumChatCompletionRequest): + """Invokes the TGI LLM service to generate summarization output for the provided input. + + Args: + input (DocSumChatCompletionRequest): The input text(s). + """ + server_kwargs = {} + if self.access_token: + server_kwargs["headers"] = {"Authorization": f"Bearer {self.access_token}"} + + if input.stream and input.summary_type == "map_reduce": + logger.info("Map Reduce mode don't support stream=True, set to stream=False") + input.stream = False + self.client = HuggingFaceEndpoint( + endpoint_url=self.llm_endpoint, + max_new_tokens=input.max_tokens if input.max_tokens else 1024, + top_k=input.top_k if input.top_k else 10, + top_p=input.top_p if input.top_p else 0.95, + typical_p=input.typical_p if input.typical_p else 0.95, + temperature=input.temperature if input.temperature else 0.01, + repetition_penalty=input.repetition_penalty if input.repetition_penalty else 1.03, + streaming=input.stream, + server_kwargs=server_kwargs, + ) + result = await self.generate(input, self.client) + + return result diff --git a/comps/llms/src/doc-summarization/integrations/vllm.py b/comps/llms/src/doc-summarization/integrations/vllm.py new file mode 100644 index 0000000000..c292f850e5 --- /dev/null +++ b/comps/llms/src/doc-summarization/integrations/vllm.py @@ -0,0 +1,69 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +import requests +from langchain_community.llms import VLLMOpenAI + +from comps import CustomLogger, GeneratedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.cores.proto.api_protocol import DocSumChatCompletionRequest + +from .common import * + +logger = CustomLogger("llm_docsum_vllm") +logflag = os.getenv("LOGFLAG", False) + + +@OpeaComponentRegistry.register("OpeaDocSumvLLM") +class OpeaDocSumvLLM(OpeaDocSum): + """A specialized OPEA DocSum vLLM component derived from OpeaDocSum for interacting with vLLM services based on Lanchain VLLMOpenAI API. + + Attributes: + client (vLLM): An instance of the vLLM client for text generation. + """ + + def check_health(self) -> bool: + """Checks the health of the vLLM LLM service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + + try: + response = requests.get(f"{self.llm_endpoint}/health") + if response.status_code == 200: + return True + else: + return False + except Exception as e: + logger.error(e) + logger.error("Health check failed") + return False + + async def invoke(self, input: DocSumChatCompletionRequest): + """Invokes the vLLM LLM service to generate summarization output for the provided input. + + Args: + input (DocSumChatCompletionRequest): The input text(s). + """ + headers = {} + if self.access_token: + headers = {"Authorization": f"Bearer {self.access_token}"} + + if input.stream and input.summary_type == "map_reduce": + logger.info("Map Reduce mode don't support stream=True, set to stream=False") + input.stream = False + self.client = VLLMOpenAI( + openai_api_key="EMPTY", + openai_api_base=self.llm_endpoint + "/v1", + model_name=MODEL_NAME, + default_headers=headers, + max_tokens=input.max_tokens if input.max_tokens else 1024, + top_p=input.top_p if input.top_p else 0.95, + streaming=input.stream, + temperature=input.temperature if input.temperature else 0.01, + ) + result = await self.generate(input, self.client) + + return result diff --git a/comps/llms/src/doc-summarization/opea_docsum_microservice.py b/comps/llms/src/doc-summarization/opea_docsum_microservice.py new file mode 100644 index 0000000000..3ac9c8b132 --- /dev/null +++ b/comps/llms/src/doc-summarization/opea_docsum_microservice.py @@ -0,0 +1,58 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time + +from integrations.tgi import OpeaDocSumTgi +from integrations.vllm import OpeaDocSumvLLM + +from comps import ( + CustomLogger, + OpeaComponentLoader, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) +from comps.cores.proto.api_protocol import DocSumChatCompletionRequest + +logger = CustomLogger("llm_docsum") +logflag = os.getenv("LOGFLAG", False) + +llm_component_name = os.getenv("DocSum_COMPONENT_NAME", "OpeaDocSumTgi") +# Initialize OpeaComponentLoader +loader = OpeaComponentLoader(llm_component_name, description=f"OPEA LLM DocSum Component: {llm_component_name}") + + +@register_microservice( + name="opea_service@llm_docsum", + service_type=ServiceType.LLM, + endpoint="/v1/docsum", + host="0.0.0.0", + port=9000, +) +@register_statistics(names=["opea_service@llm_docsum"]) +async def llm_generate(input: DocSumChatCompletionRequest): + start = time.time() + + # Log the input if logging is enabled + if logflag: + logger.info(input) + + try: + # Use the controller to invoke the active component + response = await loader.invoke(input) + # Record statistics + statistics_dict["opea_service@llm_docsum"].append_latency(time.time() - start, None) + return response + + except Exception as e: + logger.error(f"Error during DocSum invocation: {e}") + raise + + +if __name__ == "__main__": + logger.info("OPEA DocSum Microservice is starting...") + opea_microservices["opea_service@llm_docsum"].start() diff --git a/comps/llms/summarization/vllm/langchain/requirements-runtime.txt b/comps/llms/src/doc-summarization/requirements-runtime.txt similarity index 100% rename from comps/llms/summarization/vllm/langchain/requirements-runtime.txt rename to comps/llms/src/doc-summarization/requirements-runtime.txt diff --git a/comps/llms/summarization/tgi/langchain/requirements.txt b/comps/llms/src/doc-summarization/requirements.txt similarity index 100% rename from comps/llms/summarization/tgi/langchain/requirements.txt rename to comps/llms/src/doc-summarization/requirements.txt diff --git a/comps/llms/src/faq-generation/Dockerfile b/comps/llms/src/faq-generation/Dockerfile index 90439a6542..73ac91aa96 100644 --- a/comps/llms/src/faq-generation/Dockerfile +++ b/comps/llms/src/faq-generation/Dockerfile @@ -11,8 +11,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -20,6 +18,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/llms/src/faq-generation ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/src/faq-generation/README.md b/comps/llms/src/faq-generation/README.md index 1a57c90d77..32cc3e87b5 100644 --- a/comps/llms/src/faq-generation/README.md +++ b/comps/llms/src/faq-generation/README.md @@ -12,17 +12,16 @@ In order to start FaqGen microservices, you need to setup the following environm export host_ip=${your_host_ip} export LLM_ENDPOINT_PORT=8008 export FAQ_PORT=9000 -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export HF_TOKEN=${your_hf_api_token} export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" export LLM_MODEL_ID=${your_hf_llm_model} -export FAQGen_COMPONENT_NAME="OPEAFAQGen_TGI" # or "vllm" ``` ### 1.2 Build Docker Image Step 1: Prepare backend LLM docker image. -If you want to use vLLM backend, refer to [vLLM](../../../third_parties/vllm/src) to build vLLM docker images first. +If you want to use vLLM backend, refer to [vLLM](../../../third_parties/vllm) to build vLLM docker images first. No need for TGI. @@ -45,11 +44,12 @@ You can choose one as needed. #### 1.3.1 Run Docker with CLI (Option A) Step 1: Start the backend LLM service -Please refer to [TGI](../../../third_parties/tgi/deployment/docker_compose/) or [vLLM](../../../third_parties/vllm/deployment/docker_compose/) guideline to start a backend LLM service. +Please refer to [TGI](../../../third_parties/tgi) or [vLLM](../../../third_parties/vllm) guideline to start a backend LLM service. Step 2: Start the FaqGen microservices ```bash +export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi" # or "OpeaFaqGenvLLM" docker run -d \ --name="llm-faqgen-server" \ -p 9000:9000 \ @@ -58,27 +58,23 @@ docker run -d \ -e https_proxy=$https_proxy \ -e LLM_MODEL_ID=$LLM_MODEL_ID \ -e LLM_ENDPOINT=$LLM_ENDPOINT \ - -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ + -e HF_TOKEN=$HF_TOKEN \ -e FAQGen_COMPONENT_NAME=$FAQGen_COMPONENT_NAME \ opea/llm-faqgen:latest ``` #### 1.3.2 Run Docker with Docker Compose (Option B) -```bash -cd ../../deployment/docker_compose/ - -# Backend is TGI on xeon -docker compose -f faq-generation_tgi.yaml up -d +Set `service_name` to match backend service. -# Backend is TGI on gaudi -# docker compose -f faq-generation_tgi_on_intel_hpu.yaml up -d - -# Backend is vLLM on xeon -# docker compose -f faq-generation_vllm.yaml up -d +```bash +export service_name="faqgen-tgi" +# export service_name="faqgen-tgi-gaudi" +# export service_name="faqgen-vllm" +# export service_name="faqgen-vllm-gaudi" -# Backend is vLLM on gaudi -# docker compose -f faq-generation_vllm_on_intel_hpu.yaml up -d +cd ../../deployment/docker_compose/ +docker compose -f compose_faq-generation.yaml up ${service_name} -d ``` ## 🚀2. Consume LLM Service @@ -98,13 +94,13 @@ curl http://${host_ip}:${FAQ_PORT}/v1/health_check\ # Set stream to True. Default will be True. curl http://${host_ip}:${FAQ_PORT}/v1/faqgen \ -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 128}' \ + -d '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 128}' \ -H 'Content-Type: application/json' # Non-Streaming Response # Set stream to False. curl http://${host_ip}:${FAQ_PORT}/v1/faqgen \ -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 128, "stream":false}' \ + -d '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 128, "stream":false}' \ -H 'Content-Type: application/json' ``` diff --git a/comps/llms/src/faq-generation/integrations/common.py b/comps/llms/src/faq-generation/integrations/common.py index 6d756a3ab7..fcd9e4c709 100644 --- a/comps/llms/src/faq-generation/integrations/common.py +++ b/comps/llms/src/faq-generation/integrations/common.py @@ -10,8 +10,9 @@ from langchain.text_splitter import CharacterTextSplitter from langchain_core.prompts import PromptTemplate -from comps import CustomLogger, GeneratedDoc, LLMParamsDoc, OpeaComponent, ServiceType +from comps import CustomLogger, GeneratedDoc, OpeaComponent, ServiceType from comps.cores.mega.utils import ConfigError, get_access_token, load_model_configs +from comps.cores.proto.api_protocol import ChatCompletionRequest logger = CustomLogger("opea_faqgen") logflag = os.getenv("LOGFLAG", False) @@ -56,7 +57,7 @@ def get_llm_endpoint(): raise ConfigError(f"Input model {MODEL_NAME} not present in model_configs") -class OPEAFAQGen(OpeaComponent): +class OpeaFaqGen(OpeaComponent): """A specialized OPEA FAQGen component derived from OpeaComponent. Attributes: @@ -72,18 +73,31 @@ def __init__(self, name: str, description: str, config: dict = None): self.llm_endpoint = get_llm_endpoint() health_status = self.check_health() if not health_status: - logger.error("OPEAFAQGen health check failed.") + logger.error("OpeaFaqGen health check failed.") - async def generate(self, input: LLMParamsDoc, client): + async def generate(self, input: ChatCompletionRequest, client): """Invokes the TGI/vLLM LLM service to generate FAQ output for the provided input. Args: - input (LLMParamsDoc): The input text(s). + input (ChatCompletionRequest): The input text(s). client: TGI/vLLM based client """ + message = None + if isinstance(input.messages, str): + message = input.messages + else: # List[Dict] + for input_data in input.messages: + if "role" in input_data and input_data["role"] == "user" and "content" in input_data: + message = input_data["content"] + if logflag: + logger.info(f"Get input text:\n {message}") + if message is None: + logger.error("Don't receive any input text, exit!") + return GeneratedDoc(text=None, prompt=None) + PROMPT = PromptTemplate.from_template(templ) llm_chain = load_summarize_chain(llm=client, prompt=PROMPT) - texts = self.text_splitter.split_text(input.query) + texts = self.text_splitter.split_text(message) # Create multiple documents docs = [Document(page_content=t) for t in texts] @@ -107,4 +121,4 @@ async def stream_generator(): response = response["output_text"] if logflag: logger.info(response) - return GeneratedDoc(text=response, prompt=input.query) + return GeneratedDoc(text=response, prompt=message) diff --git a/comps/llms/src/faq-generation/integrations/tgi.py b/comps/llms/src/faq-generation/integrations/tgi.py index 41fa7b58b5..edfa64bcb2 100644 --- a/comps/llms/src/faq-generation/integrations/tgi.py +++ b/comps/llms/src/faq-generation/integrations/tgi.py @@ -6,7 +6,8 @@ import requests from langchain_community.llms import HuggingFaceEndpoint -from comps import CustomLogger, GeneratedDoc, LLMParamsDoc, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps import CustomLogger, GeneratedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.cores.proto.api_protocol import ChatCompletionRequest from .common import * @@ -14,9 +15,9 @@ logflag = os.getenv("LOGFLAG", False) -@OpeaComponentRegistry.register("OPEAFAQGen_TGI") -class OPEAFAQGen_TGI(OPEAFAQGen): - """A specialized OPEA FAQGen TGI component derived from OPEAFAQGen for interacting with TGI services based on Lanchain HuggingFaceEndpoint API. +@OpeaComponentRegistry.register("OpeaFaqGenTgi") +class OpeaFaqGenTgi(OpeaFaqGen): + """A specialized OPEA FAQGen TGI component derived from OpeaFaqGen for interacting with TGI services based on Lanchain HuggingFaceEndpoint API. Attributes: client (TGI): An instance of the TGI client for text generation. @@ -47,11 +48,11 @@ def check_health(self) -> bool: logger.error("Health check failed") return False - async def invoke(self, input: LLMParamsDoc): + async def invoke(self, input: ChatCompletionRequest): """Invokes the TGI LLM service to generate FAQ output for the provided input. Args: - input (LLMParamsDoc): The input text(s). + input (ChatCompletionRequest): The input text(s). """ server_kwargs = {} if self.access_token: @@ -59,12 +60,12 @@ async def invoke(self, input: LLMParamsDoc): self.client = HuggingFaceEndpoint( endpoint_url=self.llm_endpoint, - max_new_tokens=input.max_tokens, - top_k=input.top_k, - top_p=input.top_p, - typical_p=input.typical_p, - temperature=input.temperature, - repetition_penalty=input.repetition_penalty, + max_new_tokens=input.max_tokens if input.max_tokens else 1024, + top_k=input.top_k if input.top_k else 10, + top_p=input.top_p if input.top_p else 0.95, + typical_p=input.typical_p if input.typical_p else 0.95, + temperature=input.temperature if input.temperature else 0.01, + repetition_penalty=input.repetition_penalty if input.repetition_penalty else 1.03, streaming=input.stream, server_kwargs=server_kwargs, ) diff --git a/comps/llms/src/faq-generation/integrations/vllm.py b/comps/llms/src/faq-generation/integrations/vllm.py index 6e8b696ea6..bf891ea7ff 100644 --- a/comps/llms/src/faq-generation/integrations/vllm.py +++ b/comps/llms/src/faq-generation/integrations/vllm.py @@ -6,7 +6,8 @@ import requests from langchain_community.llms import VLLMOpenAI -from comps import CustomLogger, GeneratedDoc, LLMParamsDoc, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps import CustomLogger, GeneratedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.cores.proto.api_protocol import ChatCompletionRequest from .common import * @@ -14,9 +15,9 @@ logflag = os.getenv("LOGFLAG", False) -@OpeaComponentRegistry.register("OPEAFAQGen_vLLM") -class OPEAFAQGen_vLLM(OPEAFAQGen): - """A specialized OPEA FAQGen vLLM component derived from OPEAFAQGen for interacting with vLLM services based on Lanchain VLLMOpenAI API. +@OpeaComponentRegistry.register("OpeaFaqGenvLLM") +class OpeaFaqGenvLLM(OpeaFaqGen): + """A specialized OPEA FAQGen vLLM component derived from OpeaFaqGen for interacting with vLLM services based on Lanchain VLLMOpenAI API. Attributes: client (vLLM): An instance of the vLLM client for text generation. @@ -40,11 +41,11 @@ def check_health(self) -> bool: logger.error("Health check failed") return False - async def invoke(self, input: LLMParamsDoc): + async def invoke(self, input: ChatCompletionRequest): """Invokes the vLLM LLM service to generate FAQ output for the provided input. Args: - input (LLMParamsDoc): The input text(s). + input (ChatCompletionRequest): The input text(s). """ headers = {} if self.access_token: @@ -55,10 +56,10 @@ async def invoke(self, input: LLMParamsDoc): openai_api_base=self.llm_endpoint + "/v1", model_name=MODEL_NAME, default_headers=headers, - max_tokens=input.max_tokens, - top_p=input.top_p, + max_tokens=input.max_tokens if input.max_tokens else 1024, + top_p=input.top_p if input.top_p else 0.95, streaming=input.stream, - temperature=input.temperature, + temperature=input.temperature if input.temperature else 0.01, ) result = await self.generate(input, self.client) diff --git a/comps/llms/src/faq-generation/opea_faqgen_microservice.py b/comps/llms/src/faq-generation/opea_faqgen_microservice.py index e98ca7eb61..9f0353b492 100644 --- a/comps/llms/src/faq-generation/opea_faqgen_microservice.py +++ b/comps/llms/src/faq-generation/opea_faqgen_microservice.py @@ -4,12 +4,11 @@ import os import time -from integrations.tgi import OPEAFAQGen_TGI -from integrations.vllm import OPEAFAQGen_vLLM +from integrations.tgi import OpeaFaqGenTgi +from integrations.vllm import OpeaFaqGenvLLM from comps import ( CustomLogger, - LLMParamsDoc, OpeaComponentLoader, ServiceType, opea_microservices, @@ -17,11 +16,12 @@ register_statistics, statistics_dict, ) +from comps.cores.proto.api_protocol import ChatCompletionRequest logger = CustomLogger("llm_faqgen") logflag = os.getenv("LOGFLAG", False) -llm_component_name = os.getenv("FAQGen_COMPONENT_NAME", "OPEAFAQGen_TGI") +llm_component_name = os.getenv("FAQGen_COMPONENT_NAME", "OpeaFaqGenTgi") # Initialize OpeaComponentLoader loader = OpeaComponentLoader(llm_component_name, description=f"OPEA LLM FAQGen Component: {llm_component_name}") @@ -34,7 +34,7 @@ port=9000, ) @register_statistics(names=["opea_service@llm_faqgen"]) -async def llm_generate(input: LLMParamsDoc): +async def llm_generate(input: ChatCompletionRequest): start = time.time() # Log the input if logging is enabled diff --git a/comps/llms/src/text-generation/Dockerfile b/comps/llms/src/text-generation/Dockerfile index 3d080be3b7..463c1f7428 100644 --- a/comps/llms/src/text-generation/Dockerfile +++ b/comps/llms/src/text-generation/Dockerfile @@ -11,8 +11,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -20,7 +18,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/llms/src/text-generation ENTRYPOINT ["bash", "entrypoint.sh"] - diff --git a/comps/llms/text-generation/native/langchain/Dockerfile b/comps/llms/src/text-generation/Dockerfile.intel_hpu similarity index 64% rename from comps/llms/text-generation/native/langchain/Dockerfile rename to comps/llms/src/text-generation/Dockerfile.intel_hpu index a3f5d9b240..3705475211 100644 --- a/comps/llms/text-generation/native/langchain/Dockerfile +++ b/comps/llms/src/text-generation/Dockerfile.intel_hpu @@ -2,11 +2,11 @@ # SPDX-License-Identifier: Apache-2.0 # HABANA environment -FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0 AS hpu +FROM vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1 AS hpu ENV LANG=en_US.UTF-8 ARG REPO=https://github.com/huggingface/optimum-habana.git -ARG REPO_VER=v1.12.1 +ARG REPO_VER=v1.15.0 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ git-lfs \ @@ -17,24 +17,24 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - RUN git lfs install COPY comps /home/user/comps -RUN pip install --no-cache-dir --upgrade-strategy eager optimum[habana] && \ - pip install --no-cache-dir git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + pip install --no-cache-dir --upgrade-strategy eager optimum[habana] && \ + pip install --no-cache-dir git+https://github.com/HabanaAI/DeepSpeed.git@1.19.0 RUN git clone ${REPO} /home/user/optimum-habana && \ cd /home/user/optimum-habana && git checkout ${REPO_VER} && \ cd examples/text-generation && pip install --no-cache-dir -r requirements.txt && \ - cd /home/user/comps/llms/text-generation/native/langchain && \ - pip install --no-cache-dir -r requirements.txt && \ - pip install --no-cache-dir --upgrade --force-reinstall pydantic + cd /home/user/comps/llms/src/text-generation/ && pip install --no-cache-dir -r requirements.txt && \ + pip install --no-cache-dir --upgrade --force-reinstall pydantic numpy==1.23.5 ENV PYTHONPATH=/root:/home/user -WORKDIR /home/user/comps/llms/text-generation/native/langchain +USER user + +WORKDIR /home/user/comps/llms/src/text-generation/ -ENTRYPOINT ["python", "llm.py"] +ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/src/text-generation/README.md b/comps/llms/src/text-generation/README.md index c3be5362a4..360c459dc1 100644 --- a/comps/llms/src/text-generation/README.md +++ b/comps/llms/src/text-generation/README.md @@ -1,67 +1,131 @@ -# TGI LLM Microservice +# LLM text generation Microservice -[Text Generation Inference](https://github.com/huggingface/text-generation-inference) (TGI) is a toolkit for deploying and serving Large Language Models (LLMs). TGI enables high-performance text generation for the most popular open-source LLMs, including Llama, Falcon, StarCoder, BLOOM, GPT-NeoX, and more. +This microservice, designed for Language Model Inference (LLM), processes input consisting of a query string and associated reranked documents. It constructs a prompt based on the query and documents, which is then used to perform inference with a large language model. The service delivers the inference results as output. -## 🚀1. Start Microservice with Python (Option 1) +A prerequisite for using this microservice is that users must have a LLM text generation service (etc., TGI, vLLM) already running. Users need to set the LLM service's endpoint into an environment variable. The microservice utilizes this endpoint to create an LLM object, enabling it to communicate with the LLM service for executing language model operations. -To start the LLM microservice, you need to install python packages first. +Overall, this microservice offers a streamlined way to integrate large language model inference into applications, requiring minimal setup from the user beyond initiating a TGI/vLLM service and configuring the necessary environment variables. This allows for the seamless processing of queries and documents to generate intelligent, context-aware responses. -### 1.1 Install Requirements +## Validated LLM Models -```bash -pip install -r requirements.txt -``` +| Model | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi | +| --------------------------- | --------- | -------- | ---------- | +| [Intel/neural-chat-7b-v3-3] | ✓ | ✓ | ✓ | +| [Llama-2-7b-chat-hf] | ✓ | ✓ | ✓ | +| [Llama-2-70b-chat-hf] | ✓ | - | ✓ | +| [Meta-Llama-3-8B-Instruct] | ✓ | ✓ | ✓ | +| [Meta-Llama-3-70B-Instruct] | ✓ | - | ✓ | +| [Phi-3] | x | Limit 4K | Limit 4K | + +## Support integrations -### 1.2 Start 3rd-party TGI Service +In this microservices, we have supported following backend LLM service as integrations, we will include TGI/vLLM/Ollama in this readme, for others, please refer to corresponding readmes. -Please refer to [3rd-party TGI](../../../third_parties/tgi/deployment/docker_compose/) to start a LLM endpoint and verify. +- TGI +- VLLM +- Ollama +- [Bedrock](./README_bedrock.md) +- [Native](./README_native.md), based on optimum habana +- [Predictionguard](./README_predictionguard.md) -### 1.3 Start LLM Service with Python Script +## Clone OPEA GenAIComps + +Clone this repository at your desired location and set an environment variable for easy setup and usage throughout the instructions. ```bash -export TGI_LLM_ENDPOINT="http://${your_ip}:8008" -python llm.py +git clone https://github.com/opea-project/GenAIComps.git + +export OPEA_GENAICOMPS_ROOT=$(pwd)/GenAIComps ``` -## 🚀2. Start Microservice with Docker (Option 2) +## Prerequisites -If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a TGI/vLLM service with docker. +For TGI/vLLM, You must create a user account with [HuggingFace] and obtain permission to use the gated LLM models by adhering to the guidelines provided on the respective model's webpage. The environment variables `LLM_MODEL` would be the HuggingFace model id and the `HF_TOKEN` is your HuggugFace account's "User Access Token". -### 2.1 Setup Environment Variables +## 🚀Start Microservice with Docker -In order to start TGI and LLM services, you need to setup the following environment variables first. +In order to start the microservices with docker, you need to build the docker images first for the microservice. -```bash -export HF_TOKEN=${your_hf_api_token} -export TGI_LLM_ENDPOINT="http://${your_ip}:8008" -export LLM_MODEL_ID=${your_hf_llm_model} -``` +### 1. Build Docker Image + +#### 1.1 Prepare backend LLM docker image. + +If you want to use vLLM backend, refer to [vLLM](../../../third_parties/vllm/) to build vLLM docker images first. -### 2.2 Build Docker Image +No need for TGI or Ollama. + +#### 1.2 Prepare TextGen docker image. ```bash -cd ../../../../ -docker build -t opea/llm-textgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile . +# Build the microservice docker +cd ${OPEA_GENAICOMPS_ROOT} + +docker build \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy \ + -t opea/llm-textgen:latest \ + -f comps/llms/src/text-generation/Dockerfile . ``` +### 2. Start LLM Service with the built image + To start a docker container, you have two options: - A. Run Docker with CLI - B. Run Docker with Docker Compose -You can choose one as needed. +You can choose one as needed. If you start an LLM microservice with docker compose, the `compose_text-generation.yaml` file will automatically start both endpoint and the microservice docker. -### 2.3 Run Docker with CLI (Option A) +#### 2.1 Setup Environment Variables + +In order to start services, you need to setup the following environment variables first. + +```bash +export LLM_ENDPOINT_PORT=8008 +export TEXTGEN_PORT=9000 +export host_ip=${host_ip} +export HF_TOKEN=${HF_TOKEN} +export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" +export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +``` + +#### 2.2 Run Docker with CLI (Option A) + +Step 1: Start the backend LLM service + +Please refer to [TGI](../../../third_parties/tgi/), [vLLM](../../../third_parties/vllm/), [Ollama](../../../third_parties/ollama/) guideline to start a backend LLM service. + +Step 2: Start the TextGen microservices ```bash -docker run -d --name="llm-tgi-server" -p 9000:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HF_TOKEN=$HF_TOKEN opea/llm-textgen:latest +export LLM_COMPONENT_NAME="OpeaTextGenService" +docker run \ + --name="llm-textgen-server" \ + -p $TEXTGEN_PORT:9000 \ + --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e no_proxy=${no_proxy} \ + -e LLM_ENDPOINT=$LLM_ENDPOINT \ + -e HF_TOKEN=$HF_TOKEN \ + -e LLM_MODEL_ID=$LLM_MODEL_ID \ + -e LLM_COMPONENT_NAME=$LLM_COMPONENT_NAME \ + opea/llm-textgen:latest ``` -### 2.4 Run Docker with Docker Compose (Option B) +#### 2.3 Run Docker with Docker Compose (Option B) + +Set `service_name` to match backend service. ```bash -cd comps/llms/deployment/docker_compose/ -docker compose -f text-generation_tgi.yaml up -d +export service_name="textgen-service-tgi" +# export service_name="textgen-service-tgi-gaudi" +# export service_name="textgen-service-vllm" +# export service_name="textgen-service-vllm-gaudi" +# export service_name="textgen-service-ollama" + +cd ../../deployment/docker_compose/ +docker compose -f compose_text-generation.yaml up ${service_name} -d ``` ## 🚀3. Consume LLM Service @@ -69,12 +133,12 @@ docker compose -f text-generation_tgi.yaml up -d ### 3.1 Check Service Status ```bash -curl http://${your_ip}:9000/v1/health_check\ +curl http://${host_ip}:${TEXTGEN_PORT}/v1/health_check\ -X GET \ -H 'Content-Type: application/json' ``` -### 3.2 Consume LLM Service +### 3.1 Verify microservice You can set the following model parameters according to your actual needs, such as `max_tokens`, `stream`. @@ -82,32 +146,29 @@ The `stream` parameter determines the format of the data returned by the API. It ```bash # stream mode -curl http://${your_ip}:9000/v1/chat/completions \ +curl http://${host_ip}:${TEXTGEN_PORT}/v1/chat/completions \ -X POST \ -d '{"model": "${LLM_MODEL_ID}", "messages": "What is Deep Learning?", "max_tokens":17}' \ -H 'Content-Type: application/json' -curl http://${your_ip}:9000/v1/chat/completions \ +curl http://${host_ip}:${TEXTGEN_PORT}/v1/chat/completions \ -X POST \ -d '{"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ -H 'Content-Type: application/json' #Non-stream mode -curl http://${your_ip}:9000/v1/chat/completions \ +curl http://${host_ip}:${TEXTGEN_PORT}/v1/chat/completions \ -X POST \ -d '{"model": "${LLM_MODEL_ID}", "messages": "What is Deep Learning?", "max_tokens":17, "stream":false}' \ -H 'Content-Type: application/json' ``` -For parameters in Chat mode, please refer to [OpenAI API](https://platform.openai.com/docs/api-reference/chat/create) - -### 4. Validated Model + -| Model | TGI | -| ------------------------- | --- | -| Intel/neural-chat-7b-v3-3 | ✓ | -| Llama-2-7b-chat-hf | ✓ | -| Llama-2-70b-chat-hf | ✓ | -| Meta-Llama-3-8B-Instruct | ✓ | -| Meta-Llama-3-70B-Instruct | ✓ | -| Phi-3 | ✓ | +[Intel/neural-chat-7b-v3-3]: https://huggingface.co/Intel/neural-chat-7b-v3-3 +[Llama-2-7b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf +[Llama-2-70b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf +[Meta-Llama-3-8B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct +[Meta-Llama-3-70B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct +[Phi-3]: https://huggingface.co/collections/microsoft/phi-3-6626e15e9585a200d2d761e3 +[HuggingFace]: https://huggingface.co/ diff --git a/comps/llms/src/text-generation/README_bedrock.md b/comps/llms/src/text-generation/README_bedrock.md new file mode 100644 index 0000000000..fda24916ef --- /dev/null +++ b/comps/llms/src/text-generation/README_bedrock.md @@ -0,0 +1,49 @@ +# Introduction + +[Bedrock](https://aws.amazon.com/bedrock) Amazon Bedrock is a fully managed service that offers a choice of high-performing foundation models (FMs) from leading AI companies like AI21 Labs, Anthropic, Cohere, Meta, Mistral AI, Stability AI, and Amazon through a single API, along with a broad set of capabilities you need to build generative AI applications with security, privacy, and responsible AI. + +## Get Started + +## Setup Environment Variables + +In order to start Bedrock service, you need to setup the following environment variables first. + +```bash +export AWS_ACCESS_KEY_ID=${aws_access_key_id} +export AWS_SECRET_ACCESS_KEY=${aws_secret_access_key} +``` + +If you're using an IAM Role, you also need to set the following environment variable. + +```bash +export AWS_SESSION_TOKEN=${aws_session_token} +``` + +## Build Docker Image + +```bash +cd GenAIComps/ +docker build --no-cache -t opea/bedrock:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile . +``` + +## Run the Bedrock Microservice + +```bash +docker run -d --name bedrock -p 9009:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LLM_COMPONENT_NAME="OpeaTextGenBedrock" -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY -e AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN opea/bedrock:latest +``` + +(You can remove `-e AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN` if you are not using an IAM Role) + +## Consume the Bedrock Microservice + +```bash +curl http://${host_ip}:9009/v1/chat/completions \ + -X POST \ + -d '{"model": "us.anthropic.claude-3-5-haiku-20241022-v1:0", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ + -H 'Content-Type: application/json' + +curl http://${host_ip}:9009/v1/chat/completions \ + -X POST \ + -d '{"model": "us.anthropic.claude-3-5-haiku-20241022-v1:0", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17, "stream": "true"}' \ + -H 'Content-Type: application/json' +``` diff --git a/comps/llms/text-generation/native/langchain/README.md b/comps/llms/src/text-generation/README_native.md similarity index 70% rename from comps/llms/text-generation/native/langchain/README.md rename to comps/llms/src/text-generation/README_native.md index 130d3e66b3..ba574be027 100644 --- a/comps/llms/text-generation/native/langchain/README.md +++ b/comps/llms/src/text-generation/README_native.md @@ -4,24 +4,24 @@ LLM Native microservice uses [optimum-habana](https://github.com/huggingface/opt ## 🚀1. Start Microservice -If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a Native LLM service with docker. - ### 1.1 Setup Environment Variables In order to start Native LLM service, you need to setup the following environment variables first. -For LLM model, both `Qwen` and `Falcon3` models are supported. Users can set different models by changing the `LLM_NATIVE_MODEL` below. +For LLM model, both `Qwen` and `Falcon3` models are supported. Users can set different models by changing the `LLM_MODEL_ID` below. ```bash -export LLM_NATIVE_MODEL="Qwen/Qwen2-7B-Instruct" -export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token" +export LLM_MODEL_ID="Qwen/Qwen2-7B-Instruct" +export HF_TOKEN="your_huggingface_token" +export TEXTGEN_PORT=10512 +export host_ip=${host_ip} ``` ### 1.2 Build Docker Image ```bash cd ../../../../../ -docker build -t opea/llm-native:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/native/langchain/Dockerfile . +docker build -t opea/llm-textgen-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile.intel_hpu . ``` To start a docker container, you have two options: @@ -34,13 +34,15 @@ You can choose one as needed. ### 1.3 Run Docker with CLI (Option A) ```bash -docker run -d --runtime=habana --name="llm-native-server" -p 9000:9000 -e https_proxy=$https_proxy -e http_proxy=$http_proxy -e TOKENIZERS_PARALLELISM=false -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e LLM_NATIVE_MODEL=${LLM_NATIVE_MODEL} opea/llm-native:latest +docker run -d --runtime=habana --name="llm-native-server" -p 9000:9000 -e https_proxy=$https_proxy -e http_proxy=$http_proxy -e TOKENIZERS_PARALLELISM=false -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e LLM_MODEL_ID=${LLM_MODEL_ID} opea/llm-textgen-gaudi:latest ``` ### 1.4 Run Docker with Docker Compose (Option B) ```bash -docker compose -f docker_compose_llm.yaml up -d +export service_name="textgen-native-gaudi" +cd comps/llms/deployment/docker_compose +docker compose -f compose_text-generation.yaml up ${service_name} -d ``` ## 🚀2. Consume LLM Service @@ -58,6 +60,6 @@ curl http://${your_ip}:9000/v1/health_check\ ```bash curl http://${your_ip}:9000/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?"}' \ + -d '{"messages":"What is Deep Learning?"}' \ -H 'Content-Type: application/json' ``` diff --git a/comps/llms/text-generation/predictionguard/README.md b/comps/llms/src/text-generation/README_predictionguard.md similarity index 77% rename from comps/llms/text-generation/predictionguard/README.md rename to comps/llms/src/text-generation/README_predictionguard.md index 643434f2ee..06680a98f0 100644 --- a/comps/llms/text-generation/predictionguard/README.md +++ b/comps/llms/src/text-generation/README_predictionguard.md @@ -4,17 +4,13 @@ ## Get Started -### Build Docker Image - -```bash -cd ../../.. -docker build -t opea/llm-textgen-predictionguard:latest -f comps/llms/text-generation/predictionguard/Dockerfile . -``` - ### Run the Predictionguard Microservice ```bash -docker run -d -p 9000:9000 -e PREDICTIONGUARD_API_KEY=$PREDICTIONGUARD_API_KEY --name llm-textgen-predictionguard opea/llm-textgen-predictionguard:latest +export service_name="textgen-predictionguard" + +cd comps/llms/deployment/docker_compose/ +docker compose -f compose_text-generation.yaml up ${service_name} -d ``` ## Consume the Prediction Guard Microservice @@ -28,7 +24,7 @@ curl -X POST http://localhost:9000/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "Hermes-2-Pro-Llama-3-8B", - "query": "Tell me a joke.", + "messages": "Tell me a joke.", "max_tokens": 100, "temperature": 0.7, "top_p": 0.9, @@ -44,7 +40,7 @@ curl -N -X POST http://localhost:9000/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "Hermes-2-Pro-Llama-3-8B", - "query": "Tell me a joke.", + "messages": "Tell me a joke.", "max_tokens": 100, "temperature": 0.7, "top_p": 0.9, diff --git a/comps/llms/src/text-generation/integrations/bedrock.py b/comps/llms/src/text-generation/integrations/bedrock.py new file mode 100644 index 0000000000..2f6b973526 --- /dev/null +++ b/comps/llms/src/text-generation/integrations/bedrock.py @@ -0,0 +1,142 @@ +# Copyright (C) 2024 Prediction Guard, Inc. +# SPDX-License-Identified: Apache-2.0 + +import json +import os +import time + +import boto3 +from botocore.exceptions import ClientError +from fastapi.responses import StreamingResponse + +from comps import CustomLogger, GeneratedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.cores.proto.api_protocol import ChatCompletionRequest + +logger = CustomLogger("opea_textgen_bedrock") +logflag = os.getenv("LOGFLAG", False) + + +@OpeaComponentRegistry.register("OpeaTextGenBedrock") +class OpeaTextGenBedrock(OpeaComponent): + """A specialized OPEA TextGen component derived from OpeaComponent for + interacting with AWS Bedrock.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.LLM.name.lower(), description, config) + + self.region = os.getenv("BEDROCK_REGION", "us-west-2") + self.bedrock_runtime = boto3.client("bedrock-runtime", region_name=self.region) + self.sts_client = boto3.client("sts", region_name=self.region) + + self.sse_headers = {"x-accel-buffering": "no", "cache-control": "no-cache", "content-type": "text/event-stream"} + + self.default_model = os.getenv("MODEL_ID", "us.anthropic.claude-3-haiku-20240307-v1:0") + + health_status = self.check_health() + if not health_status: + logger.error("OpeaTextGenBedrock health check failed.") + + def check_health(self): + """Checks health by validating ability to check caller identity with + AWS. + + Returns: + bool: True if AWS is reachable, False otherwise + """ + try: + response = self.sts_client.get_caller_identity() + return response is not None + except ClientError as e: + logger.error(e) + logger.error("OpeaTextGenBedrock health check failed") + return False + + async def invoke(self, input: ChatCompletionRequest): + """Invokes the AWS Bedrock service to generate a response based on the + previous chats. + + Args: + input (ChatCompletionRequest): The chat input. + """ + if logflag: + logger.info(input) + + # Parse out arguments for Bedrock converse API + model_id = input.model if input.model else self.default_model + if logflag: + logger.info(f"[llm - chat] Using model {model_id}") + + bedrock_args = {"modelId": model_id} + + inference_config = {} + if input.max_tokens: + inference_config["maxTokens"] = input.max_tokens + + if input.stop: + inference_config["stopSequences"] = input.stop + + if input.temperature: + inference_config["temperature"] = input.temperature + + if input.top_p: + inference_config["topP"] = input.top_p + + if len(inference_config) > 0: + bedrock_args["inferenceConfig"] = inference_config + + if logflag and len(inference_config) > 0: + logger.info(f"[llm - chat] inference_config: {inference_config}") + + # Parse messages from HuggingFace TGI format to bedrock messages format + # tgi: [{role: "system" | "user", content: "text"}] + # bedrock: [role: "assistant" | "user", content: {text: "content"}] + messages = [ + {"role": "assistant" if i.get("role") == "system" else "user", "content": [{"text": i.get("content", "")}]} + for i in input.messages + ] + + # Bedrock requires that conversations start with a user prompt + # TGI allows the first message to be an assistant prompt, defining assistant behavior + # If the message list starts with an assistant prompt, move that message to the bedrock system prompt + if len(messages) > 0 and messages[0]["role"] == "assistant": + system_prompt = messages[0]["content"][0]["text"] + bedrock_args["system"] = [{"text": system_prompt}] + messages.pop(0) + + bedrock_args["messages"] = messages + + if logflag: + logger.info(f"[llm - chat] Bedrock args: {bedrock_args}") + + if input.stream: + response = self.bedrock_runtime.converse_stream(**bedrock_args) + + def stream_generator(): + chat_response = "" + for chunk in response["stream"]: + if "contentBlockDelta" in chunk: + text = chunk.get("contentBlockDelta", {}).get("delta", {}).get("text", "") + if logflag: + logger.info(f"[llm - chat_stream] chunk:{text}") + + tgi_format_out = { + "object": "chat.completion.chunk", + "model": model_id, + "created": int(time.time()), + "choices": [ + {"index": 0, "delta": {"role": "assistant", "content": text}, "finish_reason": None} + ], + } + yield f"data: {json.dumps(tgi_format_out)}\n\n" + if logflag: + logger.info(f"[llm - chat_stream] stream response: {chat_response}") + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), headers=self.sse_headers) + + response = self.bedrock_runtime.converse(**bedrock_args) + output_content = response.get("output", {}).get("message", {}).get("content", []) + output_text = output_content[0].get("text", "") if len(output_content) > 0 else "" + prompt = messages[-1].get("content", [{"text": ""}])[0].get("text", "") + + return GeneratedDoc(text=output_text, prompt=prompt) diff --git a/comps/llms/src/text-generation/integrations/native.py b/comps/llms/src/text-generation/integrations/native.py new file mode 100644 index 0000000000..5a0ad4a44c --- /dev/null +++ b/comps/llms/src/text-generation/integrations/native.py @@ -0,0 +1,269 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys + +sys.path.append("/test/GenAIComps/") + +import os +import threading +import time + +import torch +from langchain_core.prompts import PromptTemplate + +from comps import CustomLogger, GeneratedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.cores.proto.api_protocol import ChatCompletionRequest + +from .template import ChatTemplate +from .utils import initialize_model + +logger = CustomLogger("opea_textgen_native") +logflag = os.getenv("LOGFLAG", False) + +MODEL_NAME = os.getenv("LLM_MODEL_ID", "Qwen/Qwen2-7B-Instruct") + +input_sentences = [ + "DeepSpeed is a machine learning framework", + "He is working on", + "He has a", + "He got all", + "Everyone is happy and I can", + "The new movie that got Oscar this year", + "In the far far distance from our galaxy,", + "Peace is the only way", +] + +args_dict = { + "device": "hpu", + "model_name_or_path": MODEL_NAME, + "bf16": True, + "max_new_tokens": 100, + "max_input_tokens": 0, + "batch_size": 1, + "warmup": 3, + "n_iterations": 5, + "local_rank": 0, + "use_kv_cache": True, + "use_hpu_graphs": True, + "dataset_name": None, + "column_name": None, + "do_sample": False, + "num_beams": 1, + "trim_logits": False, + "seed": 27, + "profiling_warmup_steps": 0, + "profiling_steps": 0, + "profiling_record_shapes": False, + "prompt": None, + "bad_words": None, + "force_words": None, + "assistant_model": None, + "peft_model": None, + "num_return_sequences": 1, + "token": None, + "model_revision": "main", + "attn_softmax_bf16": False, + "output_dir": None, + "bucket_size": -1, + "bucket_internal": False, + "dataset_max_samples": -1, + "limit_hpu_graphs": False, + "reuse_cache": False, + "verbose_workers": False, + "simulate_dyn_prompt": None, + "reduce_recompile": False, + "use_flash_attention": False, + "flash_attention_recompute": False, + "flash_attention_causal_mask": False, + "flash_attention_fast_softmax": False, + "book_source": False, + "torch_compile": False, + "ignore_eos": True, + "temperature": 1.0, + "top_p": 1.0, + "top_k": None, + "const_serialization_path": None, + "disk_offload": False, + "trust_remote_code": False, + "quant_config": "", + "world_size": 0, + "show_graphs_count": False, + "load_quantized_model_with_inc": False, + "local_quantized_inc_model_path": None, + "load_quantized_model_with_autogptq": False, + "penalty_alpha": None, +} + + +class Args: + def __init__(self, **entries): + self.__dict__.update(entries) + + +model = None +assistant_model = None +tokenizer = None +generation_config = None +args = Args(**args_dict) +initialization_lock = threading.Lock() +initialized = False + + +def generate( + input_query: list, + device="hpu", + use_lazy_mode=True, + use_hpu_graphs=True, + profiling_steps=0, + profiling_warmup_steps=0, + ignore_eos=True, + profiling_record_shapes=False, +): + """Generates sequences from the input sentences and returns them.""" + logger.info(f"[llm - generate] starting to inference with prompt {input_query}") + encode_t0 = time.perf_counter() + + # Tokenization + input_tokens = tokenizer.batch_encode_plus( + input_query, + return_tensors="pt", + padding=True, + return_token_type_ids=False, # token_type_ids is not needed for falcon-three model + ) + encode_duration = time.perf_counter() - encode_t0 + logger.info(f"[llm - generate] input tokenized: {input_tokens}") + + # Move inputs to target device(s) + for t in input_tokens: + logger.info(f"[llm - generate] t: {t}") + if torch.is_tensor(input_tokens[t]): + logger.info("[llm - generate] input[t] is tensor") + logger.info(f"[llm - generate] device: {model.device}") + input_tokens[t] = input_tokens[t].to(model.device) + + logger.info("[llm - generate] inputs transferred.") + + iteration_times = [] + outputs = model.generate( + **input_tokens, + generation_config=generation_config, + assistant_model=assistant_model, + lazy_mode=use_lazy_mode, + hpu_graphs=use_hpu_graphs, + profiling_steps=profiling_steps, + profiling_warmup_steps=profiling_warmup_steps, + ignore_eos=ignore_eos, + iteration_times=iteration_times, + profiling_record_shapes=profiling_record_shapes, + ).cpu() + logger.info("[llm - generate] result generated") + first_token_time = iteration_times[0] + encode_duration + result = tokenizer.batch_decode(outputs, skip_special_tokens=True) + logger.info(f"[llm - generate] result: {result}") + logger.info(f"[llm - generate] Time to first token = {first_token_time*1000}ms") + return result + + +def initialize(): + global model, assistant_model, tokenizer, generation_config, initialized + with initialization_lock: + if not initialized: + # initialize model and tokenizer + import habana_frameworks.torch.hpu as torch_hpu + from optimum.habana.utils import HabanaProfile + + model, assistant_model, tokenizer, generation_config = initialize_model(args, logger) + logger.info("[llm] model and tokenizer initialized.") + + # compilation and model warmup + HabanaProfile.disable() + logger.info("[llm - native] Graph compilation...") + for _ in range(args.warmup): + generate(input_sentences) + logger.info("[llm - native] model warm up finished.") + torch_hpu.synchronize() + HabanaProfile.enable() + logger.info("[llm - native] Ready to inference") + res = generate(["What is Deep Learning?"]) + logger.info(f"[llm - native] test result: {res}") + initialized = True + + +@OpeaComponentRegistry.register("OpeaTextGenNative") +class OpeaTextGenNative(OpeaComponent): + """A specialized OPEA TextGen component derived from OpeaComponent for interacting with LLM services based on native optimum habana.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.LLM.name.lower(), description, config) + initialize() + health_status = self.check_health() + if not health_status: + logger.error("OpeaTextGenNative health check failed.") + else: + logger.info("OpeaTextGenNative health check success.") + + def check_health(self) -> bool: + """Checks the health of the LLM service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + + try: + return initialized + except Exception as e: + logger.error(e) + logger.error("Health check failed") + return False + + async def invoke(self, input: ChatCompletionRequest): + """Invokes the LLM service to generate output for the provided input. + + Args: + input (ChatCompletionRequest): The input text(s). + """ + + message = None + if isinstance(input.messages, str): + message = input.messages + else: # List[Dict] + for input_data in input.messages: + if "role" in input_data and input_data["role"] == "user" and "content" in input_data: + message = input_data["content"] + if logflag: + logger.info(f"Get input text:\n {message}") + if message is None: + logger.error("Don't receive any input text, exit!") + return GeneratedDoc(text=None, prompt=None) + + prompt = message + prompt_template = None + if input.chat_template: + prompt_template = PromptTemplate.from_template(input.chat_template) + input_variables = prompt_template.input_variables + if prompt_template: + if sorted(input_variables) == ["context", "question"]: + prompt = prompt_template.format(question=message, context="\n".join(input.documents)) + elif input_variables == ["question"]: + prompt = prompt_template.format(question=message) + else: + logger.info(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']") + else: + if input.documents: + prompt = ChatTemplate.generate_rag_prompt(message, input.documents) + res = generate([prompt]) + + if logflag: + logger.info(f"[llm - native] inference result: {res}") + return GeneratedDoc(text=res[0], prompt=message) diff --git a/comps/llms/src/text-generation/integrations/predictionguard.py b/comps/llms/src/text-generation/integrations/predictionguard.py new file mode 100644 index 0000000000..866dd69d74 --- /dev/null +++ b/comps/llms/src/text-generation/integrations/predictionguard.py @@ -0,0 +1,101 @@ +# Copyright (C) 2024 Prediction Guard, Inc. +# SPDX-License-Identified: Apache-2.0 + +import os +import time + +from fastapi import HTTPException +from fastapi.responses import StreamingResponse +from predictionguard import PredictionGuard + +from comps import CustomLogger, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.cores.proto.api_protocol import ChatCompletionRequest + +logger = CustomLogger("opea_textgen_predictionguard") +logflag = os.getenv("LOGFLAG", False) + + +@OpeaComponentRegistry.register("OpeaTextGenPredictionguard") +class OpeaTextGenPredictionguard(OpeaComponent): + """A specialized OPEA TextGen component derived from OpeaComponent for interacting with Predictionguard services. + + Attributes: + client (Predictionguard): An instance of the Predictionguard client for text generation. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.LLM.name.lower(), description, config) + self.client = PredictionGuard() + health_status = self.check_health() + if not health_status: + logger.error("OpeaTextGenPredictionguard health check failed.") + else: + logger.info("OpeaTextGenPredictionguard health check success.") + + def check_health(self) -> bool: + """Checks the health of the Predictionguard LLM service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + + try: + response = self.client.models.list() + return response is not None + except Exception as e: + logger.error(e) + logger.error("Health check failed") + return False + + async def invoke(self, input: ChatCompletionRequest): + """Invokes the Predictionguard LLM service to generate output for the provided input. + + Args: + input (ChatCompletionRequest): The input text(s). + """ + if isinstance(input.messages, str): + messages = [ + { + "role": "system", + "content": "You are a helpful assistant. Your goal is to provide accurate, detailed, and safe responses to the user's queries.", + }, + {"role": "user", "content": input.messages}, + ] + else: + messages = input.messages + + if input.stream: + + async def stream_generator(): + chat_response = "" + for res in self.client.chat.completions.create( + model=input.model, + messages=messages, + max_tokens=input.max_tokens, + temperature=input.temperature, + top_p=input.top_p, + top_k=input.top_k, + stream=True, + ): + if "choices" in res["data"] and "delta" in res["data"]["choices"][0]: + delta_content = res["data"]["choices"][0]["delta"]["content"] + chat_response += delta_content + yield f"data: {delta_content}\n\n" + else: + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), media_type="text/event-stream") + else: + try: + response = self.client.chat.completions.create( + model=input.model, + messages=messages, + max_tokens=input.max_tokens, + temperature=input.temperature, + top_p=input.top_p, + top_k=input.top_k, + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + return response diff --git a/comps/llms/src/text-generation/integrations/opea.py b/comps/llms/src/text-generation/integrations/service.py similarity index 97% rename from comps/llms/src/text-generation/integrations/opea.py rename to comps/llms/src/text-generation/integrations/service.py index 718415a64c..485571dc07 100644 --- a/comps/llms/src/text-generation/integrations/opea.py +++ b/comps/llms/src/text-generation/integrations/service.py @@ -21,7 +21,7 @@ # Environment variables MODEL_NAME = os.getenv("LLM_MODEL_ID") MODEL_CONFIGS = os.getenv("MODEL_CONFIGS") -DEFAULT_ENDPOINT = os.getenv("LLM_ENDPOINT") +DEFAULT_ENDPOINT = os.getenv("LLM_ENDPOINT", "http://localhost:8080") TOKEN_URL = os.getenv("TOKEN_URL") CLIENTID = os.getenv("CLIENTID") CLIENT_SECRET = os.getenv("CLIENT_SECRET") @@ -47,8 +47,8 @@ def get_llm_endpoint(): raise ConfigError(f"Input model {MODEL_NAME} not present in model_configs") -@OpeaComponentRegistry.register("OPEA_LLM") -class OPEALLM(OpeaComponent): +@OpeaComponentRegistry.register("OpeaTextGenService") +class OpeaTextGenService(OpeaComponent): """A specialized OPEA LLM component derived from OpeaComponent for interacting with TGI/vLLM services based on OpenAI API. Attributes: @@ -60,7 +60,7 @@ def __init__(self, name: str, description: str, config: dict = None): self.client = self._initialize_client() health_status = self.check_health() if not health_status: - logger.error("OPEALLM health check failed.") + logger.error("OpeaTextGenService health check failed.") def _initialize_client(self) -> AsyncOpenAI: """Initializes the AsyncOpenAI.""" diff --git a/comps/llms/src/text-generation/integrations/template.py b/comps/llms/src/text-generation/integrations/template.py index 6d976106ac..36af028be0 100644 --- a/comps/llms/src/text-generation/integrations/template.py +++ b/comps/llms/src/text-generation/integrations/template.py @@ -6,9 +6,9 @@ class ChatTemplate: @staticmethod - def generate_rag_prompt(question, documents, model): + def generate_rag_prompt(question, documents, model=None): context_str = "\n".join(documents) - if model == "meta-llama/Meta-Llama-3.1-70B-Instruct" or model == "meta-llama/Meta-Llama-3.1-8B-Instruct": + if model in ["meta-llama/Meta-Llama-3.1-70B-Instruct", "meta-llama/Meta-Llama-3.1-8B-Instruct"]: template = """ <|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|> Question: {question} diff --git a/comps/llms/text-generation/native/langchain/utils.py b/comps/llms/src/text-generation/integrations/utils.py similarity index 66% rename from comps/llms/text-generation/native/langchain/utils.py rename to comps/llms/src/text-generation/integrations/utils.py index 57bd059567..da177df6ec 100644 --- a/comps/llms/text-generation/native/langchain/utils.py +++ b/comps/llms/src/text-generation/integrations/utils.py @@ -26,7 +26,6 @@ from pathlib import Path import torch -from huggingface_hub import login from optimum.habana.checkpoint_utils import ( get_ds_injection_policy, get_repo_root, @@ -43,10 +42,6 @@ from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer from transformers.utils import check_min_version -HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") -if HUGGINGFACEHUB_API_TOKEN != "": - login(token=HUGGINGFACEHUB_API_TOKEN) - def adjust_batch(batch, size): curr_size = batch["input_ids"].shape[1] @@ -139,7 +134,7 @@ def setup_env(args): # TODO: SW-167588 - WA for memory issue in hqt prep_model os.environ.setdefault("EXPERIMENTAL_WEIGHT_SHARING", "FALSE") - if args.global_rank == 0 and not args.torch_compile: + if args.global_rank == 0 and not args.torch_compile and args.show_graphs_count: os.environ.setdefault("GRAPH_VISUALIZATION", "true") shutil.rmtree(".graph_dumps", ignore_errors=True) @@ -179,11 +174,53 @@ def patch_scoped_linear_all_reduce(model): patch_scoped_linear_all_reduce(module) -def get_torch_compiled_model(model): - model.model = torch.compile(model.model, backend="hpu_backend", options={"keep_input_mutations": True}) +def get_torch_compiled_model(model, logger): + # for gpt_bigcode, mpt, bloom, gpt2 model_type + if hasattr(model, "transformer"): + model.transformer = torch.compile( + model.transformer, backend="hpu_backend", options={"keep_input_mutations": True} + ) + # for gpt_neox + elif hasattr(model, "gpt_neox"): + model.gpt_neox = torch.compile(model.gpt_neox, backend="hpu_backend", options={"keep_input_mutations": True}) + # for llama, mistral, mixtral, qwen2 + elif hasattr(model, "model"): + model.model = torch.compile(model.model, backend="hpu_backend", options={"keep_input_mutations": True}) + else: + logger.warning( + "In low performance case, please explicitly specify a module you want to wrap with `torch.compile`" + ) + model = torch.compile(model, backend="hpu_backend", options={"keep_input_mutations": True}) + return model + + +def setup_quantization(model, args): + try: + from neural_compressor.torch.quantization import FP8Config, convert, prepare + except ImportError: + raise ImportError( + "Module neural_compressor is missing. Please use a newer Synapse version to use quantization." + ) + + config = FP8Config.from_json_file(args.quant_config) + if config.measure: + model = prepare(model, config) + if config.quantize: + model = convert(model, config) + return model +def finalize_quantization(model): + try: + from neural_compressor.torch.quantization import finalize_calibration + except ImportError: + raise ImportError( + "Module neural_compressor is missing. Please use a newer Synapse version to use quantization." + ) + finalize_calibration(model) + + def setup_model(args, model_dtype, model_kwargs, logger): logger.info("Single-device run.") if args.assistant_model is None: @@ -206,6 +243,32 @@ def setup_model(args, model_dtype, model_kwargs, logger): torch_dtype=model_dtype, **model_kwargs, ) + elif args.load_quantized_model_with_autogptq: + from transformers import GPTQConfig + + quantization_config = GPTQConfig(bits=4, use_exllama=False) + model = AutoModelForCausalLM.from_pretrained( + args.model_name_or_path, torch_dtype=model_dtype, quantization_config=quantization_config, **model_kwargs + ) + elif args.load_quantized_model_with_inc: + from neural_compressor.torch.quantization import load + + model = load(model_name_or_path=args.model_name_or_path, format="huggingface", device="hpu", **model_kwargs) + elif args.local_quantized_inc_model_path: + org_model = AutoModelForCausalLM.from_pretrained( + args.model_name_or_path, + **model_kwargs, + ) + + from neural_compressor.torch.quantization import load + + model = load( + model_name_or_path=args.local_quantized_inc_model_path, + format="default", + device="hpu", + original_model=org_model, + **model_kwargs, + ) else: if args.assistant_model is not None: assistant_model = AutoModelForCausalLM.from_pretrained( @@ -218,11 +281,7 @@ def setup_model(args, model_dtype, model_kwargs, logger): args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs ) if args.quant_config: - import habana_quantization_toolkit - - habana_quantization_toolkit.prep_model(model) - if args.assistant_model is not None: - habana_quantization_toolkit.quantize_model(assistant_model) + model = setup_quantization(model, args) model = model.eval().to(args.device) if args.assistant_model is not None: @@ -240,14 +299,120 @@ def setup_model(args, model_dtype, model_kwargs, logger): assistant_model = wrap_in_hpu_graph(assistant_model) if _is_peft_model(model): model.base_model = wrap_in_hpu_graph(model.base_model) + if model.peft_type == "ADAPTION_PROMPT": + model.base_model.model = wrap_in_hpu_graph(model.base_model.model) - if args.torch_compile and model.config.model_type == "llama": - model = get_torch_compiled_model(model) + if args.torch_compile: + model = get_torch_compiled_model(model, logger) # if args.assistant_model is not None: - # assistant_model = get_torch_compiled_model(assistant_model) + # assistant_model = get_torch_compiled_model(assistant_model, logger) return model, assistant_model +def setup_distributed_model_tp(args, model_dtype, model_kwargs, logger, cache_dir): + from typing import Any, MutableMapping + + from optimum.habana.distributed import serialization + from optimum.habana.distributed.strategy import TensorParallelStrategy + + logger.info("Multi-device run.") + + assert args.quant_config == "", "Fp8 is not enabled, unset QUANT_CONFIG" + assert args.assistant_model is None, "Assistant model must be None" + + from torch import distributed as dist + + if args.device == "hpu": + dist.init_process_group(backend="hccl") + else: + assert False, "Supports TP only on HPU" + + torch._C._distributed_c10d._register_process_group("default", dist.group.WORLD) + logger.info("Creating Model") + config = AutoConfig.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs) + model_kwargs = {} + model_kwargs["parallel_strategy"] = TensorParallelStrategy() + model = AutoModelForCausalLM.from_config(config, torch_dtype=model_dtype, **model_kwargs) + + initial_device = torch.device("cpu") + source = "hf" + checkpoint_sharding = None + lazy_sd: MutableMapping[str, Any] = {} + logger.info("Loading Checkpoints") + lazy_sd = serialization.load_state_dict( + cache_dir, + source=source, + distributed_strategy=args.parallel_strategy, + checkpoint_sharding=None, + initial_device=initial_device, + rank=args.global_rank, + world_size=args.world_size, + ) + architecture = "llama" + if len(lazy_sd): + serialization.load_state_dict_into_model( + model, + lazy_sd, + architecture, + source, + args.parallel_strategy, + checkpoint_sharding, + initial_device, + args.local_rank, + args.world_size, + ) + + model = model.eval().to(args.device) + + if args.use_hpu_graphs: + from habana_frameworks.torch.hpu import wrap_in_hpu_graph + + model = wrap_in_hpu_graph(model) + + if args.torch_compile: + model = get_torch_compiled_model(model, logger) + + return model, args.assistant_model + + +def setup_distributed_model_ep(args, model_dtype, model_kwargs, logger): + logger.info("Multi-device ep run.") + + assert args.quant_config == "", "Fp8 is not enabled, unset QUANT_CONFIG" + assert args.assistant_model is None, "Assistant model must be None" + + from torch import distributed as dist + + if args.device == "hpu": + dist.init_process_group(backend="hccl") + else: + assert False, "Supports EP only on HPU" + + torch._C._distributed_c10d._register_process_group("default", dist.group.WORLD) + logger.info("Creating Model") + config = AutoConfig.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs) + config.update({"ep_size": args.world_size}) + + model = AutoModelForCausalLM.from_pretrained( + args.model_name_or_path, + config=config, + torch_dtype=model_dtype, + **model_kwargs, + ) + + model = model.eval().to(args.device) + + if args.use_hpu_graphs: + from habana_frameworks.torch.hpu import wrap_in_hpu_graph + + model = wrap_in_hpu_graph(model) + + if args.torch_compile: + model = get_torch_compiled_model(model) + + return model, args.assistant_model + + def setup_distributed_model(args, model_dtype, model_kwargs, logger): import deepspeed @@ -264,6 +429,13 @@ def setup_distributed_model(args, model_dtype, model_kwargs, logger): if load_to_meta: # Construct model with fake meta tensors, later will be replaced on devices during ds-inference ckpt load with deepspeed.OnDevice(dtype=model_dtype, device="meta"): + if ( + hasattr(config, "rope_scaling") + and config.rope_scaling + and config.rope_scaling["rope_type"] == "llama3" + and config.max_position_embeddings > 8192 + ): + config.max_position_embeddings = 8192 model = AutoModelForCausalLM.from_config(config, torch_dtype=model_dtype) # Model loaded to meta is managed differently @@ -310,20 +482,16 @@ def setup_distributed_model(args, model_dtype, model_kwargs, logger): model = deepspeed.init_inference(model, **ds_inference_kwargs) model = model.module - if model.config.model_type in ["llama", "falcon", "qwen2"]: + if model.config.model_type in ["llama", "falcon", "qwen2", "starcoder2", "gemma"]: patch_scoped_linear_all_reduce(model) if args.quant_config: - import habana_quantization_toolkit + model = setup_quantization(model, args) - habana_quantization_toolkit.prep_model(model) - if args.assistant_model is not None: - habana_quantization_toolkit.prep_model(assistant_model) - - if args.torch_compile and model.config.model_type == "llama": - model = get_torch_compiled_model(model) + if args.torch_compile: + model = get_torch_compiled_model(model, logger) # if args.assistant_model is not None: - # assistant_model = get_torch_compiled_model(assistant_model) + # assistant_model = get_torch_compiled_model(assistant_model, logger) return model, assistant_model @@ -374,10 +542,17 @@ def peft_model(args, model_dtype, logger, **model_kwargs): model.__class__.generate = gaudi_generate model.__class__.prepare_inputs_for_generation = gaudi_prepare_inputs_for_generation + if model.peft_type == "ADAPTION_PROMPT": + from optimum.habana.peft.layer import GaudiAdaptedAttention_getattr, GaudiAdaptedAttentionPreAttnForward + from peft import tuners + + tuners.adaption_prompt.layer.AdaptedAttention.pre_attn_forward = GaudiAdaptedAttentionPreAttnForward + tuners.adaption_prompt.layer.AdaptedAttention.__getattr__ = GaudiAdaptedAttention_getattr + return model -def setup_tokenizer(args, model, assistant_model): +def setup_tokenizer(args, model, assistant_model, logger): tokenizer_kwargs = { "revision": args.model_revision, "token": args.token, @@ -390,16 +565,22 @@ def setup_tokenizer(args, model, assistant_model): tokenizer.padding_side = "left" if model.config.model_type == "llama": - # unwind broken decapoda-research config - model.generation_config.pad_token_id = 0 - model.generation_config.bos_token_id = 1 - model.generation_config.eos_token_id = 2 + if model.generation_config.pad_token_id is None: + if isinstance(model.generation_config.eos_token_id, int): + model.generation_config.pad_token_id = model.generation_config.eos_token_id + elif isinstance(model.generation_config.eos_token_id, list): + model.generation_config.pad_token_id = model.generation_config.eos_token_id[0] if assistant_model is not None: - assistant_model.generation_config.pad_token_id = 0 - assistant_model.generation_config.bos_token_id = 1 - assistant_model.generation_config.eos_token_id = 2 + if assistant_model.generation_config.pad_token_id is None: + if isinstance(assistant_model.generation_config.eos_token_id, int): + assistant_model.generation_config.pad_token_id = assistant_model.generation_config.eos_token_id + elif isinstance(assistant_model.generation_config.eos_token_id, list): + assistant_model.generation_config.pad_token_id = assistant_model.generation_config.eos_token_id[0] tokenizer.bos_token_id = model.generation_config.bos_token_id - tokenizer.eos_token_id = model.generation_config.eos_token_id + if isinstance(model.generation_config.eos_token_id, int): + tokenizer.eos_token_id = model.generation_config.eos_token_id + elif isinstance(model.generation_config.eos_token_id, list): + tokenizer.eos_token_id = model.generation_config.eos_token_id[0] tokenizer.pad_token_id = model.generation_config.pad_token_id tokenizer.pad_token = tokenizer.decode(tokenizer.pad_token_id) tokenizer.eos_token = tokenizer.decode(tokenizer.eos_token_id) @@ -415,6 +596,13 @@ def setup_tokenizer(args, model, assistant_model): tokenizer.eos_token = tokenizer.decode(tokenizer.eos_token_id) tokenizer.bos_token = tokenizer.decode(tokenizer.bos_token_id) + # HACK: MiniCPM3 does not support list EOS token ID generation config. + if model.config.model_type == "minicpm3" and isinstance(model.generation_config.eos_token_id, list): + logger.warning( + f"Model type {model.config.model_type} does not support list style EOS token ID in generation config. Only last eos token id will be used." + ) + model.generation_config.eos_token_id = model.generation_config.eos_token_id[-1] + # Some models like GPT2 do not have a PAD token so we have to set it if necessary if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token @@ -444,6 +632,8 @@ def setup_generation_config(args, model, assistant_model, tokenizer): generation_config.bucket_internal = args.bucket_internal generation_config.do_sample = args.do_sample generation_config.num_beams = args.num_beams + generation_config.top_k = args.top_k + generation_config.penalty_alpha = args.penalty_alpha generation_config.bad_words_ids = bad_words_ids generation_config.force_words_ids = force_words_ids generation_config.num_return_sequences = args.num_return_sequences @@ -459,6 +649,7 @@ def setup_generation_config(args, model, assistant_model, tokenizer): generation_config.flash_attention_causal_mask = args.flash_attention_causal_mask generation_config.flash_attention_fast_softmax = args.flash_attention_fast_softmax generation_config.trust_remote_code = args.trust_remote_code + generation_config.valid_sequence_lengths = None return generation_config @@ -489,7 +680,7 @@ def initialize_model(args, logger): setup_env(args) setup_device(args) set_seed(args.seed) - get_repo_root(args.model_name_or_path, local_rank=args.local_rank, token=args.token) + cache_dir = get_repo_root(args.model_name_or_path, local_rank=args.local_rank, token=args.token) if args.assistant_model is not None: get_repo_root(args.assistant_model, local_rank=args.local_rank, token=args.token) use_deepspeed = args.world_size > 0 @@ -504,15 +695,27 @@ def initialize_model(args, logger): "token": args.token, "trust_remote_code": args.trust_remote_code, } + if args.load_quantized_model_with_inc or args.local_quantized_inc_model_path: + model_kwargs["torch_dtype"] = torch.bfloat16 + if args.trust_remote_code: logger.warning("`trust_remote_code` is set, there is no guarantee this model works properly and it may fail") model, assistant_model = ( setup_model(args, model_dtype, model_kwargs, logger) if not use_deepspeed - else setup_distributed_model(args, model_dtype, model_kwargs, logger) + else ( + setup_distributed_model(args, model_dtype, model_kwargs, logger) + if args.parallel_strategy == "none" + else ( + setup_distributed_model_tp(args, model_dtype, model_kwargs, logger, cache_dir) + if args.parallel_strategy == "tp" + else setup_distributed_model_ep(args, model_dtype, model_kwargs, logger) + ) + ) ) - tokenizer, model, assistant_model = setup_tokenizer(args, model, assistant_model) + + tokenizer, model, assistant_model = setup_tokenizer(args, model, assistant_model, logger) generation_config = setup_generation_config(args, model, assistant_model, tokenizer) if args.const_serialization_path: diff --git a/comps/llms/src/text-generation/opea_llm_microservice.py b/comps/llms/src/text-generation/opea_llm_microservice.py index fb24911c41..c59db8e472 100644 --- a/comps/llms/src/text-generation/opea_llm_microservice.py +++ b/comps/llms/src/text-generation/opea_llm_microservice.py @@ -5,8 +5,6 @@ import time from typing import Union -from integrations.opea import OPEALLM - from comps import ( CustomLogger, LLMParamsDoc, @@ -19,12 +17,23 @@ statistics_dict, ) from comps.cores.proto.api_protocol import ChatCompletionRequest +from comps.cores.telemetry.opea_telemetry import opea_telemetry logger = CustomLogger("llm") logflag = os.getenv("LOGFLAG", False) +llm_component_name = os.getenv("LLM_COMPONENT_NAME", "OpeaTextGenService") +if logflag: + logger.info(f"Get llm_component_name {llm_component_name}") + +if llm_component_name == "OpeaTextGenNative": + from integrations.native import OpeaTextGenNative +elif llm_component_name == "OpeaTextGenBedrock": + from integrations.bedrock import OpeaTextGenBedrock +else: + from integrations.predictionguard import OpeaTextGenPredictionguard + from integrations.service import OpeaTextGenService -llm_component_name = os.getenv("LLM_COMPONENT_NAME", "OPEA_LLM") # Initialize OpeaComponentLoader loader = OpeaComponentLoader(llm_component_name, description=f"OPEA LLM Component: {llm_component_name}") @@ -36,6 +45,7 @@ host="0.0.0.0", port=9000, ) +@opea_telemetry @register_statistics(names=["opea_service@llm"]) async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]): start = time.time() diff --git a/comps/llms/src/text-generation/requirements.txt b/comps/llms/src/text-generation/requirements.txt index 85b06a876d..2669179cff 100644 --- a/comps/llms/src/text-generation/requirements.txt +++ b/comps/llms/src/text-generation/requirements.txt @@ -1,4 +1,5 @@ aiohttp +boto3 docarray[full] fastapi httpx==0.27.2 @@ -8,6 +9,8 @@ openai==1.57.4 opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk +Pillow +predictionguard prometheus-fastapi-instrumentator shortuuid transformers diff --git a/comps/llms/summarization/tgi/langchain/Dockerfile b/comps/llms/summarization/tgi/langchain/Dockerfile deleted file mode 100644 index 3a73120547..0000000000 --- a/comps/llms/summarization/tgi/langchain/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/llms/summarization/tgi/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/llms/summarization/tgi/langchain - -ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/summarization/tgi/langchain/README.md b/comps/llms/summarization/tgi/langchain/README.md deleted file mode 100644 index 5442fbf4ff..0000000000 --- a/comps/llms/summarization/tgi/langchain/README.md +++ /dev/null @@ -1,172 +0,0 @@ -# Document Summary TGI Microservice - -This microservice leverages LangChain to implement summarization strategies and facilitate LLM inference using Text Generation Inference on Intel Xeon and Gaudi2 processors. -[Text Generation Inference](https://github.com/huggingface/text-generation-inference) (TGI) is a toolkit for deploying and serving Large Language Models (LLMs). TGI enables high-performance text generation for the most popular open-source LLMs, including Llama, Falcon, StarCoder, BLOOM, GPT-NeoX, and more. - -## 🚀1. Start Microservice with Python 🐍 (Option 1) - -To start the LLM microservice, you need to install python packages first. - -### 1.1 Install Requirements - -```bash -pip install -r requirements.txt -``` - -### 1.2 Start LLM Service - -```bash -export HF_TOKEN=${your_hf_api_token} -docker run -p 8008:80 -v ./data:/data --name llm-docsum-tgi --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} -``` - -### 1.3 Verify the TGI Service - -```bash -curl http://${your_ip}:8008/v1/chat/completions \ - -X POST \ - -d '{"model": ${your_hf_llm_model}, "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ - -H 'Content-Type: application/json' -``` - -### 1.4 Start LLM Service with Python Script - -```bash -export TGI_LLM_ENDPOINT="http://${your_ip}:8008" -python llm.py -``` - -## 🚀2. Start Microservice with Docker 🐳 (Option 2) - -If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a TGI/vLLM service with docker. - -### 2.1 Setup Environment Variables - -In order to start TGI and LLM services, you need to setup the following environment variables first. - -```bash -export HF_TOKEN=${your_hf_api_token} -export TGI_LLM_ENDPOINT="http://${your_ip}:8008" -export LLM_MODEL_ID=${your_hf_llm_model} -export MAX_INPUT_TOKENS=2048 -export MAX_TOTAL_TOKENS=4096 -``` - -Please make sure MAX_TOTAL_TOKENS should be larger than (MAX_INPUT_TOKENS + max_new_tokens + 50), 50 is reserved prompt length. - -### 2.2 Build Docker Image - -```bash -cd ../../../../../ -docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/tgi/langchain/Dockerfile . -``` - -To start a docker container, you have two options: - -- A. Run Docker with CLI -- B. Run Docker with Docker Compose - -You can choose one as needed. - -### 2.3 Run Docker with CLI (Option A) - -```bash -docker run -d --name="llm-docsum-tgi-server" -p 9000:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HF_TOKEN=$HF_TOKEN -e MAX_INPUT_TOKENS=${MAX_INPUT_TOKENS} -e MAX_TOTAL_TOKENS=${MAX_TOTAL_TOKENS} opea/llm-docsum-tgi:latest -``` - -### 2.4 Run Docker with Docker Compose (Option B) - -```bash -docker compose -f docker_compose_llm.yaml up -d -``` - -## 🚀3. Consume LLM Service - -### 3.1 Check Service Status - -```bash -curl http://${your_ip}:9000/v1/health_check\ - -X GET \ - -H 'Content-Type: application/json' -``` - -### 3.2 Consume LLM Service - -In DocSum microservice, except for basic LLM parameters, we also support several optimization parameters setting. - -- "language": specify the language, can be "auto", "en", "zh", default is "auto" - -If you want to deal with long context, can select suitable summary type, details in section 3.2.2. - -- "summary_type": can be "auto", "stuff", "truncate", "map_reduce", "refine", default is "auto" -- "chunk_size": max token length for each chunk. Set to be different default value according to "summary_type". -- "chunk_overlap": overlap token length between each chunk, default is 0.1\*chunk_size - -#### 3.2.1 Basic usage - -```bash -# Enable stream to receive a stream response. By default, this is set to True. -curl http://${your_ip}:9000/v1/chat/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en"}' \ - -H 'Content-Type: application/json' - -# Disable stream to receive a non-stream response. -curl http://${your_ip}:9000/v1/chat/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "stream":false}' \ - -H 'Content-Type: application/json' - -# Use Chinese mode -curl http://${your_ip}:9000/v1/chat/docsum \ - -X POST \ - -d '{"query":"2024年9月26日,北京——今日,英特尔正式发布英特尔® 至强® 6性能核处理器(代号Granite Rapids),为AI、数据分析、科学计算等计算密集型业务提供卓越性能。", "max_tokens":32, "language":"zh", "stream":false}' \ - -H 'Content-Type: application/json' -``` - -#### 3.2.2 Long context summarization with "summary_type" - -**summary_type=auto** - -"summary_type" is set to be "auto" by default, in this mode we will check input token length, if it exceed `MAX_INPUT_TOKENS`, `summary_type` will automatically be set to `refine` mode, otherwise will be set to `stuff` mode. - -**summary_type=stuff** - -In this mode LLM generate summary based on complete input text. In this case please carefully set `MAX_INPUT_TOKENS` and `MAX_TOTAL_TOKENS` according to your model and device memory, otherwise it may exceed LLM context limit and raise error when meet long context. - -**summary_type=truncate** - -Truncate mode will truncate the input text and keep only the first chunk, whose length is equal to `min(MAX_TOTAL_TOKENS - input.max_tokens - 50, MAX_INPUT_TOKENS)` - -```bash -curl http://${your_ip}:9000/v1/chat/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "truncate", "chunk_size": 2000}' \ - -H 'Content-Type: application/json' -``` - -**summary_type=map_reduce** - -Map_reduce mode will split the inputs into multiple chunks, map each document to an individual summary, then consolidate those summaries into a single global summary. `stream=True` is not allowed here. - -In this mode, default `chunk_size` is set to be `min(MAX_TOTAL_TOKENS - input.max_tokens - 50, MAX_INPUT_TOKENS)` - -```bash -curl http://${your_ip}:9000/v1/chat/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false}' \ - -H 'Content-Type: application/json' -``` - -**summary_type=refine** - -Refin mode will split the inputs into multiple chunks, generate summary for the first one, then combine with the second, loops over every remaining chunks to get the final summary. - -In this mode, default `chunk_size` is set to be `min(MAX_TOTAL_TOKENS - 2 * input.max_tokens - 128, MAX_INPUT_TOKENS)`. - -```bash -curl http://${your_ip}:9000/v1/chat/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000}' \ - -H 'Content-Type: application/json' -``` diff --git a/comps/llms/summarization/tgi/langchain/docker_compose_llm.yaml b/comps/llms/summarization/tgi/langchain/docker_compose_llm.yaml deleted file mode 100644 index 93579a5712..0000000000 --- a/comps/llms/summarization/tgi/langchain/docker_compose_llm.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - tgi_service: - image: ghcr.io/huggingface/text-generation-inference:2.1.0 - container_name: tgi-service - ports: - - "8008:80" - volumes: - - "./data:/data" - environment: - HF_TOKEN: ${HF_TOKEN} - shm_size: 1g - command: --model-id ${LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS} - llm: - image: opea/llm-docsum-tgi:latest - container_name: llm-docsum-tgi-server - ports: - - "9000:9000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} - MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} - LLM_MODEL_ID: ${LLM_MODEL_ID} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/llms/summarization/tgi/langchain/llm.py b/comps/llms/summarization/tgi/langchain/llm.py deleted file mode 100644 index addb090071..0000000000 --- a/comps/llms/summarization/tgi/langchain/llm.py +++ /dev/null @@ -1,245 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -from fastapi.responses import StreamingResponse -from langchain.chains.summarize import load_summarize_chain -from langchain.docstore.document import Document -from langchain.prompts import PromptTemplate -from langchain_community.llms import HuggingFaceEndpoint -from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter -from transformers import AutoTokenizer - -from comps import CustomLogger, DocSumLLMParams, GeneratedDoc, ServiceType, opea_microservices, register_microservice -from comps.cores.mega.utils import get_access_token - -logger = CustomLogger("llm_docsum") -logflag = os.getenv("LOGFLAG", False) - -# Environment variables -TOKEN_URL = os.getenv("TOKEN_URL") -CLIENTID = os.getenv("CLIENTID") -CLIENT_SECRET = os.getenv("CLIENT_SECRET") -MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", 2048)) -MAX_TOTAL_TOKENS = int(os.getenv("MAX_TOTAL_TOKENS", 4096)) -LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "Intel/neural-chat-7b-v3-3") - -templ_en = """Write a concise summary of the following: - - -"{text}" - - -CONCISE SUMMARY:""" - -templ_zh = """请简要概括以下内容: - - -"{text}" - - -概况:""" - - -templ_refine_en = """Your job is to produce a final summary. -We have provided an existing summary up to a certain point, then we will provide more context. -You need to refine the existing summary (only if needed) with new context and generate a final summary. - - -Existing Summary: -"{existing_answer}" - - - -New Context: -"{text}" - - - -Final Summary: - -""" - -templ_refine_zh = """\ -你的任务是生成一个最终摘要。 -我们已经处理好部分文本并生成初始摘要, 并提供了新的未处理文本 -你需要根据新提供的文本,结合初始摘要,生成一个最终摘要。 - - -初始摘要: -"{existing_answer}" - - - -新的文本: -"{text}" - - - -最终摘要: - -""" - - -@register_microservice( - name="opea_service@llm_docsum", - service_type=ServiceType.LLM, - endpoint="/v1/chat/docsum", - host="0.0.0.0", - port=9000, -) -async def llm_generate(input: DocSumLLMParams): - if logflag: - logger.info(input) - - ### check summary type - summary_types = ["auto", "stuff", "truncate", "map_reduce", "refine"] - if input.summary_type not in summary_types: - raise NotImplementedError(f"Please specify the summary_type in {summary_types}") - if input.summary_type == "auto": ### Check input token length in auto mode - token_len = len(tokenizer.encode(input.query)) - if token_len > MAX_INPUT_TOKENS + 50: - input.summary_type = "refine" - if logflag: - logger.info( - f"Input token length {token_len} exceed MAX_INPUT_TOKENS + 50 {MAX_INPUT_TOKENS+50}, auto switch to 'refine' mode." - ) - else: - input.summary_type = "stuff" - if logflag: - logger.info( - f"Input token length {token_len} not exceed MAX_INPUT_TOKENS + 50 {MAX_INPUT_TOKENS+50}, auto switch to 'stuff' mode." - ) - - if input.language in ["en", "auto"]: - templ = templ_en - templ_refine = templ_refine_en - elif input.language in ["zh"]: - templ = templ_zh - templ_refine = templ_refine_zh - else: - raise NotImplementedError('Please specify the input language in "en", "zh", "auto"') - - ## Prompt - PROMPT = PromptTemplate.from_template(templ) - if input.summary_type == "refine": - PROMPT_REFINE = PromptTemplate.from_template(templ_refine) - if logflag: - logger.info("After prompting:") - logger.info(PROMPT) - if input.summary_type == "refine": - logger.info(PROMPT_REFINE) - - ## Split text - if input.summary_type == "stuff": - text_splitter = CharacterTextSplitter() - else: - if input.summary_type == "refine": - if MAX_TOTAL_TOKENS <= 2 * input.max_tokens + 128: - raise RuntimeError("In Refine mode, Please set MAX_TOTAL_TOKENS larger than (max_tokens * 2 + 128)") - max_input_tokens = min( - MAX_TOTAL_TOKENS - 2 * input.max_tokens - 128, MAX_INPUT_TOKENS - ) # 128 is reserved token length for prompt - else: - if MAX_TOTAL_TOKENS <= input.max_tokens + 50: - raise RuntimeError("Please set MAX_TOTAL_TOKENS larger than max_tokens + 50)") - max_input_tokens = min( - MAX_TOTAL_TOKENS - input.max_tokens - 50, MAX_INPUT_TOKENS - ) # 50 is reserved token length for prompt - chunk_size = min(input.chunk_size, max_input_tokens) if input.chunk_size > 0 else max_input_tokens - chunk_overlap = input.chunk_overlap if input.chunk_overlap > 0 else int(0.1 * chunk_size) - text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer( - tokenizer=tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap - ) - if logflag: - logger.info(f"set chunk size to: {chunk_size}") - logger.info(f"set chunk overlap to: {chunk_overlap}") - - texts = text_splitter.split_text(input.query) - docs = [Document(page_content=t) for t in texts] - if logflag: - logger.info(f"Split input query into {len(docs)} chunks") - logger.info(f"The character length of the first chunk is {len(texts[0])}") - - ## Access auth - access_token = ( - get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None - ) - server_kwargs = {} - if access_token: - server_kwargs["headers"] = {"Authorization": f"Bearer {access_token}"} - - ## LLM - if input.stream and input.summary_type == "map_reduce": - logger.info("Map Reduce mode don't support stream=True, set to stream=False") - input.stream = False - llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") - llm = HuggingFaceEndpoint( - endpoint_url=llm_endpoint, - max_new_tokens=input.max_tokens, - top_k=input.top_k, - top_p=input.top_p, - typical_p=input.typical_p, - temperature=input.temperature, - repetition_penalty=input.repetition_penalty, - streaming=input.stream, - server_kwargs=server_kwargs, - ) - - ## LLM chain - summary_type = input.summary_type - if summary_type == "stuff": - llm_chain = load_summarize_chain(llm=llm, prompt=PROMPT) - elif summary_type == "truncate": - docs = [docs[0]] - llm_chain = load_summarize_chain(llm=llm, prompt=PROMPT) - elif summary_type == "map_reduce": - llm_chain = load_summarize_chain( - llm=llm, map_prompt=PROMPT, combine_prompt=PROMPT, chain_type="map_reduce", return_intermediate_steps=True - ) - elif summary_type == "refine": - llm_chain = load_summarize_chain( - llm=llm, - question_prompt=PROMPT, - refine_prompt=PROMPT_REFINE, - chain_type="refine", - return_intermediate_steps=True, - ) - else: - raise NotImplementedError('Please specify the summary_type in "stuff", "truncate", "map_reduce", "refine"') - - if input.stream: - - async def stream_generator(): - from langserve.serialization import WellKnownLCSerializer - - _serializer = WellKnownLCSerializer() - async for chunk in llm_chain.astream_log(docs): - data = _serializer.dumps({"ops": chunk.ops}).decode("utf-8") - if logflag: - logger.info(data) - yield f"data: {data}\n\n" - yield "data: [DONE]\n\n" - - return StreamingResponse(stream_generator(), media_type="text/event-stream") - else: - response = await llm_chain.ainvoke(docs) - - if input.summary_type in ["map_reduce", "refine"]: - intermediate_steps = response["intermediate_steps"] - if logflag: - logger.info("intermediate_steps:") - logger.info(intermediate_steps) - - output_text = response["output_text"] - if logflag: - logger.info("\n\noutput_text:") - logger.info(output_text) - - return GeneratedDoc(text=output_text, prompt=input.query) - - -if __name__ == "__main__": - tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_ID) - opea_microservices["opea_service@llm_docsum"].start() diff --git a/comps/llms/summarization/vllm/langchain/Dockerfile b/comps/llms/summarization/vllm/langchain/Dockerfile deleted file mode 100644 index 3a1cd5a8f7..0000000000 --- a/comps/llms/summarization/vllm/langchain/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/llms/summarization/vllm/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/llms/summarization/vllm/langchain - -ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/summarization/vllm/langchain/README.md b/comps/llms/summarization/vllm/langchain/README.md deleted file mode 100644 index 061a526d68..0000000000 --- a/comps/llms/summarization/vllm/langchain/README.md +++ /dev/null @@ -1,171 +0,0 @@ -# Document Summary vLLM Microservice - -This microservice leverages LangChain to implement summarization strategies and facilitate LLM inference using vLLM. -[vLLM](https://github.com/vllm-project/vllm) is a fast and easy-to-use library for LLM inference and serving, it delivers state-of-the-art serving throughput with a set of advanced features such as PagedAttention, Continuous batching and etc.. Besides GPUs, vLLM already supported [Intel CPUs](https://www.intel.com/content/www/us/en/products/overview.html) and [Gaudi accelerators](https://habana.ai/products). - -## 🚀1. Start Microservice with Python 🐍 (Option 1) - -To start the LLM microservice, you need to install python packages first. - -### 1.1 Install Requirements - -```bash -pip install -r requirements.txt -``` - -### 1.2 Start LLM Service - -```bash -export HF_TOKEN=${your_hf_api_token} -export LLM_MODEL_ID=${your_hf_llm_model} -docker run -p 8008:80 -v ./data:/data --name llm-docsum-vllm --shm-size 1g opea/vllm-gaudi:latest --model-id ${LLM_MODEL_ID} -``` - -### 1.3 Verify the vLLM Service - -```bash -curl http://${your_ip}:8008/v1/chat/completions \ - -X POST \ - -H "Content-Type: application/json" \ - -d '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning? "}]}' -``` - -### 1.4 Start LLM Service with Python Script - -```bash -export vLLM_ENDPOINT="http://${your_ip}:8008" -python llm.py -``` - -## 🚀2. Start Microservice with Docker 🐳 (Option 2) - -If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a vLLM/vLLM service with docker. - -To setup or build the vLLM image follow the instructions provided in [vLLM Gaudi](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/text-generation/vllm/langchain#22-vllm-on-gaudi) - -### 2.1 Setup Environment Variables - -In order to start vLLM and LLM services, you need to setup the following environment variables first. - -```bash -export HF_TOKEN=${your_hf_api_token} -export vLLM_ENDPOINT="http://${your_ip}:8008" -export LLM_MODEL_ID=${your_hf_llm_model} -``` - -### 2.2 Build Docker Image - -```bash -cd ../../../../../ -docker build -t opea/llm-docsum-vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/vllm/langchain/Dockerfile . -``` - -To start a docker container, you have two options: - -- A. Run Docker with CLI -- B. Run Docker with Docker Compose - -You can choose one as needed. - -### 2.3 Run Docker with CLI (Option A) - -```bash -docker run -d --name="llm-docsum-vllm-server" -p 9000:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e vLLM_ENDPOINT=$vLLM_ENDPOINT -e HF_TOKEN=$HF_TOKEN opea/llm-docsum-vllm:latest -``` - -### 2.4 Run Docker with Docker Compose (Option B) - -```bash -docker compose -f docker_compose_llm.yaml up -d -``` - -## 🚀3. Consume LLM Service - -### 3.1 Check Service Status - -```bash -curl http://${your_ip}:9000/v1/health_check\ - -X GET \ - -H 'Content-Type: application/json' -``` - -### 3.2 Consume LLM Service - -In DocSum microservice, except for basic LLM parameters, we also support several optimization parameters setting. - -- "language": specify the language, can be "auto", "en", "zh", default is "auto" - -If you want to deal with long context, can select suitable summary type, details in section 3.2.2. - -- "summary_type": can be "auto", "stuff", "truncate", "map_reduce", "refine", default is "auto" -- "chunk_size": max token length for each chunk. Set to be different default value according to "summary_type". -- "chunk_overlap": overlap token length between each chunk, default is 0.1\*chunk_size - -#### 3.2.1 Basic usage - -```bash -# Enable stream to receive a stream response. By default, this is set to True. -curl http://${your_ip}:9000/v1/chat/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en"}' \ - -H 'Content-Type: application/json' - -# Disable stream to receive a non-stream response. -curl http://${your_ip}:9000/v1/chat/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "stream":false}' \ - -H 'Content-Type: application/json' - -# Use Chinese mode -curl http://${your_ip}:9000/v1/chat/docsum \ - -X POST \ - -d '{"query":"2024年9月26日,北京——今日,英特尔正式发布英特尔® 至强® 6性能核处理器(代号Granite Rapids),为AI、数据分析、科学计算等计算密集型业务提供卓越性能。", "max_tokens":32, "language":"zh", "stream":false}' \ - -H 'Content-Type: application/json' -``` - -#### 3.2.2 Long context summarization with "summary_type" - -**summary_type=auto** - -"summary_type" is set to be "auto" by default, in this mode we will check input token length, if it exceed `MAX_INPUT_TOKENS`, `summary_type` will automatically be set to `refine` mode, otherwise will be set to `stuff` mode. - -**summary_type=stuff** - -In this mode LLM generate summary based on complete input text. In this case please carefully set `MAX_INPUT_TOKENS` and `MAX_TOTAL_TOKENS` according to your model and device memory, otherwise it may exceed LLM context limit and raise error when meet long context. - -**summary_type=truncate** - -Truncate mode will truncate the input text and keep only the first chunk, whose length is equal to `min(MAX_TOTAL_TOKENS - input.max_tokens - 50, MAX_INPUT_TOKENS)` - -```bash -curl http://${your_ip}:9000/v1/chat/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "truncate", "chunk_size": 2000}' \ - -H 'Content-Type: application/json' -``` - -**summary_type=map_reduce** - -Map_reduce mode will split the inputs into multiple chunks, map each document to an individual summary, then consolidate those summaries into a single global summary. `stream=True` is not allowed here. - -In this mode, default `chunk_size` is set to be `min(MAX_TOTAL_TOKENS - input.max_tokens - 50, MAX_INPUT_TOKENS)` - -```bash -curl http://${your_ip}:9000/v1/chat/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false}' \ - -H 'Content-Type: application/json' -``` - -**summary_type=refine** - -Refin mode will split the inputs into multiple chunks, generate summary for the first one, then combine with the second, loops over every remaining chunks to get the final summary. - -In this mode, default `chunk_size` is set to be `min(MAX_TOTAL_TOKENS - 2 * input.max_tokens - 128, MAX_INPUT_TOKENS)`. - -```bash -curl http://${your_ip}:9000/v1/chat/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000}' \ - -H 'Content-Type: application/json' -``` diff --git a/comps/llms/summarization/vllm/langchain/__init__.py b/comps/llms/summarization/vllm/langchain/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/llms/summarization/vllm/langchain/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/llms/summarization/vllm/langchain/docker_compose_llm.yaml b/comps/llms/summarization/vllm/langchain/docker_compose_llm.yaml deleted file mode 100644 index 26847387cc..0000000000 --- a/comps/llms/summarization/vllm/langchain/docker_compose_llm.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - vllm-service: - image: opea/vllm-gaudi:latest - container_name: vllm-gaudi-server - ports: - - "8008:80" - volumes: - - "./data:/data" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HF_TOKEN} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - LLM_MODEL_ID: ${LLM_MODEL_ID} - runtime: habana - cap_add: - - SYS_NICE - ipc: host - command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 - llm: - image: opea/llm-docsum-vllm:latest - container_name: llm-docsum-vllm-server - ports: - - "9000:9000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - vLLM_ENDPOINT: ${vLLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - LLM_MODEL_ID: ${LLM_MODEL_ID} - MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} - MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/llms/summarization/vllm/langchain/entrypoint.sh b/comps/llms/summarization/vllm/langchain/entrypoint.sh deleted file mode 100644 index d60eddd36b..0000000000 --- a/comps/llms/summarization/vllm/langchain/entrypoint.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -pip --no-cache-dir install -r requirements-runtime.txt - -python llm.py diff --git a/comps/llms/summarization/vllm/langchain/llm.py b/comps/llms/summarization/vllm/langchain/llm.py deleted file mode 100644 index 5371e7e560..0000000000 --- a/comps/llms/summarization/vllm/langchain/llm.py +++ /dev/null @@ -1,247 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -from pathlib import Path as p - -from fastapi.responses import StreamingResponse -from langchain.chains.summarize import load_summarize_chain -from langchain.docstore.document import Document -from langchain.prompts import PromptTemplate -from langchain_community.llms import VLLMOpenAI -from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter -from transformers import AutoTokenizer - -from comps import CustomLogger, DocSumLLMParams, GeneratedDoc, ServiceType, opea_microservices, register_microservice -from comps.cores.mega.utils import get_access_token - -logger = CustomLogger("llm_docsum") -logflag = os.getenv("LOGFLAG", False) - -# Environment variables -TOKEN_URL = os.getenv("TOKEN_URL") -CLIENTID = os.getenv("CLIENTID") -CLIENT_SECRET = os.getenv("CLIENT_SECRET") -MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS")) -MAX_TOTAL_TOKENS = int(os.getenv("MAX_TOTAL_TOKENS")) -LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", None) - -templ_en = """Write a concise summary of the following: - - -"{text}" - - -CONCISE SUMMARY:""" - -templ_zh = """请简要概括以下内容: - - -"{text}" - - -概况:""" - - -templ_refine_en = """Your job is to produce a final summary. -We have provided an existing summary up to a certain point, then we will provide more context. -You need to refine the existing summary (only if needed) with new context and generate a final summary. - - -Existing Summary: -"{existing_answer}" - - - -New Context: -"{text}" - - - -Final Summary: - -""" - -templ_refine_zh = """\ -你的任务是生成一个最终摘要。 -我们已经处理好部分文本并生成初始摘要, 并提供了新的未处理文本 -你需要根据新提供的文本,结合初始摘要,生成一个最终摘要。 - - -初始摘要: -"{existing_answer}" - - - -新的文本: -"{text}" - - - -最终摘要: - -""" - - -@register_microservice( - name="opea_service@llm_docsum", - service_type=ServiceType.LLM, - endpoint="/v1/chat/docsum", - host="0.0.0.0", - port=9000, -) -async def llm_generate(input: DocSumLLMParams): - if logflag: - logger.info(input) - - ### check summary type - summary_types = ["auto", "stuff", "truncate", "map_reduce", "refine"] - if input.summary_type not in summary_types: - raise NotImplementedError(f"Please specify the summary_type in {summary_types}") - if input.summary_type == "auto": ### Check input token length in auto mode - token_len = len(tokenizer.encode(input.query)) - if token_len > MAX_INPUT_TOKENS + 50: - input.summary_type = "refine" - if logflag: - logger.info( - f"Input token length {token_len} exceed MAX_INPUT_TOKENS + 50 {MAX_INPUT_TOKENS+50}, auto switch to 'refine' mode." - ) - else: - input.summary_type = "stuff" - if logflag: - logger.info( - f"Input token length {token_len} not exceed MAX_INPUT_TOKENS + 50 {MAX_INPUT_TOKENS+50}, auto switch to 'stuff' mode." - ) - - if input.language in ["en", "auto"]: - templ = templ_en - templ_refine = templ_refine_en - elif input.language in ["zh"]: - templ = templ_zh - templ_refine = templ_refine_zh - else: - raise NotImplementedError('Please specify the input language in "en", "zh", "auto"') - - ## Prompt - PROMPT = PromptTemplate.from_template(templ) - if input.summary_type == "refine": - PROMPT_REFINE = PromptTemplate.from_template(templ_refine) - if logflag: - logger.info("After prompting:") - logger.info(PROMPT) - if input.summary_type == "refine": - logger.info(PROMPT_REFINE) - - ## Split text - if input.summary_type == "stuff": - text_splitter = CharacterTextSplitter() - else: - if input.summary_type == "refine": - if MAX_TOTAL_TOKENS <= 2 * input.max_tokens + 128: - raise RuntimeError("In Refine mode, Please set MAX_TOTAL_TOKENS larger than (max_tokens * 2 + 128)") - max_input_tokens = min( - MAX_TOTAL_TOKENS - 2 * input.max_tokens - 128, MAX_INPUT_TOKENS - ) # 128 is reserved token length for prompt - else: - if MAX_TOTAL_TOKENS <= input.max_tokens + 50: - raise RuntimeError("Please set MAX_TOTAL_TOKENS larger than max_tokens + 50)") - max_input_tokens = min( - MAX_TOTAL_TOKENS - input.max_tokens - 50, MAX_INPUT_TOKENS - ) # 50 is reserved token length for prompt - chunk_size = min(input.chunk_size, max_input_tokens) if input.chunk_size > 0 else max_input_tokens - chunk_overlap = input.chunk_overlap if input.chunk_overlap > 0 else int(0.1 * chunk_size) - text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer( - tokenizer=tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap - ) - if logflag: - logger.info(f"set chunk size to: {chunk_size}") - logger.info(f"set chunk overlap to: {chunk_overlap}") - - texts = text_splitter.split_text(input.query) - docs = [Document(page_content=t) for t in texts] - if logflag: - logger.info(f"Split input query into {len(docs)} chunks") - logger.info(f"The character length of the first chunk is {len(texts[0])}") - - ## Access auth - access_token = ( - get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None - ) - headers = {} - if access_token: - headers = {"Authorization": f"Bearer {access_token}"} - - ## LLM - if input.stream and input.summary_type == "map_reduce": - logger.info("Map Reduce mode don't support stream=True, set to stream=False") - input.stream = False - llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8080") - model = input.model if input.model else os.getenv("LLM_MODEL_ID") - llm = VLLMOpenAI( - openai_api_key="EMPTY", - openai_api_base=llm_endpoint + "/v1", - model_name=model, - default_headers=headers, - max_tokens=input.max_tokens, - top_p=input.top_p, - streaming=input.stream, - temperature=input.temperature, - presence_penalty=input.repetition_penalty, - ) - - ## LLM chain - summary_type = input.summary_type - if summary_type == "stuff": - llm_chain = load_summarize_chain(llm=llm, prompt=PROMPT) - elif summary_type == "truncate": - docs = [docs[0]] - llm_chain = load_summarize_chain(llm=llm, prompt=PROMPT) - elif summary_type == "map_reduce": - llm_chain = load_summarize_chain( - llm=llm, map_prompt=PROMPT, combine_prompt=PROMPT, chain_type="map_reduce", return_intermediate_steps=True - ) - elif summary_type == "refine": - llm_chain = load_summarize_chain( - llm=llm, - question_prompt=PROMPT, - refine_prompt=PROMPT_REFINE, - chain_type="refine", - return_intermediate_steps=True, - ) - else: - raise NotImplementedError('Please specify the summary_type in "stuff", "truncate", "map_reduce", "refine"') - - if input.stream: - - async def stream_generator(): - from langserve.serialization import WellKnownLCSerializer - - _serializer = WellKnownLCSerializer() - async for chunk in llm_chain.astream_log(docs): - data = _serializer.dumps({"ops": chunk.ops}).decode("utf-8") - if logflag: - logger.info(data) - yield f"data: {data}\n\n" - yield "data: [DONE]\n\n" - - return StreamingResponse(stream_generator(), media_type="text/event-stream") - else: - response = await llm_chain.ainvoke(docs) - - if input.summary_type in ["map_reduce", "refine"]: - intermediate_steps = response["intermediate_steps"] - if logflag: - logger.info("intermediate_steps:") - logger.info(intermediate_steps) - - output_text = response["output_text"] - if logflag: - logger.info("\n\noutput_text:") - logger.info(output_text) - - return GeneratedDoc(text=output_text, prompt=input.query) - - -if __name__ == "__main__": - tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_ID) - opea_microservices["opea_service@llm_docsum"].start() diff --git a/comps/llms/summarization/vllm/langchain/requirements.txt b/comps/llms/summarization/vllm/langchain/requirements.txt deleted file mode 100644 index 1694618637..0000000000 --- a/comps/llms/summarization/vllm/langchain/requirements.txt +++ /dev/null @@ -1,16 +0,0 @@ -docarray[full] -fastapi -httpx==0.27.2 -huggingface_hub -langchain #==0.1.12 -langchain-huggingface -langchain-openai -langchain_community -langchainhub -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -shortuuid -transformers -uvicorn diff --git a/comps/llms/text-generation/README.md b/comps/llms/text-generation/README.md deleted file mode 100644 index 2c12e1cfc4..0000000000 --- a/comps/llms/text-generation/README.md +++ /dev/null @@ -1,318 +0,0 @@ -# LLM Microservice - -This microservice, designed for Language Model Inference (LLM), processes input consisting of a query string and associated reranked documents. It constructs a prompt based on the query and documents, which is then used to perform inference with a large language model. The service delivers the inference results as output. - -A prerequisite for using this microservice is that users must have a LLM text generation service (etc., TGI, vLLM) already running. Users need to set the LLM service's endpoint into an environment variable. The microservice utilizes this endpoint to create an LLM object, enabling it to communicate with the LLM service for executing language model operations. - -Overall, this microservice offers a streamlined way to integrate large language model inference into applications, requiring minimal setup from the user beyond initiating a TGI/vLLM service and configuring the necessary environment variables. This allows for the seamless processing of queries and documents to generate intelligent, context-aware responses. - -## Validated LLM Models - -| Model | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi | -| --------------------------- | --------- | -------- | ---------- | -| [Intel/neural-chat-7b-v3-3] | ✓ | ✓ | ✓ | -| [Llama-2-7b-chat-hf] | ✓ | ✓ | ✓ | -| [Llama-2-70b-chat-hf] | ✓ | - | ✓ | -| [Meta-Llama-3-8B-Instruct] | ✓ | ✓ | ✓ | -| [Meta-Llama-3-70B-Instruct] | ✓ | - | ✓ | -| [Phi-3] | x | Limit 4K | Limit 4K | - -## Clone OPEA GenAIComps - -Clone this repository at your desired location and set an environment variable for easy setup and usage throughout the instructions. - -```bash -git clone https://github.com/opea-project/GenAIComps.git - -export OPEA_GENAICOMPS_ROOT=$(pwd)/GenAIComps -``` - -## Prerequisites - -You must create a user account with [HuggingFace] and obtain permission to use the gated LLM models by adhering to the guidelines provided on the respective model's webpage. The environment variables `LLM_MODEL` would be the HuggingFace model id and the `HF_TOKEN` is your HuggugFace account's "User Access Token". - -## 🚀1. Start Microservice with Python (Option 1) - -To start the LLM microservice, you need to install python packages first. - -### 1.1 Install Requirements - -```bash -# Install opea-comps -pip install opea-comps - -# Install requirements from comps/llms -cd ${OPEA_GENAICOMPS_ROOT}/comps/llms - -pip install -r requirements.txt -``` - -### 1.2 Start LLM Service with Python Script - -#### 1.2.1 Start the TGI Service - -Install the requirements for TGI Service - -```bash -cd ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/tgi - -pip install -r requirements.txt -``` - -Execute the docker run command to initiate the backend, along with the Python script that launches the microservice. - -```bash -export TGI_HOST_IP=$(hostname -I | awk '{print $1}') # This sets IP of the current machine -export LLM_MODEL=${your_hf_llm_model} -export DATA_DIR=$HOME/data # Location to download the model -export HF_TOKEN=${your_hf_api_token} - -# Initiate the backend -docker run -d \ - -p 8008:80 \ - -e HF_TOKEN=${HF_TOKEN} \ - -v ${DATA_DIR}:/data \ - --name tgi_service \ - --shm-size 1g \ - ghcr.io/huggingface/text-generation-inference:1.4 \ - --model-id ${LLM_MODEL} - -# Start the microservice with an endpoint as the above docker run command -export TGI_LLM_ENDPOINT="http://${TGI_HOST_IP}:8008" - -python llm.py -``` - -#### 1.2.2 Start the vLLM Service - -Install the requirements for vLLM Service - -```bash -cd ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/vllm/langchain - -pip install -r requirements.txt -``` - -Execute the docker run command to initiate the backend, along with the Python script that launches the microservice. - -```bash -export vLLM_HOST_IP=$(hostname -I | awk '{print $1}') # This sets IP of the current machine -export LLM_MODEL=${your_hf_llm_model} -export DATA_DIR=$HOME/data # Location to download the model -export HF_TOKEN=${your_hf_api_token} - -# Build the image first as opea/vllm-cpu -bash ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm.sh cpu - -# Initiate the backend -docker run -d -it \ - --name vllm_service \ - -p 8008:80 \ - -e HF_TOKEN=${HF_TOKEN} \ - -e VLLM_CPU_KVCACHE_SPACE=40 \ - -v ${DATA_DIR}:/data \ - opea/vllm-cpu:latest \ - --model ${LLM_MODEL} \ - --port 80 - -# Start the microservice with an endpoint as the above docker run command -export vLLM_ENDPOINT="http://${vLLM_HOST_IP}:8008" - -python llm.py -``` - -## 🚀2. Start Microservice with Docker (Option 2) - -In order to start the microservices with docker, you need to build the docker images first for the microservice. - -### 2.1 Build Docker Image - -```bash -# Build the microservice docker -cd ${OPEA_GENAICOMPS_ROOT} - -docker build \ - --build-arg https_proxy=$https_proxy \ - --build-arg http_proxy=$http_proxy \ - -t opea/llm:latest \ - -f comps/llms/src/text-generation/Dockerfile . -``` - -### 2.2 Start LLM Service with the built image - -To start a docker container, you have two options: - -- A. Run Docker with CLI -- B. Run Docker with Docker Compose - -You can choose one as needed. If you start an LLM microservice with docker compose, the `docker_compose_llm.yaml` file will automatically start both endpoint and the microservice docker. - -#### 2.2.1 Setup Environment Variables - -In order to start TGI and LLM services, you need to setup the following environment variables first. - -```bash -export HF_TOKEN=${your_hf_api_token} -export TGI_LLM_ENDPOINT="http://${your_ip}:8008" -export LLM_MODEL=${your_hf_llm_model} -export DATA_DIR=$HOME/data -``` - -In order to start vLLM and LLM services, you need to setup the following environment variables first. - -```bash -export HF_TOKEN=${your_hf_api_token} -export vLLM_LLM_ENDPOINT="http://${your_ip}:8008" -export LLM_MODEL=${your_hf_llm_model} -``` - -### 2.3 Run Docker with CLI (Option A) - -#### 2.3.1 TGI - -Start TGI endpoint. - -```bash -docker run -d \ - -p 8008:80 \ - -e HF_TOKEN=${HF_TOKEN} \ - -v ${DATA_DIR}:/data \ - --name tgi_service \ - --shm-size 1g \ - ghcr.io/huggingface/text-generation-inference:1.4 \ - --model-id ${LLM_MODEL} -``` - -Start TGI microservice - -```bash -docker run -d \ - --name="llm-tgi-server" \ - -p 9000:9000 \ - --ipc=host \ - -e http_proxy=$http_proxy \ - -e https_proxy=$https_proxy \ - -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT \ - -e HF_TOKEN=$HF_TOKEN \ - opea/llm-textgen:latest -``` - -#### 2.3.2 vLLM - -Start vllm endpoint. - -```bash -bash ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/vllm/langchain/dependency/launch_vllm_service.sh -``` - -Start vllm microservice. - -```bash -docker run \ - --name="llm-vllm-server" \ - -p 9000:9000 \ - --ipc=host \ - -e http_proxy=$http_proxy \ - -e https_proxy=$https_proxy \ - -e no_proxy=${no_proxy} \ - -e vLLM_LLM_ENDPOINT=$vLLM_LLM_ENDPOINT \ - -e HF_TOKEN=$HF_TOKEN \ - -e LLM_MODEL=$LLM_MODEL \ - opea/llm-textgen:latest -``` - -### 2.4 Run Docker with Docker Compose (Option B) - -#### 2.4.1 TGI - -```bash -cd ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/tgi -docker compose -f docker_compose_llm.yaml up -d -``` - -#### 2.4.2 vLLM - -```bash -cd ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/vllm/langchain -docker compose -f docker_compose_llm.yaml up -d -``` - -## 🚀3. Consume LLM Service - -### 3.1 Check Service Status - -```bash -curl http://${your_ip}:9000/v1/health_check\ - -X GET \ - -H 'Content-Type: application/json' -``` - -### 3.2 Verify the LLM Service - -#### 3.2.1 Verify the TGI Service - -```bash -curl http://${your_ip}:8008/v1/chat/completions \ - -X POST \ - -d '{"model": ${your_hf_llm_model}, "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ - -H 'Content-Type: application/json' -``` - -#### 3.2.2 Verify the vLLM Service - -```bash -curl http://${host_ip}:8008/v1/chat/completions \ - -X POST \ - -H "Content-Type: application/json" \ - -d '{"model": ${your_hf_llm_model}, "messages": [{"role": "user", "content": "What is Deep Learning?"}]}' -``` - -### 3.3 Consume LLM Service - -You can set the following model parameters according to your actual needs, such as `max_tokens`, `stream`. - -The `stream` parameter determines the format of the data returned by the API. It will return text string with `stream=false`, return text stream flow with `stream=true`. - -```bash -# non-stream mode -curl http://${your_ip}:9000/v1/chat/completions \ - -X POST \ - -H 'Content-Type: application/json' \ - -d '{ - "query":"What is Deep Learning?", - "max_tokens":17, - "top_k":10, - "top_p":0.95, - "typical_p":0.95, - "temperature":0.01, - "repetition_penalty":1.03, - "stream":false - }' - - -# stream mode -curl http://${your_ip}:9000/v1/chat/completions \ - -X POST \ - -H 'Content-Type: application/json' \ - -d '{ - "query":"What is Deep Learning?", - "max_tokens":17, - "top_k":10, - "top_p":0.95, - "typical_p":0.95, - "temperature":0.01, - "repetition_penalty":1.03, - "stream":true - }' - -``` - - - -[Intel/neural-chat-7b-v3-3]: https://huggingface.co/Intel/neural-chat-7b-v3-3 -[Llama-2-7b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -[Llama-2-70b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -[Meta-Llama-3-8B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct -[Meta-Llama-3-70B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct -[Phi-3]: https://huggingface.co/collections/microsoft/phi-3-6626e15e9585a200d2d761e3 -[HuggingFace]: https://huggingface.co/ diff --git a/comps/llms/text-generation/native/langchain/docker_compose_llm.yaml b/comps/llms/text-generation/native/langchain/docker_compose_llm.yaml deleted file mode 100644 index 241853efc7..0000000000 --- a/comps/llms/text-generation/native/langchain/docker_compose_llm.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - llm: - image: opea/llm-native:latest - container_name: llm-native-server - ports: - - "9000:9000" - runtime: habana - cap_add: - - SYS_NICE - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_NATIVE_MODEL: ${LLM_NATIVE_MODEL} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - TOKENIZERS_PARALLELISM: false - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/llms/text-generation/native/langchain/llm.py b/comps/llms/text-generation/native/langchain/llm.py deleted file mode 100644 index f04b1db9d1..0000000000 --- a/comps/llms/text-generation/native/langchain/llm.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys - -sys.path.append("/test/GenAIComps/") - -import logging -import os -import threading -import time - -import torch -from langchain_core.prompts import PromptTemplate -from template import ChatTemplate, args_dict, input_sentences -from utils import initialize_model - -from comps import ( - GeneratedDoc, - LLMParamsDoc, - ServiceType, - opea_microservices, - register_microservice, - register_statistics, -) - -logflag = os.getenv("LOGFLAG", False) - -logging.basicConfig( - format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", - datefmt="%m/%d/%Y %H:%M:%S", - level=logging.INFO, -) -logger = logging.getLogger(__name__) - - -class Args: - def __init__(self, **entries): - self.__dict__.update(entries) - - -model = None -assistant_model = None -tokenizer = None -generation_config = None -args = Args(**args_dict) -initialization_lock = threading.Lock() -initialized = False - - -def generate( - input_query: list, - device="hpu", - use_lazy_mode=True, - use_hpu_graphs=True, - profiling_steps=0, - profiling_warmup_steps=0, - ignore_eos=True, - profiling_record_shapes=False, -): - """Generates sequences from the input sentences and returns them.""" - logger.info(f"[llm - generate] starting to inference with prompt {input_query}") - encode_t0 = time.perf_counter() - - # Tokenization - input_tokens = tokenizer.batch_encode_plus( - input_query, - return_tensors="pt", - padding=True, - return_token_type_ids=False, # token_type_ids is not needed for falcon-three model - ) - encode_duration = time.perf_counter() - encode_t0 - logger.info(f"[llm - generate] input tokenized: {input_tokens}") - - # Move inputs to target device(s) - for t in input_tokens: - logger.info(f"[llm - generate] t: {t}") - if torch.is_tensor(input_tokens[t]): - logger.info("[llm - generate] input[t] is tensor") - logger.info(f"[llm - generate] device: {model.device}") - input_tokens[t] = input_tokens[t].to(model.device) - - logger.info("[llm - generate] inputs transferred.") - - iteration_times = [] - outputs = model.generate( - **input_tokens, - generation_config=generation_config, - assistant_model=assistant_model, - lazy_mode=use_lazy_mode, - hpu_graphs=use_hpu_graphs, - profiling_steps=profiling_steps, - profiling_warmup_steps=profiling_warmup_steps, - ignore_eos=ignore_eos, - iteration_times=iteration_times, - profiling_record_shapes=profiling_record_shapes, - ).cpu() - logger.info("[llm - generate] result generated") - first_token_time = iteration_times[0] + encode_duration - result = tokenizer.batch_decode(outputs, skip_special_tokens=True) - logger.info(f"[llm - generate] result: {result}") - logger.info(f"[llm - generate] Time to first token = {first_token_time*1000}ms") - return result - - -def initialize(): - global model, assistant_model, tokenizer, generation_config, initialized - with initialization_lock: - if not initialized: - # initialize model and tokenizer - import habana_frameworks.torch.hpu as torch_hpu - from optimum.habana.utils import HabanaProfile - - model, assistant_model, tokenizer, generation_config = initialize_model(args, logger) - logger.info("[llm] model and tokenizer initialized.") - - # compilation and model warmup - HabanaProfile.disable() - logger.info("[llm - native] Graph compilation...") - for _ in range(args.warmup): - generate(input_sentences) - logger.info("[llm - native] model warm up finished.") - torch_hpu.synchronize() - HabanaProfile.enable() - logger.info("[llm - native] Ready to inference") - res = generate(["What is Deep Learning?"]) - logger.info(f"[llm - native] test result: {res}") - initialized = True - - -@register_microservice( - name="opea_service@llm_native", - service_type=ServiceType.LLM, - endpoint="/v1/chat/completions", - host="0.0.0.0", - port=9000, -) -@register_statistics(names=["opea_service@llm_native"]) -def llm_generate(input: LLMParamsDoc): - initialize() - if logflag: - logger.info(input) - prompt = input.query - prompt_template = None - if input.chat_template: - prompt_template = PromptTemplate.from_template(input.chat_template) - input_variables = prompt_template.input_variables - if prompt_template: - if sorted(input_variables) == ["context", "question"]: - prompt = prompt_template.format(question=input.query, context="\n".join(input.documents)) - elif input_variables == ["question"]: - prompt = prompt_template.format(question=input.query) - else: - logger.info(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']") - else: - if input.documents: - prompt = ChatTemplate.generate_rag_prompt(input.query, input.documents) - res = generate([prompt]) - - if logflag: - logger.info(f"[llm - native] inference result: {res}") - return GeneratedDoc(text=res[0], prompt=input.query) - - -if __name__ == "__main__": - opea_microservices["opea_service@llm_native"].start() diff --git a/comps/llms/text-generation/native/langchain/requirements.txt b/comps/llms/text-generation/native/langchain/requirements.txt deleted file mode 100644 index 806f2d29fa..0000000000 --- a/comps/llms/text-generation/native/langchain/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -docarray -fastapi -httpx -langchain_core -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -shortuuid -uvicorn diff --git a/comps/llms/text-generation/native/llama_index/Dockerfile b/comps/llms/text-generation/native/llama_index/Dockerfile deleted file mode 100644 index e9f4e37f25..0000000000 --- a/comps/llms/text-generation/native/llama_index/Dockerfile +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# HABANA environment -FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0 AS hpu - -ENV LANG=en_US.UTF-8 -ARG REPO=https://github.com/huggingface/optimum-habana.git -ARG REPO_VER=v1.12.1 - -RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ - git-lfs \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -RUN git lfs install - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade-strategy eager optimum[habana] && \ - pip install --no-cache-dir git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 - -RUN git clone ${REPO} /home/user/optimum-habana && \ - cd /home/user/optimum-habana && git checkout ${REPO_VER} && \ - cd examples/text-generation && pip install --no-cache-dir -r requirements.txt && \ - cd /home/user/comps/llms/text-generation/native/llama_index && pip install --no-cache-dir -r requirements.txt && \ - pip install --no-cache-dir --upgrade --force-reinstall pydantic - -ENV PYTHONPATH=/root:/home/user - -WORKDIR /home/user/comps/llms/text-generation/native/llama_index - -ENTRYPOINT ["python", "llm.py"] diff --git a/comps/llms/text-generation/native/llama_index/README.md b/comps/llms/text-generation/native/llama_index/README.md deleted file mode 100644 index edf62fa100..0000000000 --- a/comps/llms/text-generation/native/llama_index/README.md +++ /dev/null @@ -1,60 +0,0 @@ -# LLM Native Microservice - -LLM Native microservice uses [optimum-habana](https://github.com/huggingface/optimum-habana) for model initialization and warm-up, focusing solely on large language models (LLMs). It operates without frameworks like TGI/VLLM, using PyTorch directly for inference, and supports only non-streaming formats. This streamlined approach optimizes performance on Habana hardware. - -## 🚀1. Start Microservice - -If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a Native LLM service with docker. - -### 1.1 Setup Environment Variables - -In order to start Native LLM service, you need to setup the following environment variables first. - -```bash -export LLM_NATIVE_MODEL="Qwen/Qwen2-7B-Instruct" -``` - -### 1.2 Build Docker Image - -```bash -cd ../../../../../ -docker build -t opea/llm-native:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/native/llama_index/Dockerfile . -``` - -To start a docker container, you have two options: - -- A. Run Docker with CLI -- B. Run Docker with Docker Compose - -You can choose one as needed. - -### 1.3 Run Docker with CLI (Option A) - -```bash -docker run -d --runtime=habana --name="llm-native-server" -p 9000:9000 -e https_proxy=$https_proxy -e http_proxy=$http_proxy -e TOKENIZERS_PARALLELISM=false -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e LLM_NATIVE_MODEL=${LLM_NATIVE_MODEL} opea/llm-native:latest -``` - -### 1.4 Run Docker with Docker Compose (Option B) - -```bash -docker compose -f docker_compose_llm.yaml up -d -``` - -## 🚀2. Consume LLM Service - -### 2.1 Check Service Status - -```bash -curl http://${your_ip}:9000/v1/health_check\ - -X GET \ - -H 'Content-Type: application/json' -``` - -### 2.2 Consume LLM Service - -```bash -curl http://${your_ip}:9000/v1/chat/completions\ - -X POST \ - -d '{"query":"What is Deep Learning?"}' \ - -H 'Content-Type: application/json' -``` diff --git a/comps/llms/text-generation/native/llama_index/docker_compose_llm.yaml b/comps/llms/text-generation/native/llama_index/docker_compose_llm.yaml deleted file mode 100644 index f3a36e5bb8..0000000000 --- a/comps/llms/text-generation/native/llama_index/docker_compose_llm.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - llm: - image: opea/llm-native:latest - container_name: llm-native-server - ports: - - "9000:9000" - runtime: habana - cap_add: - - SYS_NICE - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_NATIVE_MODEL: ${LLM_NATIVE_MODEL} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - TOKENIZERS_PARALLELISM: false - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/llms/text-generation/native/llama_index/llm.py b/comps/llms/text-generation/native/llama_index/llm.py deleted file mode 100644 index cc17b1151d..0000000000 --- a/comps/llms/text-generation/native/llama_index/llm.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys - -sys.path.append("/test/GenAIComps/") - -import logging -import os -import threading -import time - -import torch -from llama_index.core import PromptTemplate -from template import ChatTemplate, args_dict, input_sentences -from utils import initialize_model - -from comps import ( - GeneratedDoc, - LLMParamsDoc, - ServiceType, - opea_microservices, - register_microservice, - register_statistics, -) - -logflag = os.getenv("LOGFLAG", False) - -logging.basicConfig( - format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", - datefmt="%m/%d/%Y %H:%M:%S", - level=logging.INFO, -) -logger = logging.getLogger(__name__) - - -class Args: - def __init__(self, **entries): - self.__dict__.update(entries) - - -model = None -assistant_model = None -tokenizer = None -generation_config = None -args = Args(**args_dict) -initialization_lock = threading.Lock() -initialized = False - - -def generate( - input_query: list, - device="hpu", - use_lazy_mode=True, - use_hpu_graphs=True, - profiling_steps=0, - profiling_warmup_steps=0, - ignore_eos=True, - profiling_record_shapes=False, -): - """Generates sequences from the input sentences and returns them.""" - logger.info(f"[llm - generate] starting to inference with prompt {input_query}") - encode_t0 = time.perf_counter() - - # Tokenization - input_tokens = tokenizer.batch_encode_plus(input_query, return_tensors="pt", padding=True) - encode_duration = time.perf_counter() - encode_t0 - logger.info(f"[llm - generate] input tokenized: {input_tokens}") - - # Move inputs to target device(s) - for t in input_tokens: - logger.info(f"[llm - generate] t: {t}") - if torch.is_tensor(input_tokens[t]): - logger.info("[llm - generate] input[t] is tensor") - logger.info(f"[llm - generate] device: {model.device}") - input_tokens[t] = input_tokens[t].to(model.device) - - logger.info("[llm - generate] inputs transferred.") - - iteration_times = [] - outputs = model.generate( - **input_tokens, - generation_config=generation_config, - assistant_model=assistant_model, - lazy_mode=use_lazy_mode, - hpu_graphs=use_hpu_graphs, - profiling_steps=profiling_steps, - profiling_warmup_steps=profiling_warmup_steps, - ignore_eos=ignore_eos, - iteration_times=iteration_times, - profiling_record_shapes=profiling_record_shapes, - ).cpu() - logger.info("[llm - generate] result generated") - first_token_time = iteration_times[0] + encode_duration - result = tokenizer.batch_decode(outputs, skip_special_tokens=True) - logger.info(f"[llm - generate] result: {result}") - logger.info(f"[llm - generate] Time to first token = {first_token_time*1000}ms") - return result - - -def initialize(): - global model, assistant_model, tokenizer, generation_config, initialized - with initialization_lock: - if not initialized: - # initialize model and tokenizer - import habana_frameworks.torch.hpu as torch_hpu - from optimum.habana.utils import HabanaProfile - - model, assistant_model, tokenizer, generation_config = initialize_model(args, logger) - logger.info("[llm] model and tokenizer initialized.") - - # compilation and model warmup - HabanaProfile.disable() - logger.info("[llm - native] Graph compilation...") - for _ in range(args.warmup): - generate(input_sentences) - logger.info("[llm - native] model warm up finished.") - torch_hpu.synchronize() - HabanaProfile.enable() - logger.info("[llm - native] Ready to inference") - res = generate(["What is Deep Learning?"]) - logger.info(f"[llm - native] test result: {res}") - initialized = True - - -@register_microservice( - name="opea_service@llm_native_llamaindex", - service_type=ServiceType.LLM, - endpoint="/v1/chat/completions", - host="0.0.0.0", - port=9000, -) -@register_statistics(names=["opea_service@llm_native_llamaindex"]) -def llm_generate(input: LLMParamsDoc): - initialize() - if logflag: - logger.info(input) - prompt = input.query - prompt_template = None - if input.chat_template: - prompt_template = PromptTemplate(input.chat_template) - input_variables = prompt_template.template_vars - if prompt_template: - if sorted(input_variables) == ["context", "question"]: - prompt = prompt_template.format(question=input.query, context="\n".join(input.documents)) - elif input_variables == ["question"]: - prompt = prompt_template.format(question=input.query) - else: - logger.info(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']") - else: - if input.documents: - prompt = ChatTemplate.generate_rag_prompt(input.query, input.documents) - res = generate([prompt]) - - if logflag: - logger.info(f"[llm - native] inference result: {res}") - return GeneratedDoc(text=res[0], prompt=input.query) - - -if __name__ == "__main__": - opea_microservices["opea_service@llm_native_llamaindex"].start() diff --git a/comps/llms/text-generation/native/llama_index/requirements.txt b/comps/llms/text-generation/native/llama_index/requirements.txt deleted file mode 100644 index 5d9c5ca1f1..0000000000 --- a/comps/llms/text-generation/native/llama_index/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -docarray -fastapi -httpx -llama_index -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -shortuuid -uvicorn diff --git a/comps/llms/text-generation/native/llama_index/template.py b/comps/llms/text-generation/native/llama_index/template.py deleted file mode 100644 index c43205a0ae..0000000000 --- a/comps/llms/text-generation/native/llama_index/template.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import re - - -class ChatTemplate: - @staticmethod - def generate_rag_prompt(question, documents): - context_str = "\n".join(documents) - if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3: - # chinese context - template = """ -### 你将扮演一个乐于助人、尊重他人并诚实的助手,你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案,请避免分享不准确的信息。 -### 搜索结果:{context} -### 问题:{question} -### 回答: -""" - else: - template = """ -### You are a helpful, respectful and honest assistant to help the user with questions. \ -Please refer to the search results obtained from the local knowledge base. \ -But be careful to not incorporate the information that you think is not relevant to the question. \ -If you don't know the answer to a question, please don't share false information. \n -### Search results: {context} \n -### Question: {question} \n -### Answer: -""" - return template.format(context=context_str, question=question) - - -input_sentences = [ - "DeepSpeed is a machine learning framework", - "He is working on", - "He has a", - "He got all", - "Everyone is happy and I can", - "The new movie that got Oscar this year", - "In the far far distance from our galaxy,", - "Peace is the only way", -] - - -llm_model = os.getenv("LLM_NATIVE_MODEL", "Qwen/Qwen2-7B-Instruct") -args_dict = { - "device": "hpu", - "model_name_or_path": llm_model, - "bf16": True, - "max_new_tokens": 100, - "max_input_tokens": 0, - "batch_size": 1, - "warmup": 3, - "n_iterations": 5, - "local_rank": 0, - "use_kv_cache": True, - "use_hpu_graphs": True, - "dataset_name": None, - "column_name": None, - "do_sample": False, - "num_beams": 1, - "trim_logits": False, - "seed": 27, - "profiling_warmup_steps": 0, - "profiling_steps": 0, - "profiling_record_shapes": False, - "prompt": None, - "bad_words": None, - "force_words": None, - "assistant_model": None, - "peft_model": None, - "num_return_sequences": 1, - "token": None, - "model_revision": "main", - "attn_softmax_bf16": False, - "output_dir": None, - "bucket_size": -1, - "bucket_internal": False, - "dataset_max_samples": -1, - "limit_hpu_graphs": False, - "reuse_cache": False, - "verbose_workers": False, - "simulate_dyn_prompt": None, - "reduce_recompile": False, - "use_flash_attention": False, - "flash_attention_recompute": False, - "flash_attention_causal_mask": False, - "flash_attention_fast_softmax": False, - "book_source": False, - "torch_compile": False, - "ignore_eos": True, - "temperature": 1.0, - "top_p": 1.0, - "const_serialization_path": None, - "disk_offload": False, - "trust_remote_code": False, - "quant_config": "", - "world_size": 0, -} diff --git a/comps/llms/text-generation/native/llama_index/utils.py b/comps/llms/text-generation/native/llama_index/utils.py deleted file mode 100644 index 04cebfbd49..0000000000 --- a/comps/llms/text-generation/native/llama_index/utils.py +++ /dev/null @@ -1,521 +0,0 @@ -# coding=utf-8 -# Copyright 2022 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -############################################################################### -# Copyright (C) 2020-2021 Habana Labs, Ltd. an Intel Company -############################################################################### - -import copy -import glob -import os -import shutil -import tempfile -import time -from pathlib import Path - -import torch -from optimum.habana.checkpoint_utils import ( - get_ds_injection_policy, - get_repo_root, - model_is_optimized, - model_on_meta, - write_checkpoints_json, -) -from optimum.habana.utils import ( - check_habana_frameworks_version, - check_optimum_habana_min_version, - get_habana_frameworks_version, - set_seed, -) -from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer -from transformers.utils import check_min_version - - -def adjust_batch(batch, size): - curr_size = batch["input_ids"].shape[1] - if curr_size >= size: - adjusted_batch = { - "input_ids": batch["input_ids"][:, :size], - "attention_mask": batch["attention_mask"][:, :size], - } - else: - adjusted_batch = {} - for k in batch.keys(): - last_colm = batch[k][:, -1] - expanded = last_colm.tile((size - curr_size, 1)).T - adjusted_batch[k] = torch.concat([batch[k], expanded], 1) - assert adjusted_batch["input_ids"].shape[1] == size - assert adjusted_batch["attention_mask"].shape[1] == size - return adjusted_batch - - -def override_print(enable): - import builtins as __builtin__ - - builtin_print = __builtin__.print - - def print(*args, **kwargs): - force = kwargs.pop("force", False) - if force or enable: - builtin_print(*args, **kwargs) - - __builtin__.print = print - - -def override_logger(logger, enable): - logger_info = logger.info - - def info(*args, **kwargs): - force = kwargs.pop("force", False) - if force or enable: - logger_info(*args, **kwargs) - - logger.info = info - - -def count_hpu_graphs(): - return len(glob.glob(".graph_dumps/*PreGraph*")) - - -def override_prints(enable, logger): - override_print(enable) - override_logger(logger, enable) - - -def setup_distributed(args): - args.local_rank = int(os.getenv("LOCAL_RANK", "0")) - args.world_size = int(os.getenv("WORLD_SIZE", "0")) - args.global_rank = int(os.getenv("RANK", "0")) - - -def setup_inference(args, model): - import habana_frameworks.torch.core as htcore - - habana_version = get_habana_frameworks_version() - - print("Initializing inference mode") - # Keeping the if-else here for back compat. TODO remove later - if habana_version.major >= 1 and habana_version.minor >= 16: - htcore.hpu_initialize(model, mark_only_scales_as_const=True) - else: - const_marking = os.getenv("ENABLE_CONST_MARKING", "True") - if const_marking == "True": - htcore.hpu_initialize(model) - return model - - -def setup_const_serialization(const_serialization_path): - import uuid - - const_serialization_path = os.path.join(const_serialization_path + uuid.uuid4().hex) - os.makedirs(const_serialization_path) - from habana_frameworks.torch.hpu import enable_const_section_serialization - - print("Serializing const params to {}".format(const_serialization_path)) - enable_const_section_serialization(const_serialization_path, True) - - -def setup_env(args): - # Will error if the minimal version of Transformers is not installed. Remove at your own risks. - check_min_version("4.34.0") - check_optimum_habana_min_version("1.9.0.dev0") - # TODO: SW-167588 - WA for memory issue in hqt prep_model - os.environ.setdefault("EXPERIMENTAL_WEIGHT_SHARING", "FALSE") - - if args.global_rank == 0 and not args.torch_compile: - os.environ.setdefault("GRAPH_VISUALIZATION", "true") - shutil.rmtree(".graph_dumps", ignore_errors=True) - - if args.world_size > 0: - os.environ.setdefault("PT_HPU_LAZY_ACC_PAR_MODE", "0") - os.environ.setdefault("PT_HPU_ENABLE_LAZY_COLLECTIVES", "true") - - if args.use_hpu_graphs and args.limit_hpu_graphs and not args.reuse_cache and args.bucket_internal: - # Based upon above conditions and below env variable, - # we can call HPU graphs clear_inputs(). - os.environ.setdefault("PT_HPUGRAPH_DISABLE_TENSOR_CACHE", "1") - - # Tweak generation so that it runs faster on Gaudi - from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi - - adapt_transformers_to_gaudi() - - -def setup_device(args): - if args.device == "hpu": - import habana_frameworks.torch.core as htcore - - if args.quant_config: - htcore.hpu_set_env() - return torch.device(args.device) - - -# patching LinearAllreduce to use ScopedLinearAllReduce -def patch_scoped_linear_all_reduce(model): - from deepspeed.module_inject.layers import LinearAllreduce - from optimum.habana.transformers.models.modeling_all_models import ScopedLinearAllReduce - - for name, module in model.named_children(): - if type(module) is LinearAllreduce: - SL = ScopedLinearAllReduce(mod=module) - setattr(model, name, SL) - patch_scoped_linear_all_reduce(module) - - -def get_torch_compiled_model(model): - model.model = torch.compile(model.model, backend="hpu_backend", options={"keep_input_mutations": True}) - return model - - -def setup_model(args, model_dtype, model_kwargs, logger): - logger.info("Single-device run.") - if args.assistant_model is None: - assistant_model = None - else: - logger.info(f"Using asssitant model {args.assistant_model}.") - if args.disk_offload: - from accelerate import infer_auto_device_map, init_empty_weights - - config = AutoConfig.from_pretrained(args.model_name_or_path) - with init_empty_weights(): - model = AutoModelForCausalLM.from_config(config) - max_memory = {"cpu": "10GiB"} - device_map = infer_auto_device_map(model, max_memory=max_memory, dtype=model_dtype) - model = AutoModelForCausalLM.from_pretrained( - args.model_name_or_path, - device_map=device_map, - offload_folder="/tmp/offload_folder/", - offload_state_dict=True, - torch_dtype=model_dtype, - **model_kwargs, - ) - else: - if args.assistant_model is not None: - assistant_model = AutoModelForCausalLM.from_pretrained( - args.assistant_model, torch_dtype=model_dtype, **model_kwargs - ) - if args.peft_model is not None: - model = peft_model(args, model_dtype, logger, **model_kwargs) - else: - model = AutoModelForCausalLM.from_pretrained( - args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs - ) - if args.quant_config: - import habana_quantization_toolkit - - habana_quantization_toolkit.prep_model(model) - if args.assistant_model is not None: - habana_quantization_toolkit.quantize_model(assistant_model) - - model = model.eval().to(args.device) - if args.assistant_model is not None: - assistant_model = assistant_model.eval().to(args.device) - - if args.use_hpu_graphs: - from habana_frameworks.torch.hpu import wrap_in_hpu_graph - from optimum.habana.transformers.trainer import _is_peft_model - - if check_habana_frameworks_version("1.13.0") and model.config.model_type == "falcon": - model = wrap_in_hpu_graph(model, hash_with_views=False) - else: - model = wrap_in_hpu_graph(model) - if args.assistant_model is not None: - assistant_model = wrap_in_hpu_graph(assistant_model) - if _is_peft_model(model): - model.base_model = wrap_in_hpu_graph(model.base_model) - - if args.torch_compile and model.config.model_type == "llama": - model = get_torch_compiled_model(model) - # if args.assistant_model is not None: - # assistant_model = get_torch_compiled_model(assistant_model) - return model, assistant_model - - -def setup_distributed_model(args, model_dtype, model_kwargs, logger): - import deepspeed - - logger.info("DeepSpeed is enabled.") - deepspeed.init_distributed(dist_backend="hccl") - config = AutoConfig.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs) - load_to_meta = model_on_meta(config) - - if args.assistant_model is None: - assistant_model = None - else: - logger.info(f"Using asssitant model {args.assistant_model}.") - - if load_to_meta: - # Construct model with fake meta tensors, later will be replaced on devices during ds-inference ckpt load - with deepspeed.OnDevice(dtype=model_dtype, device="meta"): - model = AutoModelForCausalLM.from_config(config, torch_dtype=model_dtype) - - # Model loaded to meta is managed differently - checkpoints_json = tempfile.NamedTemporaryFile(suffix=".json", mode="+w") - - # For PEFT models, write the merged model on disk to be able to load it on the meta device - if args.peft_model is not None: - merged_model_dir = "/tmp/text_generation_merged_peft_model" - if args.local_rank == 0: - if Path(merged_model_dir).is_dir(): - shutil.rmtree(merged_model_dir) - peft_model(args, model_dtype, logger, **model_kwargs).save_pretrained(merged_model_dir) - torch.distributed.barrier() - - write_checkpoints_json( - merged_model_dir if args.peft_model is not None else args.model_name_or_path, - args.local_rank, - checkpoints_json, - token=args.token, - ) - else: - # TODO: revisit placement on CPU when auto-injection is possible - with deepspeed.OnDevice(dtype=model_dtype, device="cpu"): - if args.peft_model is not None: - model = peft_model(args, model_dtype, logger, **model_kwargs) - else: - model = AutoModelForCausalLM.from_pretrained( - args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs - ) - model.eval() - - if args.assistant_model is not None: - assistant_model = AutoModelForCausalLM.from_pretrained( - args.assistant_model, torch_dtype=model_dtype, **model_kwargs - ).eval() - - # Initialize the model - ds_inference_kwargs = {"dtype": model_dtype} - ds_inference_kwargs["tensor_parallel"] = {"tp_size": args.world_size} - ds_inference_kwargs["enable_cuda_graph"] = args.use_hpu_graphs - ds_inference_kwargs["injection_policy"] = get_ds_injection_policy(config) - if load_to_meta: - ds_inference_kwargs["checkpoint"] = checkpoints_json.name - - model = deepspeed.init_inference(model, **ds_inference_kwargs) - model = model.module - if model.config.model_type in ["llama", "falcon", "qwen2"]: - patch_scoped_linear_all_reduce(model) - - if args.quant_config: - import habana_quantization_toolkit - - habana_quantization_toolkit.prep_model(model) - if args.assistant_model is not None: - habana_quantization_toolkit.prep_model(assistant_model) - - if args.torch_compile and model.config.model_type == "llama": - model = get_torch_compiled_model(model) - # if args.assistant_model is not None: - # assistant_model = get_torch_compiled_model(assistant_model) - return model, assistant_model - - -def peft_model(args, model_dtype, logger, **model_kwargs): - import importlib.util - - if importlib.util.find_spec("peft") is None: - raise ImportError("The `peft` package is not installed, please run: `pip install peft`.") - from peft import AutoPeftModelForCausalLM - from peft.config import PeftConfigMixin - - base_model_name = PeftConfigMixin.from_pretrained( - args.peft_model, - token=model_kwargs["token"] if "token" in model_kwargs else None, - ).base_model_name_or_path - - base_model_is_local = Path(base_model_name).is_dir() - if not base_model_is_local: - # Check if the base model path to a remote repository on the HF Hub exists - from huggingface_hub import list_repo_files - - try: - list_repo_files(base_model_name) - base_model_is_remote = True - except Exception: - base_model_is_remote = False - - if base_model_is_local or base_model_is_remote: - model = AutoPeftModelForCausalLM.from_pretrained(args.peft_model, torch_dtype=model_dtype, **model_kwargs) - else: - # Since the base model doesn't exist locally nor remotely, use `args.model_name_or_path` as the base model - logger.warning( - f"The base model `{base_model_name}` of the LoRA configuration associated" - f" to `{args.peft_model}` does not exist locally or remotely. Using " - f"`--model_name_or_path {args.model_name_or_path}` as a fall back for the base model." - ) - from peft import PeftModel - - model = AutoModelForCausalLM.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs) - model = PeftModel.from_pretrained(model, args.peft_model, torch_dtype=model_dtype, **model_kwargs) - if hasattr(model, "merge_and_unload"): - model = model.merge_and_unload() - if model_dtype == torch.bfloat16: - model = model.to(torch.bfloat16) - return model - else: - from optimum.habana.peft.peft_model import gaudi_generate, gaudi_prepare_inputs_for_generation - - model.__class__.generate = gaudi_generate - model.__class__.prepare_inputs_for_generation = gaudi_prepare_inputs_for_generation - return model - - -def setup_tokenizer(args, model, assistant_model): - tokenizer_kwargs = { - "revision": args.model_revision, - "token": args.token, - "trust_remote_code": args.trust_remote_code, - } - if args.bad_words is not None or args.force_words is not None: - tokenizer_kwargs["add_prefix_space"] = True - tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, **tokenizer_kwargs) - if not model.config.is_encoder_decoder: - tokenizer.padding_side = "left" - - if model.config.model_type == "llama": - # unwind broken decapoda-research config - model.generation_config.pad_token_id = 0 - model.generation_config.bos_token_id = 1 - model.generation_config.eos_token_id = 2 - if assistant_model is not None: - assistant_model.generation_config.pad_token_id = 0 - assistant_model.generation_config.bos_token_id = 1 - assistant_model.generation_config.eos_token_id = 2 - tokenizer.bos_token_id = model.generation_config.bos_token_id - tokenizer.eos_token_id = model.generation_config.eos_token_id - tokenizer.pad_token_id = model.generation_config.pad_token_id - tokenizer.pad_token = tokenizer.decode(tokenizer.pad_token_id) - tokenizer.eos_token = tokenizer.decode(tokenizer.eos_token_id) - tokenizer.bos_token = tokenizer.decode(tokenizer.bos_token_id) - if model.config.model_type == "persimmon": - model.generation_config.pad_token_id = model.generation_config.eos_token_id - if assistant_model is not None: - assistant_model.generation_config.pad_token_id = assistant_model.generation_config.eos_token_id - tokenizer.bos_token_id = model.generation_config.bos_token_id - tokenizer.eos_token_id = model.generation_config.eos_token_id - tokenizer.pad_token_id = model.generation_config.pad_token_id - tokenizer.pad_token = tokenizer.decode(tokenizer.pad_token_id) - tokenizer.eos_token = tokenizer.decode(tokenizer.eos_token_id) - tokenizer.bos_token = tokenizer.decode(tokenizer.bos_token_id) - - # Some models like GPT2 do not have a PAD token so we have to set it if necessary - if tokenizer.pad_token is None: - tokenizer.pad_token = tokenizer.eos_token - model.generation_config.pad_token_id = model.generation_config.eos_token_id - if assistant_model is not None: - assistant_model.generation_config.pad_token_id = assistant_model.generation_config.eos_token_id - - return tokenizer, model, assistant_model - - -def setup_generation_config(args, model, assistant_model, tokenizer): - bad_words_ids = None - force_words_ids = None - if args.bad_words is not None: - bad_words_ids = [tokenizer.encode(bad_word, add_special_tokens=False) for bad_word in args.bad_words] - if args.force_words is not None: - force_words_ids = [tokenizer.encode(force_word, add_special_tokens=False) for force_word in args.force_words] - - is_optimized = model_is_optimized(model.config) - - # Generation configuration - generation_config = copy.deepcopy(model.generation_config) - generation_config.max_new_tokens = args.max_new_tokens - generation_config.use_cache = args.use_kv_cache - generation_config.static_shapes = is_optimized and assistant_model is None - generation_config.bucket_size = args.bucket_size if is_optimized else -1 - generation_config.bucket_internal = args.bucket_internal - generation_config.do_sample = args.do_sample - generation_config.num_beams = args.num_beams - generation_config.bad_words_ids = bad_words_ids - generation_config.force_words_ids = force_words_ids - generation_config.num_return_sequences = args.num_return_sequences - generation_config.trim_logits = args.trim_logits - generation_config.attn_softmax_bf16 = args.attn_softmax_bf16 - generation_config.limit_hpu_graphs = args.limit_hpu_graphs - generation_config.reuse_cache = args.reuse_cache - generation_config.reduce_recompile = args.reduce_recompile - if generation_config.reduce_recompile: - assert generation_config.bucket_size > 0 - generation_config.use_flash_attention = args.use_flash_attention - generation_config.flash_attention_recompute = args.flash_attention_recompute - generation_config.flash_attention_causal_mask = args.flash_attention_causal_mask - generation_config.flash_attention_fast_softmax = args.flash_attention_fast_softmax - generation_config.trust_remote_code = args.trust_remote_code - - return generation_config - - -def exclude_hpu_graph_configs(args): - # Excluded configs for batch size 1 for hpu graph - if args.batch_size == 1 and args.limit_hpu_graphs: - if "falcon-180B" in args.model_name_or_path or "falcon-180b" in args.model_name_or_path: - return False - if args.world_size == 2 or args.world_size == 4 or args.world_size == 8: - if args.quant_config: - if args.max_input_tokens >= 8192 and args.max_new_tokens >= 128: - return False - else: - if args.max_input_tokens >= 4096 and args.max_new_tokens >= 128: - return False - return True - else: - return False - - -def initialize_model(args, logger): - init_start = time.perf_counter() - setup_distributed(args) - if exclude_hpu_graph_configs(args): - args.limit_hpu_graphs = False - override_prints(args.global_rank == 0 or args.verbose_workers, logger) - setup_env(args) - setup_device(args) - set_seed(args.seed) - get_repo_root(args.model_name_or_path, local_rank=args.local_rank, token=args.token) - if args.assistant_model is not None: - get_repo_root(args.assistant_model, local_rank=args.local_rank, token=args.token) - use_deepspeed = args.world_size > 0 - if use_deepspeed or args.bf16: - model_dtype = torch.bfloat16 - else: - model_dtype = torch.float - args.attn_softmax_bf16 = False - - model_kwargs = { - "revision": args.model_revision, - "token": args.token, - "trust_remote_code": args.trust_remote_code, - } - if args.trust_remote_code: - logger.warning("`trust_remote_code` is set, there is no guarantee this model works properly and it may fail") - - model, assistant_model = ( - setup_model(args, model_dtype, model_kwargs, logger) - if not use_deepspeed - else setup_distributed_model(args, model_dtype, model_kwargs, logger) - ) - tokenizer, model, assistant_model = setup_tokenizer(args, model, assistant_model) - generation_config = setup_generation_config(args, model, assistant_model, tokenizer) - - if args.const_serialization_path: - setup_const_serialization(args.const_serialization_path) - if args.quant_config: - model = setup_inference(args, model) - init_end = time.perf_counter() - logger.info(f"Args: {args}") - logger.info(f"device: {args.device}, n_hpu: {args.world_size}, bf16: {model_dtype == torch.bfloat16}") - logger.info(f"Model initialization took {(init_end - init_start):.3f}s") - return model, assistant_model, tokenizer, generation_config diff --git a/comps/llms/text-generation/ollama/langchain/Dockerfile b/comps/llms/text-generation/ollama/langchain/Dockerfile deleted file mode 100644 index 41e3720cd3..0000000000 --- a/comps/llms/text-generation/ollama/langchain/Dockerfile +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - curl \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/user/comps/llms/text-generation/ollama/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/llms/text-generation/ollama/langchain - -ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/text-generation/ollama/langchain/__init__.py b/comps/llms/text-generation/ollama/langchain/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/llms/text-generation/ollama/langchain/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/llms/text-generation/ollama/langchain/entrypoint.sh b/comps/llms/text-generation/ollama/langchain/entrypoint.sh deleted file mode 100644 index d60eddd36b..0000000000 --- a/comps/llms/text-generation/ollama/langchain/entrypoint.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -pip --no-cache-dir install -r requirements-runtime.txt - -python llm.py diff --git a/comps/llms/text-generation/ollama/langchain/llm.py b/comps/llms/text-generation/ollama/langchain/llm.py deleted file mode 100644 index a17fa93084..0000000000 --- a/comps/llms/text-generation/ollama/langchain/llm.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -from fastapi.responses import StreamingResponse -from langchain_community.llms import Ollama - -from comps import CustomLogger, GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice - -logger = CustomLogger("llm_ollama") -logflag = os.getenv("LOGFLAG", False) - - -@register_microservice( - name="opea_service@llm_ollama", - service_type=ServiceType.LLM, - endpoint="/v1/chat/completions", - host="0.0.0.0", - port=9000, -) -async def llm_generate(input: LLMParamsDoc): - if logflag: - logger.info(input) - ollama = Ollama( - base_url=ollama_endpoint, - model=input.model if input.model else model_name, - num_predict=input.max_tokens, - top_k=input.top_k, - top_p=input.top_p, - temperature=input.temperature, - repeat_penalty=input.repetition_penalty, - ) - # assuming you have Ollama installed and have llama3 model pulled with `ollama pull llama3` - if input.stream: - - async def stream_generator(): - chat_response = "" - async for text in ollama.astream(input.query): - chat_response += text - chunk_repr = repr(text.encode("utf-8")) - if logflag: - logger.info(f"[llm - chat_stream] chunk:{chunk_repr}") - yield f"data: {chunk_repr}\n\n" - if logflag: - logger.info(f"[llm - chat_stream] stream response: {chat_response}") - yield "data: [DONE]\n\n" - - return StreamingResponse(stream_generator(), media_type="text/event-stream") - else: - response = await ollama.ainvoke(input.query) - if logflag: - logger.info(response) - return GeneratedDoc(text=response, prompt=input.query) - - -if __name__ == "__main__": - ollama_endpoint = os.getenv("OLLAMA_ENDPOINT", "http://localhost:11434") - model_name = os.getenv("OLLAMA_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct") - opea_microservices["opea_service@llm_ollama"].start() diff --git a/comps/llms/text-generation/ollama/langchain/requirements-runtime.txt b/comps/llms/text-generation/ollama/langchain/requirements-runtime.txt deleted file mode 100644 index 225adde271..0000000000 --- a/comps/llms/text-generation/ollama/langchain/requirements-runtime.txt +++ /dev/null @@ -1 +0,0 @@ -langserve diff --git a/comps/llms/text-generation/ollama/langchain/requirements.txt b/comps/llms/text-generation/ollama/langchain/requirements.txt deleted file mode 100644 index c936696b5d..0000000000 --- a/comps/llms/text-generation/ollama/langchain/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -docarray[full] -fastapi -huggingface_hub -langchain -langchain-community -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -shortuuid -transformers -uvicorn diff --git a/comps/llms/text-generation/predictionguard/Dockerfile b/comps/llms/text-generation/predictionguard/Dockerfile deleted file mode 100644 index 1c4077ac91..0000000000 --- a/comps/llms/text-generation/predictionguard/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (C) 2024 Prediction Guard, Inc. -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -COPY comps /home/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/comps/llms/text-generation/predictionguard/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home - -WORKDIR /home/comps/llms/text-generation/predictionguard - -ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/text-generation/predictionguard/__init__.py b/comps/llms/text-generation/predictionguard/__init__.py deleted file mode 100644 index a246c95e79..0000000000 --- a/comps/llms/text-generation/predictionguard/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Prediction Guard, Inc. -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/llms/text-generation/predictionguard/docker_compose_llm.yaml b/comps/llms/text-generation/predictionguard/docker_compose_llm.yaml deleted file mode 100644 index bde9fa10a9..0000000000 --- a/comps/llms/text-generation/predictionguard/docker_compose_llm.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (C) 2024 Prediction Guard, Inc -# SPDX-License-Identifier: Apache-2.0 - -services: - llm: - image: opea/llm-textgen-predictionguard:latest - container_name: llm-textgen-predictionguard - ports: - - "9000:9000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/llms/text-generation/predictionguard/entrypoint.sh b/comps/llms/text-generation/predictionguard/entrypoint.sh deleted file mode 100644 index 8220ff6399..0000000000 --- a/comps/llms/text-generation/predictionguard/entrypoint.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Prediction Guard, Inc. -# SPDX-License-Identifier: Apache-2.0 - -#pip --no-cache-dir install -r requirements-runtime.txt - -python llm_predictionguard.py diff --git a/comps/llms/text-generation/predictionguard/llm_predictionguard.py b/comps/llms/text-generation/predictionguard/llm_predictionguard.py deleted file mode 100644 index 475b3f69be..0000000000 --- a/comps/llms/text-generation/predictionguard/llm_predictionguard.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2024 Prediction Guard, Inc. -# SPDX-License-Identified: Apache-2.0 - - -import time - -from fastapi import FastAPI, HTTPException -from fastapi.responses import StreamingResponse -from predictionguard import PredictionGuard - -from comps import ( - GeneratedDoc, - LLMParamsDoc, - ServiceType, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) - -client = PredictionGuard() -app = FastAPI() - - -@register_microservice( - name="opea_service@llm_predictionguard", - service_type=ServiceType.LLM, - endpoint="/v1/chat/completions", - host="0.0.0.0", - port=9000, -) -@register_statistics(names=["opea_service@llm_predictionguard"]) -def llm_generate(input: LLMParamsDoc): - start = time.time() - - messages = [ - { - "role": "system", - "content": "You are a helpful assistant. Your goal is to provide accurate, detailed, and safe responses to the user's queries.", - }, - {"role": "user", "content": input.query}, - ] - - if input.stream: - - async def stream_generator(): - chat_response = "" - for res in client.chat.completions.create( - model=input.model, - messages=messages, - max_tokens=input.max_tokens, - temperature=input.temperature, - top_p=input.top_p, - top_k=input.top_k, - stream=True, - ): - if "choices" in res["data"] and "delta" in res["data"]["choices"][0]: - delta_content = res["data"]["choices"][0]["delta"]["content"] - chat_response += delta_content - yield f"data: {delta_content}\n\n" - else: - yield "data: [DONE]\n\n" - - statistics_dict["opea_service@llm_predictionguard"].append_latency(time.time() - start, None) - return StreamingResponse(stream_generator(), media_type="text/event-stream") - else: - try: - response = client.chat.completions.create( - model=input.model, - messages=messages, - max_tokens=input.max_tokens, - temperature=input.temperature, - top_p=input.top_p, - top_k=input.top_k, - ) - response_text = response["choices"][0]["message"]["content"] - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - - statistics_dict["opea_service@llm_predictionguard"].append_latency(time.time() - start, None) - return GeneratedDoc(text=response_text, prompt=input.query) - - -if __name__ == "__main__": - opea_microservices["opea_service@llm_predictionguard"].start() diff --git a/comps/llms/text-generation/predictionguard/requirements.txt b/comps/llms/text-generation/predictionguard/requirements.txt deleted file mode 100644 index 6c9f8340fd..0000000000 --- a/comps/llms/text-generation/predictionguard/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -aiohttp -docarray -fastapi -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -Pillow -predictionguard -prometheus-fastapi-instrumentator -shortuuid -transformers -uvicorn diff --git a/comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm.sh b/comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm.sh deleted file mode 100644 index bcbf20c4a3..0000000000 --- a/comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash - -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Get script directory -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -echo "Script directory: $SCRIPT_DIR" -cd $SCRIPT_DIR - -# Set default values -default_hw_mode="cpu" - -# Assign arguments to variable -hw_mode=${1:-$default_hw_mode} - -# Check if all required arguments are provided -if [ "$#" -lt 0 ] || [ "$#" -gt 1 ]; then - echo "Usage: $0 [hw_mode]" - echo "Please customize the arguments you want to use. - - hw_mode: The hardware mode for the Ray Gaudi endpoint, with the default being 'cpu', and the optional selection can be 'cpu' and 'hpu'." - exit 1 -fi - -# Build the docker image for vLLM based on the hardware mode -if [ "$hw_mode" = "hpu" ]; then - git clone https://github.com/HabanaAI/vllm-fork.git - cd ./vllm-fork/ - git checkout 3c39626 - docker build -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy - cd .. - rm -rf vllm-fork -else - git clone https://github.com/vllm-project/vllm.git - cd ./vllm/ - docker build -f Dockerfile.cpu -t opea/vllm-cpu:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy - cd .. - rm -rf vllm -fi diff --git a/comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm_openvino.sh b/comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm_openvino.sh deleted file mode 100644 index c7ca87cacc..0000000000 --- a/comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm_openvino.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Set default values -default_hw_mode="cpu" - -# Assign arguments to variable -hw_mode=${1:-$default_hw_mode} - -# Check if all required arguments are provided -if [ "$#" -lt 0 ] || [ "$#" -gt 1 ]; then - echo "Usage: $0 [hw_mode]" - echo "Please customize the arguments you want to use. - - hw_mode: The hardware mode for the vLLM endpoint, with the default being 'cpu', and the optional selection can be 'cpu' and 'gpu'." - exit 1 -fi - -# Build the docker image for vLLM based on the hardware mode -if [ "$hw_mode" = "gpu" ]; then - docker build -f Dockerfile.intel_gpu -t opea/vllm-arc:latest . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -else - BASEDIR="$( cd "$( dirname "$0" )" && pwd )" - git clone https://github.com/vllm-project/vllm.git vllm - cd ./vllm/ && git checkout v0.6.1 - docker build -t vllm-openvino:latest -f Dockerfile.openvino . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy - cd $BASEDIR && rm -rf vllm -fi diff --git a/comps/llms/text-generation/vllm/langchain/dependency/launch_vllm_service_openvino.sh b/comps/llms/text-generation/vllm/langchain/dependency/launch_vllm_service_openvino.sh deleted file mode 100644 index 140df6a0f8..0000000000 --- a/comps/llms/text-generation/vllm/langchain/dependency/launch_vllm_service_openvino.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/bin/bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -# Set default values - - -default_port=8008 -default_model="meta-llama/Llama-2-7b-hf" -default_device="cpu" -swap_space=50 -image="vllm:openvino" - -while getopts ":hm:p:d:" opt; do - case $opt in - h) - echo "Usage: $0 [-h] [-m model] [-p port] [-d device]" - echo "Options:" - echo " -h Display this help message" - echo " -m model Model (default: meta-llama/Llama-2-7b-hf for cpu" - echo " meta-llama/Llama-3.2-3B-Instruct for gpu)" - echo " -p port Port (default: 8000)" - echo " -d device Target Device (Default: cpu, optional selection can be 'cpu' and 'gpu')" - exit 0 - ;; - m) - model=$OPTARG - ;; - p) - port=$OPTARG - ;; - d) - device=$OPTARG - ;; - \?) - echo "Invalid option: -$OPTARG" >&2 - exit 1 - ;; - esac -done - -# Assign arguments to variables -model_name=${model:-$default_model} -port_number=${port:-$default_port} -device=${device:-$default_device} - - -# Set the Huggingface cache directory variable -HF_CACHE_DIR=$HOME/.cache/huggingface -if [ "$device" = "gpu" ]; then - docker_args="-e VLLM_OPENVINO_DEVICE=GPU --device /dev/dri -v /dev/dri/by-path:/dev/dri/by-path" - vllm_args="--max_model_len=1024" - model_name="meta-llama/Llama-3.2-3B-Instruct" - image="opea/vllm-arc:latest" -fi -# Start the model server using Openvino as the backend inference engine. -# Provide the container name that is unique and meaningful, typically one that includes the model name. - -docker run -d --rm --name="vllm-openvino-server" \ - -p $port_number:80 \ - --ipc=host \ - $docker_args \ - -e HTTPS_PROXY=$https_proxy \ - -e HTTP_PROXY=$https_proxy \ - -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ - -v $HOME/.cache/huggingface:/root/.cache/huggingface \ - $image /bin/bash -c "\ - cd / && \ - export VLLM_CPU_KVCACHE_SPACE=50 && \ - python3 -m vllm.entrypoints.openai.api_server \ - --model \"$model_name\" \ - $vllm_args \ - --host 0.0.0.0 \ - --port 80" diff --git a/comps/llms/text-generation/vllm/langchain/llm.py b/comps/llms/text-generation/vllm/langchain/llm.py deleted file mode 100644 index 17e3606e17..0000000000 --- a/comps/llms/text-generation/vllm/langchain/llm.py +++ /dev/null @@ -1,293 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -from typing import Union - -from fastapi.responses import StreamingResponse -from langchain_community.llms import VLLMOpenAI -from langchain_core.prompts import PromptTemplate -from openai import OpenAI -from template import ChatTemplate - -from comps import ( - CustomLogger, - GeneratedDoc, - LLMParamsDoc, - SearchedDoc, - ServiceType, - opea_microservices, - opea_telemetry, - register_microservice, -) -from comps.cores.mega.utils import ConfigError, get_access_token, load_model_configs -from comps.cores.proto.api_protocol import ChatCompletionRequest - -logger = CustomLogger("llm_vllm") -logflag = os.getenv("LOGFLAG", False) - -# Environment variables -TOKEN_URL = os.getenv("TOKEN_URL") -CLIENTID = os.getenv("CLIENTID") -CLIENT_SECRET = os.getenv("CLIENT_SECRET") -MODEL_CONFIGS = os.getenv("MODEL_CONFIGS") -DEFAULT_ENDPOINT = os.getenv("vLLM_ENDPOINT", "http://localhost:8080") - -# Validate and Load the models config if MODEL_CONFIGS is not null -configs_map = {} -if MODEL_CONFIGS: - try: - configs_map = load_model_configs(MODEL_CONFIGS) - except ConfigError as e: - logger.error(f"Failed to load model configurations: {e}") - raise ConfigError(f"Failed to load model configurations: {e}") - - -def get_llm_endpoint(model): - if not MODEL_CONFIGS: - return DEFAULT_ENDPOINT - try: - return configs_map.get(model).get("endpoint") - except ConfigError as e: - logger.error(f"Input model {model} not present in model_configs. Error {e}") - raise ConfigError(f"Input model {model} not present in model_configs") - - -@opea_telemetry -def post_process_text(text: str): - if text == " ": - return "data: @#$\n\n" - if text == "\n": - return "data:
\n\n" - if text.isspace(): - return None - new_text = text.replace(" ", "@#$") - return f"data: {new_text}\n\n" - - -@register_microservice( - name="opea_service@llm_vllm", - service_type=ServiceType.LLM, - endpoint="/v1/chat/completions", - host="0.0.0.0", - port=9000, -) -async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]): - if logflag: - logger.info(input) - - prompt_template = None - access_token = ( - get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None - ) - headers = {} - if access_token: - headers = {"Authorization": f"Bearer {access_token}"} - model_name = input.model if input.model else os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct") - llm_endpoint = get_llm_endpoint(model_name) - llm = VLLMOpenAI( - openai_api_key="EMPTY", openai_api_base=llm_endpoint + "/v1", model_name=model_name, default_headers=headers - ) - - if not isinstance(input, SearchedDoc) and input.chat_template: - prompt_template = PromptTemplate.from_template(input.chat_template) - input_variables = prompt_template.input_variables - - if isinstance(input, SearchedDoc): - if logflag: - logger.info("[ SearchedDoc ] input from retriever microservice") - - prompt = input.initial_query - - if input.retrieved_docs: - docs = [doc.text for doc in input.retrieved_docs] - if logflag: - logger.info(f"[ SearchedDoc ] combined retrieved docs: {docs}") - - prompt = ChatTemplate.generate_rag_prompt(input.initial_query, docs, model_name) - - # use default llm parameter for inference - new_input = LLMParamsDoc(query=prompt) - - parameters = { - "max_tokens": new_input.max_tokens, - "top_p": new_input.top_p, - "temperature": new_input.temperature, - "frequency_penalty": new_input.frequency_penalty, - "presence_penalty": new_input.presence_penalty, - } - - if logflag: - logger.info(f"[ SearchedDoc ] final input: {new_input}") - - if new_input.stream: - - async def stream_generator(): - chat_response = "" - async for text in llm.astream(new_input.query, **parameters): - if text not in ["<|im_end|>", "<|endoftext|>"]: - chat_response += text - chunk_repr = repr(text.encode("utf-8")) - if logflag: - logger.info(f"[ SearchedDoc ] chunk: {chunk_repr}") - yield f"data: {chunk_repr}\n\n" - if logflag: - logger.info(f"[ SearchedDoc ] stream response: {chat_response}") - yield "data: [DONE]\n\n" - - return StreamingResponse(stream_generator(), media_type="text/event-stream") - - else: - response = await llm.ainvoke(new_input.query, **parameters) - if logflag: - logger.info(response) - - return GeneratedDoc(text=response, prompt=new_input.query) - - elif isinstance(input, LLMParamsDoc): - if logflag: - logger.info("[ LLMParamsDoc ] input from rerank microservice") - - prompt = input.query - - parameters = { - "max_tokens": input.max_tokens, - "top_p": input.top_p, - "temperature": input.temperature, - "frequency_penalty": input.frequency_penalty, - "presence_penalty": input.presence_penalty, - } - - if prompt_template: - if sorted(input_variables) == ["context", "question"]: - prompt = prompt_template.format(question=input.query, context="\n".join(input.documents)) - elif input_variables == ["question"]: - prompt = prompt_template.format(question=input.query) - else: - logger.info( - f"[ LLMParamsDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']" - ) - else: - if input.documents: - # use rag default template - prompt = ChatTemplate.generate_rag_prompt(input.query, input.documents, model_name) - - if input.stream: - - async def stream_generator(): - chat_response = "" - async for text in llm.astream(prompt, **parameters): - if text not in ["<|im_end|>", "<|endoftext|>"]: - chat_response += text - chunk_repr = repr(text.encode("utf-8")) - if logflag: - logger.info(f"[ LLMParamsDoc ] chunk: {chunk_repr}") - yield f"data: {chunk_repr}\n\n" - if logflag: - logger.info(f"[ LLMParamsDoc ] stream response: {chat_response}") - yield "data: [DONE]\n\n" - - return StreamingResponse(stream_generator(), media_type="text/event-stream") - - else: - response = await llm.ainvoke(prompt, **parameters) - if logflag: - logger.info(response) - - return GeneratedDoc(text=response, prompt=input.query) - else: - if logflag: - logger.info("[ ChatCompletionRequest ] input in opea format") - client = OpenAI( - api_key="EMPTY", - base_url=llm_endpoint + "/v1", - ) - - if isinstance(input.messages, str): - prompt = input.messages - if prompt_template: - if sorted(input_variables) == ["context", "question"]: - prompt = prompt_template.format(question=input.messages, context="\n".join(input.documents)) - elif input_variables == ["question"]: - prompt = prompt_template.format(question=input.messages) - else: - logger.info( - f"[ ChatCompletionRequest ] {prompt_template} not used, we only support 2 input variables ['question', 'context']" - ) - else: - if input.documents: - # use rag default template - prompt = ChatTemplate.generate_rag_prompt(input.messages, input.documents, input.model) - - chat_completion = client.completions.create( - model=model_name, - prompt=prompt, - echo=input.echo, - frequency_penalty=input.frequency_penalty, - max_tokens=input.max_tokens, - n=input.n, - presence_penalty=input.presence_penalty, - seed=input.seed, - stop=input.stop, - stream=input.stream, - suffix=input.suffix, - temperature=input.temperature, - top_p=input.top_p, - user=input.user, - ) - else: - if input.messages[0]["role"] == "system": - if "{context}" in input.messages[0]["content"]: - if input.documents is None or input.documents == []: - input.messages[0]["content"].format(context="") - else: - input.messages[0]["content"].format(context="\n".join(input.documents)) - else: - if prompt_template: - system_prompt = prompt_template - if input_variables == ["context"]: - system_prompt = prompt_template.format(context="\n".join(input.documents)) - else: - logger.info( - f"[ ChatCompletionRequest ] {prompt_template} not used, only support 1 input variables ['context']" - ) - - input.messages.insert(0, {"role": "system", "content": system_prompt}) - - chat_completion = client.chat.completions.create( - model=model_name, - messages=input.messages, - frequency_penalty=input.frequency_penalty, - max_tokens=input.max_tokens, - n=input.n, - presence_penalty=input.presence_penalty, - response_format=input.response_format, - seed=input.seed, - stop=input.stop, - stream=input.stream, - stream_options=input.stream_options, - temperature=input.temperature, - top_p=input.top_p, - user=input.user, - ) - - if input.stream: - - def stream_generator(): - for c in chat_completion: - if logflag: - logger.info(c) - chunk = c.model_dump_json() - if chunk not in ["<|im_end|>", "<|endoftext|>"]: - yield f"data: {chunk}\n\n" - yield "data: [DONE]\n\n" - - return StreamingResponse(stream_generator(), media_type="text/event-stream") - else: - if logflag: - logger.info(chat_completion) - return chat_completion - - -if __name__ == "__main__": - opea_microservices["opea_service@llm_vllm"].start() diff --git a/comps/llms/text-generation/vllm/langchain/requirements-runtime.txt b/comps/llms/text-generation/vllm/langchain/requirements-runtime.txt deleted file mode 100644 index 225adde271..0000000000 --- a/comps/llms/text-generation/vllm/langchain/requirements-runtime.txt +++ /dev/null @@ -1 +0,0 @@ -langserve diff --git a/comps/llms/text-generation/vllm/langchain/template.py b/comps/llms/text-generation/vllm/langchain/template.py deleted file mode 100644 index 6d976106ac..0000000000 --- a/comps/llms/text-generation/vllm/langchain/template.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import re - - -class ChatTemplate: - @staticmethod - def generate_rag_prompt(question, documents, model): - context_str = "\n".join(documents) - if model == "meta-llama/Meta-Llama-3.1-70B-Instruct" or model == "meta-llama/Meta-Llama-3.1-8B-Instruct": - template = """ - <|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|> - Question: {question} - Context: {context} - Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""" - else: - if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3: - # chinese context - template = """ - ### 你将扮演一个乐于助人、尊重他人并诚实的助手,你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案,请避免分享不准确的信息。 - ### 搜索结果:{context} - ### 问题:{question} - ### 回答: - """ - else: - template = """ - ### You are a helpful, respectful and honest assistant to help the user with questions. \ - Please refer to the search results obtained from the local knowledge base. \ - But be careful to not incorporate the information that you think is not relevant to the question. \ - If you don't know the answer to a question, please don't share false information. \n - ### Search results: {context} \n - ### Question: {question} \n - ### Answer: - """ - return template.format(context=context_str, question=question) diff --git a/comps/llms/utils/lm-eval/Dockerfile b/comps/llms/utils/lm-eval/Dockerfile index 57a4270df0..9d535af1e7 100644 --- a/comps/llms/utils/lm-eval/Dockerfile +++ b/comps/llms/utils/lm-eval/Dockerfile @@ -21,8 +21,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ python3-pip \ wget -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ @@ -30,6 +28,8 @@ RUN pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/llms/utils/lm-eval diff --git a/comps/lvms/deployment/docker_compose/compose.yaml b/comps/lvms/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..01782dcd1b --- /dev/null +++ b/comps/lvms/deployment/docker_compose/compose.yaml @@ -0,0 +1,146 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + llava-service: + image: ${REGISTRY:-opea}/lvm-llava:${TAG:-latest} + container_name: llava-service + ports: + - ${LLAVA_PORT:-5028}:8399 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8399/health"] + interval: 30s + timeout: 6s + retries: 20 + llava-tgi-service: + image: ghcr.io/huggingface/tgi-gaudi:2.3.1 + container_name: llava-tgi-service + ports: + - ${LLAVA_TGI_PORT:-5027}:80 + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + runtime: habana + cap_add: + - SYS_NICE + ipc: host + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:80/health"] + interval: 30s + timeout: 6s + retries: 20 + command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 + llama-vision-service: + image: opea/lvm-llama-vision:latest + container_name: lvm-llama-vision-service + ports: + - ${LLAMA_VISION_PORT:-9399}:9399 + runtime: habana + cap_add: + - SYS_NICE + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLAMA_VISION_MODEL_ID: ${LLAMA_VISION_MODEL_ID} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + restart: unless-stopped + predictionguard-service: + image: ${REGISTRY:-opea}/lvm-pg:${TAG:-latest} + container_name: predictionguard-service + ports: + - ${PREDICTIONGUARD_PORT:-5028}:9399 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9399/health"] + interval: 30s + timeout: 6s + retries: 20 + video-llama-service: + image: ${REGISTRY:-opea}/lvm-video-llama:${TAG:-latest} + container_name: video-llama-service + ports: + - ${VIDEO_LLAMA_PORT:-9009}:9009 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + llm_download: "True" + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9009/health"] + interval: 30s + timeout: 6s + retries: 20 + lvm: + image: ${REGISTRY:-opea}/lvm:${TAG:-latest} + container_name: lvm-service + ports: + - ${LVM_PORT:-5051}:9399 + ipc: host + environment: + LVM_ENDPOINT: ${LVM_ENDPOINT} + LVM_COMPONENT_NAME: ${LVM_COMPONENT_NAME:-OPEA_LLAVA_LVM} + lvm-llava: + extends: lvm + container_name: lvm-llava-service + environment: + LVM_COMPONENT_NAME: ${LVM_COMPONENT_NAME:-OPEA_LLAVA_LVM} + depends_on: + llava-service: + condition: service_healthy + lvm-llava-tgi: + extends: lvm + container_name: lvm-llava-tgi-service + environment: + LVM_COMPONENT_NAME: ${LVM_COMPONENT_NAME:-OPEA_TGI_LLAVA_LVM} + depends_on: + llava-tgi-service: + condition: service_healthy + lvm-llama-vision: + extends: lvm + container_name: lvm-llama-vision-service + environment: + LVM_COMPONENT_NAME: ${LVM_COMPONENT_NAME:-OPEA_LLAMA_VISION_LVM} + depends_on: + llama-vision-service: + condition: service_healthy + lvm-predictionguard: + extends: lvm + container_name: lvm-predictionguard-service + environment: + LVM_COMPONENT_NAME: ${LVM_COMPONENT_NAME:-OPEA_PREDICTION_GUARD_LVM} + depends_on: + predictionguard-service: + condition: service_healthy + lvm-video-llama: + extends: lvm + container_name: lvm-video-llama-service + environment: + LVM_COMPONENT_NAME: ${LVM_COMPONENT_NAME:-OPEA_VIDEO_LLAMA_LVM} + depends_on: + video-llama-service: + condition: service_healthy + +networks: + default: + driver: bridge diff --git a/comps/lvms/deployment/kubernetes/README.md b/comps/lvms/deployment/kubernetes/README.md new file mode 100644 index 0000000000..f8c26af8d5 --- /dev/null +++ b/comps/lvms/deployment/kubernetes/README.md @@ -0,0 +1,11 @@ +# Deploy LVM microservice on Kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Kubernetes + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install lvm oci://ghcr.io/opea-project/charts/lvm-uservice --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` diff --git a/comps/dataprep/vdms/multimodal_langchain/__init__.py b/comps/lvms/deployment/kubernetes/cpu-values.yaml similarity index 77% rename from comps/dataprep/vdms/multimodal_langchain/__init__.py rename to comps/lvms/deployment/kubernetes/cpu-values.yaml index 916f3a44b2..3de5b26fce 100644 --- a/comps/dataprep/vdms/multimodal_langchain/__init__.py +++ b/comps/lvms/deployment/kubernetes/cpu-values.yaml @@ -1,2 +1,5 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 + +tgi: + enabled: true diff --git a/comps/lvms/llava/Dockerfile b/comps/lvms/llava/Dockerfile deleted file mode 100644 index d4de4f1843..0000000000 --- a/comps/lvms/llava/Dockerfile +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ARG ARCH="cpu" # Set this to "cpu" or "gpu" - -# Set environment variables -ENV LANG=en_US.UTF-8 - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then \ - pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/lvms/llava/requirements.txt; \ - else \ - pip install --no-cache-dir -r /home/user/comps/lvms/llava/requirements.txt; \ - fi - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/lvms/llava - -ENTRYPOINT ["python", "lvm.py"] diff --git a/comps/lvms/llava/__init__.py b/comps/lvms/llava/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/lvms/llava/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/lvms/llava/check_lvm.py b/comps/lvms/llava/check_lvm.py deleted file mode 100644 index 3f2ec34f6d..0000000000 --- a/comps/lvms/llava/check_lvm.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -import base64 -import json -from io import BytesIO - -import PIL.Image -import requests - -image_path = "https://avatars.githubusercontent.com/u/39623753?v=4" - -image = PIL.Image.open(requests.get(image_path, stream=True, timeout=3000).raw) -buffered = BytesIO() -image.save(buffered, format="PNG") -img_b64_str = base64.b64encode(buffered.getvalue()).decode() - -endpoint = "http://localhost:9399/v1/lvm" -inputs = {"image": img_b64_str, "prompt": "What is this?", "max_new_tokens": 32} -response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None}) -print(response.json()) diff --git a/comps/lvms/llava/lvm.py b/comps/lvms/llava/lvm.py deleted file mode 100644 index 897f7cbbe4..0000000000 --- a/comps/lvms/llava/lvm.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -import json -import os -import time -from typing import Union - -import requests -from fastapi import HTTPException -from langchain_core.prompts import PromptTemplate -from template import ChatTemplate - -from comps import ( - CustomLogger, - LVMDoc, - LVMSearchedMultimodalDoc, - MetadataTextDoc, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) - -logger = CustomLogger("lvm") -logflag = os.getenv("LOGFLAG", False) - - -@register_microservice( - name="opea_service@lvm", - service_type=ServiceType.LVM, - endpoint="/v1/lvm", - host="0.0.0.0", - port=9399, -) -@register_statistics(names=["opea_service@lvm"]) -async def lvm(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> Union[TextDoc, MetadataTextDoc]: - if logflag: - logger.info(request) - start = time.time() - if isinstance(request, LVMSearchedMultimodalDoc): - if logflag: - logger.info("[LVMSearchedMultimodalDoc ] input from retriever microservice") - retrieved_metadatas = request.metadata - if retrieved_metadatas is None or len(retrieved_metadatas) == 0: - # there is no video segments retrieved. - # Raise HTTPException status_code 204 - # due to llava-tgi-gaudi should receive image as input; Otherwise, the generated text is bad. - raise HTTPException(status_code=500, detail="There is no video segments retrieved given the query!") - - img_b64_str = retrieved_metadatas[0]["b64_img_str"] - has_image = img_b64_str != "" - initial_query = request.initial_query - context = retrieved_metadatas[0]["transcript_for_inference"] - prompt = initial_query - if request.chat_template is None: - prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, context, has_image) - else: - prompt_template = PromptTemplate.from_template(request.chat_template) - input_variables = prompt_template.input_variables - if sorted(input_variables) == ["context", "question"]: - prompt = prompt_template.format(question=initial_query, context=context) - else: - logger.info( - f"[ LVMSearchedMultimodalDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']" - ) - max_new_tokens = request.max_new_tokens - if logflag: - logger.info(f"prompt generated for [LVMSearchedMultimodalDoc ] input from retriever microservice: {prompt}") - - else: - img_b64_str = request.image - prompt = request.prompt - max_new_tokens = request.max_new_tokens - - inputs = {"img_b64_str": img_b64_str, "prompt": prompt, "max_new_tokens": max_new_tokens} - # forward to the LLaVA server - response = requests.post(url=f"{lvm_endpoint}/generate", data=json.dumps(inputs), proxies={"http": None}) - - statistics_dict["opea_service@lvm"].append_latency(time.time() - start, None) - result = response.json()["text"] - if logflag: - logger.info(result) - if isinstance(request, LVMSearchedMultimodalDoc): - retrieved_metadata = request.metadata[0] - return_metadata = {} # this metadata will be used to construct proof for generated text - return_metadata["video_id"] = retrieved_metadata["video_id"] - return_metadata["source_video"] = retrieved_metadata["source_video"] - return_metadata["time_of_frame_ms"] = retrieved_metadata["time_of_frame_ms"] - return_metadata["transcript_for_inference"] = retrieved_metadata["transcript_for_inference"] - return MetadataTextDoc(text=result, metadata=return_metadata) - else: - return TextDoc(text=result) - - -if __name__ == "__main__": - lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399") - - logger.info("[LVM] LVM initialized.") - opea_microservices["opea_service@lvm"].start() diff --git a/comps/lvms/llava/template.py b/comps/lvms/llava/template.py deleted file mode 100644 index 01992d2f85..0000000000 --- a/comps/lvms/llava/template.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -class ChatTemplate: - - @staticmethod - def generate_multimodal_rag_on_videos_prompt(question: str, context: str, has_image: bool = False): - - if has_image: - template = """The transcript associated with the image is '{context}'. {question}""" - else: - template = ( - """Refer to the following results obtained from the local knowledge base: '{context}'. {question}""" - ) - - return template.format(context=context, question=question) diff --git a/comps/ragas/src/tgi/langchain/Dockerfile b/comps/lvms/src/Dockerfile similarity index 53% rename from comps/ragas/src/tgi/langchain/Dockerfile rename to comps/lvms/src/Dockerfile index b280cb049f..9cf9d03410 100644 --- a/comps/ragas/src/tgi/langchain/Dockerfile +++ b/comps/lvms/src/Dockerfile @@ -3,23 +3,21 @@ FROM python:3.11-slim -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev - RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user +ENV LANG=C.UTF-8 COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/user/comps/ragas/tgi/langchain/requirements.txt + pip install --no-cache-dir -r /home/user/comps/lvms/src/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/ragas/tgi/langchain/ +USER user + +WORKDIR /home/user/comps/lvms/src -ENTRYPOINT ["bash", "entrypoint.sh"] +ENTRYPOINT ["python", "opea_lvm_microservice.py"] \ No newline at end of file diff --git a/comps/lvms/src/README.md b/comps/lvms/src/README.md new file mode 100644 index 0000000000..e18ee4f94e --- /dev/null +++ b/comps/lvms/src/README.md @@ -0,0 +1,85 @@ +# LVM Microservice + +Visual Question and Answering is one of the multimodal tasks empowered by LVMs (Large Visual Models). This microservice supports visual Q&A by using LLaVA as the base large visual model. It accepts two inputs: a prompt and an image. It outputs the answer to the prompt about the image. + +## 🚀1. Start Microservice with Docker (Option 1) + +You have to build/start the [dependency](./integrations/dependency/) service based on your demands. + +```bash +docker build --no-cache -t opea/lvm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/Dockerfile . +# Change LVM_ENDPOINT to you dependency service endpoint +docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://localhost:8399 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9399:9399 --ipc=host opea/lvm:latest +``` + +## 🚀1. Start Microservice with Docker Compose (Option 2) + +Alternatively, you can also start the TTS microservice with Docker Compose. + +- LLaVA + +```bash +export ip_address=$(hostname -I | awk '{print $1}') +export LVM_PORT=9399 +export LLAVA_PORT=11500 +export LVM_ENDPOINT=http://$ip_address:$LLAVA_PORT +docker compose -f comps/lvms/deployment/docker_compose/compose.yaml up llava-service lvm-llava -d +``` + +- LLaVA TGI on HPU Gaudi + +```bash +export ip_address=$(hostname -I | awk '{print $1}') +export LVM_PORT=9399 +export LLAVA_TGI_PORT=11502 +export LVM_ENDPOINT=http://$ip_address:$LLAVA_TGI_PORT +docker compose -f comps/lvms/deployment/docker_compose/compose.yaml up llava-tgi-service lvm-llava-tgi -d +``` + +- LLaMA Vision + +```bash +export ip_address=$(hostname -I | awk '{print $1}') +export LVM_PORT=9399 +export LLAMA_VISION_PORT=11510 +export LVM_ENDPOINT=http://$ip_address:$LLAMA_VISION_PORT + +docker compose -f comps/lvms/deployment/docker_compose/compose.yaml up lvm-llama-vision llama-vision-service -d +``` + +- PredictionGuard + +```bash +export ip_address=$(hostname -I | awk '{print $1}') +export PREDICTIONGUARD_PORT=9399 + +docker compose -f comps/lvms/deployment/docker_compose/compose.yaml up predictionguard-service -d +``` + +- Video LLaMA + +```bash +export ip_address=$(hostname -I | awk '{print $1}') +export LVM_PORT=9399 +export VIDEO_LLAMA_PORT=11506 +export LVM_ENDPOINT=http://$ip_address:$VIDEO_LLAMA_PORT +docker compose -f comps/lvms/deployment/docker_compose/compose.yaml up video-llama-service lvm-video-llama -d +``` + +## Test + +- LLaVA & llama-vision & PredictionGuard & TGI LLaVA + +```bash +# curl with an image and a prompt +http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json' + +# curl with only the prompt +http_proxy="" curl http://localhost:9399/v1/lvm --silent --write-out "HTTPSTATUS:%{http_code}" -XPOST -d '{"image": "", "prompt":"What is deep learning?"}' -H 'Content-Type: application/json' +``` + +- video-llama + +```bash +http_proxy="" curl -X POST http://localhost:9399/v1/lvm -d '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 9,"prompt":"What is the person doing?","max_new_tokens": 150}' -H 'Content-Type: application/json' +``` diff --git a/comps/lvms/llama-vision/Dockerfile b/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile similarity index 78% rename from comps/lvms/llama-vision/Dockerfile rename to comps/lvms/src/integrations/dependency/llama-vision/Dockerfile index b68a796e3b..293fe94ef7 100644 --- a/comps/lvms/llama-vision/Dockerfile +++ b/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile @@ -9,24 +9,25 @@ ENV LANG=en_US.UTF-8 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ git-lfs \ libgl1-mesa-glx \ - libjemalloc-dev + libjemalloc-dev \ + curl RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - RUN git lfs install COPY comps /home/user/comps -RUN cd /home/user/comps/lvms/llama-vision/ && \ +RUN cd /home/user/comps/lvms/src/integrations/dependency/llama-vision/ && \ pip install --no-cache-dir -r requirements.txt && \ pip install --no-cache-dir --upgrade Pillow ENV PYTHONPATH=/root:/home/user -WORKDIR /home/user/comps/lvms/llama-vision/ +USER user + +WORKDIR /home/user/comps/lvms/src/integrations/dependency/llama-vision/ ENTRYPOINT ["python", "lvm.py"] diff --git a/comps/lvms/llama-vision/Dockerfile_guard b/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.guard similarity index 78% rename from comps/lvms/llama-vision/Dockerfile_guard rename to comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.guard index 61a613ed58..507b58d4ba 100644 --- a/comps/lvms/llama-vision/Dockerfile_guard +++ b/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.guard @@ -9,24 +9,25 @@ ENV LANG=en_US.UTF-8 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ git-lfs \ libgl1-mesa-glx \ - libjemalloc-dev + libjemalloc-dev \ + curl RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - RUN git lfs install COPY comps /home/user/comps -RUN cd /home/user/comps/lvms/llama-vision/ && \ +RUN cd /home/user/comps/lvms/src/integrations/dependency/llama-vision/ && \ pip install --no-cache-dir -r requirements.txt && \ pip install --no-cache-dir --upgrade Pillow ENV PYTHONPATH=/root:/home/user -WORKDIR /home/user/comps/lvms/llama-vision/ +USER user + +WORKDIR /home/user/comps/lvms/src/integrations/dependency/llama-vision/ ENTRYPOINT ["python", "lvm_guard.py"] diff --git a/comps/lvms/llama-vision/Dockerfile_tp b/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.tp similarity index 73% rename from comps/lvms/llama-vision/Dockerfile_tp rename to comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.tp index 6c046b770c..c4bc97b165 100644 --- a/comps/lvms/llama-vision/Dockerfile_tp +++ b/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.tp @@ -9,7 +9,8 @@ ENV LANG=en_US.UTF-8 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ git-lfs \ libgl1-mesa-glx \ - libjemalloc-dev + libjemalloc-dev \ + curl RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -22,13 +23,13 @@ COPY comps /home/user/comps RUN pip install --no-cache-dir git+https://github.com/HabanaAI/DeepSpeed.git@1.17.1 RUN pip install --no-cache-dir git+https://github.com/huggingface/optimum-habana@v1.13.2 -RUN cd /home/user/comps/lvms/llama-vision/ \ +RUN cd /home/user/comps/lvms/src/integrations/dependency/llama-vision/ \ pip install --no-cache-dir --upgrade pip && \ bash update && \ - pip install --no-cache-dir -r /home/user/comps/lvms/llama-vision/requirements_tp.txt + pip install --no-cache-dir -r /home/user/comps/lvms/src/integrations/dependency/llama-vision/requirements_tp.txt ENV PYTHONPATH=/root:/home/user -WORKDIR /home/user/comps/lvms/llama-vision/ +WORKDIR /home/user/comps/lvms/src/integrations/dependency/llama-vision/ ENTRYPOINT ["bash", "run_tp.sh"] diff --git a/comps/lvms/llama-vision/README.md b/comps/lvms/src/integrations/dependency/llama-vision/README.md similarity index 94% rename from comps/lvms/llama-vision/README.md rename to comps/lvms/src/integrations/dependency/llama-vision/README.md index 961b333c9a..5b2f290ce8 100644 --- a/comps/lvms/llama-vision/README.md +++ b/comps/lvms/src/integrations/dependency/llama-vision/README.md @@ -10,7 +10,7 @@ Visual Question and Answering is one of the multimodal tasks empowered by LVMs ( ```bash cd ../../../ -docker build -t opea/lvm-llama-vision:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llama-vision/Dockerfile . +docker build -t opea/lvm-llama-vision:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/llama-vision/Dockerfile . ``` #### Build Llama Vision Model with deepspeed @@ -18,14 +18,14 @@ docker build -t opea/lvm-llama-vision:latest --build-arg https_proxy=$https_prox If you need to build the image for 90B models, use the following command: ```bash -docker build -t opea/lvm-llama-vision-tp:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llama-vision/Dockerfile_tp . +docker build -t opea/lvm-llama-vision-tp:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.tp . ``` #### Build Llama Vision Guard Model ```bash cd ../../../ -docker build -t opea/lvm-llama-vision-guard:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llama-vision/Dockerfile_guard . +docker build -t opea/lvm-llama-vision-guard:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.guard . ``` ### Start Llama LVM Service diff --git a/comps/lvms/llama-vision/auto_tp.py b/comps/lvms/src/integrations/dependency/llama-vision/auto_tp.py similarity index 100% rename from comps/lvms/llama-vision/auto_tp.py rename to comps/lvms/src/integrations/dependency/llama-vision/auto_tp.py diff --git a/comps/lvms/llama-vision/checkpoint_utils.py b/comps/lvms/src/integrations/dependency/llama-vision/checkpoint_utils.py similarity index 100% rename from comps/lvms/llama-vision/checkpoint_utils.py rename to comps/lvms/src/integrations/dependency/llama-vision/checkpoint_utils.py diff --git a/comps/lvms/llama-vision/docker_compose_llm.yaml b/comps/lvms/src/integrations/dependency/llama-vision/docker_compose_llm.yaml similarity index 100% rename from comps/lvms/llama-vision/docker_compose_llm.yaml rename to comps/lvms/src/integrations/dependency/llama-vision/docker_compose_llm.yaml diff --git a/comps/lvms/llama-vision/gaudi_spawn.py b/comps/lvms/src/integrations/dependency/llama-vision/gaudi_spawn.py similarity index 100% rename from comps/lvms/llama-vision/gaudi_spawn.py rename to comps/lvms/src/integrations/dependency/llama-vision/gaudi_spawn.py diff --git a/comps/lvms/llama-vision/lvm.py b/comps/lvms/src/integrations/dependency/llama-vision/lvm.py similarity index 100% rename from comps/lvms/llama-vision/lvm.py rename to comps/lvms/src/integrations/dependency/llama-vision/lvm.py diff --git a/comps/lvms/llama-vision/lvm_guard.py b/comps/lvms/src/integrations/dependency/llama-vision/lvm_guard.py similarity index 100% rename from comps/lvms/llama-vision/lvm_guard.py rename to comps/lvms/src/integrations/dependency/llama-vision/lvm_guard.py diff --git a/comps/lvms/llama-vision/lvm_tp.py b/comps/lvms/src/integrations/dependency/llama-vision/lvm_tp.py similarity index 100% rename from comps/lvms/llama-vision/lvm_tp.py rename to comps/lvms/src/integrations/dependency/llama-vision/lvm_tp.py diff --git a/comps/lvms/llama-vision/lvm_tp_serve.py b/comps/lvms/src/integrations/dependency/llama-vision/lvm_tp_serve.py similarity index 100% rename from comps/lvms/llama-vision/lvm_tp_serve.py rename to comps/lvms/src/integrations/dependency/llama-vision/lvm_tp_serve.py diff --git a/comps/lvms/llama-vision/prompt_format_utils.py b/comps/lvms/src/integrations/dependency/llama-vision/prompt_format_utils.py similarity index 100% rename from comps/lvms/llama-vision/prompt_format_utils.py rename to comps/lvms/src/integrations/dependency/llama-vision/prompt_format_utils.py diff --git a/comps/lvms/llama-vision/replace_module.py b/comps/lvms/src/integrations/dependency/llama-vision/replace_module.py similarity index 100% rename from comps/lvms/llama-vision/replace_module.py rename to comps/lvms/src/integrations/dependency/llama-vision/replace_module.py diff --git a/comps/lvms/llama-vision/requirements.txt b/comps/lvms/src/integrations/dependency/llama-vision/requirements.txt similarity index 100% rename from comps/lvms/llama-vision/requirements.txt rename to comps/lvms/src/integrations/dependency/llama-vision/requirements.txt diff --git a/comps/lvms/llama-vision/requirements_tp.txt b/comps/lvms/src/integrations/dependency/llama-vision/requirements_tp.txt similarity index 100% rename from comps/lvms/llama-vision/requirements_tp.txt rename to comps/lvms/src/integrations/dependency/llama-vision/requirements_tp.txt diff --git a/comps/lvms/llama-vision/run_tp.sh b/comps/lvms/src/integrations/dependency/llama-vision/run_tp.sh similarity index 100% rename from comps/lvms/llama-vision/run_tp.sh rename to comps/lvms/src/integrations/dependency/llama-vision/run_tp.sh diff --git a/comps/lvms/llama-vision/transformers_generation_utils.py b/comps/lvms/src/integrations/dependency/llama-vision/transformers_generation_utils.py similarity index 100% rename from comps/lvms/llama-vision/transformers_generation_utils.py rename to comps/lvms/src/integrations/dependency/llama-vision/transformers_generation_utils.py diff --git a/comps/lvms/llama-vision/update b/comps/lvms/src/integrations/dependency/llama-vision/update similarity index 100% rename from comps/lvms/llama-vision/update rename to comps/lvms/src/integrations/dependency/llama-vision/update diff --git a/comps/lvms/llava/dependency/Dockerfile b/comps/lvms/src/integrations/dependency/llava/Dockerfile similarity index 66% rename from comps/lvms/llava/dependency/Dockerfile rename to comps/lvms/src/integrations/dependency/llava/Dockerfile index d937b05671..4f337ded6b 100644 --- a/comps/lvms/llava/dependency/Dockerfile +++ b/comps/lvms/src/integrations/dependency/llava/Dockerfile @@ -5,7 +5,11 @@ FROM python:3.11-slim RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user + +# Install system dependencies +RUN apt-get update \ + && apt-get install -y curl + # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana @@ -13,10 +17,11 @@ ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/user/comps/lvms/llava/requirements.txt + pip install --no-cache-dir -r /home/user/comps/lvms/src/integrations/dependency/llava/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/lvms/llava/dependency +USER user +WORKDIR /home/user/comps/lvms/src/integrations/dependency/llava ENTRYPOINT ["python", "llava_server.py", "--device", "cpu"] diff --git a/comps/lvms/llava/dependency/Dockerfile.intel_hpu b/comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu similarity index 72% rename from comps/lvms/llava/dependency/Dockerfile.intel_hpu rename to comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu index e4da185f41..dda62f78f2 100644 --- a/comps/lvms/llava/dependency/Dockerfile.intel_hpu +++ b/comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu @@ -7,8 +7,12 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ +# Install system dependencies +RUN apt-get update \ + && apt-get install -y curl + RUN rm -rf /etc/ssh/ssh_host* -USER user + # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana @@ -17,11 +21,11 @@ COPY comps /home/user/comps # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/user/comps/lvms/llava/requirements.txt && \ + pip install --no-cache-dir -r /home/user/comps/lvms/src/integrations/dependency/llava/requirements.txt && \ pip install --no-cache-dir optimum[habana] ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/lvms/llava/dependency +USER user +WORKDIR /home/user/comps/lvms/src/integrations/dependency/llava/ ENTRYPOINT ["python", "llava_server.py"] diff --git a/comps/lvms/llava/README.md b/comps/lvms/src/integrations/dependency/llava/README.md similarity index 57% rename from comps/lvms/llava/README.md rename to comps/lvms/src/integrations/dependency/llava/README.md index 998eb4b664..1678b53028 100644 --- a/comps/lvms/llava/README.md +++ b/comps/lvms/src/integrations/dependency/llava/README.md @@ -1,6 +1,6 @@ # LVM Microservice -Visual Question and Answering is one of the multimodal tasks empowered by LVMs (Large Visual Models). This microservice supports visual Q&A by using LLaVA as the base large visual model. It accepts two inputs: a prompt and an image. It outputs the answer to the prompt about the image. +Visual Question and Answering is one of the multimodal tasks empowered by LVMs (Large Visual Models). This microservice supports visual Q&A by using LLaVA as the base large visual model. It accepts two inputs: a prompt and images. It outputs the answer to the prompt about the images. ## 🚀1. Start Microservice with Python (Option 1) @@ -16,7 +16,6 @@ pip install -r requirements.txt ```bash # Start LLaVA service -cd dependency/ nohup python llava_server.py --device=cpu & # Wait until the server is up # Test @@ -30,23 +29,12 @@ pip install optimum[habana] ``` ```bash -cd dependency/ # Start LLaVA service nohup python llava_server.py & # Test python check_llava_server.py ``` -### 1.3 Start Image To Text Service/Test - -```bash -cd .. -# Start the OPEA Microservice -python lvm.py -# Test -python check_lvm.py -``` - ## 🚀2. Start Microservice with Docker (Option 2) ### 2.1 Build Images @@ -57,21 +45,14 @@ python check_lvm.py ```bash cd ../../../ -docker build -t opea/lvm-llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/dependency/Dockerfile . +docker build -t opea/lvm-llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/llava/Dockerfile . ``` - Gaudi2 HPU ```bash cd ../../../ -docker build -t opea/lvm-llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/dependency/Dockerfile.intel_hpu . -``` - -#### 2.1.2 LVM Service Image - -```bash -cd ../../../ -docker build -t opea/lvm-llava-svc:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/Dockerfile . +docker build -t opea/lvm-llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu . ``` ### 2.2 Start LLaVA and LVM Service @@ -90,15 +71,12 @@ docker run -p 8399:8399 -e http_proxy=$http_proxy --ipc=host -e https_proxy=$htt docker run -p 8399:8399 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/lvm-llava:latest ``` -#### 2.2.2 Start LVM service - -```bash -ip_address=$(hostname -I | awk '{print $1}') - -docker run -p 9399:9399 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LVM_ENDPOINT=http://$ip_address:8399 opea/lvm-llava-svc:latest -``` +#### 2.2.2 Test -#### 2.2.3 Test +> Note: The `MAX_IMAGES` environment variable is used to specify the maximum number of images that will be sent from the LVM service to the LLaVA server. +> If an image list longer than `MAX_IMAGES` is sent to the LVM server, a shortened image list will be sent to the LLaVA service. If the image list +> needs to be shortened, the most recent images (the ones at the end of the list) are prioritized to send to the LLaVA service. Some LLaVA models have not +> been trained with multiple images and may lead to inaccurate results. If `MAX_IMAGES` is not set, it will default to `1`. ```bash # Use curl/python @@ -106,9 +84,12 @@ docker run -p 9399:9399 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$htt # curl with an image and a prompt http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json' +# curl with multiple images and a prompt (Note that depending on your MAX_IMAGES value, both images may not be sent to the LLaVA model) +http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": ["iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNkYPhfz0AEYBxVSF+FAP5FDvcfRYWgAAAAAElFTkSuQmCC", "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNk+M9Qz0AEYBxVSF+FAAhKDveksOjmAAAAAElFTkSuQmCC"], "prompt":"What is in these images?"}' -H 'Content-Type: application/json' + # curl with a prompt only (no image) http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "", "prompt":"What is deep learning?"}' -H 'Content-Type: application/json' -# python -python check_lvm.py +# Test +python check_llava_server.py ``` diff --git a/comps/lvms/llava/dependency/check_llava_server.py b/comps/lvms/src/integrations/dependency/llava/check_llava_server.py similarity index 100% rename from comps/lvms/llava/dependency/check_llava_server.py rename to comps/lvms/src/integrations/dependency/llava/check_llava_server.py diff --git a/comps/lvms/llava/dependency/llava_server.py b/comps/lvms/src/integrations/dependency/llava/llava_server.py similarity index 69% rename from comps/lvms/llava/dependency/llava_server.py rename to comps/lvms/src/integrations/dependency/llava/llava_server.py index 644e15a82e..5540a02b62 100644 --- a/comps/lvms/llava/dependency/llava_server.py +++ b/comps/lvms/src/integrations/dependency/llava/llava_server.py @@ -4,6 +4,7 @@ import argparse import base64 +import os import time from io import BytesIO @@ -13,7 +14,7 @@ import uvicorn from fastapi import FastAPI, Request from fastapi.responses import JSONResponse, Response -from transformers import pipeline +from transformers import AutoProcessor, pipeline from transformers.image_utils import load_image model_name_or_path = None @@ -33,9 +34,16 @@ def pipeline_preprocess(self, image, prompt=None, timeout=None): The original transformers image-to-text pipeline preprocess function requires that an image is passed in, and will fail if the image parameter is null/empty. In order to support multimodal use cases with the same pipeline, this preprocess function handles the case where there is no image with the prompt. + Also, the image-to-text pipeline typically treats multiple images passed in as a list as a batch (where it iterates + over the image inputs for generation). For that reason, the original pipeline_preprocess code would only get a + single image at a time. To support multiple images, the pipeline call is updated to send a list of lists for the + images (so that when iterated, we still get multiple images) and this pipeline_preprocess function has been updated + to handle a list of images in addition to single images. """ - if image: + if isinstance(image, list): + image = [load_image(i, timeout=timeout) for i in image] + elif image: image = load_image(image, timeout=timeout) if prompt is not None: @@ -114,23 +122,52 @@ async def health() -> Response: @app.post("/generate") -async def generate(request: Request) -> Response: # FIXME batch_size=1 for now, only accept single image +async def generate(request: Request) -> Response: # FIXME batch_size=1 for now print("LLaVA generation begin.") request_dict = await request.json() prompt = request_dict.pop("prompt") - img_b64_str = request_dict.pop("img_b64_str") + img_b64_str = request_dict.pop("img_b64_str") # String or list of strings max_new_tokens = request_dict.pop("max_new_tokens", 100) + # Determine the format of the role labels based on the model name + model_name = generator.model.name_or_path + user_label = "USER:" + assistant_label = "ASSISTANT:" + image_tag = "\n" + + # This is the role label that we see in the results from the pipeline. This is used to split the output. + output_assistant_label = "ASSISTANT: " + + if "llava-interleave" in model_name: + user_label = "<|im_start|>user" + assistant_label = "<|im_end|><|im_start|>assistant" + output_assistant_label = "assistant " + elif "llava-v1.6-mistral" in model_name: + user_label = "[INST]" + assistant_label = " [/INST]" + output_assistant_label = "[/INST] " + if img_b64_str: - # Decode and Resize the image - image = PIL.Image.open(BytesIO(base64.b64decode(img_b64_str))) - image = process_image(image) - # format the prompt with an image - prompt = f"\nUSER: {prompt}\nASSISTANT:" + if isinstance(img_b64_str, str): + img_b64_str = [img_b64_str] + + # Decode and Resize the images + images = [] + for img_b64 in img_b64_str: + if img_b64: + image = PIL.Image.open(BytesIO(base64.b64decode(img_b64))) + image = process_image(image) + images.append(image) + + # If the prompt provided does not have all the image tags, format the prompt with images + num_images = len(images) + num_image_tags = prompt.count(image_tag) + image_tags = image_tag * (num_images - num_image_tags) if num_images > num_image_tags else "" + prompt = f"{user_label}{image_tags} {prompt}{assistant_label}" else: - image = None + images = None # format the prompt with text only - prompt = f"USER: {prompt}\nASSISTANT:" + prompt = f"{user_label} {prompt}\n{assistant_label}" if args.device == "hpu": generate_kwargs = { @@ -149,12 +186,13 @@ async def generate(request: Request) -> Response: # FIXME batch_size=1 for now, # Override the pipeline preprocessing generator.preprocess = pipeline_preprocess.__get__(generator, type(generator)) - result = generator(image, prompt=prompt, batch_size=1, generate_kwargs=generate_kwargs) + result = generator([images], prompt=prompt, batch_size=1, generate_kwargs=generate_kwargs) end = time.time() - result = result[0]["generated_text"].split("ASSISTANT: ")[-1] + result = result[0][0]["generated_text"].split(output_assistant_label.strip())[-1].strip() print(f"LLaVA result = {result}, time = {(end-start) * 1000 }ms") - if image: - image.close() + if images: + for i in images: + i.close() ret = {"text": result} return JSONResponse(ret) @@ -163,7 +201,7 @@ async def generate(request: Request) -> Response: # FIXME batch_size=1 for now, if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--host", type=str, default="0.0.0.0") - parser.add_argument("--port", type=int, default=8399) + parser.add_argument("--port", type=int, default=os.getenv("LLAVA_SERVER_PORT", 8399)) parser.add_argument("--model_name_or_path", type=str, default="llava-hf/llava-1.5-7b-hf") parser.add_argument("--use_hpu_graphs", default=False, action="store_true") parser.add_argument("--warmup", type=int, default=1, help="Number of warmup iterations for benchmarking.") @@ -191,6 +229,8 @@ async def generate(request: Request) -> Response: # FIXME batch_size=1 for now, device=args.device, ) + processor = AutoProcessor.from_pretrained(model_name_or_path) + # warmup print("LLaVA warmup...") if args.device == "hpu": @@ -214,10 +254,23 @@ async def generate(request: Request) -> Response: # FIXME batch_size=1 for now, images = [] for image_path in image_paths: images.append(PIL.Image.open(requests.get(image_path, stream=True, timeout=3000).raw)) + + # Generate a text prompt to use for warm up + conversation = [ + { + "role": "user", + "content": [ + {"type": "image"}, + {"type": "text", "text": "What's the content of the image?"}, + ], + }, + ] + text_prompt = processor.apply_chat_template(conversation) + for i in range(args.warmup): generator( images, - prompt="\nUSER: What's the content of the image?\nASSISTANT:", + prompt=text_prompt, batch_size=1, generate_kwargs=generate_kwargs, ) diff --git a/comps/lvms/llava/requirements.txt b/comps/lvms/src/integrations/dependency/llava/requirements.txt similarity index 100% rename from comps/lvms/llava/requirements.txt rename to comps/lvms/src/integrations/dependency/llava/requirements.txt diff --git a/comps/lvms/predictionguard/Dockerfile b/comps/lvms/src/integrations/dependency/predictionguard/Dockerfile similarity index 53% rename from comps/lvms/predictionguard/Dockerfile rename to comps/lvms/src/integrations/dependency/predictionguard/Dockerfile index 64b858eb80..6d44590723 100644 --- a/comps/lvms/predictionguard/Dockerfile +++ b/comps/lvms/src/integrations/dependency/predictionguard/Dockerfile @@ -8,11 +8,15 @@ ENV LANG=en_US.UTF-8 COPY comps /home/comps +# Install system dependencies +RUN apt-get update \ + && apt-get install -y curl + RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/comps/lvms/predictionguard/requirements.txt + pip install --no-cache-dir -r /home/comps/lvms/src/integrations/dependency/predictionguard/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home -WORKDIR /home/comps/lvms/predictionguard +WORKDIR /home/comps/lvms/src/integrations/dependency/predictionguard ENTRYPOINT ["python", "lvm.py"] diff --git a/comps/lvms/predictionguard/README.md b/comps/lvms/src/integrations/dependency/predictionguard/README.md similarity index 93% rename from comps/lvms/predictionguard/README.md rename to comps/lvms/src/integrations/dependency/predictionguard/README.md index 1220566661..88097ef116 100644 --- a/comps/lvms/predictionguard/README.md +++ b/comps/lvms/src/integrations/dependency/predictionguard/README.md @@ -32,7 +32,7 @@ export PREDICTIONGUARD_API_KEY=${your_predictionguard_api_key} ```bash cd ../../.. -docker build -t opea/lvm-predictionguard:latest -f comps/lvms/predictionguard/Dockerfile . +docker build -t opea/lvm-predictionguard:latest -f comps/lvms/src/integrations/dependency/predictionguard/Dockerfile . ``` ### 2.2 Start Service diff --git a/comps/lvms/predictionguard/__init__.py b/comps/lvms/src/integrations/dependency/predictionguard/__init__.py similarity index 100% rename from comps/lvms/predictionguard/__init__.py rename to comps/lvms/src/integrations/dependency/predictionguard/__init__.py diff --git a/comps/lvms/predictionguard/lvm.py b/comps/lvms/src/integrations/dependency/predictionguard/lvm.py similarity index 100% rename from comps/lvms/predictionguard/lvm.py rename to comps/lvms/src/integrations/dependency/predictionguard/lvm.py diff --git a/comps/lvms/predictionguard/requirements.txt b/comps/lvms/src/integrations/dependency/predictionguard/requirements.txt similarity index 100% rename from comps/lvms/predictionguard/requirements.txt rename to comps/lvms/src/integrations/dependency/predictionguard/requirements.txt diff --git a/comps/lvms/video-llama/dependency/Dockerfile b/comps/lvms/src/integrations/dependency/video-llama/Dockerfile similarity index 81% rename from comps/lvms/video-llama/dependency/Dockerfile rename to comps/lvms/src/integrations/dependency/video-llama/Dockerfile index 5c86e7b2f2..94e9213202 100644 --- a/comps/lvms/video-llama/dependency/Dockerfile +++ b/comps/lvms/src/integrations/dependency/video-llama/Dockerfile @@ -6,7 +6,7 @@ FROM python:3.9-slim ENV LANG=C.UTF-8 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - git git-lfs && \ + git git-lfs curl && \ git lfs install RUN useradd -m -s /bin/bash user && \ @@ -14,13 +14,11 @@ RUN useradd -m -s /bin/bash user && \ chown -R user:user /home/user/ RUN mkdir /home/user/model && chown user:user -R /home/user/model -USER user - COPY --chown=user:user comps /home/user/comps -WORKDIR /home/user/comps/lvms/video-llama/dependency +WORKDIR /home/user/comps/lvms/src/integrations/dependency/video-llama/ RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/user/comps/lvms/video-llama/dependency/requirements.txt + pip install --no-cache-dir -r /home/user/comps/lvms/src/integrations/dependency/video-llama/requirements.txt ARG VIDEO_LLAMA_REPO=https://github.com/DAMO-NLP-SG/Video-LLaMA.git ARG VIDEO_LLAMA_COMMIT=0adb19e @@ -31,6 +29,7 @@ RUN tar -xvf video-llama.patch.tar && \ mv video_llama ../ && \ cd ../ && rm -rf Video-LLaMA +USER user ENV PYTHONPATH=/home/user diff --git a/comps/lvms/video-llama/README.md b/comps/lvms/src/integrations/dependency/video-llama/README.md similarity index 68% rename from comps/lvms/video-llama/README.md rename to comps/lvms/src/integrations/dependency/video-llama/README.md index 6b7006d840..5f8ed9a587 100644 --- a/comps/lvms/video-llama/README.md +++ b/comps/lvms/src/integrations/dependency/video-llama/README.md @@ -9,9 +9,9 @@ This is a Docker-based microservice that runs Video-Llama as a Large Vision Mode ```bash cd GenAIComps # Video-Llama Server Image -docker build --no-cache -t opea/video-llama-lvm-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/video-llama/dependency/Dockerfile . +docker build --no-cache -t opea/lvm-video-llama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/video-llama/Dockerfile . # LVM Service Image -docker build --no-cache -t opea/lvm-video-llama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/video-llama/Dockerfile . +docker build --no-cache -t opea/lvm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/video-llama/Dockerfile . ``` ### 1.2 Start Video-Llama and LVM Services @@ -24,9 +24,9 @@ export ip_address=$(hostname -I | awk '{print $1}') export no_proxy=$no_proxy,${ip_address} export LVM_ENDPOINT=http://${ip_address}:9009 # Start service -docker compose -f comps/lvms/video-llama/docker_compose.yaml up -d +docker compose -f comps/lvms/src/integrations/dependency/video-llama/docker_compose.yaml up -d # it should take about 1.5 hours for the model to download in the video-llama server, assuming a maximum download speed of 100 Mbps -until docker logs video-llama-lvm-server 2>&1 | grep -q "Uvicorn running on"; do +until docker logs lvm-video-llama 2>&1 | grep -q "Uvicorn running on"; do sleep 5m done ``` @@ -34,7 +34,7 @@ done If you've run the microservice before, it's recommended to keep the downloaded model so it won't be redownloaded each time you run it. To achieve this, you need to modify the following configuration: ```yaml -# comps/lvms/video-llama/docker_compose.yaml +# comps/lvms/src/integrations/dependency/video-llama/docker_compose.yaml services: lvm-video-llama: ... @@ -49,13 +49,6 @@ services: export ip_address=$(hostname -I | awk '{print $1}') ## check video-llama http_proxy="" curl -X POST "http://${ip_address}:9009/generate?video_url=https%3A%2F%2Fgithub.com%2FDAMO-NLP-SG%2FVideo-LLaMA%2Fraw%2Fmain%2Fexamples%2Fsilence_girl.mp4&start=0.0&duration=9&prompt=What%20is%20the%20person%20doing%3F&max_new_tokens=150" -H "accept: */*" -d '' - -## check lvm -http_proxy="" curl -X POST http://${ip_address}:9000/v1/lvm -d '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 9,"prompt":"What is the person doing?","max_new_tokens": 150}' -H 'Content-Type: application/json' - -# or use python -export ip_address=$(hostname -I | awk '{print $1}') -python comps/lvms/video-llama/check_lvm.py ``` ## ♻️ 3. Clean diff --git a/comps/lvms/video-llama/dependency/data/silence_girl.mp4 b/comps/lvms/src/integrations/dependency/video-llama/data/silence_girl.mp4 similarity index 100% rename from comps/lvms/video-llama/dependency/data/silence_girl.mp4 rename to comps/lvms/src/integrations/dependency/video-llama/data/silence_girl.mp4 diff --git a/comps/lvms/video-llama/dependency/docker_compose_vllama.yaml b/comps/lvms/src/integrations/dependency/video-llama/docker_compose_vllama.yaml similarity index 86% rename from comps/lvms/video-llama/dependency/docker_compose_vllama.yaml rename to comps/lvms/src/integrations/dependency/video-llama/docker_compose_vllama.yaml index 17d38e076c..9b880e8960 100644 --- a/comps/lvms/video-llama/dependency/docker_compose_vllama.yaml +++ b/comps/lvms/src/integrations/dependency/video-llama/docker_compose_vllama.yaml @@ -4,8 +4,8 @@ version: "3" services: lvm-video-llama: - image: opea/video-llama-lvm-server:latest - container_name: video-llama-lvm-server + image: opea/lvm-video-llama:latest + container_name: lvm-video-llama ports: - "9009:9009" ipc: host diff --git a/comps/lvms/video-llama/dependency/extract_vl_embedding.py b/comps/lvms/src/integrations/dependency/video-llama/extract_vl_embedding.py similarity index 100% rename from comps/lvms/video-llama/dependency/extract_vl_embedding.py rename to comps/lvms/src/integrations/dependency/video-llama/extract_vl_embedding.py diff --git a/comps/lvms/video-llama/dependency/requirements.txt b/comps/lvms/src/integrations/dependency/video-llama/requirements.txt similarity index 96% rename from comps/lvms/video-llama/dependency/requirements.txt rename to comps/lvms/src/integrations/dependency/video-llama/requirements.txt index 71c94c901d..cb2cde464f 100644 --- a/comps/lvms/video-llama/dependency/requirements.txt +++ b/comps/lvms/src/integrations/dependency/video-llama/requirements.txt @@ -29,7 +29,7 @@ timm torch==1.13.1 --index-url https://download.pytorch.org/whl/cpu torchaudio==0.13.1 --index-url https://download.pytorch.org/whl/cpu torchvision==0.14.1 --index-url https://download.pytorch.org/whl/cpu -transformers +transformers==4.47.1 uvicorn validators webdataset diff --git a/comps/lvms/video-llama/dependency/server.py b/comps/lvms/src/integrations/dependency/video-llama/server.py similarity index 99% rename from comps/lvms/video-llama/dependency/server.py rename to comps/lvms/src/integrations/dependency/video-llama/server.py index 24280c02b8..5c34b275b6 100644 --- a/comps/lvms/video-llama/dependency/server.py +++ b/comps/lvms/src/integrations/dependency/video-llama/server.py @@ -37,7 +37,7 @@ streamer = None chat = None -VIDEO_DIR = "/home/user/comps/lvms/video-llama/dependency/data" +VIDEO_DIR = "/home/user/comps/lvms/src/integrations/dependency/video-llama/data" CFG_PATH = "video_llama_config/video_llama_eval_only_vl.yaml" MODEL_TYPE = "llama_v2" diff --git a/comps/lvms/video-llama/dependency/start.sh b/comps/lvms/src/integrations/dependency/video-llama/start.sh similarity index 100% rename from comps/lvms/video-llama/dependency/start.sh rename to comps/lvms/src/integrations/dependency/video-llama/start.sh diff --git a/comps/lvms/video-llama/dependency/video-llama.patch.tar b/comps/lvms/src/integrations/dependency/video-llama/video-llama.patch.tar similarity index 100% rename from comps/lvms/video-llama/dependency/video-llama.patch.tar rename to comps/lvms/src/integrations/dependency/video-llama/video-llama.patch.tar diff --git a/comps/lvms/video-llama/dependency/video_llama_config/video_llama_eval_only_vl.yaml b/comps/lvms/src/integrations/dependency/video-llama/video_llama_config/video_llama_eval_only_vl.yaml similarity index 100% rename from comps/lvms/video-llama/dependency/video_llama_config/video_llama_eval_only_vl.yaml rename to comps/lvms/src/integrations/dependency/video-llama/video_llama_config/video_llama_eval_only_vl.yaml diff --git a/comps/lvms/src/integrations/llama_vision.py b/comps/lvms/src/integrations/llama_vision.py new file mode 100644 index 0000000000..9b2abd999c --- /dev/null +++ b/comps/lvms/src/integrations/llama_vision.py @@ -0,0 +1,60 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +from typing import Union + +import requests + +from comps import CustomLogger, LVMDoc, OpeaComponent, OpeaComponentRegistry, ServiceType, TextDoc + +logger = CustomLogger("opea_llama_vision") +logflag = os.getenv("LOGFLAG", False) + + +@OpeaComponentRegistry.register("OPEA_LLAMA_VISION_LVM") +class OpeaLlamaVisionLvm(OpeaComponent): + """A specialized LVM component derived from OpeaComponent for LLaMA-Vision services.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.LVM.name.lower(), description, config) + self.base_url = os.getenv("LVM_ENDPOINT", "http://localhost:9399") + health_status = self.check_health() + if not health_status: + logger.error("specialized health check failed.") + + async def invoke( + self, + request: Union[LVMDoc], + ) -> Union[TextDoc]: + """Involve the LVM service to generate answer for the provided input.""" + if logflag: + logger.info(request) + + inputs = {"image": request.image, "prompt": request.prompt, "max_new_tokens": request.max_new_tokens} + # forward to the LLaMA Vision server + response = requests.post(url=f"{self.base_url}/v1/lvm", data=json.dumps(inputs), proxies={"http": None}) + + result = response.json()["text"] + if logflag: + logger.info(result) + + return TextDoc(text=result) + + def check_health(self) -> bool: + """Checks the health of the embedding service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + try: + response = requests.get(f"{self.base_url}/health") + if response.status_code == 200: + return True + else: + return False + except Exception as e: + # Handle connection errors, timeouts, etc. + logger.error(f"Health check failed: {e}") + return False diff --git a/comps/lvms/src/integrations/llava.py b/comps/lvms/src/integrations/llava.py new file mode 100644 index 0000000000..95713dffeb --- /dev/null +++ b/comps/lvms/src/integrations/llava.py @@ -0,0 +1,146 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +from typing import Union + +import requests +from fastapi import HTTPException +from langchain_core.prompts import PromptTemplate + +from comps import ( + CustomLogger, + LVMDoc, + LVMSearchedMultimodalDoc, + MetadataTextDoc, + OpeaComponent, + OpeaComponentRegistry, + ServiceType, + TextDoc, +) + +logger = CustomLogger("opea_llava") +logflag = os.getenv("LOGFLAG", False) + +# The maximum number of images that should be sent to the LVM +max_images = int(os.getenv("MAX_IMAGES", 1)) + + +class ChatTemplate: + + @staticmethod + def generate_multimodal_rag_on_videos_prompt(question: str, context: str, has_image: bool = False): + + if has_image: + template = """The transcript associated with the image is '{context}'. {question}""" + else: + template = ( + """Refer to the following results obtained from the local knowledge base: '{context}'. {question}""" + ) + + return template.format(context=context, question=question) + + +@OpeaComponentRegistry.register("OPEA_LLAVA_LVM") +class OpeaLlavaLvm(OpeaComponent): + """A specialized LVM component derived from OpeaComponent for LLaVA LVM services.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.LVM.name.lower(), description, config) + self.base_url = os.getenv("LVM_ENDPOINT", "http://localhost:8399") + if logflag: + logger.info(f"MAX_IMAGES: {max_images}") + health_status = self.check_health() + if not health_status: + logger.error("OpeaLlavaLvm health check failed.") + + async def invoke( + self, + request: Union[LVMDoc, LVMSearchedMultimodalDoc], + ) -> Union[TextDoc, MetadataTextDoc]: + """Involve the LVM service to generate answer for the provided input.""" + if logflag: + logger.info(request) + if isinstance(request, LVMSearchedMultimodalDoc): + if logflag: + logger.info("[LVMSearchedMultimodalDoc ] input from retriever microservice") + retrieved_metadatas = request.metadata + if retrieved_metadatas is None or len(retrieved_metadatas) == 0: + # there is no video segments retrieved. + # Raise HTTPException status_code 204 + # due to llava-tgi-gaudi should receive image as input; Otherwise, the generated text is bad. + raise HTTPException(status_code=500, detail="There is no video segments retrieved given the query!") + + img_b64_str = retrieved_metadatas[0]["b64_img_str"] + has_image = img_b64_str != "" + initial_query = request.initial_query + context = retrieved_metadatas[0]["transcript_for_inference"] + prompt = initial_query + if request.chat_template is None: + prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, context, has_image) + else: + prompt_template = PromptTemplate.from_template(request.chat_template) + input_variables = prompt_template.input_variables + if sorted(input_variables) == ["context", "question"]: + prompt = prompt_template.format(question=initial_query, context=context) + else: + logger.info( + f"[ LVMSearchedMultimodalDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']" + ) + max_new_tokens = request.max_new_tokens + if logflag: + logger.info( + f"prompt generated for [LVMSearchedMultimodalDoc ] input from retriever microservice: {prompt}" + ) + + else: + img_b64_str = request.image + prompt = request.prompt + max_new_tokens = request.max_new_tokens + + # Limit the number of images being sent to the LVM + if isinstance(img_b64_str, list) and len(img_b64_str) > max_images: + img_b64_str = img_b64_str[-max_images:] + + # Adjust the number of images tags in the prompt + image_tag = "\n" + num_tags_in_prompt = prompt.count(image_tag) + + if len(img_b64_str) < num_tags_in_prompt: + prompt = prompt.replace(image_tag, "", num_tags_in_prompt - len(img_b64_str)) + + inputs = {"img_b64_str": img_b64_str, "prompt": prompt, "max_new_tokens": max_new_tokens} + # forward to the LLaVA server + response = requests.post(url=f"{self.base_url}/generate", data=json.dumps(inputs), proxies={"http": None}) + + result = response.json()["text"] + if logflag: + logger.info(result) + if isinstance(request, LVMSearchedMultimodalDoc): + retrieved_metadata = request.metadata[0] + return_metadata = {} # this metadata will be used to construct proof for generated text + return_metadata["video_id"] = retrieved_metadata["video_id"] + return_metadata["source_video"] = retrieved_metadata["source_video"] + return_metadata["time_of_frame_ms"] = retrieved_metadata["time_of_frame_ms"] + return_metadata["transcript_for_inference"] = retrieved_metadata["transcript_for_inference"] + return MetadataTextDoc(text=result, metadata=return_metadata) + else: + return TextDoc(text=result) + + def check_health(self) -> bool: + """Checks the health of the embedding service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + try: + response = requests.get(f"{self.base_url}/health") + if response.status_code == 200: + return True + else: + return False + except Exception as e: + # Handle connection errors, timeouts, etc. + logger.error(f"Health check failed: {e}") + return False diff --git a/comps/lvms/src/integrations/predictionguard.py b/comps/lvms/src/integrations/predictionguard.py new file mode 100644 index 0000000000..d377cda24d --- /dev/null +++ b/comps/lvms/src/integrations/predictionguard.py @@ -0,0 +1,60 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +from typing import Union + +import requests + +from comps import CustomLogger, LVMDoc, OpeaComponent, OpeaComponentRegistry, ServiceType, TextDoc + +logger = CustomLogger("opea_predictionguard") +logflag = os.getenv("LOGFLAG", False) + + +@OpeaComponentRegistry.register("OPEA_PREDICTION_GUARD_LVM") +class OpeaPredictionguardLvm(OpeaComponent): + """A specialized LVM component derived from OpeaComponent for Predictionguard services.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.LVM.name.lower(), description, config) + self.base_url = os.getenv("LVM_ENDPOINT", "http://localhost:9399") + health_status = self.check_health() + if not health_status: + logger.error("OpeaPredictionguardLvm health check failed.") + + async def invoke( + self, + request: Union[LVMDoc], + ) -> Union[TextDoc]: + """Involve the LVM service to generate answer for the provided input.""" + if logflag: + logger.info(request) + + inputs = {"image": request.image, "prompt": request.prompt, "max_new_tokens": request.max_new_tokens} + # forward to the PredictionGuard server + response = requests.post(url=f"{self.base_url}/v1/lvm", data=json.dumps(inputs), proxies={"http": None}) + + result = response.json()["text"] + if logflag: + logger.info(result) + + return TextDoc(text=result) + + def check_health(self) -> bool: + """Checks the health of the embedding service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + try: + response = requests.get(f"{self.base_url}/health") + if response.status_code == 200: + return True + else: + return False + except Exception as e: + # Handle connection errors, timeouts, etc. + logger.error(f"Health check failed: {e}") + return False diff --git a/comps/lvms/src/integrations/tgi_llava.py b/comps/lvms/src/integrations/tgi_llava.py new file mode 100644 index 0000000000..0de6b7ede6 --- /dev/null +++ b/comps/lvms/src/integrations/tgi_llava.py @@ -0,0 +1,220 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time +from typing import Union + +import requests +from fastapi import HTTPException +from fastapi.responses import StreamingResponse +from huggingface_hub import AsyncInferenceClient +from langchain_core.prompts import PromptTemplate + +from comps import ( + CustomLogger, + LVMDoc, + LVMSearchedMultimodalDoc, + MetadataTextDoc, + OpeaComponent, + OpeaComponentRegistry, + ServiceType, + TextDoc, + statistics_dict, +) + +logger = CustomLogger("opea_tgi_llava") +logflag = os.getenv("LOGFLAG", False) + +# The maximum number of images that should be sent to the LVM +max_images = int(os.getenv("MAX_IMAGES", 1)) + + +class ChatTemplate: + + @staticmethod + def generate_multimodal_rag_on_videos_prompt(question: str, context: str, has_image: bool = False): + + if has_image: + template = """The transcript associated with the image is '{context}'. {question}""" + else: + template = ( + """Refer to the following results obtained from the local knowledge base: '{context}'. {question}""" + ) + + return template.format(context=context, question=question) + + +@OpeaComponentRegistry.register("OPEA_TGI_LLAVA_LVM") +class OpeaTgiLlavaLvm(OpeaComponent): + """A specialized TGI LVM component derived from OpeaComponent for LLaVA LVM services.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.LVM.name.lower(), description, config) + self.base_url = os.getenv("LVM_ENDPOINT", "http://localhost:8399") + self.lvm_client = AsyncInferenceClient(self.base_url) + health_status = self.check_health() + if logflag: + logger.info(f"MAX_IMAGES: {max_images}") + if not health_status: + logger.error("OpeaTgiLlavaLvm health check failed.") + + async def invoke( + self, + request: Union[LVMDoc, LVMSearchedMultimodalDoc], + ) -> Union[TextDoc, MetadataTextDoc]: + """Involve the LVM service to generate answer for the provided input.""" + if logflag: + logger.info(request) + if isinstance(request, LVMSearchedMultimodalDoc): + if logflag: + logger.info("[LVMSearchedMultimodalDoc ] input from retriever microservice") + retrieved_metadatas = request.metadata + if retrieved_metadatas is None or len(retrieved_metadatas) == 0: + # there is no video segments retrieved. + # Raise HTTPException status_code 204 + # due to llava-tgi-gaudi should receive image as input; Otherwise, the generated text is bad. + raise HTTPException(status_code=500, detail="There is no video segments retrieved given the query!") + + img_b64_str = retrieved_metadatas[0]["b64_img_str"] + has_image = img_b64_str != "" + initial_query = request.initial_query + context = retrieved_metadatas[0]["transcript_for_inference"] + prompt = initial_query + if request.chat_template is None: + prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, context, has_image) + else: + prompt_template = PromptTemplate.from_template(request.chat_template) + input_variables = prompt_template.input_variables + if sorted(input_variables) == ["context", "question"]: + prompt = prompt_template.format(question=initial_query, context=context) + else: + logger.info( + f"[ LVMSearchedMultimodalDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']" + ) + max_new_tokens = request.max_new_tokens + stream = request.stream + repetition_penalty = request.repetition_penalty + temperature = request.temperature + top_k = request.top_k + top_p = request.top_p + if logflag: + logger.info( + f"prompt generated for [LVMSearchedMultimodalDoc ] input from retriever microservice: {prompt}" + ) + + else: + img_b64_str = request.image + prompt = request.prompt + max_new_tokens = request.max_new_tokens + stream = request.stream + repetition_penalty = request.repetition_penalty + temperature = request.temperature + top_k = request.top_k + top_p = request.top_p + + # Make img_b64_str into a list of strings (if it's not already a list) + if not isinstance(img_b64_str, list): + if img_b64_str: + img_b64_str = [img_b64_str] + else: + # If img_b64_str was an empty string, which means we have just have a text prompt. + # Work around an issue where LLaVA-NeXT is not providing good responses when prompted without an image. + # Provide an image and then instruct the model to ignore the image. The base64 string below is the encoded png: + # https://raw.githubusercontent.com/opea-project/GenAIExamples/refs/tags/v1.0/AudioQnA/ui/svelte/src/lib/assets/icons/png/audio1.png + img_b64_str = [ + "iVBORw0KGgoAAAANSUhEUgAAADUAAAAlCAYAAADiMKHrAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAKPSURBVHgB7Zl/btowFMefnUTqf+MAHYMTjN4gvcGOABpM+8E0doLSE4xpsE3rKuAG3KC5Ad0J6MYOkP07YnvvhR9y0lVzupTIVT5SwDjB9fd97WfsMkCef1rUXM8dY9HHK4hWUevzi/oVWAqnF8fzLmAtiPA3Aq0lFsVA1fRKxlgNLIbDPaQUZQuu6YO98aIipHOiFGtIqaYfn1UnUCDds6WPyeANlTFbv9WztbFTK+HNUVAPiz7nbPzq7HsPCoKWIBREGfsJXZit5xT07X0jp6iRdIbEHOnjyyD97OvzH00lVS2K5OS2ax11cBXxJgYxlEIE6XZclzdTX6n8XjkkcEIfbj2nMO0/SNd1vy4vsCNjYPyEovfyy88GZIQCSKOCMf6ORgStoboLJuSWKDYCfK2q4jjrMZ+GOh7Pib/gek5DHxVUJtcgA7mJ4kwZRbN7viQXFzQn0Nl52gXG4Fo7DKAYp0yI3VHQ16oaWV0wYa+iGE8nG+wAdx5DzpS/KGyhFGULpShbKEXZQinqLlBK/IKc2asoh4sZvoXJWhlAzuxV1KBVD3HrfYTFAK8ZHgu0hu36DHLG+Izinw250WUkXHJht02QUnxLP7fZxR7f1I6S7Ir2GgmYvIQM5OYUuYBdainATq2ZjTqPBlnbGXYeBrg9Od18DKmc1U0jpw4OIIwEJFxQSl2b4MN2lf74fw8nFNbHt/5N9xWKTZvJ2S6YZk6RC3j2cKpVhSIShZ0mea6caCOCAjyNHd5gPPxGncMBTvI6hunYdaJ6kf8VoSCP2odxX6RkR6NOtanfj13EswKVqEQrPzzFL1lK+YvCFraiEqs8TrwQLGYraqpX4kr/Hixml+63Z+CoM9DTo438AUmP+KyMWT+tAAAAAElFTkSuQmCC" + ] + prompt = f"Please disregard the image and answer the question. {prompt}" + + # Truncate the list of images if we have too many, only sending the most recent ones at the end of the list + if len(img_b64_str) > max_images: + img_b64_str = img_b64_str[-max_images:] + + # Check the number of image tags in the prompt and adjust them to match the number of images that we have + image_tag = "\n" + num_tags_in_prompt = prompt.count(image_tag) + + # We have too many image tags in the prompt replace the first x instance of the tag with an empty string + if len(img_b64_str) < num_tags_in_prompt: + prompt = prompt.replace(image_tag, "", num_tags_in_prompt - len(img_b64_str)) + + # We don't have enough image tags in the prompt, add them + if len(img_b64_str) > num_tags_in_prompt: + num_tags_to_add = len(img_b64_str) - num_tags_in_prompt + tags_to_add = image_tag * num_tags_to_add + prompt = f"{tags_to_add}{prompt}" + + # Replace image tags with the data + for i in img_b64_str: + formatted_image_str = f"![](data:image/png;base64,{i})\n" + prompt = prompt.replace(image_tag, formatted_image_str, 1) + image_prompt = f"{prompt}\nASSISTANT:" + + if stream: + t_start = time.time() + + async def stream_generator(time_start): + first_token_latency = None + chat_response = "" + text_generation = await self.lvm_client.text_generation( + prompt=image_prompt, + stream=stream, + max_new_tokens=max_new_tokens, + repetition_penalty=repetition_penalty, + temperature=temperature, + top_k=top_k, + top_p=top_p, + ) + async for text in text_generation: + if first_token_latency is None: + first_token_latency = time.time() - time_start + chat_response += text + chunk_repr = repr(text.encode("utf-8")) + if logflag: + logger.info(f"[llm - chat_stream] chunk:{chunk_repr}") + yield f"data: {chunk_repr}\n\n" + if logflag: + logger.info(f"[llm - chat_stream] stream response: {chat_response}") + statistics_dict["opea_service@lvm"].append_latency(time.time() - time_start, first_token_latency) + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(t_start), media_type="text/event-stream") + else: + generated_str = await self.lvm_client.text_generation( + image_prompt, + max_new_tokens=max_new_tokens, + repetition_penalty=repetition_penalty, + temperature=temperature, + top_k=top_k, + top_p=top_p, + ) + if logflag: + logger.info(generated_str) + if isinstance(request, LVMSearchedMultimodalDoc): + retrieved_metadata = request.metadata[0] + return_metadata = {} # this metadata will be used to construct proof for generated text + return_metadata["video_id"] = retrieved_metadata["video_id"] + return_metadata["source_video"] = retrieved_metadata["source_video"] + return_metadata["time_of_frame_ms"] = retrieved_metadata["time_of_frame_ms"] + return_metadata["transcript_for_inference"] = retrieved_metadata["transcript_for_inference"] + return MetadataTextDoc(text=generated_str, metadata=return_metadata) + else: + return TextDoc(text=generated_str) + + def check_health(self) -> bool: + """Checks the health of the embedding service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + try: + response = requests.get(f"{self.base_url}/health") + if response.status_code == 200: + return True + else: + return False + except Exception as e: + # Handle connection errors, timeouts, etc. + logger.error(f"Health check failed: {e}") + return False diff --git a/comps/lvms/src/integrations/video_llama.py b/comps/lvms/src/integrations/video_llama.py new file mode 100644 index 0000000000..a7677d67a5 --- /dev/null +++ b/comps/lvms/src/integrations/video_llama.py @@ -0,0 +1,100 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time +from typing import Union + +import requests +from fastapi import HTTPException +from fastapi.responses import StreamingResponse + +from comps import CustomLogger, LVMVideoDoc, OpeaComponent, OpeaComponentRegistry, ServiceType, statistics_dict + +logger = CustomLogger("opea_video_llama") +logflag = os.getenv("LOGFLAG", False) + + +@OpeaComponentRegistry.register("OPEA_VIDEO_LLAMA_LVM") +class OpeaVideoLlamaLvm(OpeaComponent): + """A specialized LVM component derived from OpeaComponent for Video-LLaMA services.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.LVM.name.lower(), description, config) + self.base_url = os.getenv("LVM_ENDPOINT", "http://localhost:9099") + health_status = self.check_health() + if not health_status: + logger.error("OpeaVideoLlamaLvm health check failed.") + + async def invoke( + self, + request: Union[LVMVideoDoc], + ) -> Union[StreamingResponse]: + """Involve the LVM service to generate answer for the provided request. + + Parameters: + request (LVMVideoDoc): The request containing the video URL, start time, duration, prompt, and maximum new tokens. + + Returns: + StreamingResponse: A streaming response containing the generated text in text/event-stream format, or a JSON error response if the upstream API responds with an error. + """ + if logflag: + logger.info("[lvm] Received input") + logger.info(request) + + video_url = request.video_url + chunk_start = request.chunk_start + chunk_duration = request.chunk_duration + prompt = request.prompt + max_new_tokens = request.max_new_tokens + + params = { + "video_url": video_url, + "start": chunk_start, + "duration": chunk_duration, + "prompt": prompt, + "max_new_tokens": max_new_tokens, + } + logger.info(f"[lvm] Params: {params}") + + t_start = time.time() + + response = requests.post(url=f"{self.base_url}/generate", params=params, proxies={"http": None}, stream=True) + logger.info(f"[lvm] Response status code: {response.status_code}") + if response.status_code == 200: + + def streamer(time_start): + first_token_latency = None + yield f"{{'video_url': '{video_url}', 'chunk_start': {chunk_start}, 'chunk_duration': {chunk_duration}}}\n".encode( + "utf-8" + ) + for chunk in response.iter_content(chunk_size=8192): + if chunk: + if first_token_latency is None: + first_token_latency = time.time() - time_start + yield chunk + logger.info(f"[lvm - chat_stream] Streaming chunk of size {len(chunk)}") + logger.info("[lvm - chat_stream] stream response finished") + statistics_dict["opea_service@lvm"].append_latency(time.time() - time_start, first_token_latency) + + return StreamingResponse(streamer(t_start), media_type="text/event-stream") + else: + logger.error(f"[lvm] Error: {response.text}") + raise HTTPException(status_code=500, detail="The upstream API responded with an error.") + + def check_health(self) -> bool: + """Checks the health of the embedding service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + try: + response = requests.get(f"{self.base_url}/health") + if response.status_code == 200: + return True + else: + return False + except Exception as e: + # Handle connection errors, timeouts, etc. + logger.error(f"Health check failed: {e}") + return False diff --git a/comps/lvms/src/opea_lvm_microservice.py b/comps/lvms/src/opea_lvm_microservice.py new file mode 100644 index 0000000000..b1deb6e651 --- /dev/null +++ b/comps/lvms/src/opea_lvm_microservice.py @@ -0,0 +1,72 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time +from typing import Union + +from integrations.llama_vision import OpeaLlamaVisionLvm +from integrations.llava import OpeaLlavaLvm +from integrations.predictionguard import OpeaPredictionguardLvm +from integrations.tgi_llava import OpeaTgiLlavaLvm +from integrations.video_llama import OpeaVideoLlamaLvm + +from comps import ( + CustomLogger, + LVMDoc, + LVMSearchedMultimodalDoc, + LVMVideoDoc, + MetadataTextDoc, + OpeaComponentLoader, + ServiceType, + TextDoc, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + +logger = CustomLogger("opea_lvm_microservice") +logflag = os.getenv("LOGFLAG", False) + +lvm_component_name = os.getenv("LVM_COMPONENT_NAME", "OPEA_LLAVA_LVM") +# Initialize OpeaComponentController +loader = OpeaComponentLoader(lvm_component_name, description=f"OPEA LVM Component: {lvm_component_name}") + + +@register_microservice( + name="opea_service@lvm", + service_type=ServiceType.LVM, + endpoint="/v1/lvm", + host="0.0.0.0", + port=9399, +) +@register_statistics(names=["opea_service@lvm"]) +async def lvm( + request: Union[LVMDoc, LVMSearchedMultimodalDoc, LVMVideoDoc] +) -> Union[TextDoc, MetadataTextDoc]: # can also return a StreamingResponse but omit it in annotation for FastAPI + start = time.time() + + try: + # Use the controller to invoke the active component + lvm_response = await loader.invoke(request) + if logflag: + logger.info(lvm_response) + + if loader.component.name in ["OpeaVideoLlamaLvm"] or ( + loader.component.name in ["OpeaTgiLlavaLvm"] and request.streaming + ): + # statistics for StreamingResponse are handled inside the integrations + # here directly return the response + return lvm_response + statistics_dict["opea_service@lvm"].append_latency(time.time() - start, None) + return lvm_response + + except Exception as e: + logger.error(f"Error during lvm invocation: {e}") + raise + + +if __name__ == "__main__": + logger.info("OPEA LVM Microservice is starting....") + opea_microservices["opea_service@lvm"].start() diff --git a/comps/lvms/tgi-llava/requirements.txt b/comps/lvms/src/requirements.txt similarity index 100% rename from comps/lvms/tgi-llava/requirements.txt rename to comps/lvms/src/requirements.txt diff --git a/comps/lvms/tgi-llava/Dockerfile b/comps/lvms/tgi-llava/Dockerfile deleted file mode 100644 index 84e672264b..0000000000 --- a/comps/lvms/tgi-llava/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -# Set environment variables -ENV LANG=en_US.UTF-8 - -ARG ARCH="cpu" - -COPY comps /home/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then \ - pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/comps/lvms/tgi-llava/requirements.txt; \ - else \ - pip install --no-cache-dir -r /home/comps/lvms/tgi-llava/requirements.txt; \ - fi; - -ENV PYTHONPATH=$PYTHONPATH:/home - -WORKDIR /home/comps/lvms/tgi-llava - -ENTRYPOINT ["python", "lvm_tgi.py"] - diff --git a/comps/lvms/tgi-llava/__init__.py b/comps/lvms/tgi-llava/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/lvms/tgi-llava/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/lvms/tgi-llava/lvm_tgi.py b/comps/lvms/tgi-llava/lvm_tgi.py deleted file mode 100644 index b0f05bab48..0000000000 --- a/comps/lvms/tgi-llava/lvm_tgi.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import time -from typing import Union - -from fastapi import HTTPException -from fastapi.responses import StreamingResponse -from huggingface_hub import AsyncInferenceClient -from langchain_core.prompts import PromptTemplate -from template import ChatTemplate - -from comps import ( - CustomLogger, - LVMDoc, - LVMSearchedMultimodalDoc, - MetadataTextDoc, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) - -logger = CustomLogger("lvm_tgi") -logflag = os.getenv("LOGFLAG", False) - - -@register_microservice( - name="opea_service@lvm_tgi", - service_type=ServiceType.LVM, - endpoint="/v1/lvm", - host="0.0.0.0", - port=9399, - input_datatype=LVMDoc, - output_datatype=TextDoc, -) -@register_statistics(names=["opea_service@lvm_tgi"]) -async def lvm(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> Union[TextDoc, MetadataTextDoc]: - if logflag: - logger.info(request) - start = time.time() - stream_gen_time = [] - - if isinstance(request, LVMSearchedMultimodalDoc): - if logflag: - logger.info("[LVMSearchedMultimodalDoc ] input from retriever microservice") - retrieved_metadatas = request.metadata - if not retrieved_metadatas or len(retrieved_metadatas) == 0: - # there is no video segments retrieved. - # Raise HTTPException status_code 204 - # due to llava-tgi-gaudi should receive image as input; Otherwise, the generated text is bad. - raise HTTPException(status_code=500, detail="There is no video segments retrieved given the query!") - img_b64_str = retrieved_metadatas[0]["b64_img_str"] - has_image = img_b64_str != "" - initial_query = request.initial_query - context = retrieved_metadatas[0]["transcript_for_inference"] - prompt = initial_query - if request.chat_template is None: - prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, context, has_image) - else: - prompt_template = PromptTemplate.from_template(request.chat_template) - input_variables = prompt_template.input_variables - if sorted(input_variables) == ["context", "question"]: - prompt = prompt_template.format(question=initial_query, context=context) - else: - logger.info( - f"[ LVMSearchedMultimodalDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']" - ) - max_new_tokens = request.max_new_tokens - stream = request.stream - repetition_penalty = request.repetition_penalty - temperature = request.temperature - top_k = request.top_k - top_p = request.top_p - if logflag: - logger.info(f"prompt generated for [LVMSearchedMultimodalDoc ] input from retriever microservice: {prompt}") - - else: - img_b64_str = request.image - prompt = request.prompt - max_new_tokens = request.max_new_tokens - stream = request.stream - repetition_penalty = request.repetition_penalty - temperature = request.temperature - top_k = request.top_k - top_p = request.top_p - - if not img_b64_str: - # Work around an issue where LLaVA-NeXT is not providing good responses when prompted without an image. - # Provide an image and then instruct the model to ignore the image. The base64 string below is the encoded png: - # https://raw.githubusercontent.com/opea-project/GenAIExamples/refs/tags/v1.0/AudioQnA/ui/svelte/src/lib/assets/icons/png/audio1.png - img_b64_str = "iVBORw0KGgoAAAANSUhEUgAAADUAAAAlCAYAAADiMKHrAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAKPSURBVHgB7Zl/btowFMefnUTqf+MAHYMTjN4gvcGOABpM+8E0doLSE4xpsE3rKuAG3KC5Ad0J6MYOkP07YnvvhR9y0lVzupTIVT5SwDjB9fd97WfsMkCef1rUXM8dY9HHK4hWUevzi/oVWAqnF8fzLmAtiPA3Aq0lFsVA1fRKxlgNLIbDPaQUZQuu6YO98aIipHOiFGtIqaYfn1UnUCDds6WPyeANlTFbv9WztbFTK+HNUVAPiz7nbPzq7HsPCoKWIBREGfsJXZit5xT07X0jp6iRdIbEHOnjyyD97OvzH00lVS2K5OS2ax11cBXxJgYxlEIE6XZclzdTX6n8XjkkcEIfbj2nMO0/SNd1vy4vsCNjYPyEovfyy88GZIQCSKOCMf6ORgStoboLJuSWKDYCfK2q4jjrMZ+GOh7Pib/gek5DHxVUJtcgA7mJ4kwZRbN7viQXFzQn0Nl52gXG4Fo7DKAYp0yI3VHQ16oaWV0wYa+iGE8nG+wAdx5DzpS/KGyhFGULpShbKEXZQinqLlBK/IKc2asoh4sZvoXJWhlAzuxV1KBVD3HrfYTFAK8ZHgu0hu36DHLG+Izinw250WUkXHJht02QUnxLP7fZxR7f1I6S7Ir2GgmYvIQM5OYUuYBdainATq2ZjTqPBlnbGXYeBrg9Od18DKmc1U0jpw4OIIwEJFxQSl2b4MN2lf74fw8nFNbHt/5N9xWKTZvJ2S6YZk6RC3j2cKpVhSIShZ0mea6caCOCAjyNHd5gPPxGncMBTvI6hunYdaJ6kf8VoSCP2odxX6RkR6NOtanfj13EswKVqEQrPzzFL1lK+YvCFraiEqs8TrwQLGYraqpX4kr/Hixml+63Z+CoM9DTo438AUmP+KyMWT+tAAAAAElFTkSuQmCC" - prompt = f"Please disregard the image and answer the question. {prompt}" - - image = f"data:image/png;base64,{img_b64_str}" - image_prompt = f"![]({image})\n{prompt}\nASSISTANT:" - - if stream: - - async def stream_generator(): - chat_response = "" - text_generation = await lvm_client.text_generation( - prompt=image_prompt, - stream=stream, - max_new_tokens=max_new_tokens, - repetition_penalty=repetition_penalty, - temperature=temperature, - top_k=top_k, - top_p=top_p, - ) - async for text in text_generation: - stream_gen_time.append(time.time() - start) - chat_response += text - chunk_repr = repr(text.encode("utf-8")) - if logflag: - logger.info(f"[llm - chat_stream] chunk:{chunk_repr}") - yield f"data: {chunk_repr}\n\n" - if logflag: - logger.info(f"[llm - chat_stream] stream response: {chat_response}") - statistics_dict["opea_service@lvm_tgi"].append_latency(stream_gen_time[-1], stream_gen_time[0]) - yield "data: [DONE]\n\n" - - return StreamingResponse(stream_generator(), media_type="text/event-stream") - else: - generated_str = await lvm_client.text_generation( - image_prompt, - max_new_tokens=max_new_tokens, - repetition_penalty=repetition_penalty, - temperature=temperature, - top_k=top_k, - top_p=top_p, - ) - statistics_dict["opea_service@lvm_tgi"].append_latency(time.time() - start, None) - if logflag: - logger.info(generated_str) - if isinstance(request, LVMSearchedMultimodalDoc): - retrieved_metadata = request.metadata[0] - return_metadata = {} # this metadata will be used to construct proof for generated text - return_metadata["video_id"] = retrieved_metadata["video_id"] - return_metadata["source_video"] = retrieved_metadata["source_video"] - return_metadata["time_of_frame_ms"] = retrieved_metadata["time_of_frame_ms"] - return_metadata["transcript_for_inference"] = retrieved_metadata["transcript_for_inference"] - return MetadataTextDoc(text=generated_str, metadata=return_metadata) - else: - return TextDoc(text=generated_str) - - -if __name__ == "__main__": - lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399") - lvm_client = AsyncInferenceClient(lvm_endpoint) - logger.info("[LVM] LVM initialized.") - opea_microservices["opea_service@lvm_tgi"].start() diff --git a/comps/lvms/tgi-llava/template.py b/comps/lvms/tgi-llava/template.py deleted file mode 100644 index 01992d2f85..0000000000 --- a/comps/lvms/tgi-llava/template.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -class ChatTemplate: - - @staticmethod - def generate_multimodal_rag_on_videos_prompt(question: str, context: str, has_image: bool = False): - - if has_image: - template = """The transcript associated with the image is '{context}'. {question}""" - else: - template = ( - """Refer to the following results obtained from the local knowledge base: '{context}'. {question}""" - ) - - return template.format(context=context, question=question) diff --git a/comps/lvms/video-llama/Dockerfile b/comps/lvms/video-llama/Dockerfile deleted file mode 100644 index 265970021b..0000000000 --- a/comps/lvms/video-llama/Dockerfile +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -# Set environment variables -ENV LANG=en_US.UTF-8 - -COPY comps /home/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/comps/lvms/video-llama/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home - -WORKDIR /home/comps/lvms/video-llama - -ENTRYPOINT ["python", "lvm.py"] diff --git a/comps/lvms/video-llama/check_lvm.py b/comps/lvms/video-llama/check_lvm.py deleted file mode 100644 index fcf6f6aeea..0000000000 --- a/comps/lvms/video-llama/check_lvm.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import datetime -import json -import os - -import requests - -ip_address = os.getenv("ip_address") -####### video-llama request ######## -print("video-llama request") -api_url = f"http://${ip_address}:9009/generate" -content = { - "video_url": "https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4", - "start": 0.0, - "duration": 9, - "prompt": "What is the person doing?", - "max_new_tokens": 150, -} - -start = datetime.datetime.now() -with requests.post(api_url, params=content, stream=True) as response: - for chunk in response.iter_content(chunk_size=8192): - if chunk: - print(chunk.decode("utf-8"), end="", flush=True) # Flush to ensure immediate output - -end = datetime.datetime.now() -print(f"\nTotal time: {end - start}") - -####### lvm request ######## -print("lvm request") -api_url = f"http://${ip_address}:9000/v1/lvm" -headers = {"Content-Type": "application/json"} -data = { - "video_url": "https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4", - "chunk_start": 0, - "chunk_duration": 9, - "prompt": "what is the person doing", - "max_new_tokens": 150, -} - -start = datetime.datetime.now() -with requests.post(api_url, headers=headers, data=json.dumps(data), stream=True) as response: - for chunk in response.iter_content(chunk_size=8192): - if chunk: - print(chunk.decode("utf-8"), end="", flush=True) # Flush to ensure immediate output - -end = datetime.datetime.now() -print(f"\nTotal time: {end - start}") diff --git a/comps/lvms/video-llama/docker_compose.yaml b/comps/lvms/video-llama/docker_compose.yaml deleted file mode 100644 index 54aace84e7..0000000000 --- a/comps/lvms/video-llama/docker_compose.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - lvm-video-llama: - image: opea/video-llama-lvm-server:latest - container_name: video-llama-lvm-server - ports: - - "9009:9009" - ipc: host - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - llm_download: "True" - volumes: - - "/home/$USER/.cache:/home/user/.cache" # RECOMMENDED: use local cache to avoid download - - video-llama-model:/home/user/model - restart: unless-stopped - - lvm: - image: opea/lvm-video-llama:latest - container_name: lvm-video-llama - ports: - - "9000:9000" - ipc: host - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - LVM_ENDPOINT: ${LVM_ENDPOINT} - restart: unless-stopped - depends_on: - - lvm-video-llama -networks: - default: - driver: bridge -volumes: - video-llama-model: diff --git a/comps/lvms/video-llama/lvm.py b/comps/lvms/video-llama/lvm.py deleted file mode 100644 index 1cbfcd5e1b..0000000000 --- a/comps/lvms/video-llama/lvm.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -# import json -import logging -import os - -import requests -from fastapi import HTTPException -from fastapi.responses import StreamingResponse - -from comps import LVMVideoDoc, ServiceType, opea_microservices, register_microservice, register_statistics - -# import time - - -logging.basicConfig(level=logging.INFO) - - -@register_microservice( - name="opea_service@lvm", - service_type=ServiceType.LVM, - endpoint="/v1/lvm", - host="0.0.0.0", - port=9000, - input_datatype=LVMVideoDoc, - output_datatype=StreamingResponse, -) -@register_statistics(names=["opea_service@lvm"]) -async def lvm(input: LVMVideoDoc): - """This function handles the LVM microservice, which generates text based on a video URL, start time, duration, prompt, and maximum new tokens. - - Parameters: - input (LVMVideoDoc): The input containing the video URL, start time, duration, prompt, and maximum new tokens. - - Returns: - StreamingResponse: A streaming response containing the generated text in text/event-stream format, or a JSON error response if the upstream API responds with an error. - """ - logging.info("[lvm] Received input") - - video_url = input.video_url - chunk_start = input.chunk_start - chunk_duration = input.chunk_duration - prompt = input.prompt - max_new_tokens = input.max_new_tokens - - params = { - "video_url": video_url, - "start": chunk_start, - "duration": chunk_duration, - "prompt": prompt, - "max_new_tokens": max_new_tokens, - } - logging.info(f"[lvm] Params: {params}") - - response = requests.post(url=f"{lvm_endpoint}/generate", params=params, proxies={"http": None}, stream=True) - logging.info(f"[lvm] Response status code: {response.status_code}") - if response.status_code == 200: - - def streamer(): - yield f"{{'video_url': '{video_url}', 'chunk_start': {chunk_start}, 'chunk_duration': {chunk_duration}}}\n".encode( - "utf-8" - ) - for chunk in response.iter_content(chunk_size=8192): - if chunk: - yield chunk - logging.info(f"[llm - chat_stream] Streaming: {chunk}") - logging.info("[llm - chat_stream] stream response finished") - - return StreamingResponse(streamer(), media_type="text/event-stream") - else: - logging.error(f"[lvm] Error: {response.text}") - raise HTTPException(status_code=500, detail="The upstream API responded with an error.") - - -if __name__ == "__main__": - lvm_endpoint = os.getenv("LVM_ENDPOINT") - - opea_microservices["opea_service@lvm"].start() diff --git a/comps/lvms/video-llama/requirements.txt b/comps/lvms/video-llama/requirements.txt deleted file mode 100644 index c7cc250eba..0000000000 --- a/comps/lvms/video-llama/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -datasets -docarray -fastapi -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -Pillow -prometheus-fastapi-instrumentator -pydub -shortuuid -uvicorn diff --git a/comps/prompt_registry/deployment/docker_compose/compose_prompt_registry_mongo.yaml b/comps/prompt_registry/deployment/docker_compose/compose.yaml similarity index 81% rename from comps/prompt_registry/deployment/docker_compose/compose_prompt_registry_mongo.yaml rename to comps/prompt_registry/deployment/docker_compose/compose.yaml index e956320bdc..7720a5de62 100644 --- a/comps/prompt_registry/deployment/docker_compose/compose_prompt_registry_mongo.yaml +++ b/comps/prompt_registry/deployment/docker_compose/compose.yaml @@ -15,10 +15,10 @@ services: command: mongod --quiet --logpath /dev/null promptregistry-mongo: - image: opea/promptregistry-mongo:latest - container_name: promptregistry-server + image: ${REGISTRY:-opea}/promptregistry-mongo:${TAG:-latest} + container_name: promptregistry-mongo-server ports: - - "6018:6018" + - "${PROMPT_REGISTRY_PORT:-6018}:6018" ipc: host environment: http_proxy: ${http_proxy} diff --git a/comps/prompt_registry/deployment/kubernetes/README.md b/comps/prompt_registry/deployment/kubernetes/README.md index e69de29bb2..387197ea76 100644 --- a/comps/prompt_registry/deployment/kubernetes/README.md +++ b/comps/prompt_registry/deployment/kubernetes/README.md @@ -0,0 +1,11 @@ +# Deploy prompt microservice on Kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Kubernetes + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install prompt-usvc oci://ghcr.io/opea-project/charts/prompt-usvc --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` diff --git a/comps/prompt_registry/deployment/kubernetes/cpu-values.yaml b/comps/prompt_registry/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..7850c0ee9d --- /dev/null +++ b/comps/prompt_registry/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +mongodb: + enabled: true diff --git a/comps/prompt_registry/src/Dockerfile b/comps/prompt_registry/src/Dockerfile index b00a8d56c7..9c9d0a79c5 100644 --- a/comps/prompt_registry/src/Dockerfile +++ b/comps/prompt_registry/src/Dockerfile @@ -14,8 +14,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps COPY requirements.txt /home/user/ @@ -25,6 +23,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/prompt_registry/src/ ENTRYPOINT ["python", "opea_prompt_microservice.py"] diff --git a/comps/prompt_registry/src/README.md b/comps/prompt_registry/src/README.md index 8142e23bd0..a012956eaa 100644 --- a/comps/prompt_registry/src/README.md +++ b/comps/prompt_registry/src/README.md @@ -17,13 +17,13 @@ export COLLECTION_NAME=${COLLECTION_NAME} --- -## 🚀Start Microservice with Docker +## 🚀 Start Microservice with Docker (Option 1) ### Build Docker Image ```bash cd ~/GenAIComps -docker build -t opea/promptregistry-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/src/Dockerfile . +docker build -t opea/promptregistry-mongo:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/src/Dockerfile . ``` ### Run Docker with CLI @@ -37,11 +37,19 @@ docker build -t opea/promptregistry-server:latest --build-arg https_proxy=$https - Run Prompt Registry microservice ```bash - docker run -d --name="promptregistry-server" -p 6018:6018 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-server:latest + docker run -d --name="promptregistry-mongo-server" -p 6018:6018 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-mongo:latest ``` --- +## 🚀 Start Microservice with Docker Compose (Option 2) + +```bash +docker compose -f ../deployment/docker_compose/compose.yaml up -d +``` + +--- + ### ✅ Invoke Microservice The Prompt Registry microservice exposes the following API endpoints: diff --git a/comps/ragas/deployment/docker_compose/README.md b/comps/ragas/deployment/docker_compose/README.md deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/comps/ragas/deployment/docker_compose/tgi_langchain.yaml b/comps/ragas/deployment/docker_compose/tgi_langchain.yaml deleted file mode 100644 index e75dab9c3d..0000000000 --- a/comps/ragas/deployment/docker_compose/tgi_langchain.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - tgi_service: - image: ghcr.io/huggingface/text-generation-inference:1.4 - container_name: tgi-service - ports: - - "8008:80" - volumes: - - "./data:/data" - shm_size: 1g - command: --model-id ${LLM_MODEL_ID} - llm: - image: opea/gen-ai-comps:llm-tgi-server:latest - container_name: llm-tgi-server - ports: - - "9000:9000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - TEI_ENDPOINT: ${TEI_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/ragas/deployment/kubernetes/README.md b/comps/ragas/deployment/kubernetes/README.md deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/comps/ragas/src/tgi/langchain/__init__.py b/comps/ragas/src/tgi/langchain/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/ragas/src/tgi/langchain/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/ragas/src/tgi/langchain/build_docker.sh b/comps/ragas/src/tgi/langchain/build_docker.sh deleted file mode 100644 index 05ef3666d5..0000000000 --- a/comps/ragas/src/tgi/langchain/build_docker.sh +++ /dev/null @@ -1,8 +0,0 @@ - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -git clone https://github.com/huggingface/tgi-gaudi.git -cd ./tgi-gaudi/ -docker build -t ghcr.io/huggingface/tgi-gaudi:latest . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy diff --git a/comps/ragas/src/tgi/langchain/entrypoint.sh b/comps/ragas/src/tgi/langchain/entrypoint.sh deleted file mode 100644 index d60eddd36b..0000000000 --- a/comps/ragas/src/tgi/langchain/entrypoint.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -pip --no-cache-dir install -r requirements-runtime.txt - -python llm.py diff --git a/comps/ragas/src/tgi/langchain/llm.py b/comps/ragas/src/tgi/langchain/llm.py deleted file mode 100644 index 7421dc8363..0000000000 --- a/comps/ragas/src/tgi/langchain/llm.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -from datasets import Dataset -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings -from langchain_community.llms import HuggingFaceEndpoint -from ragas import evaluate -from ragas.metrics import answer_relevancy, context_precision, context_recall, faithfulness - -from comps import CustomLogger, RAGASParams, RAGASScores, ServiceType, opea_microservices, register_microservice - -logger = CustomLogger("ragas_tgi_llm") -logflag = os.getenv("LOGFLAG", False) - -tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - - -@register_microservice( - name="opea_service@ragas_tgi_llm", - service_type=ServiceType.RAGAS, - endpoint="/v1/ragas", - host="0.0.0.0", - port=9050, - input_datatype=RAGASParams, - output_datatype=RAGASScores, -) -def llm_generate(input: RAGASParams): - if logflag: - logger.info(input) - llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") - - # Create vectorstore - if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) - else: - # create embeddings using local embedding model - embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - llm = HuggingFaceEndpoint( - endpoint_url=llm_endpoint, - max_new_tokens=input.max_new_tokens, - top_k=input.top_k, - top_p=input.top_p, - typical_p=input.typical_p, - temperature=input.temperature, - repetition_penalty=input.repetition_penalty, - streaming=input.stream, - timeout=600, - ) - - data_collections = { - "question": input.questions, - "answer": input.answers, - "docs": input.docs, - "ground_truth": input.groundtruths, - } - dataset = Dataset.from_dict(data_collections) - - score = evaluate( - dataset, - metrics=[answer_relevancy, faithfulness, context_recall, context_precision], - llm=llm, - embeddings=embedder, - ) - df = score.to_pandas() - answer_relevancy_average = df["answer_relevancy"][:].mean() - faithfulness_average = df["faithfulness"][:].mean() - context_recall_average = df["context_recall"][:].mean() - context_precision_average = df["context_precision"][:].mean() - result = RAGASScores( - answer_relevancy=answer_relevancy_average, - faithfulness=faithfulness_average, - context_recallL=context_recall_average, - context_precision=context_precision_average, - ) - if logflag: - logger.info(result) - return result - - -if __name__ == "__main__": - opea_microservices["opea_service@llm_tgi"].start() diff --git a/comps/ragas/src/tgi/langchain/requirements-runtime.txt b/comps/ragas/src/tgi/langchain/requirements-runtime.txt deleted file mode 100644 index 225adde271..0000000000 --- a/comps/ragas/src/tgi/langchain/requirements-runtime.txt +++ /dev/null @@ -1 +0,0 @@ -langserve diff --git a/comps/ragas/src/tgi/langchain/requirements.txt b/comps/ragas/src/tgi/langchain/requirements.txt deleted file mode 100644 index 0186cab400..0000000000 --- a/comps/ragas/src/tgi/langchain/requirements.txt +++ /dev/null @@ -1,14 +0,0 @@ -datasets -docarray[full] -fastapi -huggingface_hub -langchain -langchain_community -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -ragas -shortuuid -transformers -uvicorn diff --git a/comps/rerankings/deployment/docker_compose/README.md b/comps/rerankings/deployment/docker_compose/README.md deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/comps/rerankings/deployment/docker_compose/compose.yaml b/comps/rerankings/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..487ab35a48 --- /dev/null +++ b/comps/rerankings/deployment/docker_compose/compose.yaml @@ -0,0 +1,52 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +include: + - ../../../third_parties/tei/deployment/docker_compose/compose.yaml + +services: + reranking: + image: ${REGISTRY:-opea}/reranking:${TAG:-latest} + container_name: reranking-server + ports: + - ${RERANK_PORT:-10700}:8000 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} + restart: unless-stopped + + reranking-tei: + extends: reranking + container_name: reranking-tei + environment: + HF_TOKEN: ${HF_TOKEN} + RERANK_COMPONENT_NAME: "OPEA_TEI_RERANKING" + depends_on: + tei-reranking-serving: + condition: service_healthy + + reranking-tei-gaudi: + extends: reranking + container_name: reranking-tei-gaudi + environment: + HF_TOKEN: ${HF_TOKEN} + RERANK_COMPONENT_NAME: "OPEA_TEI_RERANKING" + depends_on: + tei-reranking-gaudi-serving: + condition: service_healthy + + reranking-videoqna: + extends: reranking + container_name: reranking-videoqna + environment: + CHUNK_DURATION: ${CHUNK_DURATION} + FILE_SERVER_ENDPOINT: ${FILE_SERVER_ENDPOINT} + RERANK_COMPONENT_NAME: "OPEA_VIDEO_RERANKING" + + +networks: + default: + driver: bridge diff --git a/comps/rerankings/deployment/docker_compose/rerank_tei.yaml b/comps/rerankings/deployment/docker_compose/rerank_tei.yaml deleted file mode 100644 index 485a95210d..0000000000 --- a/comps/rerankings/deployment/docker_compose/rerank_tei.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - tei_reranking_service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 - container_name: tei-reranking-server - ports: - - "8808:80" - volumes: - - "./data:/data" - shm_size: 1g - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8808/health"] - interval: 10s - timeout: 6s - retries: 18 - command: --model-id ${RERANK_MODEL_ID} --hf-api-token ${HF_TOKEN} - reranking: - image: opea/reranking:latest - container_name: reranking-server - ports: - - "8000:8000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} - RERANK_COMPONENT_NAME: "OPEA_TEI_RERANKING" - HF_TOKEN: ${HF_TOKEN} - depends_on: - tei_reranking_service: - condition: service_healthy - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/rerankings/deployment/docker_compose/rerank_videoqna.yaml b/comps/rerankings/deployment/docker_compose/rerank_videoqna.yaml deleted file mode 100644 index 189afdcbc7..0000000000 --- a/comps/rerankings/deployment/docker_compose/rerank_videoqna.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - reranking: - image: opea/reranking:latest - container_name: reranking-videoqna-server - ports: - - "8000:8000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - CHUNK_DURATION: ${CHUNK_DURATION} - FILE_SERVER_ENDPOINT: ${FILE_SERVER_ENDPOINT} - RERANK_COMPONENT_NAME: "OPEA_VIDEO_RERANKING" - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/rerankings/deployment/kubernetes/README.md b/comps/rerankings/deployment/kubernetes/README.md index e69de29bb2..23bf0ef425 100644 --- a/comps/rerankings/deployment/kubernetes/README.md +++ b/comps/rerankings/deployment/kubernetes/README.md @@ -0,0 +1,11 @@ +# Deploy reranking microservice on Kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Kubernetes + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install reranking-usvc oci://ghcr.io/opea-project/charts/reranking-usvc --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` diff --git a/comps/rerankings/deployment/kubernetes/cpu-values.yaml b/comps/rerankings/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..f16bb56416 --- /dev/null +++ b/comps/rerankings/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +teirerank: + enabled: true diff --git a/comps/rerankings/src/Dockerfile b/comps/rerankings/src/Dockerfile index c70dee8106..8fbe8c920d 100644 --- a/comps/rerankings/src/Dockerfile +++ b/comps/rerankings/src/Dockerfile @@ -17,8 +17,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN if [ ${ARCH} = "cpu" ]; then \ @@ -38,9 +36,10 @@ fi && \ pip install --no-cache-dir --upgrade pip setuptools && \ pip install --no-cache-dir -r /home/user/comps/rerankings/src/requirements.txt; - ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/rerankings/src ENTRYPOINT ["python", "opea_reranking_microservice.py"] diff --git a/comps/rerankings/src/README.md b/comps/rerankings/src/README.md index 28c3324e8b..a5a6fd6200 100644 --- a/comps/rerankings/src/README.md +++ b/comps/rerankings/src/README.md @@ -1,5 +1,15 @@ # Reranking Microservice +The Reranking Microservice, fueled by reranking models, stands as a straightforward yet immensely potent tool for semantic search. + +--- + +The Reranking Microservice support two features: **text reranking** and **video result reranking** for VideoQnA use case. + +## 🛠️ Features + +- **rerank on retrieved documents**: Perform reranking on the given documents using reranking models together with query. + The Reranking Microservice, fueled by reranking models, stands as a straightforward yet immensely potent tool for semantic search. When provided with a query and a collection of documents, reranking swiftly indexes the documents based on their semantic relevance to the query, arranging them from most to least pertinent. This microservice significantly enhances overall accuracy. In a text retrieval system, @@ -7,3 +17,26 @@ either a dense embedding model or a sparse lexical search index is often employe However, a reranking model can further refine this process by rearranging potential candidates into a final, optimized order. ![Flow Chart](./assets/img/reranking_flow.png) + +--- + +- **video result reranking**: Perform reranking on the given vidoe list, it sorts the video as a descending list of names, ranked by their degree of match with the query. + +This microservice is designed that do result rerank for VideoQnA use case. Local rerank is used rather than rerank model. + +For the `VideoQnA` usecase, during the data preparation phase, frames are extracted from videos and stored in a vector database. +To identify the most relevant video, we count the occurrences of each video source among the retrieved data with rerank function `get_top_doc`. +This sorts the video as a descending list of names, ranked by their degree of match with the query. +Then we could send the `top_n` videos to the downstream LVM. + +--- + +## ⚙️ Implementation + +### Utilizing Reranking with TEI + +For additional information, please refer to this [README](./README_tei.md) + +### Utilizing Reranking with VideoQnA + +For additional information, please refer to this [README](./README_videoqna.md) diff --git a/comps/rerankings/src/README_tei.md b/comps/rerankings/src/README_tei.md new file mode 100644 index 0000000000..9bf77da487 --- /dev/null +++ b/comps/rerankings/src/README_tei.md @@ -0,0 +1,155 @@ +# 🌟 Reranking Microservice with TEI + +`Text Embeddings Inference (TEI)` is a comprehensive toolkit designed for efficient deployment and serving of open source text embeddings models. +It enables us to host our own reranker endpoint seamlessly. + +This README provides set-up instructions and comprehensive details regarding the reranking microservice via TEI. + +--- + +## 📦 1. Start Microservice with Docker + +### 🔹 1.1 Start Reranking Service with TEI + +1. **Start the TEI service**: + +- For Gaudi HPU: + + ```bash + export HF_TOKEN=${your_hf_api_token} + export RERANK_MODEL_ID="BAAI/bge-reranker-base" + export volume=$PWD/data + + docker run -p 12005:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e MAX_WARMUP_SEQUENCE_LENGTH=512 --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tei-gaudi:latest --model-id $RERANK_MODEL_ID --hf-api-token $HF_TOKEN --auto-truncate + + ``` + +- For Xeon CPU: + + ```bash + export HF_TOKEN=${your_hf_api_token} + export RERANK_MODEL_ID="BAAI/bge-reranker-base" + export volume=$PWD/data + + docker run -d -p 12005:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $RERANK_MODEL_ID --hf-api-token $HF_TOKEN --auto-truncate + ``` + +2. **Verify the TEI Service**: + + Run the following command to check if the service is up and running. + + ```bash + export host_ip=$(hostname -I | awk '{print $1}') + curl $host_ip:12005/rerank \ + -X POST \ + -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \ + -H 'Content-Type: application/json' + ``` + +### 🔹 1.2 Build Docker Image and Run Docker with CLI + +1. Build the Docker image for the reranking microservice: + + ```bash + docker build --no-cache \ + -t opea/reranking:comps \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy \ + --build-arg SERVICE=tei \ + -f comps/rerankings/src/Dockerfile . + ``` + +2. Run the reranking microservice and connect it to the TEI service: + + ```bash + export TEI_RERANKING_PORT=12005 + export HF_TOKEN=${your_hf_api_token} + export host_ip=$(hostname -I | awk '{print $1}') + export TEI_RERANKING_ENDPOINT="http://${host_ip}:${TEI_RERANKING_PORT}" + + docker run -d --name="reranking-tei-server" -e LOGFLAG=True -p 10700:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_RERANKING_ENDPOINT=$TEI_RERANKING_ENDPOINT -e HF_TOKEN=$HF_TOKEN -e RERANK_COMPONENT_NAME="OPEA_TEI_RERANKING" opea/reranking:comps + ``` + +## 📦 2. Start Microservice with docker compose + +Deploy both the TEI Reranking Service and the Reranking Microservice using Docker Compose. + +🔹 Steps: + +1. Set environment variables: + + ```bash + export RERANK_MODEL_ID="BAAI/bge-reranker-base" + export TEI_RERANKING_PORT=12005 + export RERANK_PORT=10700 + export host_ip=$(hostname -I | awk '{print $1}') + export TEI_RERANKING_ENDPOINT="http://${host_ip}:${TEI_RERANKING_PORT}" + export TAG=comps + + ``` + +2. Navigate to the Docker Compose directory: + + ```bash + cd comps/rerankings/deployment/docker_compose/ + ``` + +3. Start the services: + +- For Gaudi HPU: + + ```bash + docker compose up reranking-tei-gaudi -d + ``` + +- For Xeon CPU: + + ```bash + docker compose up reranking-tei -d + ``` + +## 📦 3. Consume Reranking Service + +### 🔹 3.1 Check Service Status + +- Verify the reranking service is running: + + ```bash + curl http://localhost:10700/v1/health_check \ + -X GET \ + -H 'Content-Type: application/json' + ``` + +### 🔹 3.2 Use the Reranking Service API + +- Execute reranking process by providing query and documents + + ```bash + curl http://localhost:10700/v1/reranking \ + -X POST \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ + -H 'Content-Type: application/json' + ``` + + - You can add the parameter `top_n` to specify the return number of the reranker model, default value is 1. + + ```bash + curl http://localhost:10700/v1/reranking \ + -X POST \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}], "top_n":2}' \ + -H 'Content-Type: application/json' + ``` + +## ✨ Tips for Better Understanding: + +1. Port Mapping: + Ensure the ports are correctly mapped to avoid conflicts with other services. + +2. Model Selection: + Choose a model appropriate for your use case, like "BAAI/bge-reranker-base". + +3. Environment Variables: + Use http_proxy and https_proxy for proxy setup if necessary. + +4. Data Volume: + The `-v ./data:/data` flag ensures the data directory is correctly mounted. diff --git a/comps/rerankings/src/README_videoqna.md b/comps/rerankings/src/README_videoqna.md new file mode 100644 index 0000000000..d24ebc3b28 --- /dev/null +++ b/comps/rerankings/src/README_videoqna.md @@ -0,0 +1,114 @@ +# 🌟 Reranking Microservice with VideoQnA + +This README provides set-up instructions and comprehensive details regarding the reranking microservice with VideoQnA. +This microservice is designed that do result rerank for VideoQnA use case. Local rerank is used rather than rerank model. + +For the `VideoQnA` usecase, during the data preparation phase, frames are extracted from videos and stored in a vector database. +To identify the most relevant video, we count the occurrences of each video source among the retrieved data with rerank function `get_top_doc`. +This sorts the video as a descending list of names, ranked by their degree of match with the query. +Then we could send the `top_n` videos to the downstream LVM. + +--- + +## 📦 1. Start Microservice with Docker + +### 🔹 1.1 Build Docker Image and Run Docker with CLI + +1. Build the Docker image for the reranking microservice: + + ```bash + docker build --no-cache \ + -t opea/reranking:comps \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy \ + --build-arg SERVICE=svideoqna \ + -f comps/rerankings/src/Dockerfile . + ``` + +2. Run the reranking microservice and connect it to the VideoQnA service: + + ```bash + docker run -d --name "reranking-videoqna-server" \ + -p 10703:8000 \ + --ipc=host \ + -e no_proxy=${no_proxy} \ + -e http_proxy=${http_proxy} \ + -e https_proxy=${https_proxy} \ + -e CHUNK_DURATION=${CHUNK_DURATION} \ + -e RERANK_COMPONENT_NAME="OPEA_VIDEO_RERANKING" \ + -e FILE_SERVER_ENDPOINT=${FILE_SERVER_ENDPOINT} \ + opea/reranking:comps + ``` + +## 📦 2. Start Microservice with docker compose + +Deploy both the Videoqna Reranking Service and the Reranking Microservice using Docker Compose. + +🔹 Steps: + +1. Set environment variables: + + ```bash + export TEI_RERANKING_PORT=12006 + export RERANK_PORT=10703 + export host_ip=$(hostname -I | awk '{print $1}') + export TEI_RERANKING_ENDPOINT="http://${host_ip}:${TEI_RERANKING_PORT}" + export TAG=comps + ``` + +2. Navigate to the Docker Compose directory: + + ```bash + cd comps/rerankings/deployment/docker_compose + ``` + +3. Start the services: + + ```bash + docker compose up reranking-videoqna -d + ``` + +## 📦 3. Consume Reranking Service + +### 🔹 3.1 Check Service Status + +- Verify the reranking service is running: + + ```bash + curl http://localhost:10703/v1/health_check \ + -X GET \ + -H 'Content-Type: application/json' + ``` + +### 🔹 3.2 Use the Reranking Service API + +- Execute reranking process by providing query and documents + + ```bash + curl http://localhost:10703/v1/reranking \ + -X POST \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ + -H 'Content-Type: application/json' + ``` + +- You can add the parameter `top_n` to specify the return number of the reranker model, default value is 1. + + ```bash + curl http://localhost:10703/v1/reranking \ + -X POST \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}], "top_n":2}' \ + -H 'Content-Type: application/json' + ``` + +## ✨ Tips for Better Understanding: + +1. Port Mapping: + Ensure the ports are correctly mapped to avoid conflicts with other services. + +2. Environment Variables: + + - Use http_proxy and https_proxy for proxy setup if necessary. + - CHUNK_DURATION: target chunk duration, should be aligned with VideoQnA dataprep. Default 10s. + +3. Data Volume: + The `-v ./data:/data` flag ensures the data directory is correctly mounted. diff --git a/comps/rerankings/src/opea_reranking_microservice.py b/comps/rerankings/src/opea_reranking_microservice.py index 4cf94c2cd7..7cf073ef7f 100644 --- a/comps/rerankings/src/opea_reranking_microservice.py +++ b/comps/rerankings/src/opea_reranking_microservice.py @@ -19,6 +19,7 @@ ) from comps.cores.proto.api_protocol import ChatCompletionRequest, RerankingRequest, RerankingResponse from comps.cores.proto.docarray import LLMParamsDoc, LVMVideoDoc, RerankedDoc, SearchedDoc, SearchedMultimodalDoc +from comps.cores.telemetry.opea_telemetry import opea_telemetry logger = CustomLogger("opea_reranking_microservice") logflag = os.getenv("LOGFLAG", False) @@ -35,6 +36,7 @@ host="0.0.0.0", port=8000, ) +@opea_telemetry @register_statistics(names=["opea_service@reranking"]) async def reranking( input: Union[SearchedMultimodalDoc, SearchedDoc, RerankingRequest, ChatCompletionRequest] diff --git a/comps/retrievers/README.md b/comps/retrievers/README.md index a05586d5f1..03ec20c515 100644 --- a/comps/retrievers/README.md +++ b/comps/retrievers/README.md @@ -8,24 +8,36 @@ Overall, this microservice provides robust backend support for applications requ ## Retriever Microservice with Redis -For details, please refer to this [langchain readme](redis/langchain/README.md) or [llama_index readme](redis/llama_index/README.md) +For details, please refer to this [readme](src/README_redis.md) ## Retriever Microservice with Milvus -For details, please refer to this [readme](milvus/langchain/README.md) +For details, please refer to this [readme](src/README_milvus.md) + +## Retriever Microservice with Qdrant + +For details, please refer to this [readme](src/README_qdrant.md) ## Retriever Microservice with PGVector -For details, please refer to this [readme](pgvector/langchain/README.md) +For details, please refer to this [readme](src/README_pgvector.md) -## Retriever Microservice with Pathway +## Retriever Microservice with VDMS -For details, please refer to this [readme](pathway/langchain/README.md) +For details, please refer to this [readme](src/README_vdms.md) -## Retriever Microservice with QDrant +## Retriever Microservice with ElasticSearch -For details, please refer to this [readme](qdrant/haystack/README.md) +For details, please refer to this [readme](src/README_elasticsearch.md) -## Retriever Microservice with VDMS +## Retriever Microservice with OpenSearch + +For details, please refer to this [readme](src/README_opensearch.md) + +## Retriever Microservice with neo4j + +For details, please refer to this [readme](src/README_neo4j.md) + +## Retriever Microservice with Pathway -For details, please refer to this [readme](vdms/langchain/README.md) +For details, please refer to this [readme](src/README_pathway.md) diff --git a/comps/retrievers/deployment/docker_compose/compose.yaml b/comps/retrievers/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..708f7819d0 --- /dev/null +++ b/comps/retrievers/deployment/docker_compose/compose.yaml @@ -0,0 +1,177 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +include: + - ../../../third_parties/elasticsearch/deployment/docker_compose/compose.yaml + - ../../../third_parties/opensearch/deployment/docker_compose/compose.yaml + - ../../../third_parties/neo4j/deployment/docker_compose/compose.yaml + - ../../../third_parties/pgvector/deployment/docker_compose/compose.yaml + - ../../../third_parties/pathway/deployment/docker_compose/compose.yaml + - ../../../third_parties/qdrant/deployment/docker_compose/compose.yaml + - ../../../third_parties/redis/deployment/docker_compose/compose.yaml + - ../../../third_parties/tei/deployment/docker_compose/compose.yaml + - ../../../third_parties/tgi/deployment/docker_compose/compose.yaml + - ../../../third_parties/vdms/deployment/docker_compose/compose.yaml + +services: + retriever: + image: ${REGISTRY:-opea}/retriever:${TAG:-latest} + container_name: retriever + ports: + - "${RETRIEVER_PORT:-7000}:7000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + INDEX_NAME: ${INDEX_NAME} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + LOGFLAG: ${LOGFLAG:-False} + restart: unless-stopped + + retriever-elasticsearch: + extends: retriever + container_name: retriever-elasticsearch + environment: + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_ELASTICSEARCH} + ES_CONNECTION_STRING: ${ES_CONNECTION_STRING} + depends_on: + elasticsearch-vector-db: + condition: service_healthy + + retriever-milvus: + extends: retriever + container_name: retriever-milvus + environment: + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_MILVUS} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + MILVUS_HOST: ${host_ip} + depends_on: + tei-embedding-serving: + condition: service_healthy + + retriever-neo4j: + extends: retriever + container_name: retriever-neo4j + environment: + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_NEO4J} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID} + LLM_MODEL_ID: ${LLM_MODEL_ID} + NEO4J_URI: ${NEO4J_URI} + NEO4J_USERNAME: ${NEO4J_USERNAME} + NEO4J_PASSWORD: ${NEO4J_PASSWORD} + VDMS_USE_CLIP: 0 + host_ip: ${host_ip} + depends_on: + neo4j-apoc: + condition: service_healthy + tei-embedding-serving: + condition: service_healthy + tgi-gaudi-server: + condition: service_healthy + + retriever-opensearch: + extends: retriever + container_name: retriever-opensearch + environment: + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_OPENSEARCH} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + OPENSEARCH_INITIAL_ADMIN_PASSWORD: ${OPENSEARCH_INITIAL_ADMIN_PASSWORD} + OPENSEARCH_URL: ${OPENSEARCH_URL} + OPENSEARCH_INDEX_NAME: ${INDEX_NAME} + depends_on: + opensearch-vector-db: + condition: service_healthy + tei-embedding-serving: + condition: service_healthy + + retriever-pinecone: + extends: retriever + container_name: retriever-pinecone + environment: + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_PINECONE} + PINECONE_API_KEY: ${PINECONE_API_KEY} + PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} + INDEX_NAME: ${PINECONE_INDEX_NAME} + + retriever-pgvector: + extends: retriever + container_name: retriever-pgvector + environment: + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_PGVECTOR} + PG_CONNECTION_STRING: ${PG_CONNECTION_STRING} + depends_on: + pgvector-db: + condition: service_healthy + + retriever-pathway: + extends: retriever + container_name: retriever-pathway + environment: + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_PATHWAY} + PATHWAY_HOST: ${PATHWAY_HOST} + PATHWAY_PORT: ${PATHWAY_PORT} + depends_on: + pathway-db: + condition: service_healthy + + retriever-qdrant: + extends: retriever + container_name: retriever-qdrant + environment: + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_QDRANT} + QDRANT_HOST: ${QDRANT_HOST} + QDRANT_PORT: ${QDRANT_PORT} + INDEX_NAME: ${INDEX_NAME} + depends_on: + qdrant-vector-db: + condition: service_healthy + + retriever-redis: + extends: retriever + container_name: retriever-redis + environment: + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + REDIS_URL: ${REDIS_URL} + depends_on: + redis-vector-db: + condition: service_healthy + tei-embedding-serving: + condition: service_healthy + + retriever-redis-multimodal: + extends: retriever + container_name: retriever-redis-multimodal + environment: + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS} + RETRIEVER_TYPE: ${RETRIEVER_TYPE:-redis} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + REDIS_URL: ${REDIS_URL} + BRIDGE_TOWER_EMBEDDING: ${BRIDGE_TOWER_EMBEDDING} + depends_on: + redis-vector-db: + condition: service_healthy + + retriever-vdms: + extends: retriever + container_name: retriever-vdms + environment: + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_VDMS} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + VDMS_INDEX_NAME: ${INDEX_NAME} + VDMS_HOST: ${host_ip} + VDMS_PORT: ${VDMS_PORT} + VDMS_USE_CLIP: ${VDMS_USE_CLIP} + depends_on: + vdms-vector-db: + condition: service_healthy + tei-embedding-serving: + condition: service_healthy + + +networks: + default: + driver: bridge diff --git a/comps/retrievers/deployment/kubernetes/README.md b/comps/retrievers/deployment/kubernetes/README.md new file mode 100644 index 0000000000..141d49f05a --- /dev/null +++ b/comps/retrievers/deployment/kubernetes/README.md @@ -0,0 +1,18 @@ +# Deploy retriever microservice on Kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Kubernetes with redis vector DB + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install retriever-usvc oci://ghcr.io/opea-project/charts/retriever-usvc --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f redis-values.yaml +``` + +## Deploy on Kubernetes with milvus vector DB + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install retriever-usvc oci://ghcr.io/opea-project/charts/retriever-usvc --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f milvus-values.yaml +``` diff --git a/comps/retrievers/deployment/kubernetes/milvus-values.yaml b/comps/retrievers/deployment/kubernetes/milvus-values.yaml new file mode 100644 index 0000000000..add7867f8c --- /dev/null +++ b/comps/retrievers/deployment/kubernetes/milvus-values.yaml @@ -0,0 +1,34 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +milvus: + enabled: true + # Milvus config for standalone mode with no PVC which has minimum requirements for the K8s cluster. + # Check https://github.com/zilliztech/milvus-helm/tree/milvus-4.2.12/charts/milvus for more production level configuration. + cluster: + enabled: false + etcd: + replicaCount: 1 + persistence: + enabled: false + pulsar: + enabled: false + minio: + mode: standalone + persistence: + enabled: false + standalone: + persistence: + enabled: false +redis-vector-db: + enabled: false +tei: + enabled: true + +RETRIEVER_BACKEND: "MILVUS" +# text embedding inference service URL, e.g. http://: +#TEI_EMBEDDING_ENDPOINT: "http://retriever-tei:80" +# milvus DB configurations +#MILVUS_HOST: "retriever-milvus" +MILVUS_PORT: 19530 +COLLECTION_NAME: "rag_milvus" diff --git a/comps/retrievers/deployment/kubernetes/redis-values.yaml b/comps/retrievers/deployment/kubernetes/redis-values.yaml new file mode 100644 index 0000000000..cbc29c7eeb --- /dev/null +++ b/comps/retrievers/deployment/kubernetes/redis-values.yaml @@ -0,0 +1,13 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for retriever-usvc. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tei: + enabled: true +redis-vector-db: + enabled: true +milvus: + enabled: false diff --git a/comps/retrievers/elasticsearch/langchain/Dockerfile b/comps/retrievers/elasticsearch/langchain/Dockerfile deleted file mode 100644 index 6c7bb903fd..0000000000 --- a/comps/retrievers/elasticsearch/langchain/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -COPY comps /home/user/comps - -USER user - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/retrievers/elasticsearch/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/retrievers/elasticsearch/langchain - -ENTRYPOINT ["python", "retriever_elasticsearch.py"] diff --git a/comps/retrievers/elasticsearch/langchain/config.py b/comps/retrievers/elasticsearch/langchain/config.py deleted file mode 100644 index a6e44747b2..0000000000 --- a/comps/retrievers/elasticsearch/langchain/config.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -# Embedding model -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - -ES_CONNECTION_STRING = os.getenv("ES_CONNECTION_STRING", "http://localhost:9200") - -# TEI Embedding endpoints -TEI_ENDPOINT = os.getenv("TEI_ENDPOINT", "") - -# Vector Index Configuration -INDEX_NAME = os.getenv("INDEX_NAME", "rag-elastic") - -# Logging enabled -LOG_FLAG = os.getenv("LOGFLAG", False) diff --git a/comps/retrievers/elasticsearch/langchain/elasticsearch_langchain.yaml b/comps/retrievers/elasticsearch/langchain/elasticsearch_langchain.yaml deleted file mode 100644 index 7ca8d36fae..0000000000 --- a/comps/retrievers/elasticsearch/langchain/elasticsearch_langchain.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - tei_xeon_service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 - container_name: tei-xeon-server - ports: - - "6060:80" - volumes: - - "./data:/data" - shm_size: 1g - command: --model-id ${RETRIEVE_MODEL_ID} - retriever: - image: opea/retriever-elasticsearch:latest - container_name: retriever-elasticsearch - ports: - - "7000:7000" - ipc: host - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - INDEX_NAME: ${INDEX_NAME} - TEI_ENDPOINT: ${TEI_ENDPOINT} - ES_CONNECTION_STRING: ${ES_CONNECTION_STRING} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/retrievers/elasticsearch/langchain/requirements.txt b/comps/retrievers/elasticsearch/langchain/requirements.txt deleted file mode 100644 index 9ce5afcb6d..0000000000 --- a/comps/retrievers/elasticsearch/langchain/requirements.txt +++ /dev/null @@ -1,14 +0,0 @@ -docarray[full] -easyocr -fastapi -langchain-community -langchain-elasticsearch -langchain-huggingface -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator==7.0.0 -pymupdf -sentence_transformers -shortuuid -uvicorn diff --git a/comps/retrievers/elasticsearch/langchain/retriever_elasticsearch.py b/comps/retrievers/elasticsearch/langchain/retriever_elasticsearch.py deleted file mode 100644 index 9f26d4d311..0000000000 --- a/comps/retrievers/elasticsearch/langchain/retriever_elasticsearch.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import time -from typing import Union - -from config import EMBED_MODEL, ES_CONNECTION_STRING, INDEX_NAME, LOG_FLAG, TEI_ENDPOINT -from elasticsearch import Elasticsearch -from langchain_community.embeddings import HuggingFaceBgeEmbeddings -from langchain_elasticsearch import ElasticsearchStore -from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings - -from comps import ( - CustomLogger, - EmbedDoc, - SearchedDoc, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) - -logger = CustomLogger(__name__) - - -def create_index() -> None: - if not es_client.indices.exists(index=INDEX_NAME): - es_client.indices.create(index=INDEX_NAME) - - -def get_embedder() -> Union[HuggingFaceEndpointEmbeddings, HuggingFaceBgeEmbeddings]: - """Obtain required Embedder.""" - - if TEI_ENDPOINT: - return HuggingFaceEndpointEmbeddings(model=TEI_ENDPOINT) - else: - return HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - -def get_elastic_store(embedder: Union[HuggingFaceEndpointEmbeddings, HuggingFaceBgeEmbeddings]) -> ElasticsearchStore: - """Get Elasticsearch vector store.""" - - return ElasticsearchStore(index_name=INDEX_NAME, embedding=embedder, es_connection=es_client) - - -@register_microservice( - name="opea_service@retriever_elasticsearch", - service_type=ServiceType.RETRIEVER, - endpoint="/v1/retrieval", - host="0.0.0.0", - port=7000, -) -@register_statistics(names=["opea_service@retriever_elasticsearch"]) -async def retrieve(input: EmbedDoc) -> SearchedDoc: - """Retrieve documents based on similarity search type.""" - if LOG_FLAG: - logger.info(input) - start = time.time() - - if input.search_type == "similarity": - docs_and_similarities = vector_db.similarity_search_by_vector_with_relevance_scores( - embedding=input.embedding, k=input.k - ) - search_res = [doc for doc, _ in docs_and_similarities] - - elif input.search_type == "similarity_distance_threshold": - if input.distance_threshold is None: - raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") - docs_and_similarities = vector_db.similarity_search_by_vector_with_relevance_scores( - embedding=input.embedding, k=input.k - ) - search_res = [doc for doc, similarity in docs_and_similarities if similarity > input.distance_threshold] - - elif input.search_type == "similarity_score_threshold": - docs_and_similarities = vector_db.similarity_search_by_vector_with_relevance_scores(query=input.text, k=input.k) - search_res = [doc for doc, similarity in docs_and_similarities if similarity > input.score_threshold] - - elif input.search_type == "mmr": - search_res = vector_db.max_marginal_relevance_search( - query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult - ) - - else: - raise ValueError(f"{input.search_type} not valid") - - searched_docs = [] - for r in search_res: - searched_docs.append(TextDoc(text=r.page_content)) - result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) - - statistics_dict["opea_service@retriever_elasticsearch"].append_latency(time.time() - start, None) - - if LOG_FLAG: - logger.info(result) - - return result - - -if __name__ == "__main__": - es_client = Elasticsearch(hosts=ES_CONNECTION_STRING) - vector_db = get_elastic_store(get_embedder()) - create_index() - opea_microservices["opea_service@retriever_elasticsearch"].start() diff --git a/comps/retrievers/milvus/langchain/Dockerfile b/comps/retrievers/milvus/langchain/Dockerfile deleted file mode 100644 index 94a482aaf2..0000000000 --- a/comps/retrievers/milvus/langchain/Dockerfile +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/retrievers/milvus/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/retrievers/milvus/langchain - -ENTRYPOINT ["python", "retriever_milvus.py"] \ No newline at end of file diff --git a/comps/retrievers/milvus/langchain/__init__.py b/comps/retrievers/milvus/langchain/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/retrievers/milvus/langchain/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/retrievers/milvus/langchain/config.py b/comps/retrievers/milvus/langchain/config.py deleted file mode 100644 index 92533eb0c4..0000000000 --- a/comps/retrievers/milvus/langchain/config.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -# Local Embedding model -LOCAL_EMBEDDING_MODEL = os.getenv("LOCAL_EMBEDDING_MODEL", "maidalun1020/bce-embedding-base_v1") -# TEI Embedding endpoints -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") -# MILVUS configuration -MILVUS_HOST = os.getenv("MILVUS_HOST", "localhost") -MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530)) -COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag_milvus") -# TEI configuration -TEI_EMBEDDING_MODEL = os.environ.get("TEI_EMBEDDING_MODEL", "/home/user/bce-embedding-base_v1") -TEI_EMBEDDING_ENDPOINT = os.environ.get("TEI_EMBEDDING_ENDPOINT", "") -os.environ["OPENAI_API_BASE"] = TEI_EMBEDDING_ENDPOINT -os.environ["OPENAI_API_KEY"] = "Dummy key" diff --git a/comps/retrievers/milvus/langchain/requirements.txt b/comps/retrievers/milvus/langchain/requirements.txt deleted file mode 100644 index fd6e197c87..0000000000 --- a/comps/retrievers/milvus/langchain/requirements.txt +++ /dev/null @@ -1,25 +0,0 @@ -beautifulsoup4 -docarray[full] -easyocr -fastapi -frontend==0.0.3 -huggingface_hub -langchain -langchain-community -langchain_milvus -numpy -openai -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -pandas -Pillow -prometheus-fastapi-instrumentator -pydantic==2.7.3 -pymilvus==2.4.3 -pymupdf==1.24.5 -python-docx==0.8.11 -sentence_transformers -shortuuid -tiktoken -uvicorn diff --git a/comps/retrievers/milvus/langchain/retriever_milvus.py b/comps/retrievers/milvus/langchain/retriever_milvus.py deleted file mode 100644 index b029d819f3..0000000000 --- a/comps/retrievers/milvus/langchain/retriever_milvus.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import time -from typing import List, Optional - -from config import ( - COLLECTION_NAME, - LOCAL_EMBEDDING_MODEL, - MILVUS_HOST, - MILVUS_PORT, - TEI_EMBEDDING_ENDPOINT, - TEI_EMBEDDING_MODEL, -) -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings, OpenAIEmbeddings -from langchain_milvus.vectorstores import Milvus - -from comps import ( - CustomLogger, - EmbedDoc, - SearchedDoc, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) - -logger = CustomLogger("retriever_milvus") -logflag = os.getenv("LOGFLAG", False) - - -@register_microservice( - name="opea_service@retriever_milvus", - service_type=ServiceType.RETRIEVER, - endpoint="/v1/retrieval", - host="0.0.0.0", - port=7000, -) -@register_statistics(names=["opea_service@retriever_milvus"]) -async def retrieve(input: EmbedDoc) -> SearchedDoc: - if logflag: - logger.info(input) - vector_db = Milvus( - embeddings, - connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT}, - collection_name=COLLECTION_NAME, - ) - start = time.time() - if input.search_type == "similarity": - search_res = await vector_db.asimilarity_search_by_vector(embedding=input.embedding, k=input.k) - elif input.search_type == "similarity_distance_threshold": - if input.distance_threshold is None: - raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") - search_res = await vector_db.asimilarity_search_by_vector( - embedding=input.embedding, k=input.k, distance_threshold=input.distance_threshold - ) - elif input.search_type == "similarity_score_threshold": - docs_and_similarities = await vector_db.asimilarity_search_with_relevance_scores( - query=input.text, k=input.k, score_threshold=input.score_threshold - ) - search_res = [doc for doc, _ in docs_and_similarities] - elif input.search_type == "mmr": - search_res = await vector_db.amax_marginal_relevance_search( - query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult - ) - searched_docs = [] - for r in search_res: - searched_docs.append(TextDoc(text=r.page_content)) - result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) - statistics_dict["opea_service@retriever_milvus"].append_latency(time.time() - start, None) - if logflag: - logger.info(result) - return result - - -if __name__ == "__main__": - # Create vectorstore - if TEI_EMBEDDING_ENDPOINT: - # create embeddings using TEI endpoint service - if logflag: - logger.info(f"[ retriever_milvus ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") - embeddings = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) - else: - # create embeddings using local embedding model - if logflag: - logger.info(f"[ retriever_milvus ] LOCAL_EMBEDDING_MODEL:{LOCAL_EMBEDDING_MODEL}") - embeddings = HuggingFaceBgeEmbeddings(model_name=LOCAL_EMBEDDING_MODEL) - - opea_microservices["opea_service@retriever_milvus"].start() diff --git a/comps/retrievers/neo4j/langchain/Dockerfile b/comps/retrievers/neo4j/langchain/Dockerfile deleted file mode 100644 index 3609a34c43..0000000000 --- a/comps/retrievers/neo4j/langchain/Dockerfile +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ENV HUGGINGFACEHUB_API_TOKEN=dummy - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/retrievers/neo4j/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/retrievers/neo4j/langchain - -ENTRYPOINT ["python", "retriever_neo4j.py"] diff --git a/comps/retrievers/neo4j/langchain/README.md b/comps/retrievers/neo4j/langchain/README.md deleted file mode 100644 index 731abc20f5..0000000000 --- a/comps/retrievers/neo4j/langchain/README.md +++ /dev/null @@ -1,112 +0,0 @@ -# Retriever Microservice with Neo4J - -## 🚀Start Microservice with Python - -### Install Requirements - -```bash -pip install -r requirements.txt -``` - -### Start Neo4J Server - -To launch Neo4j locally, first ensure you have docker installed. Then, you can launch the database with the following docker command. - -```bash -docker run \ - -p 7474:7474 -p 7687:7687 \ - -v $PWD/data:/data -v $PWD/plugins:/plugins \ - --name neo4j-apoc \ - -d \ - -e NEO4J_AUTH=neo4j/password \ - -e NEO4J_PLUGINS=\[\"apoc\"\] \ - neo4j:latest -``` - -### Setup Environment Variables - -```bash -export no_proxy=${your_no_proxy} -export http_proxy=${your_http_proxy} -export https_proxy=${your_http_proxy} -export NEO4J_URI=${your_neo4j_url} -export NEO4J_USERNAME=${your_neo4j_username} -export NEO4J_PASSWORD=${your_neo4j_password} -``` - -### Start Retriever Service - -```bash -python retriever_neo4j.py -``` - -## 🚀Start Microservice with Docker - -### Build Docker Image - -```bash -cd ../../ -docker build -t opea/retriever-neo4j:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/neo4j/langchain/Dockerfile . -``` - -### Run Docker with CLI - -```bash -docker run -d --name="retriever-neo4j-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URI=${your_neo4j_host_ip} opea/retriever-neo4j:latest -``` - -## 🚀3. Consume Retriever Service - -### 3.1 Check Service Status - -```bash -curl http://${your_ip}:7000/v1/health_check \ - -X GET \ - -H 'Content-Type: application/json' -``` - -### 3.2 Consume Embedding Service - -To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python. - -```bash -export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") -curl http://${your_ip}:7000/v1/retrieval \ - -X POST \ - -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \ - -H 'Content-Type: application/json' -``` - -You can set the parameters for the retriever. - -```bash -export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") -curl http://localhost:7000/v1/retrieval \ - -X POST \ - -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity\", \"k\":4}" \ - -H 'Content-Type: application/json' -``` - -```bash -export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") -curl http://localhost:7000/v1/retrieval \ - -X POST \ - -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity_distance_threshold\", \"k\":4, \"distance_threshold\":1.0}" \ - -H 'Content-Type: application/json' -``` - -```bash -export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") -curl http://localhost:7000/v1/retrieval \ - -X POST \ - -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity_score_threshold\", \"k\":4, \"score_threshold\":0.2}" \ - -H 'Content-Type: application/json' -``` - -```bash -export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") -curl http://localhost:7000/v1/retrieval \ - -X POST \ - -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"mmr\", \"k\":4, \"fetch_k\":20, \"lambda_mult\":0.5}" \ - -H 'Content-Type: application/json' -``` diff --git a/comps/retrievers/neo4j/langchain/__init__.py b/comps/retrievers/neo4j/langchain/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/retrievers/neo4j/langchain/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/retrievers/neo4j/langchain/config.py b/comps/retrievers/neo4j/langchain/config.py deleted file mode 100644 index 39adf6d89d..0000000000 --- a/comps/retrievers/neo4j/langchain/config.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -# Neo4J configuration -NEO4J_URL = os.getenv("NEO4J_URI", "bolt://localhost:7687") -NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j") -NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "test") - -# Embedding model -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - -# Embedding endpoints -EMBED_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") diff --git a/comps/retrievers/neo4j/langchain/requirements.txt b/comps/retrievers/neo4j/langchain/requirements.txt deleted file mode 100644 index a5a1ec31db..0000000000 --- a/comps/retrievers/neo4j/langchain/requirements.txt +++ /dev/null @@ -1,21 +0,0 @@ -docarray[full] -fastapi -frontend -huggingface_hub -langchain -langchain-community -neo4j -numpy -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -pandas -Pillow -prometheus-fastapi-instrumentator -pydantic -pymupdf -python-docx -sentence_transformers -shortuuid -tiktoken -uvicorn diff --git a/comps/retrievers/neo4j/langchain/retriever_neo4j.py b/comps/retrievers/neo4j/langchain/retriever_neo4j.py deleted file mode 100644 index 9ad00bf7af..0000000000 --- a/comps/retrievers/neo4j/langchain/retriever_neo4j.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import time -from typing import Union - -from config import EMBED_ENDPOINT, EMBED_MODEL, NEO4J_PASSWORD, NEO4J_URL, NEO4J_USERNAME -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings -from langchain_community.vectorstores import Neo4jVector - -from comps import ( - CustomLogger, - EmbedDoc, - SearchedDoc, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) -from comps.cores.proto.api_protocol import ( - ChatCompletionRequest, - RetrievalRequest, - RetrievalResponse, - RetrievalResponseData, -) - -logger = CustomLogger("retriever_neo4j") -logflag = os.getenv("LOGFLAG", False) - - -@register_microservice( - name="opea_service@retriever_neo4j", - service_type=ServiceType.RETRIEVER, - endpoint="/v1/retrieval", - host="0.0.0.0", - port=7000, -) -@register_statistics(names=["opea_service@retriever_neo4j"]) -async def retrieve( - input: Union[EmbedDoc, RetrievalRequest, ChatCompletionRequest] -) -> Union[SearchedDoc, RetrievalResponse, ChatCompletionRequest]: - if logflag: - logger.info(input) - start = time.time() - - if isinstance(input, EmbedDoc): - query = input.text - else: - # for RetrievalRequest, ChatCompletionRequest - query = input.input - - if input.search_type == "similarity": - search_res = await vector_db.asimilarity_search_by_vector( - embedding=input.embedding, query=input.text, k=input.k - ) - elif input.search_type == "similarity_distance_threshold": - if input.distance_threshold is None: - raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") - search_res = await vector_db.asimilarity_search_by_vector( - embedding=input.embedding, query=input.text, k=input.k, distance_threshold=input.distance_threshold - ) - elif input.search_type == "similarity_score_threshold": - docs_and_similarities = await vector_db.asimilarity_search_with_relevance_scores( - query=input.text, k=input.k, score_threshold=input.score_threshold - ) - search_res = [doc for doc, _ in docs_and_similarities] - elif input.search_type == "mmr": - search_res = await vector_db.amax_marginal_relevance_search( - query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult - ) - else: - raise ValueError(f"{input.search_type} not valid") - - # return different response format - retrieved_docs = [] - if isinstance(input, EmbedDoc): - for r in search_res: - retrieved_docs.append(TextDoc(text=r.page_content)) - result = SearchedDoc(retrieved_docs=retrieved_docs, initial_query=input.text) - else: - for r in search_res: - retrieved_docs.append(RetrievalResponseData(text=r.page_content, metadata=r.metadata)) - if isinstance(input, RetrievalRequest): - result = RetrievalResponse(retrieved_docs=retrieved_docs) - elif isinstance(input, ChatCompletionRequest): - input.retrieved_docs = retrieved_docs - input.documents = [doc.text for doc in retrieved_docs] - result = input - - statistics_dict["opea_service@retriever_neo4j"].append_latency(time.time() - start, None) - if logflag: - logger.info(result) - return result - - -if __name__ == "__main__": - - if EMBED_ENDPOINT: - # create embeddings using TEI endpoint service - hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN") - embeddings = HuggingFaceHubEmbeddings(model=EMBED_ENDPOINT, huggingfacehub_api_token=hf_token) - else: - # create embeddings using local embedding model - embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - vector_db = Neo4jVector.from_existing_graph( - embedding=embeddings, - url=NEO4J_URL, - username=NEO4J_USERNAME, - password=NEO4J_PASSWORD, - node_label="__Entity__", - text_node_properties=["id", "description"], - embedding_node_property="embedding", - ) - opea_microservices["opea_service@retriever_neo4j"].start() diff --git a/comps/retrievers/neo4j/llama_index/Dockerfile b/comps/retrievers/neo4j/llama_index/Dockerfile deleted file mode 100644 index 1b601805d0..0000000000 --- a/comps/retrievers/neo4j/llama_index/Dockerfile +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -ENV HUGGINGFACEHUB_API_TOKEN=dummy - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - libgl1-mesa-glx \ - libjemalloc-dev \ - libcairo2 \ - libglib2.0-0 - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/retrievers/neo4j/llama_index/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/retrievers/neo4j/llama_index - -ENTRYPOINT ["python", "retriever_community_answers_neo4j.py"] diff --git a/comps/retrievers/neo4j/llama_index/__init__.py b/comps/retrievers/neo4j/llama_index/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/retrievers/neo4j/llama_index/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/retrievers/neo4j/llama_index/config.py b/comps/retrievers/neo4j/llama_index/config.py deleted file mode 100644 index 3037b8f9fb..0000000000 --- a/comps/retrievers/neo4j/llama_index/config.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -host_ip = os.getenv("host_ip") -# Neo4J configuration -NEO4J_URL = os.getenv("NEO4J_URL", f"bolt://{host_ip}:7687") -NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j") -NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "neo4jtest") - -# LLM/Embedding endpoints -TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", f"http://{host_ip}:6005") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT ", f"http://{host_ip}:6006") - -OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") -OPENAI_EMBEDDING_MODEL = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small") -OPENAI_LLM_MODEL = os.getenv("OPENAI_LLM_MODEL", "gpt-4o") diff --git a/comps/retrievers/neo4j/llama_index/neo4j_llama_index.yaml b/comps/retrievers/neo4j/llama_index/neo4j_llama_index.yaml deleted file mode 100644 index cc3dfa7f1c..0000000000 --- a/comps/retrievers/neo4j/llama_index/neo4j_llama_index.yaml +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" -services: - neo4j-apoc: - image: neo4j:latest - container_name: neo4j-apoc - volumes: - - /$HOME/neo4j/logs:/logs - - /$HOME/neo4j/config:/config - - /$HOME/neo4j/data:/data - - /$HOME/neo4j/plugins:/plugins - ipc: host - environment: - - NEO4J_AUTH=${NEO4J_USERNAME}/${NEO4J_PASSWORD} - - NEO4J_PLUGINS=["apoc"] - - NEO4J_apoc_export_file_enabled=true - - NEO4J_apoc_import_file_enabled=true - - NEO4J_apoc_import_file_use__neo4j__config=true - - NEO4J_dbms_security_procedures_unrestricted=apoc.\* - ports: - - "7474:7474" - - "7687:7687" - restart: always - tei-embedding-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - container_name: tei-embedding-server - ports: - - "6006:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - NO_PROXY: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - ipc: host - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate - tgi-gaudi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 - container_name: tgi-gaudi-server - ports: - - "6005:80" - volumes: - - "./data:/data" - environment: - no_proxy: ${no_proxy} - NO_PROXY: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - ENABLE_HPU_GRAPH: true - LIMIT_HPU_GRAPH: true - USE_FLASH_ATTENTION: true - FLASH_ATTENTION_RECOMPUTE: true - runtime: habana - cap_add: - - SYS_NICE - ipc: host - command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 - dataprep-neo4j-llamaindex: - image: opea/dataprep-neo4j-llamaindex:latest - container_name: dataprep-neo4j-server - depends_on: - - neo4j-apoc - - tgi-gaudi-service - - tei-embedding-service - ports: - - "6004:6004" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - host_ip: ${host_ip} - NEO4J_URL: ${NEO4J_URL} - NEO4J_USERNAME: ${NEO4J_USERNAME} - NEO4J_PASSWORD: ${NEO4J_PASSWORD} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - OPENAI_API_KEY: ${OPENAI_API_KEY} - OPENAI_EMBEDDING_MODEL: ${OPENAI_EMBEDDING_MODEL} - OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL} - EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID} - LLM_MODEL_ID: ${LLM_MODEL_ID} - LOGFLAG: ${LOGFLAG} - restart: unless-stopped - retriever-neo4j-llamaindex: - image: opea/retriever-neo4j-llamaindex:latest - container_name: retriever-neo4j-server - depends_on: - - neo4j-apoc - - tgi-gaudi-service - - tei-embedding-service - ports: - - "6009:6009" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - host_ip: ${host_ip} - NEO4J_URL: ${NEO4J_URL} - NEO4J_USERNAME: ${NEO4J_USERNAME} - NEO4J_PASSWORD: ${NEO4J_PASSWORD} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - OPENAI_API_KEY: ${OPENAI_API_KEY} - OPENAI_EMBEDDING_MODEL: ${OPENAI_EMBEDDING_MODEL} - OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL} - EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID} - LLM_MODEL_ID: ${LLM_MODEL_ID} - LOGFLAG: ${LOGFLAG} - restart: unless-stopped -networks: - default: - driver: bridge diff --git a/comps/retrievers/neo4j/llama_index/requirements.txt b/comps/retrievers/neo4j/llama_index/requirements.txt deleted file mode 100644 index c91f71ba73..0000000000 --- a/comps/retrievers/neo4j/llama_index/requirements.txt +++ /dev/null @@ -1,36 +0,0 @@ -bs4 -cairosvg -docarray[full] -docx2txt -fastapi -frontend -future -graspologic -huggingface_hub -langchain -langchain-community -llama-index-core -llama-index-embeddings-openai -llama-index-embeddings-text-embeddings-inference -llama-index-llms-openai -llama-index-llms-text-generation-inference -llama_index_graph_stores_neo4j==0.3.3 -neo4j -numpy -opencv-python -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -pandas -Pillow -prometheus-fastapi-instrumentator -pydantic -pymupdf -pytesseract -python-docx -python-multipart -python-pptx -sentence_transformers -shortuuid -tiktoken -uvicorn diff --git a/comps/retrievers/neo4j/llama_index/retriever_community_answers_neo4j.py b/comps/retrievers/neo4j/llama_index/retriever_community_answers_neo4j.py deleted file mode 100644 index 830dc27756..0000000000 --- a/comps/retrievers/neo4j/llama_index/retriever_community_answers_neo4j.py +++ /dev/null @@ -1,265 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -import os -import re -import time -from typing import List, Union - -import openai -from config import ( - NEO4J_PASSWORD, - NEO4J_URL, - NEO4J_USERNAME, - OPENAI_API_KEY, - OPENAI_EMBEDDING_MODEL, - OPENAI_LLM_MODEL, - TEI_EMBEDDING_ENDPOINT, - TGI_LLM_ENDPOINT, -) -from llama_index.core import PropertyGraphIndex, Settings -from llama_index.core.indices.property_graph.sub_retrievers.vector import VectorContextRetriever -from llama_index.core.llms import LLM, ChatMessage -from llama_index.core.query_engine import CustomQueryEngine -from llama_index.embeddings.openai import OpenAIEmbedding -from llama_index.embeddings.text_embeddings_inference import TextEmbeddingsInference -from llama_index.llms.openai import OpenAI -from llama_index.llms.text_generation_inference import TextGenerationInference -from neo4j import GraphDatabase -from pydantic import BaseModel, PrivateAttr - -from comps import ( - CustomLogger, - EmbedDoc, - SearchedDoc, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) -from comps.cores.proto.api_protocol import ( - ChatCompletionRequest, - RetrievalRequest, - RetrievalResponse, - RetrievalResponseData, -) -from comps.dataprep.neo4j.llama_index.extract_graph_neo4j import GraphRAGStore, get_attribute_from_tgi_endpoint - -logger = CustomLogger("retriever_neo4j") -logflag = os.getenv("LOGFLAG", False) - - -class GraphRAGQueryEngine(CustomQueryEngine): - # https://github.com/run-llama/llama_index/blob/main/docs/docs/examples/cookbooks/GraphRAG_v2.ipynb - # private attr because inherits from BaseModel - _graph_store: GraphRAGStore = PrivateAttr() - _index: PropertyGraphIndex = PrivateAttr() - _llm: LLM = PrivateAttr() - _similarity_top_k: int = PrivateAttr() - - def __init__(self, graph_store: GraphRAGStore, llm: LLM, index: PropertyGraphIndex, similarity_top_k: int = 20): - super().__init__() - self._graph_store = graph_store - self._index = index - self._llm = llm - self._similarity_top_k = similarity_top_k - - def custom_query(self, query_str: str) -> RetrievalResponseData: - """Process all community summaries to generate answers to a specific query.""" - - entities = self.get_entities(query_str, self._similarity_top_k) - entity_info = self._graph_store.read_entity_info() - community_ids = self.retrieve_entity_communities(entity_info, entities) - community_summaries = self.retrieve_community_summaries_cypher(entities) - community_ids = list(community_summaries.keys()) - if logflag: - logger.info(f"Community ids: {community_ids}") - # community_summaries of relevant communities - community_answers = [ - self.generate_answer_from_summary(community_summary, query_str) - for id, community_summary in community_summaries.items() - ] - # Convert answers to RetrievalResponseData objects - response_data = [RetrievalResponseData(text=answer, metadata={}) for answer in community_answers] - return response_data - - def get_entities(self, query_str, similarity_top_k): - if logflag: - logger.info(f"Retrieving entities for query: {query_str} with top_k: {similarity_top_k}") - # TODO: make retrever configurable [VectorContextRetriever]or [LLMSynonymRetriever] - vecContext_retriever = VectorContextRetriever( - graph_store=self._graph_store, - embed_model=self._index._embed_model, - similarity_top_k=self._similarity_top_k, - # similarity_score=0.6 - ) - # nodes_retrieved = self._index.as_retriever( - # sub_retrievers=[vecContext_retriever], similarity_top_k=self._similarity_top_k - # ).retrieve(query_str) - # if subretriever not specified it will use LLMSynonymRetriever with Settings.llm model - nodes_retrieved = self._index.as_retriever(similarity_top_k=self._similarity_top_k).retrieve(query_str) - entities = set() - pattern = r"(\w+(?:\s+\w+)*)\s*->\s*(\w+(?:\s+\w+)*)\s*->\s*(\w+(?:\s+\w+)*)" - if logflag: - logger.info(f" len of triplets {len(self._index.property_graph_store.get_triplets())}") - logger.info(f"number of nodes retrieved {len(nodes_retrieved), nodes_retrieved}") - for node in nodes_retrieved: - matches = re.findall(pattern, node.text, re.DOTALL) - - for match in matches: - subject = match[0] - obj = match[2] - entities.add(subject) - entities.add(obj) - if logflag: - logger.info(f"entities from query {entities}") - return list(entities) - - def retrieve_entity_communities(self, entity_info, entities): - """Retrieve cluster information for given entities, allowing for multiple clusters per entity. - - Args: - entity_info (dict): Dictionary mapping entities to their cluster IDs (list). - entities (list): List of entity names to retrieve information for. - - Returns: - List of community or cluster IDs to which an entity belongs. - """ - community_ids = [] - - for entity in entities: - if entity in entity_info: - community_ids.extend(entity_info[entity]) - - return list(set(community_ids)) - - def retrieve_community_summaries_cypher(self, entities): - """Retrieve cluster information and summaries for given entities using a Cypher query. - - Args: - entities (list): List of entity names to retrieve information for. - - Returns: - dict: Dictionary where keys are community or cluster IDs and values are summaries. - """ - community_summaries = {} - print(f"driver working? {self._graph_store.driver})") - - with self._graph_store.driver.session() as session: - for entity in entities: - result = session.run( - """ - MATCH (e:Entity {id: $entity_id})-[:BELONGS_TO]->(c:Cluster) - RETURN c.id AS cluster_id, c.summary AS summary - """, - entity_id=entity, - ) - for record in result: - community_summaries[record["cluster_id"]] = record["summary"] - - return community_summaries - - def generate_answer_from_summary(self, community_summary, query): - """Generate an answer from a community summary based on a given query using LLM.""" - prompt = ( - f"Given the community summary: {community_summary}, " - f"how would you answer the following query? Query: {query}" - ) - messages = [ - ChatMessage(role="system", content=prompt), - ChatMessage( - role="user", - content="I need an answer based on the above information.", - ), - ] - response = self._llm.chat(messages) - cleaned_response = re.sub(r"^assistant:\s*", "", str(response)).strip() - return cleaned_response - - -@register_microservice( - name="opea_service@retriever_community_answers_neo4j", - service_type=ServiceType.RETRIEVER, - endpoint="/v1/retrieval", - host="0.0.0.0", - port=6009, -) -@register_statistics(names=["opea_service@retriever_community_answers_neo4j"]) -async def retrieve(input: Union[ChatCompletionRequest]) -> Union[ChatCompletionRequest]: - if logflag: - logger.info(input) - start = time.time() - - if isinstance(input.messages, str): - query = input.messages - else: - query = input.messages[0]["content"] - logger.info(f"Query received in retriever: {query}") - - if OPENAI_API_KEY: - logger.info("OpenAI API Key is set. Verifying its validity...") - openai.api_key = OPENAI_API_KEY - try: - llm = OpenAI(temperature=0, model=OPENAI_LLM_MODEL) - embed_model = OpenAIEmbedding(model=OPENAI_EMBEDDING_MODEL, embed_batch_size=100) - logger.info("OpenAI API Key is valid.") - except openai.AuthenticationError: - logger.info("OpenAI API Key is invalid.") - except Exception as e: - logger.info(f"An error occurred while verifying the API Key: {e}") - else: - logger.info("No OpenAI API KEY provided. Will use TGI and TEI endpoints") - llm_name = get_attribute_from_tgi_endpoint(TGI_LLM_ENDPOINT, "model_id") - llm = TextGenerationInference( - model_url=TGI_LLM_ENDPOINT, - model_name=llm_name, - temperature=0.7, - max_tokens=1512, # 512otherwise too shor - ) - emb_name = get_attribute_from_tgi_endpoint(TEI_EMBEDDING_ENDPOINT, "model_id") - embed_model = TextEmbeddingsInference( - base_url=TEI_EMBEDDING_ENDPOINT, - model_name=emb_name, - timeout=60, # timeout in seconds - embed_batch_size=10, # batch size for embedding - ) - Settings.embed_model = embed_model - Settings.llm = llm - # pre-existiing graph store (created with data_prep/llama-index/extract_graph_neo4j.py) - graph_store = GraphRAGStore(username=NEO4J_USERNAME, password=NEO4J_PASSWORD, url=NEO4J_URL, llm=llm) - - index = PropertyGraphIndex.from_existing( - property_graph_store=graph_store, - embed_model=embed_model or Settings.embed_model, - embed_kg_nodes=True, - ) - - query_engine = GraphRAGQueryEngine( - graph_store=index.property_graph_store, - llm=llm, - index=index, - similarity_top_k=3, - ) - - # these are the answers from the community summaries - answers_by_community = query_engine.query(query) - input.retrieved_docs = answers_by_community - input.documents = [doc.text for doc in answers_by_community] - result = ChatCompletionRequest( - messages="Retrieval of answers from community summaries successful", - retrieved_docs=input.retrieved_docs, - documents=input.documents, - ) - - statistics_dict["opea_service@retriever_community_answers_neo4j"].append_latency(time.time() - start, None) - - if logflag: - logger.info(result) - return result - - -if __name__ == "__main__": - opea_microservices["opea_service@retriever_community_answers_neo4j"].start() diff --git a/comps/retrievers/neo4j/llama_index/set_env.sh b/comps/retrievers/neo4j/llama_index/set_env.sh deleted file mode 100644 index dcaaad0fbe..0000000000 --- a/comps/retrievers/neo4j/llama_index/set_env.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -# Remember to set your private variables mentioned in README -# host_ip, OPENAI_KEY, HUGGINGFACEHUB_API_TOKEN, proxies... - -export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" -export OPENAI_EMBEDDING_MODEL="text-embedding-3-small" -export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" -export OPENAI_LLM_MODEL="gpt-4o" -export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006" -export TGI_LLM_ENDPOINT="http://${host_ip}:6005" -export NEO4J_URL="bolt://${host_ip}:7687" -export NEO4J_USERNAME=neo4j -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6004/v1/dataprep" -export LOGFLAG=True diff --git a/comps/retrievers/opensearch/langchain/Dockerfile b/comps/retrievers/opensearch/langchain/Dockerfile deleted file mode 100644 index 038b5d6bc1..0000000000 --- a/comps/retrievers/opensearch/langchain/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -COPY comps /home/user/comps - -USER user - -RUN pip install --no-cache-dir --upgrade pip && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/retrievers/opensearch/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/retrievers/opensearch/langchain - -ENTRYPOINT ["python", "retriever_opensearch.py"] diff --git a/comps/retrievers/opensearch/langchain/__init__.py b/comps/retrievers/opensearch/langchain/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/retrievers/opensearch/langchain/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/retrievers/opensearch/langchain/docker_compose_retriever.yaml b/comps/retrievers/opensearch/langchain/docker_compose_retriever.yaml deleted file mode 100644 index 653e413a32..0000000000 --- a/comps/retrievers/opensearch/langchain/docker_compose_retriever.yaml +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - tei_xeon_service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 - container_name: tei-xeon_server - ports: - - "6060:80" - volumes: - - "./data:/data" - shm_size: 1g - command: --model-id ${RETRIEVE_MODEL_ID} - retriever: - image: opea/retriever-opensearch-server - container_name: retriever-opensearch-server - ports: - - "7000:7000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - OPENSEARCH_URL: ${OPENSEARCH_URL} - INDEX_NAME: ${INDEX_NAME} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - restart: unless-stopped - security_opt: - - no-new-privileges:true - -networks: - default: - driver: bridge diff --git a/comps/retrievers/opensearch/langchain/opensearch_config.py b/comps/retrievers/opensearch/langchain/opensearch_config.py deleted file mode 100644 index fd6b68d357..0000000000 --- a/comps/retrievers/opensearch/langchain/opensearch_config.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - - -def get_boolean_env_var(var_name, default_value=False): - """Retrieve the boolean value of an environment variable. - - Args: - var_name (str): The name of the environment variable to retrieve. - default_value (bool): The default value to return if the variable - is not found. - - Returns: - bool: The value of the environment variable, interpreted as a boolean. - """ - true_values = {"true", "1", "t", "y", "yes"} - false_values = {"false", "0", "f", "n", "no"} - - # Retrieve the environment variable's value - value = os.getenv(var_name, "").lower() - - # Decide the boolean value based on the content of the string - if value in true_values: - return True - elif value in false_values: - return False - else: - return default_value - - -# Whether or not to enable langchain debugging -DEBUG = get_boolean_env_var("DEBUG", False) -# Set DEBUG env var to "true" if you wish to enable LC debugging module -if DEBUG: - import langchain - - langchain.debug = True - - -# Embedding model -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - - -# OpenSearch Connection Information -OPENSEARCH_HOST = os.getenv("OPENSEARCH_HOST", "localhost") -OPENSEARCH_PORT = int(os.getenv("OPENSEARCH_PORT", 9200)) -OPENSEARCH_INITIAL_ADMIN_PASSWORD = os.getenv("OPENSEARCH_INITIAL_ADMIN_PASSWORD", "") - - -def format_opensearch_conn_from_env(): - opensearch_url = os.getenv("OPENSEARCH_URL", None) - if opensearch_url: - return opensearch_url - else: - using_ssl = get_boolean_env_var("OPENSEARCH_SSL", False) - start = "https://" if using_ssl else "http://" - - return start + f"{OPENSEARCH_HOST}:{OPENSEARCH_PORT}" - - -OPENSEARCH_URL = format_opensearch_conn_from_env() - -# Vector Index Configuration -INDEX_NAME = os.getenv("INDEX_NAME", "rag-opensearch") - - -current_file_path = os.path.abspath(__file__) -parent_dir = os.path.dirname(current_file_path) diff --git a/comps/retrievers/opensearch/langchain/requirements.txt b/comps/retrievers/opensearch/langchain/requirements.txt deleted file mode 100644 index 5690118bbb..0000000000 --- a/comps/retrievers/opensearch/langchain/requirements.txt +++ /dev/null @@ -1,16 +0,0 @@ -docarray[full] -easyocr -fastapi -langchain_community -langchain_huggingface -numpy -opensearch-py -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -pydantic -pymupdf -sentence_transformers -shortuuid -uvicorn diff --git a/comps/retrievers/opensearch/langchain/retriever_opensearch.py b/comps/retrievers/opensearch/langchain/retriever_opensearch.py deleted file mode 100644 index c570cb6db5..0000000000 --- a/comps/retrievers/opensearch/langchain/retriever_opensearch.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import time -from typing import Callable, List, Union - -import numpy as np -from langchain_community.embeddings import HuggingFaceBgeEmbeddings -from langchain_community.vectorstores import OpenSearchVectorSearch -from langchain_huggingface import HuggingFaceEndpointEmbeddings -from opensearch_config import EMBED_MODEL, INDEX_NAME, OPENSEARCH_INITIAL_ADMIN_PASSWORD, OPENSEARCH_URL -from pydantic import conlist - -from comps import ( - CustomLogger, - EmbedDoc, - SearchedDoc, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) -from comps.cores.proto.api_protocol import ( - ChatCompletionRequest, - RetrievalRequest, - RetrievalResponse, - RetrievalResponseData, -) - -logger = CustomLogger("retriever_opensearch") -logflag = os.getenv("LOGFLAG", False) - -tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT", None) - - -async def search_all_embeddings_vectors( - embeddings: Union[conlist(float, min_length=0), List[conlist(float, min_length=0)]], func: Callable, *args, **kwargs -): - try: - if not isinstance(embeddings, np.ndarray): - embeddings = np.array(embeddings) - - if not np.issubdtype(embeddings.dtype, np.floating): - raise ValueError("All embeddings values must be floating point numbers") - - if embeddings.ndim == 1: - return await func(embedding=embeddings, *args, **kwargs) - elif embeddings.ndim == 2: - responses = [] - for emb in embeddings: - response = await func(embedding=emb, *args, **kwargs) - responses.extend(response) - return responses - else: - raise ValueError("Embeddings must be one or two dimensional") - except Exception as e: - raise ValueError(f"Embedding data is not valid: {e}") - - -@register_microservice( - name="opea_service@retriever_opensearch", - service_type=ServiceType.RETRIEVER, - endpoint="/v1/retrieval", - host="0.0.0.0", - port=7000, -) -@register_statistics(names=["opea_service@retriever_opensearch"]) -async def retrieve( - input: Union[EmbedDoc, RetrievalRequest, ChatCompletionRequest] -) -> Union[SearchedDoc, RetrievalResponse, ChatCompletionRequest]: - if logflag: - logger.info(input) - start = time.time() - - # Check if the index exists and has documents - doc_count = 0 - - index_exists = vector_db.client.indices.exists(index=INDEX_NAME) - if index_exists: - doc_count = vector_db.client.count(index=INDEX_NAME)["count"] - if (not index_exists) or doc_count == 0: - search_res = [] - else: - if isinstance(input, EmbedDoc): - query = input.text - else: - # for RetrievalRequest, ChatCompletionRequest - query = input.input - # if the OpenSearch index has data, perform the search - if input.search_type == "similarity": - search_res = await search_all_embeddings_vectors( - embeddings=input.embedding, - func=vector_db.asimilarity_search_by_vector, - k=input.k, - ) - elif input.search_type == "similarity_distance_threshold": - if input.distance_threshold is None: - raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") - search_res = await search_all_embeddings_vectors( - embeddings=input.embedding, - func=vector_db.asimilarity_search_by_vector, - k=input.k, - distance_threshold=input.distance_threshold, - ) - elif input.search_type == "similarity_score_threshold": - doc_and_similarities = await vector_db.asimilarity_search_with_relevance_scores( - query=input.text, k=input.k, score_threshold=input.score_threshold - ) - search_res = [doc for doc, _ in doc_and_similarities] - elif input.search_type == "mmr": - search_res = await vector_db.amax_marginal_relevance_search( - query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult - ) - else: - raise ValueError(f"{input.search_type} not valid") - - # return different response format - retrieved_docs = [] - if isinstance(input, EmbedDoc): - for r in search_res: - retrieved_docs.append(TextDoc(text=r.page_content)) - result = SearchedDoc(retrieved_docs=retrieved_docs, initial_query=input.text) - else: - for r in search_res: - retrieved_docs.append(RetrievalResponseData(text=r.page_content, metadata=r.metadata)) - if isinstance(input, RetrievalRequest): - result = RetrievalResponse(retrieved_docs=retrieved_docs) - elif isinstance(input, ChatCompletionRequest): - input.retrieved_docs = retrieved_docs - input.documents = [doc.text for doc in retrieved_docs] - result = input - - statistics_dict["opea_service@retriever_opensearch"].append_latency(time.time() - start, None) - if logflag: - logger.info(result) - return result - - -if __name__ == "__main__": - # Create vectorstore - if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint) - else: - # create embeddings using local embedding model - embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - auth = ("admin", OPENSEARCH_INITIAL_ADMIN_PASSWORD) - vector_db = OpenSearchVectorSearch( - opensearch_url=OPENSEARCH_URL, - index_name=INDEX_NAME, - embedding_function=embeddings, - http_auth=auth, - use_ssl=True, - verify_certs=False, - ssl_assert_hostname=False, - ssl_show_warn=False, - ) - opea_microservices["opea_service@retriever_opensearch"].start() diff --git a/comps/retrievers/pathway/langchain/entrypoint.sh b/comps/retrievers/pathway/langchain/entrypoint.sh deleted file mode 100644 index f5c8fc1511..0000000000 --- a/comps/retrievers/pathway/langchain/entrypoint.sh +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -pip --no-cache-dir install -r requirements-runtime.txt - -python retriever_pathway.py diff --git a/comps/retrievers/pathway/langchain/pathway_langchain.yaml b/comps/retrievers/pathway/langchain/pathway_langchain.yaml deleted file mode 100644 index b2b9383d6b..0000000000 --- a/comps/retrievers/pathway/langchain/pathway_langchain.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - tei_xeon_service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 - container_name: tei-xeon-server - ports: - - "6060:80" - volumes: - - "./data:/data" - shm_size: 1g - command: --model-id ${RETRIEVE_MODEL_ID} - retriever: - image: opea/retriever-pathway:latest - container_name: retriever-pathway-server - ports: - - "7000:7000" - ipc: host - network_mode: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PATHWAY_HOST: ${PATHWAY_HOST} - PATHWAY_PORT: ${PATHWAY_PORT} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/retrievers/pathway/langchain/requirements-runtime.txt b/comps/retrievers/pathway/langchain/requirements-runtime.txt deleted file mode 100644 index 53d49066d5..0000000000 --- a/comps/retrievers/pathway/langchain/requirements-runtime.txt +++ /dev/null @@ -1 +0,0 @@ -langsmith diff --git a/comps/retrievers/pathway/langchain/requirements.txt b/comps/retrievers/pathway/langchain/requirements.txt deleted file mode 100644 index 98fe20fd1e..0000000000 --- a/comps/retrievers/pathway/langchain/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -docarray[full] -fastapi -frontend==0.0.3 -huggingface_hub -langchain_community == 0.2.0 -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -pathway -prometheus-fastapi-instrumentator -sentence_transformers -shortuuid diff --git a/comps/retrievers/pathway/langchain/retriever_pathway.py b/comps/retrievers/pathway/langchain/retriever_pathway.py deleted file mode 100644 index 72b7babaa0..0000000000 --- a/comps/retrievers/pathway/langchain/retriever_pathway.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import time - -from langchain_community.vectorstores import PathwayVectorClient -from langsmith import traceable - -from comps import ( - EmbedDoc, - SearchedDoc, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) - -host = os.getenv("PATHWAY_HOST", "127.0.0.1") -port = int(os.getenv("PATHWAY_PORT", 8666)) - -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - -tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") - - -@register_microservice( - name="opea_service@retriever_pathway", - service_type=ServiceType.RETRIEVER, - endpoint="/v1/retrieval", - host="0.0.0.0", - port=7000, -) -@traceable(run_type="retriever") -@register_statistics(names=["opea_service@retriever_pathway"]) -def retrieve(input: EmbedDoc) -> SearchedDoc: - start = time.time() - documents = pw_client.similarity_search(input.text, input.fetch_k) - - docs = [TextDoc(text=r.page_content) for r in documents] - - time_spent = time.time() - start - statistics_dict["opea_service@retriever_pathway"].append_latency(time_spent, None) # noqa: E501 - return SearchedDoc(retrieved_docs=docs, initial_query=input.text) - - -if __name__ == "__main__": - # Create the vectorstore client - pw_client = PathwayVectorClient(host=host, port=port) - opea_microservices["opea_service@retriever_pathway"].start() diff --git a/comps/retrievers/pgvector/langchain/Dockerfile b/comps/retrievers/pgvector/langchain/Dockerfile deleted file mode 100644 index 13b241b95d..0000000000 --- a/comps/retrievers/pgvector/langchain/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -COPY comps /home/user/comps - -USER user - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/retrievers/pgvector/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/retrievers/pgvector/langchain - -ENTRYPOINT ["python", "retriever_pgvector.py"] diff --git a/comps/retrievers/pgvector/langchain/config.py b/comps/retrievers/pgvector/langchain/config.py deleted file mode 100644 index 46d5650b1a..0000000000 --- a/comps/retrievers/pgvector/langchain/config.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -# Embedding model - -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - -PG_CONNECTION_STRING = os.getenv("PG_CONNECTION_STRING", "localhost") - -# Vector Index Configuration -INDEX_NAME = os.getenv("INDEX_NAME", "rag-pgvector") - -current_file_path = os.path.abspath(__file__) -parent_dir = os.path.dirname(current_file_path) -PORT = os.getenv("RETRIEVER_PORT", 7000) diff --git a/comps/retrievers/pgvector/langchain/pgvector_langchain.yaml b/comps/retrievers/pgvector/langchain/pgvector_langchain.yaml deleted file mode 100644 index e983764c83..0000000000 --- a/comps/retrievers/pgvector/langchain/pgvector_langchain.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - tei_xeon_service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 - container_name: tei-xeon-server - ports: - - "6060:80" - volumes: - - "./data:/data" - shm_size: 1g - command: --model-id ${RETRIEVE_MODEL_ID} - retriever: - image: opea/retriever-pgvector:latest - container_name: retriever-pgvector - ports: - - "7000:7000" - ipc: host - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PG_CONNECTION_STRING: ${PG_CONNECTION_STRING} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/retrievers/pgvector/langchain/requirements.txt b/comps/retrievers/pgvector/langchain/requirements.txt deleted file mode 100644 index d3d95dee91..0000000000 --- a/comps/retrievers/pgvector/langchain/requirements.txt +++ /dev/null @@ -1,14 +0,0 @@ -docarray[full] -easyocr -fastapi -langchain_community -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -pgvector==0.2.5 -prometheus-fastapi-instrumentator==7.0.0 -psycopg2-binary -pymupdf -sentence_transformers -shortuuid -uvicorn diff --git a/comps/retrievers/pgvector/langchain/retriever_pgvector.py b/comps/retrievers/pgvector/langchain/retriever_pgvector.py deleted file mode 100644 index b9db75c1cf..0000000000 --- a/comps/retrievers/pgvector/langchain/retriever_pgvector.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import time - -from config import EMBED_MODEL, INDEX_NAME, PG_CONNECTION_STRING, PORT -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings -from langchain_community.vectorstores import PGVector - -from comps import ( - CustomLogger, - EmbedDoc, - SearchedDoc, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) - -logger = CustomLogger("retriever_pgvector") -logflag = os.getenv("LOGFLAG", False) - -tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") - - -@register_microservice( - name="opea_service@retriever_pgvector", - service_type=ServiceType.RETRIEVER, - endpoint="/v1/retrieval", - host="0.0.0.0", - port=PORT, -) -@register_statistics(names=["opea_service@retriever_pgvector"]) -async def retrieve(input: EmbedDoc) -> SearchedDoc: - if logflag: - logger.info(input) - start = time.time() - search_res = await vector_db.asimilarity_search_by_vector(embedding=input.embedding) - searched_docs = [] - for r in search_res: - searched_docs.append(TextDoc(text=r.page_content)) - result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) - statistics_dict["opea_service@retriever_pgvector"].append_latency(time.time() - start, None) - if logflag: - logger.info(result) - return result - - -if __name__ == "__main__": - # Create vectorstore - if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embeddings = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) - else: - # create embeddings using local embedding model - embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - vector_db = PGVector( - embedding_function=embeddings, - collection_name=INDEX_NAME, - connection_string=PG_CONNECTION_STRING, - ) - opea_microservices["opea_service@retriever_pgvector"].start() diff --git a/comps/retrievers/pinecone/langchain/Dockerfile b/comps/retrievers/pinecone/langchain/Dockerfile deleted file mode 100644 index 5b1fa8709b..0000000000 --- a/comps/retrievers/pinecone/langchain/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -COPY comps /home/user/comps - -USER user - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir --no-warn-script-location -r /home/user/comps/retrievers/pinecone/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/retrievers/pinecone/langchain - -ENTRYPOINT ["python", "retriever_pinecone.py"] diff --git a/comps/retrievers/pinecone/langchain/__init__.py b/comps/retrievers/pinecone/langchain/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/retrievers/pinecone/langchain/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/retrievers/pinecone/langchain/config.py b/comps/retrievers/pinecone/langchain/config.py deleted file mode 100644 index cd7f9e5088..0000000000 --- a/comps/retrievers/pinecone/langchain/config.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -# Embedding model -EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2") - -# Pinecone configuration -PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "xxx_xxx") -PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME", "langchain-test") - -# LLM/Embedding endpoints -TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") -TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT") diff --git a/comps/retrievers/pinecone/langchain/pinecone_langchain.yaml b/comps/retrievers/pinecone/langchain/pinecone_langchain.yaml deleted file mode 100644 index 3c0f7cef23..0000000000 --- a/comps/retrievers/pinecone/langchain/pinecone_langchain.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - tei_xeon_service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 - container_name: tei-xeon-server - ports: - - "6060:80" - volumes: - - "./data:/data" - shm_size: 1g - command: --model-id ${RETRIEVE_MODEL_ID} - retriever: - image: opea/retriever-pinecone:latest - container_name: retriever-pinecone-server - ports: - - "7000:7000" - ipc: host - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PINECONE_API_KEY: ${PINECONE_API_KEY} - INDEX_NAME: ${PINECONE_INDEX_NAME} - PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/retrievers/pinecone/langchain/requirements.txt b/comps/retrievers/pinecone/langchain/requirements.txt deleted file mode 100644 index 499f4b9f48..0000000000 --- a/comps/retrievers/pinecone/langchain/requirements.txt +++ /dev/null @@ -1,22 +0,0 @@ -beautifulsoup4 -docarray[full] -easyocr -fastapi -huggingface_hub -langchain -langchain-community -langchain-huggingface -langchain-pinecone -numpy -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -pandas -Pillow -pinecone-client -prometheus_fastapi_instrumentator -pymupdf -python-docx -sentence_transformers -shortuuid -uvicorn diff --git a/comps/retrievers/pinecone/langchain/retriever_pinecone.py b/comps/retrievers/pinecone/langchain/retriever_pinecone.py deleted file mode 100644 index 534344e105..0000000000 --- a/comps/retrievers/pinecone/langchain/retriever_pinecone.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import time - -from config import EMBED_MODEL, PINECONE_API_KEY, PINECONE_INDEX_NAME -from langchain_community.embeddings import HuggingFaceBgeEmbeddings -from langchain_huggingface import HuggingFaceEndpointEmbeddings -from langchain_pinecone import PineconeVectorStore -from pinecone import Pinecone, ServerlessSpec - -from comps import ( - CustomLogger, - EmbedDoc, - SearchedDoc, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) - -logger = CustomLogger("retriever_pinecone") -logflag = os.getenv("LOGFLAG", False) - -tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") - - -@register_microservice( - name="opea_service@retriever_pinecone", - service_type=ServiceType.RETRIEVER, - endpoint="/v1/retrieval", - host="0.0.0.0", - port=7000, -) -@register_statistics(names=["opea_service@retriever_pinecone"]) -def retrieve(input: EmbedDoc) -> SearchedDoc: - if logflag: - logger.info(input) - start = time.time() - - pc = Pinecone(api_key=PINECONE_API_KEY) - - index = pc.Index(PINECONE_INDEX_NAME) - if logflag: - logger.info(index.describe_index_stats()["total_vector_count"]) - # check if the Pinecone index has data - if index.describe_index_stats()["total_vector_count"] == 0: - result = SearchedDoc(retrieved_docs=[], initial_query=input.text) - statistics_dict["opea_service@retriever_pinecone"].append_latency(time.time() - start, None) - if logflag: - logger.info(result) - return result - - search_res = vector_db.max_marginal_relevance_search(query=input.text, k=input.k, fetch_k=input.fetch_k) - # if the Pinecone index has data, perform the search - if input.search_type == "similarity": - docs_and_similarities = vector_db.similarity_search_by_vector_with_score(embedding=input.embedding, k=input.k) - search_res = [doc for doc, _ in docs_and_similarities] - elif input.search_type == "similarity_distance_threshold": - if input.distance_threshold is None: - raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") - docs_and_similarities = vector_db.similarity_search_by_vector_with_score(embedding=input.embedding, k=input.k) - search_res = [doc for doc, similarity in docs_and_similarities if similarity > input.distance_threshold] - elif input.search_type == "similarity_score_threshold": - docs_and_similarities = vector_db.similarity_search_by_vector_with_score(query=input.text, k=input.k) - search_res = [doc for doc, similarity in docs_and_similarities if similarity > input.score_threshold] - elif input.search_type == "mmr": - search_res = vector_db.max_marginal_relevance_search( - query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult - ) - searched_docs = [] - for r in search_res: - searched_docs.append(TextDoc(text=r.page_content)) - result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) - statistics_dict["opea_service@retriever_pinecone"].append_latency(time.time() - start, None) - if logflag: - logger.info(result) - return result - - -if __name__ == "__main__": - # Create vectorstore - if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint) - else: - # create embeddings using local embedding model - embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - pc = Pinecone(api_key=PINECONE_API_KEY) - spec = ServerlessSpec(cloud="aws", region="us-east-1") - - existing_indexes = [index_info["name"] for index_info in pc.list_indexes()] - - # For testing purposes we want to create a fresh index each time. - # In production you would probably keep your index and - # replace this with a check if the index doesn't exist then create it. - if PINECONE_INDEX_NAME in existing_indexes: - pc.configure_index(PINECONE_INDEX_NAME, deletion_protection="disabled") - pc.delete_index(PINECONE_INDEX_NAME) - time.sleep(1) - - pc.create_index( - PINECONE_INDEX_NAME, - dimension=1024, # Based on TEI Embedding service using BAAI/bge-large-en-v1.5 - deletion_protection="disabled", - spec=spec, - ) - while not pc.describe_index(PINECONE_INDEX_NAME).status["ready"]: - time.sleep(1) - - index = pc.Index(PINECONE_INDEX_NAME) - vector_db = PineconeVectorStore(index=index, embedding=embeddings) - - opea_microservices["opea_service@retriever_pinecone"].start() diff --git a/comps/retrievers/qdrant/haystack/Dockerfile b/comps/retrievers/qdrant/haystack/Dockerfile deleted file mode 100644 index eda88428ba..0000000000 --- a/comps/retrievers/qdrant/haystack/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN python -m pip install --no-cache-dir --upgrade pip setuptools && \ - python -m pip install --no-cache-dir -r /home/user/comps/retrievers/qdrant/haystack/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/retrievers/qdrant/haystack - -ENTRYPOINT ["python", "retriever_qdrant.py"] diff --git a/comps/retrievers/qdrant/haystack/__init__.py b/comps/retrievers/qdrant/haystack/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/retrievers/qdrant/haystack/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/retrievers/qdrant/haystack/qdrant_config.py b/comps/retrievers/qdrant/haystack/qdrant_config.py deleted file mode 100644 index cee448d667..0000000000 --- a/comps/retrievers/qdrant/haystack/qdrant_config.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -# Embedding model -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - -# Embedding dimension -EMBED_DIMENSION = os.getenv("EMBED_DIMENSION", 768) - -# Embedding endpoints -EMBED_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") - -# Qdrant Connection Information -QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost") -QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333)) - -# Vector Index Configuration -INDEX_NAME = os.getenv("INDEX_NAME", "rag-qdrant") diff --git a/comps/retrievers/qdrant/haystack/requirements.txt b/comps/retrievers/qdrant/haystack/requirements.txt deleted file mode 100644 index dee357a488..0000000000 --- a/comps/retrievers/qdrant/haystack/requirements.txt +++ /dev/null @@ -1,14 +0,0 @@ -docarray[full] -easyocr -fastapi -haystack-ai==2.3.1 -langchain_community -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus_fastapi_instrumentator -pymupdf -qdrant-haystack -sentence_transformers -shortuuid -uvicorn diff --git a/comps/retrievers/qdrant/haystack/retriever_qdrant.py b/comps/retrievers/qdrant/haystack/retriever_qdrant.py deleted file mode 100644 index b942afb15b..0000000000 --- a/comps/retrievers/qdrant/haystack/retriever_qdrant.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -from haystack.components.embedders import HuggingFaceAPITextEmbedder, SentenceTransformersTextEmbedder -from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever -from haystack_integrations.document_stores.qdrant import QdrantDocumentStore -from qdrant_config import EMBED_DIMENSION, EMBED_ENDPOINT, EMBED_MODEL, INDEX_NAME, QDRANT_HOST, QDRANT_PORT - -from comps import CustomLogger, EmbedDoc, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice - -logger = CustomLogger("retriever_qdrant") -logflag = os.getenv("LOGFLAG", False) - - -# Create a pipeline for querying a Qdrant document store -def initialize_qdrant_retriever() -> QdrantEmbeddingRetriever: - qdrant_store = QdrantDocumentStore( - host=QDRANT_HOST, port=QDRANT_PORT, embedding_dim=EMBED_DIMENSION, index=INDEX_NAME, recreate_index=False - ) - - retriever = QdrantEmbeddingRetriever(document_store=qdrant_store) - - return retriever - - -@register_microservice( - name="opea_service@retriever_qdrant", - service_type=ServiceType.RETRIEVER, - endpoint="/v1/retrieval", - host="0.0.0.0", - port=7000, -) -def retrieve(input: EmbedDoc) -> SearchedDoc: - if logflag: - logger.info(input) - search_res = retriever.run(query_embedding=input.embedding)["documents"] - searched_docs = [TextDoc(text=r.content) for r in search_res if r.content] - result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) - if logflag: - logger.info(result) - return result - - -if __name__ == "__main__": - if EMBED_ENDPOINT: - # create embeddings using TEI endpoint service - embedder = HuggingFaceAPITextEmbedder(api_type="text_embeddings_inference", api_params={"url": EMBED_ENDPOINT}) - else: - # create embeddings using local embedding model - embedder = SentenceTransformersTextEmbedder(model=EMBED_MODEL) - embedder.warm_up() - - retriever = initialize_qdrant_retriever() - opea_microservices["opea_service@retriever_qdrant"].start() diff --git a/comps/retrievers/redis/langchain/Dockerfile b/comps/retrievers/redis/langchain/Dockerfile deleted file mode 100644 index c2c5168f23..0000000000 --- a/comps/retrievers/redis/langchain/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -COPY comps /home/user/comps - -USER user - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/retrievers/redis/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/retrievers/redis/langchain - -ENTRYPOINT ["python", "retriever_redis.py"] diff --git a/comps/retrievers/redis/langchain/__init__.py b/comps/retrievers/redis/langchain/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/retrievers/redis/langchain/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/retrievers/redis/langchain/redis_config.py b/comps/retrievers/redis/langchain/redis_config.py deleted file mode 100644 index ade017e7b8..0000000000 --- a/comps/retrievers/redis/langchain/redis_config.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - - -def get_boolean_env_var(var_name, default_value=False): - """Retrieve the boolean value of an environment variable. - - Args: - var_name (str): The name of the environment variable to retrieve. - default_value (bool): The default value to return if the variable - is not found. - - Returns: - bool: The value of the environment variable, interpreted as a boolean. - """ - true_values = {"true", "1", "t", "y", "yes"} - false_values = {"false", "0", "f", "n", "no"} - - # Retrieve the environment variable's value - value = os.getenv(var_name, "").lower() - - # Decide the boolean value based on the content of the string - if value in true_values: - return True - elif value in false_values: - return False - else: - return default_value - - -# Whether or not to enable langchain debugging -DEBUG = get_boolean_env_var("DEBUG", False) -# Set DEBUG env var to "true" if you wish to enable LC debugging module -if DEBUG: - import langchain - - langchain.debug = True - - -# Embedding model -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - - -# Redis Connection Information -REDIS_HOST = os.getenv("REDIS_HOST", "localhost") -REDIS_PORT = int(os.getenv("REDIS_PORT", 6379)) - - -def format_redis_conn_from_env(): - redis_url = os.getenv("REDIS_URL", None) - if redis_url: - return redis_url - else: - using_ssl = get_boolean_env_var("REDIS_SSL", False) - start = "rediss://" if using_ssl else "redis://" - - # if using RBAC - password = os.getenv("REDIS_PASSWORD", None) - username = os.getenv("REDIS_USERNAME", "default") - if password is not None: - start += f"{username}:{password}@" - - return start + f"{REDIS_HOST}:{REDIS_PORT}" - - -REDIS_URL = format_redis_conn_from_env() - -# Vector Index Configuration -INDEX_NAME = os.getenv("INDEX_NAME", "rag-redis") - - -current_file_path = os.path.abspath(__file__) -parent_dir = os.path.dirname(current_file_path) -REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "redis_schema_multi.yml") -schema_path = os.path.join(parent_dir, REDIS_SCHEMA) -INDEX_SCHEMA = schema_path diff --git a/comps/retrievers/redis/langchain/redis_langchain.yaml b/comps/retrievers/redis/langchain/redis_langchain.yaml deleted file mode 100644 index 5ce13af879..0000000000 --- a/comps/retrievers/redis/langchain/redis_langchain.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - tei_xeon_service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 - container_name: tei-xeon-server - ports: - - "6060:80" - volumes: - - "./data:/data" - shm_size: 1g - command: --model-id ${RETRIEVE_MODEL_ID} - retriever: - image: opea/retriever-redis:latest - container_name: retriever-redis-server - ports: - - "7000:7000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - REDIS_URL: ${REDIS_URL} - INDEX_NAME: ${INDEX_NAME} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - BRIDGE_TOWER_EMBEDDING: ${BRIDGE_TOWER_EMBEDDING} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/retrievers/redis/langchain/redis_schema.yml b/comps/retrievers/redis/langchain/redis_schema.yml deleted file mode 100644 index adacf98656..0000000000 --- a/comps/retrievers/redis/langchain/redis_schema.yml +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -text: - - name: content - - name: source -numeric: - - name: start_index -vector: - - name: content_vector - algorithm: HNSW - datatype: FLOAT32 - dims: 768 - distance_metric: COSINE diff --git a/comps/retrievers/redis/langchain/requirements.txt b/comps/retrievers/redis/langchain/requirements.txt deleted file mode 100644 index 96a303bd7d..0000000000 --- a/comps/retrievers/redis/langchain/requirements.txt +++ /dev/null @@ -1,15 +0,0 @@ -docarray[full] -easyocr -fastapi -langchain_community -langchain_huggingface -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -pymupdf -redis -sentence_transformers -shortuuid -transformers -uvicorn diff --git a/comps/retrievers/redis/langchain/retriever_redis.py b/comps/retrievers/redis/langchain/retriever_redis.py deleted file mode 100644 index b112dd4676..0000000000 --- a/comps/retrievers/redis/langchain/retriever_redis.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import time -from typing import Union - -from langchain_community.embeddings import HuggingFaceBgeEmbeddings -from langchain_community.vectorstores import Redis -from langchain_huggingface import HuggingFaceEndpointEmbeddings -from redis_config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL - -from comps import ( - CustomLogger, - EmbedDoc, - EmbedMultimodalDoc, - SearchedDoc, - SearchedMultimodalDoc, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) -from comps.cores.proto.api_protocol import ( - ChatCompletionRequest, - EmbeddingResponse, - RetrievalRequest, - RetrievalResponse, - RetrievalResponseData, -) -from comps.third_parties.bridgetower.src.bridgetower_embedding import BridgeTowerEmbedding - -logger = CustomLogger("retriever_redis") -logflag = os.getenv("LOGFLAG", False) - -tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") -bridge_tower_embedding = os.getenv("BRIDGE_TOWER_EMBEDDING") - - -@register_microservice( - name="opea_service@retriever_redis", - service_type=ServiceType.RETRIEVER, - endpoint="/v1/retrieval", - host="0.0.0.0", - port=7000, -) -@register_statistics(names=["opea_service@retriever_redis"]) -async def retrieve( - input: Union[EmbedDoc, EmbedMultimodalDoc, RetrievalRequest, ChatCompletionRequest] -) -> Union[SearchedDoc, SearchedMultimodalDoc, RetrievalResponse, ChatCompletionRequest]: - if logflag: - logger.info(input) - start = time.time() - # check if the Redis index has data - if vector_db.client.keys() == []: - search_res = [] - else: - if isinstance(input, EmbedDoc) or isinstance(input, EmbedMultimodalDoc): - embedding_data_input = input.embedding - else: - # for RetrievalRequest, ChatCompletionRequest - if isinstance(input.embedding, EmbeddingResponse): - embeddings = input.embedding.data - embedding_data_input = [] - for emb in embeddings: - # each emb is EmbeddingResponseData - embedding_data_input.append(emb.embedding) - else: - embedding_data_input = input.embedding - - # if the Redis index has data, perform the search - if input.search_type == "similarity": - search_res = await vector_db.asimilarity_search_by_vector(embedding=embedding_data_input, k=input.k) - elif input.search_type == "similarity_distance_threshold": - if input.distance_threshold is None: - raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") - search_res = await vector_db.asimilarity_search_by_vector( - embedding=input.embedding, k=input.k, distance_threshold=input.distance_threshold - ) - elif input.search_type == "similarity_score_threshold": - docs_and_similarities = await vector_db.asimilarity_search_with_relevance_scores( - query=input.text, k=input.k, score_threshold=input.score_threshold - ) - search_res = [doc for doc, _ in docs_and_similarities] - elif input.search_type == "mmr": - search_res = await vector_db.amax_marginal_relevance_search( - query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult - ) - else: - raise ValueError(f"{input.search_type} not valid") - - # return different response format - retrieved_docs = [] - if isinstance(input, EmbedDoc) or isinstance(input, EmbedMultimodalDoc): - metadata_list = [] - for r in search_res: - metadata_list.append(r.metadata) - retrieved_docs.append(TextDoc(text=r.page_content)) - result = SearchedMultimodalDoc(retrieved_docs=retrieved_docs, initial_query=input.text, metadata=metadata_list) - else: - for r in search_res: - retrieved_docs.append(RetrievalResponseData(text=r.page_content, metadata=r.metadata)) - if isinstance(input, RetrievalRequest): - result = RetrievalResponse(retrieved_docs=retrieved_docs) - elif isinstance(input, ChatCompletionRequest): - input.retrieved_docs = retrieved_docs - input.documents = [doc.text for doc in retrieved_docs] - result = input - - statistics_dict["opea_service@retriever_redis"].append_latency(time.time() - start, None) - if logflag: - logger.info(result) - return result - - -if __name__ == "__main__": - # Create vectorstore - if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint) - vector_db = Redis(embedding=embeddings, index_name=INDEX_NAME, redis_url=REDIS_URL) - elif bridge_tower_embedding: - # create embeddings using BridgeTower service - embeddings = BridgeTowerEmbedding() - vector_db = Redis(embedding=embeddings, index_name=INDEX_NAME, index_schema=INDEX_SCHEMA, redis_url=REDIS_URL) - else: - # create embeddings using local embedding model - embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - vector_db = Redis(embedding=embeddings, index_name=INDEX_NAME, redis_url=REDIS_URL) - - opea_microservices["opea_service@retriever_redis"].start() diff --git a/comps/retrievers/redis/llama_index/Dockerfile b/comps/retrievers/redis/llama_index/Dockerfile deleted file mode 100644 index 2e48e8eff7..0000000000 --- a/comps/retrievers/redis/llama_index/Dockerfile +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -# Set environment variables -ENV LANG=en_US.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - python3-pip \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -COPY comps /home/user/comps - -USER user - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then \ - pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/retrievers/redis/llama_index/requirements.txt; \ - else \ - pip install --no-cache-dir -r /home/user/comps/retrievers/redis/llama_index/requirements.txt; \ - fi; - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/retrievers/redis/llama_index - -ENTRYPOINT ["python", "retriever_redis.py"] diff --git a/comps/retrievers/redis/llama_index/README.md b/comps/retrievers/redis/llama_index/README.md deleted file mode 100644 index 9c3bca027e..0000000000 --- a/comps/retrievers/redis/llama_index/README.md +++ /dev/null @@ -1,93 +0,0 @@ -# Retriever Microservice - -This retriever microservice is a highly efficient search service designed for handling and retrieving embedding vectors. It operates by receiving an embedding vector as input and conducting a similarity search against vectors stored in a VectorDB database. Users must specify the VectorDB's URL and the index name, and the service searches within that index to find documents with the highest similarity to the input vector. - -The service primarily utilizes similarity measures in vector space to rapidly retrieve contentually similar documents. The vector-based retrieval approach is particularly suited for handling large datasets, offering fast and accurate search results that significantly enhance the efficiency and quality of information retrieval. - -Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial. - -## 🚀1. Start Microservice with Python (Option 1) - -To start the retriever microservice, you must first install the required python packages. - -### 1.1 Install Requirements - -```bash -pip install -r requirements.txt -``` - -### 1.2 Setup VectorDB Service - -You need to setup your own VectorDB service (Redis in this example), and ingest your knowledge documents into the vector database. - -As for Redis, you could start a docker container using the following commands. -Remember to ingest data into it manually. - -```bash -docker run -d --name="redis-vector-db" -p 6379:6379 -p 8001:8001 redis/redis-stack:7.2.0-v9 -``` - -And then ingest data into the Redis VectorDB using the methods described in the dataprep microservice. - -### 1.3 Start Retriever Service - -```bash -python retriever_redis.py -``` - -## 🚀2. Start Microservice with Docker (Option 2) - -### 2.1 Setup Environment Variables - -```bash -export REDIS_URL="redis://${your_ip}:6379" -export INDEX_NAME=${your_index_name} -``` - -### 2.2 Build Docker Image - -```bash -cd ../../../.. -docker build -t opea/retriever-redis-llamaindex:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/llama_index/Dockerfile . -``` - -To start a docker container, you have two options: - -- A. Run Docker with CLI -- B. Run Docker with Docker Compose - -You can choose one as needed. - -### 2.3 Run Docker with CLI (Option A) - -```bash -docker run -d --name="retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/retriever-redis:latest -``` - -### 2.4 Run Docker with Docker Compose (Option B) - -```bash -docker compose -f docker_compose_retriever.yaml up -d -``` - -## 🚀3. Consume Retriever Service - -### 3.1 Check Service Status - -```bash -curl http://localhost:7000/v1/health_check \ - -X GET \ - -H 'Content-Type: application/json' -``` - -### 3.2 Consume Retriever Service - -To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python. - -```bash -export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") -curl http://${your_ip}:7000/v1/retrieval \ - -X POST \ - -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \ - -H 'Content-Type: application/json' -``` diff --git a/comps/retrievers/redis/llama_index/__init__.py b/comps/retrievers/redis/llama_index/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/retrievers/redis/llama_index/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/retrievers/redis/llama_index/redis_config.py b/comps/retrievers/redis/llama_index/redis_config.py deleted file mode 100644 index 619b2b8222..0000000000 --- a/comps/retrievers/redis/llama_index/redis_config.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - - -def get_boolean_env_var(var_name, default_value=False): - """Retrieve the boolean value of an environment variable. - - Args: - var_name (str): The name of the environment variable to retrieve. - default_value (bool): The default value to return if the variable - is not found. - - Returns: - bool: The value of the environment variable, interpreted as a boolean. - """ - true_values = {"true", "1", "t", "y", "yes"} - false_values = {"false", "0", "f", "n", "no"} - - # Retrieve the environment variable's value - value = os.getenv(var_name, "").lower() - - # Decide the boolean value based on the content of the string - if value in true_values: - return True - elif value in false_values: - return False - else: - return default_value - - -# Whether or not to enable langchain debugging -DEBUG = get_boolean_env_var("DEBUG", False) -# Set DEBUG env var to "true" if you wish to enable LC debugging module -if DEBUG: - import langchain - - langchain.debug = True - - -# Embedding model -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - - -# Redis Connection Information -REDIS_HOST = os.getenv("REDIS_HOST", "localhost") -REDIS_PORT = int(os.getenv("REDIS_PORT", 6379)) - - -def format_redis_conn_from_env(): - redis_url = os.getenv("REDIS_URL", None) - if redis_url: - return redis_url - else: - using_ssl = get_boolean_env_var("REDIS_SSL", False) - start = "rediss://" if using_ssl else "redis://" - - # if using RBAC - password = os.getenv("REDIS_PASSWORD", None) - username = os.getenv("REDIS_USERNAME", "default") - if password is not None: - start += f"{username}:{password}@" - - return start + f"{REDIS_HOST}:{REDIS_PORT}" - - -REDIS_URL = format_redis_conn_from_env() - -# Vector Index Configuration -INDEX_NAME = os.getenv("INDEX_NAME", "rag-redis") diff --git a/comps/retrievers/redis/llama_index/redis_llama_index.yaml b/comps/retrievers/redis/llama_index/redis_llama_index.yaml deleted file mode 100644 index 88c09bdcbe..0000000000 --- a/comps/retrievers/redis/llama_index/redis_llama_index.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - redis-vector-db: - image: redis/redis-stack:7.2.0-v9 - container_name: redis-vector-db - ports: - - "6379:6379" - - "8001:8001" - retriever: - image: opea/retriever-redis:latest - container_name: retriever-redis-server - ports: - - "7000:7000" - ipc: host - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - REDIS_URL: ${REDIS_URL} - INDEX_NAME: ${INDEX_NAME} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/retrievers/redis/llama_index/requirements.txt b/comps/retrievers/redis/llama_index/requirements.txt deleted file mode 100644 index f38ad87b73..0000000000 --- a/comps/retrievers/redis/llama_index/requirements.txt +++ /dev/null @@ -1,16 +0,0 @@ -aiohttp -docarray[full] -easyocr -fastapi -httpx -llama-index-vector-stores-redis -llama_index -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -pymupdf -redis -sentence_transformers -shortuuid -uvicorn diff --git a/comps/retrievers/redis/llama_index/retriever_redis.py b/comps/retrievers/redis/llama_index/retriever_redis.py deleted file mode 100644 index 1e36e07b62..0000000000 --- a/comps/retrievers/redis/llama_index/retriever_redis.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -from llama_index.core.vector_stores.types import VectorStoreQuery -from llama_index.vector_stores.redis import RedisVectorStore -from redis_config import REDIS_URL - -from comps import CustomLogger, EmbedDoc, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice - -logger = CustomLogger("retriever_redis") -logflag = os.getenv("LOGFLAG", False) - -tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") - - -@register_microservice( - name="opea_service@retriever_redis", - service_type=ServiceType.RETRIEVER, - endpoint="/v1/retrieval", - host="0.0.0.0", - port=7000, -) -async def retrieve(input: EmbedDoc) -> SearchedDoc: - if logflag: - logger.info(input) - vector_store_query = VectorStoreQuery(query_embedding=input.embedding) - search_res = await vector_store.aquery(query=vector_store_query) - searched_docs = [] - for node, id, similarity in zip(search_res.nodes, search_res.ids, search_res.similarities): - searched_docs.append(TextDoc(text=node.get_content())) - result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) - if logflag: - logger.info(result) - return result - - -if __name__ == "__main__": - - vector_store = RedisVectorStore( - redis_url=REDIS_URL, - ) - opea_microservices["opea_service@retriever_redis"].start() diff --git a/comps/retrievers/src/Dockerfile b/comps/retrievers/src/Dockerfile index 3c2d12ab25..3fb6b3650e 100644 --- a/comps/retrievers/src/Dockerfile +++ b/comps/retrievers/src/Dockerfile @@ -7,7 +7,10 @@ ARG ARCH="cpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev + libjemalloc-dev \ + libcairo2 \ + libglib2.0-0 \ + vim RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -15,14 +18,20 @@ RUN useradd -m -s /bin/bash user && \ COPY comps /home/user/comps -USER user - RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/retrievers/src/requirements.txt + if [ ${ARCH} = "cpu" ]; then \ + PIP_EXTRA_INDEX_URL="--extra-index-url https://download.pytorch.org/whl/cpu"; \ + else \ + PIP_EXTRA_INDEX_URL=""; \ + fi && \ + pip install --no-cache-dir torch torchvision ${PIP_EXTRA_INDEX_URL} && \ + pip install --no-cache-dir ${PIP_EXTRA_INDEX_URL} -r /home/user/comps/retrievers/src/requirements.txt && \ + pip install opentelemetry-api==1.27.0 opentelemetry-exporter-otlp==1.27.0 opentelemetry-sdk==1.27.0 ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/retrievers/src ENTRYPOINT ["python", "opea_retrievers_microservice.py"] diff --git a/comps/retrievers/src/README.md b/comps/retrievers/src/README.md deleted file mode 100644 index 9d31b1afa9..0000000000 --- a/comps/retrievers/src/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# Retriever Microservice - -This retriever microservice is a highly efficient search service designed for handling and retrieving embedding vectors. It operates by receiving an embedding vector as input and conducting a similarity search against vectors stored in a VectorDB database. Users must specify the VectorDB's URL and the index name, and the service searches within that index to find documents with the highest similarity to the input vector. - -The service primarily utilizes similarity measures in vector space to rapidly retrieve contentually similar documents. The vector-based retrieval approach is particularly suited for handling large datasets, offering fast and accurate search results that significantly enhance the efficiency and quality of information retrieval. - -Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial. diff --git a/comps/retrievers/elasticsearch/langchain/README.md b/comps/retrievers/src/README_elasticsearch.md similarity index 78% rename from comps/retrievers/elasticsearch/langchain/README.md rename to comps/retrievers/src/README_elasticsearch.md index 3a799de19c..6ff1728995 100644 --- a/comps/retrievers/elasticsearch/langchain/README.md +++ b/comps/retrievers/src/README_elasticsearch.md @@ -44,21 +44,14 @@ curl 127.0.0.1:6060/embed \ ### 1.4 Setup VectorDB Service -You need to setup your own VectorDB service (Elasticsearch in this example), and ingest your knowledge documents into -the vector database. - -As for Elasticsearch, you could start a docker container using the following commands. -Remember to ingest data into it manually. - -```bash -docker run -d --name vectorstore-elasticsearch -e ES_JAVA_OPTS="-Xms1g -Xmx1g" -e "discovery.type=single-node" -e "xpack.security.enabled=false" -p 9200:9200 -p 9300:9300 docker.elastic.co/elasticsearch/elasticsearch:8.16.0 -``` +Please refer to this [readme](../../third_parties/elasticsearch/src/README.md). ### 1.5 Start Retriever Service ```bash export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" -python retriever_elasticsearch.py +export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_ELASTICSEARCH" +python opea_retrievers_microservice.py ``` ## 🚀2. Start Microservice with Docker (Option 2) @@ -70,13 +63,14 @@ export EMBED_MODEL="BAAI/bge-base-en-v1.5" export ES_CONNECTION_STRING="http://localhost:9200" export INDEX_NAME=${your_index_name} export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" +export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_ELASTICSEARCH" ``` ### 2.2 Build Docker Image ```bash -cd ../../../../../ -docker build -t opea/retriever-elasticsearch:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/elasticsearch/langchain/Dockerfile . +cd ../../../ +docker build -t opea/retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . ``` To start a docker container, you have two options: @@ -89,14 +83,15 @@ You can choose one as needed. ### 2.3 Run Docker with CLI (Option A) ```bash -docker run -d --name="retriever-elasticsearch" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/retriever-elasticsearch:latest +docker run -d --name="retriever-elasticsearch" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/retriever:latest ``` ### 2.4 Run Docker with Docker Compose (Option B) ```bash -cd comps/retrievers/elasticsearch/langchain -docker compose -f docker_compose_retriever.yaml up -d +cd ../deployment/docker_compose +export service_name="retriever-elasticsearch" +docker compose -f compose.yaml up ${service_name} -d ``` ## 🚀3. Consume Retriever Service diff --git a/comps/retrievers/milvus/langchain/README.md b/comps/retrievers/src/README_milvus.md similarity index 83% rename from comps/retrievers/milvus/langchain/README.md rename to comps/retrievers/src/README_milvus.md index 1edfa2e346..178112c279 100644 --- a/comps/retrievers/milvus/langchain/README.md +++ b/comps/retrievers/src/README_milvus.md @@ -10,7 +10,7 @@ pip install -r requirements.txt ### Start Milvus Server -Please refer to this [readme](../../../vectorstores/milvus/README.md). +Please refer to this [readme](../../third_parties/milvus/src/README.md). ### Setup Environment Variables @@ -28,7 +28,8 @@ export TEI_EMBEDDING_ENDPOINT=${your_emdding_endpoint} ```bash export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" -python retriever_redis.py +export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_MILVUS" +python opea_retrievers_microservice.py ``` ## 🚀Start Microservice with Docker @@ -37,13 +38,21 @@ python retriever_redis.py ```bash cd ../../ -docker build -t opea/retriever-milvus:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/milvus/langchain/Dockerfile . +docker build -t opea/retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . ``` -### Run Docker with CLI +### Run Docker with CLI (Option A) ```bash -docker run -d --name="retriever-milvus-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=${your_emdding_endpoint} -e MILVUS_HOST=${your_milvus_host_ip} opea/retriever-milvus:latest +docker run -d --name="retriever-milvus-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=${your_emdding_endpoint} -e MILVUS_HOST=${your_milvus_host_ip} -e RETRIEVER_COMPONENT_NAME=$RETRIEVER_COMPONENT_NAME opea/retriever:latest +``` + +### Run Docker with Docker Compose (Option B) + +```bash +cd ../deployment/docker_compose +export service_name="retriever-milvus" +docker compose -f compose.yaml up ${service_name} -d ``` ## 🚀3. Consume Retriever Service diff --git a/comps/retrievers/neo4j/llama_index/README.md b/comps/retrievers/src/README_neo4j.md similarity index 69% rename from comps/retrievers/neo4j/llama_index/README.md rename to comps/retrievers/src/README_neo4j.md index eb6f015797..47fd74aeb8 100644 --- a/comps/retrievers/neo4j/llama_index/README.md +++ b/comps/retrievers/src/README_neo4j.md @@ -13,8 +13,27 @@ Retrieval follows these steps: ### 1. Build Docker Image ```bash -cd ../../ -docker build -t opea/retriever-community-answers-neo4j:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/neo4j/llama_index/Dockerfile . +cd ../../../ +docker build -t opea/retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . +``` + +### 2. Install Requirements + +```bash +pip install -r requirements.txt +``` + +### 3. Start Neo4j VectorDB Service + +```bash +docker run \ + -p 7474:7474 -p 7687:7687 \ + -v $PWD/data:/data -v $PWD/plugins:/plugins \ + --name neo4j-apoc \ + -d \ + -e NEO4J_AUTH=neo4j/password \ + -e NEO4J_PLUGINS=\[\"apoc\"\] \ + neo4j:latest ``` ### 2. Setup Environment Variables @@ -32,7 +51,16 @@ export PYTHONPATH=${path_to_comps} export OPENAI_KEY=${your_openai_api_key} # optional, when not provided will use smaller models TGI/TEI export HUGGINGFACEHUB_API_TOKEN=${your_hf_token} # set additional environment settings -source ./set_env.sh +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export OPENAI_EMBEDDING_MODEL="text-embedding-3-small" +export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" +export OPENAI_LLM_MODEL="gpt-4o" +export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006" +export TGI_LLM_ENDPOINT="http://${host_ip}:6005" +export NEO4J_URL="bolt://${host_ip}:7687" +export NEO4J_USERNAME=neo4j +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6004/v1/dataprep" +export LOGFLAG=True ``` ### 3. Run Docker with Docker Compose @@ -40,8 +68,9 @@ source ./set_env.sh Docker compose will start 5 microservices: retriever-neo4j-llamaindex, dataprep-neo4j-llamaindex, neo4j-apoc, tgi-gaudi-service and tei-embedding-service. Neo4j database supports embeddings natively so we do not need a separate vector store. Checkout the blog [Introducing the Property Graph Index: A Powerful New Way to Build Knowledge Graphs with LLMs](https://www.llamaindex.ai/blog/introducing-the-property-graph-index-a-powerful-new-way-to-build-knowledge-graphs-with-llms) for a better understanding of Property Graph Store and Index. ```bash -cd comps/retrievers/neo4j/llama_index -docker compose -f compose.yaml up -d +cd ../deployment/docker_compose +export service_name="retriever-neo4j" +docker compose -f compose.yaml up ${service_name} -d ``` ## Invoke Microservice @@ -49,7 +78,7 @@ docker compose -f compose.yaml up -d ### 3.1 Check Service Status ```bash -curl http://${host_ip}:6009/v1/health_check \ +curl http://${host_ip}:7000/v1/health_check \ -X GET \ -H 'Content-Type: application/json' ``` @@ -59,7 +88,7 @@ curl http://${host_ip}:6009/v1/health_check \ If OPEN_AI_KEY is provided it will use OPENAI endpoints for LLM and Embeddings otherwise will use TGI and TEI endpoints. If a model name not provided in the request it will use the default specified by the set_env.sh script. ```bash -curl -X POST http://${host_ip}:6009/v1/retrieval \ +curl -X POST http://${host_ip}:7000/v1/retrieval \ -H "Content-Type: application/json" \ -d '{"model": "gpt-3.5-turbo","messages": [{"role": "user","content": "Who is John Brady and has he had any confrontations?"}]}' ``` diff --git a/comps/retrievers/opensearch/langchain/README.md b/comps/retrievers/src/README_opensearch.md similarity index 88% rename from comps/retrievers/opensearch/langchain/README.md rename to comps/retrievers/src/README_opensearch.md index 487f8e7d53..0984553a0c 100644 --- a/comps/retrievers/opensearch/langchain/README.md +++ b/comps/retrievers/src/README_opensearch.md @@ -37,15 +37,14 @@ curl 127.0.0.1:6060/embed \ ### 1.4 Setup VectorDB Service -You need to setup your own VectorDB service (OpenSearch in this example), and ingest your knowledge documents into the vector database. - -As for OpenSearch, you could start a docker container referencing the instructions found in the OpenSearch vectorstores [README.md](../../../vectorstores/opensearch/README.md) +Please refer to this [readme](../../third_parties/opensearch/src/README.md). ### 1.5 Start Retriever Service ```bash export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" -python retriever_opensearch.py +export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_OPENSEARCH" +python opea_retrievers_microservice.py ``` ## 🚀2. Start Microservice with Docker (Option 2) @@ -59,13 +58,14 @@ export INDEX_NAME=${your_index_name} export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" export HUGGINGFACEHUB_API_TOKEN=${your_hf_token} export OPENSEARCH_INITIAL_ADMIN_PASSWORD=${your_opensearch_initial_admin_password} +export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_OPENSEARCH" ``` ### 2.2 Build Docker Image ```bash -cd ../../../../ -docker build -t opea/retriever-opensearch-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/opensearch/langchain/Dockerfile . +cd ../../../ +docker build -t opea/retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . ``` To start a docker container, you have two options: @@ -78,13 +78,15 @@ You can choose one as needed. ### 2.3 Run Docker with CLI (Option A) ```bash -docker run -d --name="retriever-opensearch-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e OPENSEARCH_URL=$OPENSEARCH_URL -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/retriever-opensearch:latest +docker run -d --name="retriever-opensearch-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e OPENSEARCH_URL=$OPENSEARCH_URL -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e RETRIEVER_COMPONENT_NAME=$RETRIEVER_COMPONENT_NAME opea/retriever:latest ``` ### 2.4 Run Docker with Docker Compose (Option B) ```bash -docker compose -f docker_compose_retriever.yaml up -d +cd ../deployment/docker_compose +export service_name="retriever-opensearch" +docker compose -f compose.yaml up ${service_name} -d ``` ## 🚀3. Consume Retriever Service diff --git a/comps/retrievers/pathway/langchain/README.md b/comps/retrievers/src/README_pathway.md similarity index 73% rename from comps/retrievers/pathway/langchain/README.md rename to comps/retrievers/src/README_pathway.md index 13b5ffa2a1..b101e4195e 100644 --- a/comps/retrievers/pathway/langchain/README.md +++ b/comps/retrievers/src/README_pathway.md @@ -14,7 +14,7 @@ If you prefer to run them separately, refer to this section. ```bash model=BAAI/bge-base-en-v1.5 -# TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" # if you want to use the hosted embedding service, example: "http://127.0.0.1:6060" +export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" # if you want to use the hosted embedding service, example: "http://127.0.0.1:6060" # then run: docker run -p 6060:80 -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model @@ -38,10 +38,11 @@ curl 127.0.0.1:6060/rerank \ -H 'Content-Type: application/json' ``` -#### Start Retriever Service +#### Start Pathway VectorDB Service + +Please refer to this [readme](../../third_parties/pathway/src/README.md). -Retriever service queries the Pathway vector store on incoming requests. -Make sure that Pathway vector store is already running, [see Pathway vector store here](../../../vectorstores/pathway/README.md). +#### Start Retriever Service Retriever service expects the Pathway host and port variables to connect to the vector DB. Set the Pathway vector store environment variables. @@ -52,9 +53,9 @@ export PATHWAY_PORT=8666 ```bash # make sure you are in the root folder of the repo -docker build -t opea/retriever-pathway:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/pathway/langchain/Dockerfile . +docker build -t opea/retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/pathway/langchain/Dockerfile . -docker run -p 7000:7000 -e PATHWAY_HOST=${PATHWAY_HOST} -e PATHWAY_PORT=${PATHWAY_PORT} -e http_proxy=$http_proxy -e https_proxy=$https_proxy --network="host" opea/retriever-pathway:latest +docker run -p 7000:7000 -e PATHWAY_HOST=${PATHWAY_HOST} -e PATHWAY_PORT=${PATHWAY_PORT} -e http_proxy=$http_proxy -e https_proxy=$https_proxy --network="host" -e RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_PATHWAY" opea/retriever:latest ``` ### With the Docker compose @@ -65,7 +66,7 @@ First, set the env variables: export PATHWAY_HOST=0.0.0.0 export PATHWAY_PORT=8666 model=BAAI/bge-base-en-v1.5 -# TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" # if you want to use the hosted embedding service, example: "http://127.0.0.1:6060" +export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" # if you want to use the hosted embedding service, example: "http://127.0.0.1:6060" ``` Text embeddings inference service expects the `RETRIEVE_MODEL_ID` variable to be set. @@ -78,13 +79,9 @@ Note that following docker compose sets the `network_mode: host` in retriever im This will start the both the embedding and retriever services: ```bash -cd comps/retrievers/pathway/langchain - -docker compose -f docker_compose_retriever.yaml build -docker compose -f docker_compose_retriever.yaml up - -# shut down the containers -docker compose -f docker_compose_retriever.yaml down +cd ../deployment/docker_compose +export service_name="retriever-pathway" +docker compose -f compose.yaml up ${service_name} -d ``` Make sure the retriever service is working as expected: diff --git a/comps/retrievers/pgvector/langchain/README.md b/comps/retrievers/src/README_pgvector.md similarity index 83% rename from comps/retrievers/pgvector/langchain/README.md rename to comps/retrievers/src/README_pgvector.md index ef11cd1829..00c51b9ffe 100644 --- a/comps/retrievers/pgvector/langchain/README.md +++ b/comps/retrievers/src/README_pgvector.md @@ -42,19 +42,14 @@ You need to setup your own VectorDB service (PGvector in this example), and inge As for PGVector, you could start a docker container using the following commands. Remember to ingest data into it manually. -```bash -export POSTGRES_USER=testuser -export POSTGRES_PASSWORD=testpwd -export POSTGRES_DB=vectordb - -docker run --name vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -d -v ./init.sql:/docker-entrypoint-initdb.d/init.sql -p 5432:5432 pgvector/pgvector:0.7.0-pg16 -``` +Please refer to this [readme](../../third_parties/pgvector/src/README.md). ### 1.5 Start Retriever Service ```bash export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" -python retriever_pgvector.py +export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_PGVECTOR" +python opea_retrievers_microservice.py ``` ## 🚀2. Start Microservice with Docker (Option 2) @@ -66,13 +61,14 @@ export RETRIEVE_MODEL_ID="BAAI/bge-base-en-v1.5" export PG_CONNECTION_STRING=postgresql+psycopg2://testuser:testpwd@${your_ip}:5432/vectordb export INDEX_NAME=${your_index_name} export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" +export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_PGVECTOR" ``` ### 2.2 Build Docker Image ```bash cd ../../../../ -docker build -t opea/retriever-pgvector:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/pgvector/langchain/Dockerfile . +docker build -t opea/retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . ``` To start a docker container, you have two options: @@ -85,14 +81,15 @@ You can choose one as needed. ### 2.3 Run Docker with CLI (Option A) ```bash -docker run -d --name="retriever-pgvector" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/retriever-pgvector:latest +docker run -d --name="retriever-pgvector" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e RETRIEVER_COMPONENT_NAME=$RETRIEVER_COMPONENT_NAME opea/retriever:latest ``` ### 2.4 Run Docker with Docker Compose (Option B) ```bash -cd comps/retrievers/pgvector/langchain -docker compose -f docker_compose_retriever.yaml up -d +cd ../deployment/docker_compose +export service_name="retriever-pgvector" +docker compose -f compose.yaml up ${service_name} -d ``` ## 🚀3. Consume Retriever Service diff --git a/comps/retrievers/qdrant/haystack/README.md b/comps/retrievers/src/README_qdrant.md similarity index 68% rename from comps/retrievers/qdrant/haystack/README.md rename to comps/retrievers/src/README_qdrant.md index 017e7dc403..f255abd22f 100644 --- a/comps/retrievers/qdrant/haystack/README.md +++ b/comps/retrievers/src/README_qdrant.md @@ -10,7 +10,7 @@ pip install -r requirements.txt ### 1.2 Start Qdrant Server -Please refer to this [readme](../../../vectorstores/qdrant/README.md). +Please refer to this [readme](../../third_parties/pgvector/src/README.md). ### 1.3 Setup Environment Variables @@ -25,7 +25,8 @@ export INDEX_NAME=${your_index_name} ```bash export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" -python retriever_qdrant.py +export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_QDRANT" +python opea_retrievers_microservice.py ``` ## 2. 🚀Start Microservice with Docker (Option 2) @@ -36,19 +37,28 @@ python retriever_qdrant.py export QDRANT_HOST=${your_qdrant_host_ip} export QDRANT_PORT=6333 export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" +export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_QDRANT" ``` ### 2.2 Build Docker Image ```bash -cd ../../../../ -docker build -t opea/retriever-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/qdrant/haystack/Dockerfile . +cd ../../../ +docker build -t opea/retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . ``` -### 2.3 Run Docker with CLI +### 2.3 Run Docker with CLI (Option A) ```bash -docker run -d --name="retriever-qdrant-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e QDRANT_HOST=$QDRANT_HOST -e QDRANT_PORT=$QDRANT_PORT opea/retriever-qdrant:latest +docker run -d --name="retriever-qdrant-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e QDRANT_HOST=$QDRANT_HOST -e QDRANT_PORT=$QDRANT_PORT -e RETRIEVER_COMPONENT_NAME=$RETRIEVER_COMPONENT_NAME opea/retriever:latest +``` + +### 2.4 Run Docker with Docker Compose (Option B) + +```bash +cd ../deployment/docker_compose +export service_name="retriever-qdrant" +docker compose -f compose.yaml up ${service_name} -d ``` ## 🚀3. Consume Retriever Service diff --git a/comps/retrievers/redis/langchain/README.md b/comps/retrievers/src/README_redis.md similarity index 87% rename from comps/retrievers/redis/langchain/README.md rename to comps/retrievers/src/README_redis.md index 2172bcbc16..7020a666de 100644 --- a/comps/retrievers/redis/langchain/README.md +++ b/comps/retrievers/src/README_redis.md @@ -35,22 +35,16 @@ curl 127.0.0.1:6060/embed \ -H 'Content-Type: application/json' ``` -### 1.4 Setup VectorDB Service +### 1.4 Setup Redis VectorDB Service -You need to setup your own VectorDB service (Redis in this example), and ingest your knowledge documents into the vector database. - -As for Redis, you could start a docker container using the following commands. -Remember to ingest data into it manually. - -```bash -docker run -d --name="redis-vector-db" -p 6379:6379 -p 8001:8001 redis/redis-stack:7.2.0-v9 -``` +Please refer to this [readme](../../third_parties/redis/src/README.md). ### 1.5 Start Retriever Service ```bash export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" -python retriever_redis.py +export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS" +python opea_retrievers_microservice.py ``` ## 🚀2. Start Microservice with Docker (Option 2) @@ -68,6 +62,7 @@ export REDIS_URL="redis://${your_ip}:6379" export INDEX_NAME=${your_index_name} export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" export HUGGINGFACEHUB_API_TOKEN=${your_hf_token} +export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS" # for multimodal retriever export your_ip=$(hostname -I | awk '{print $1}') @@ -80,8 +75,8 @@ export BRIDGE_TOWER_EMBEDDING=true ### 2.2 Build Docker Image ```bash -cd ../../../../ -docker build -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile . +cd ../../../ +docker build -t opea/retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . ``` To start a docker container, you have two options: @@ -95,15 +90,17 @@ You can choose one as needed. ```bash # Start a text retriever server -docker run -d --name="retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/retriever-redis:latest +docker run -d --name="retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e RETRIEVER_COMPONENT_NAME=$RETRIEVER_COMPONENT_NAME opea/retriever:latest # start a multimodal retriever server -docker run -d --name="retriever-multimodal-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e BRIDGE_TOWER_EMBEDDING=${BRIDGE_TOWER_EMBEDDING} opea/retriever-redis:latest +docker run -d --name="retriever-multimodal-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e BRIDGE_TOWER_EMBEDDING=${BRIDGE_TOWER_EMBEDDING} -e RETRIEVER_COMPONENT_NAME=$RETRIEVER_COMPONENT_NAME opea/retriever:latest ``` ### 2.4 Run Docker with Docker Compose (Option B) ```bash -docker compose -f docker_compose_retriever.yaml up -d +cd ../deployment/docker_compose +export service_name="retriever-redis" +docker compose -f compose.yaml up ${service_name} -d ``` ## 🚀3. Consume Retriever Service diff --git a/comps/retrievers/vdms/langchain/README.md b/comps/retrievers/src/README_vdms.md similarity index 91% rename from comps/retrievers/vdms/langchain/README.md rename to comps/retrievers/src/README_vdms.md index 489df71c85..1e699cf291 100644 --- a/comps/retrievers/vdms/langchain/README.md +++ b/comps/retrievers/src/README_vdms.md @@ -59,15 +59,14 @@ You need to setup your own VectorDB service (VDMS in this example), and ingest y As for VDMS, you could start a docker container using the following commands. Remember to ingest data into it manually. -```bash -docker run -d --name="vdms-vector-db" -p 55555:55555 intellabs/vdms:latest -``` +Please refer to this [readme](../../third_parties/vdms/src/README.md). ### 1.5 Start Retriever Service ```bash export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" -python retriever_vdms.py +export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_VDMS" +python opea_retrievers_microservice.py ``` ## 🚀2. Start Microservice with Docker (Option 2) @@ -78,13 +77,14 @@ python retriever_vdms.py export RETRIEVE_MODEL_ID="BAAI/bge-base-en-v1.5" export INDEX_NAME=${your_index_name or collection_name} export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" +export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_VDMS" ``` ### 2.2 Build Docker Image ```bash -cd ../../../../ -docker build -t opea/retriever-vdms:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/vdms/langchain/Dockerfile . +cd ../../../ +docker build -t opea/retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . ``` To start a docker container, you have two options: @@ -97,13 +97,15 @@ You can choose one as needed. ### 2.3 Run Docker with CLI (Option A) ```bash -docker run -d --name="retriever-vdms-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/retriever-vdms:latest +docker run -d --name="retriever-vdms-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e RETRIEVER_COMPONENT_NAME=$RETRIEVER_COMPONENT_NAME opea/retriever:latest ``` ### 2.4 Run Docker with Docker Compose (Option B) ```bash -docker compose -f docker_compose_retriever.yaml up -d +cd ../deployment/docker_compose +export service_name="retriever-vdms" +docker compose -f compose.yaml up ${service_name} -d ``` ## 🚀3. Consume Retriever Service diff --git a/comps/dataprep/milvus/langchain/__init__.py b/comps/retrievers/src/__init__.py similarity index 100% rename from comps/dataprep/milvus/langchain/__init__.py rename to comps/retrievers/src/__init__.py diff --git a/comps/retrievers/src/integrations/config.py b/comps/retrievers/src/integrations/config.py index f728b84efc..3ffced0820 100644 --- a/comps/retrievers/src/integrations/config.py +++ b/comps/retrievers/src/integrations/config.py @@ -52,6 +52,35 @@ def get_boolean_env_var(var_name, default_value=False): parent_dir = os.path.dirname(current_file_path) +####################################################### +# Elasticsearch # +####################################################### +ES_CONNECTION_STRING = os.getenv("ES_CONNECTION_STRING", "http://localhost:9200") +ES_INDEX_NAME = os.getenv("ES_INDEX_NAME", "rag_elasticsearch") + + +####################################################### +# Neo4j # +####################################################### +NEO4J_URL = os.getenv("NEO4J_URI", "bolt://localhost:7687") +NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j") +NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "test") +host_ip = os.getenv("host_ip") +TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", f"http://{host_ip}:6005") +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") +OPENAI_EMBEDDING_MODEL = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small") +OPENAI_LLM_MODEL = os.getenv("OPENAI_LLM_MODEL", "gpt-4o") +LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3.1-8B-Instruct") +MAX_OUTPUT_TOKENS = os.getenv("MAX_OUTPUT_TOKENS", "1024") + + +####################################################### +# Pathway # +####################################################### +PATHWAY_HOST = os.getenv("PATHWAY_HOST", "127.0.0.1") +PATHWAY_PORT = int(os.getenv("PATHWAY_PORT", 8666)) + + ####################################################### # Redis # ####################################################### @@ -78,6 +107,9 @@ def format_redis_conn_from_env(): REDIS_URL = format_redis_conn_from_env() +REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "redis_schema_multi.yml") +schema_path = os.path.join(parent_dir, REDIS_SCHEMA) +INDEX_SCHEMA = schema_path ####################################################### @@ -93,3 +125,63 @@ def format_redis_conn_from_env(): TEI_EMBEDDING_ENDPOINT = os.environ.get("TEI_EMBEDDING_ENDPOINT", "") os.environ["OPENAI_API_BASE"] = TEI_EMBEDDING_ENDPOINT os.environ["OPENAI_API_KEY"] = "Dummy key" + + +####################################################### +# Opensearch # +####################################################### +# OpenSearch Connection Information +OPENSEARCH_HOST = os.getenv("OPENSEARCH_HOST", "localhost") +OPENSEARCH_PORT = int(os.getenv("OPENSEARCH_PORT", 9200)) +OPENSEARCH_INITIAL_ADMIN_PASSWORD = os.getenv("OPENSEARCH_INITIAL_ADMIN_PASSWORD", "") + + +def format_opensearch_conn_from_env(): + opensearch_url = os.getenv("OPENSEARCH_URL", None) + if opensearch_url: + return opensearch_url + else: + using_ssl = get_boolean_env_var("OPENSEARCH_SSL", False) + start = "https://" if using_ssl else "http://" + + return start + f"{OPENSEARCH_HOST}:{OPENSEARCH_PORT}" + + +OPENSEARCH_URL = format_opensearch_conn_from_env() +OPENSEARCH_INDEX_NAME = os.getenv("OPENSEARCH_INDEX_NAME", "rag_opensearch") + + +####################################################### +# Pinecone # +####################################################### +# Pinecone configuration +PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "xxx_xxx") +PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME", "rag_pinecone") + + +####################################################### +# PGvector # +####################################################### +PG_CONNECTION_STRING = os.getenv("PG_CONNECTION_STRING", "localhost") +PG_INDEX_NAME = os.getenv("PG_INDEX_NAME", "rag_pgvector") + + +####################################################### +# QDrant # +####################################################### +QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost") +QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333)) +QDRANT_EMBED_DIMENSION = os.getenv("QDRANT_EMBED_DIMENSION", 768) +QDRANT_INDEX_NAME = os.getenv("QDRANT_INDEX_NAME", "rag_qdrant") + + +####################################################### +# VDMs # +####################################################### +# VDMS Connection Information +VDMS_HOST = os.getenv("VDMS_HOST", "localhost") +VDMS_PORT = int(os.getenv("VDMS_PORT", 55555)) +VDMS_INDEX_NAME = os.getenv("VDMS_INDEX_NAME", "rag_vdms") +VDMS_USE_CLIP = int(os.getenv("VDMS_USE_CLIP", 0)) +SEARCH_ENGINE = "FaissFlat" +DISTANCE_STRATEGY = "IP" diff --git a/comps/retrievers/src/integrations/elasticsearch.py b/comps/retrievers/src/integrations/elasticsearch.py new file mode 100644 index 0000000000..08b358e398 --- /dev/null +++ b/comps/retrievers/src/integrations/elasticsearch.py @@ -0,0 +1,116 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os + +from elasticsearch import Elasticsearch +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_elasticsearch import ElasticsearchStore + +from comps import CustomLogger, EmbedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType + +from .config import EMBED_MODEL, ES_CONNECTION_STRING, ES_INDEX_NAME, TEI_EMBEDDING_ENDPOINT + +logger = CustomLogger("es_retrievers") +logflag = os.getenv("LOGFLAG", False) + + +@OpeaComponentRegistry.register("OPEA_RETRIEVER_ELASTICSEARCH") +class OpeaElasticsearchRetriever(OpeaComponent): + """A specialized retriever component derived from OpeaComponent for elasticsearch retriever services. + + Attributes: + client (Elasticsearch): An instance of the elasticsearch client for vector database operations. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.RETRIEVER.name.lower(), description, config) + + self.embedder = self._initialize_embedder() + self.es_connection_string = ES_CONNECTION_STRING + self.es_index_name = ES_INDEX_NAME + self.client, self.store = self._initialize_client() + health_status = self.check_health() + if not health_status: + logger.error("OpeaElasticsearchRetriever health check failed.") + + def _initialize_embedder(self): + if TEI_EMBEDDING_ENDPOINT: + # create embeddings using TEI endpoint service + if logflag: + logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") + embeddings = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + else: + # create embeddings using local embedding model + if logflag: + logger.info(f"[ init embedder ] LOCAL_EMBEDDING_MODEL:{EMBED_MODEL}") + embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + return embeddings + + def _initialize_client(self) -> Elasticsearch: + """Initializes the elasticsearch client.""" + es_client = Elasticsearch(hosts=ES_CONNECTION_STRING) + es_store = ElasticsearchStore(index_name=self.es_index_name, embedding=self.embedder, es_connection=es_client) + return es_client, es_store + + def check_health(self) -> bool: + """Checks the health of the retriever service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + if logflag: + logger.info("[ check health ] start to check health of elasticsearch") + try: + if not self.client.indices.exists(index=self.es_index_name): + self.client.indices.create(index=self.es_index_name) + logger.info("[ check health ] Successfully connected to Elasticsearch!") + return True + except Exception as e: + logger.info(f"[ check health ] Failed to connect to Elasticsearch: {e}") + return False + + async def invoke(self, input: EmbedDoc) -> list: + """Search the Elasticsearch index for the most similar documents to the input query. + + Args: + input (EmbedDoc): The input query to search for. + Output: + list: The retrieved documents. + """ + if logflag: + logger.info(input) + + if input.search_type == "similarity": + docs_and_similarities = self.store.similarity_search_by_vector_with_relevance_scores( + embedding=input.embedding, k=input.k + ) + search_res = [doc for doc, _ in docs_and_similarities] + + elif input.search_type == "similarity_distance_threshold": + if input.distance_threshold is None: + raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") + docs_and_similarities = self.store.similarity_search_by_vector_with_relevance_scores( + embedding=input.embedding, k=input.k + ) + search_res = [doc for doc, similarity in docs_and_similarities if similarity > input.distance_threshold] + + elif input.search_type == "similarity_score_threshold": + docs_and_similarities = self.store.similarity_search_by_vector_with_relevance_scores( + query=input.text, k=input.k + ) + search_res = [doc for doc, similarity in docs_and_similarities if similarity > input.score_threshold] + + elif input.search_type == "mmr": + search_res = self.store.max_marginal_relevance_search( + query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult + ) + + else: + raise ValueError(f"search type {input.search_type} not valid") + + if logflag: + logger.info(f"retrieve result: {search_res}") + + return search_res diff --git a/comps/retrievers/src/integrations/milvus.py b/comps/retrievers/src/integrations/milvus.py index c52d0a6b3c..b78c33c024 100644 --- a/comps/retrievers/src/integrations/milvus.py +++ b/comps/retrievers/src/integrations/milvus.py @@ -5,10 +5,10 @@ import os from typing import List, Optional -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings, OpenAIEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings from langchain_milvus.vectorstores import Milvus -from comps import CustomLogger, EmbedDoc, OpeaComponent, OpeaComponentRegistry, SearchedDoc, ServiceType +from comps import CustomLogger, EmbedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType from .config import COLLECTION_NAME, INDEX_PARAMS, LOCAL_EMBEDDING_MODEL, MILVUS_URI, TEI_EMBEDDING_ENDPOINT @@ -47,7 +47,7 @@ def _initialize_embedder(self): return embeddings def _initialize_client(self) -> Milvus: - """Initializes the redis client.""" + """Initializes the milvus client.""" try: client = Milvus( embedding_function=self.embedder, @@ -78,13 +78,13 @@ def check_health(self) -> bool: logger.info(f"[ check health ] Failed to connect to Milvus: {e}") return False - async def invoke(self, input: EmbedDoc) -> SearchedDoc: + async def invoke(self, input: EmbedDoc) -> list: """Search the Milvus index for the most similar documents to the input query. Args: input (EmbedDoc): The input query to search for. Output: - Union[SearchedDoc, RetrievalResponse, ChatCompletionRequest]: The retrieved documents. + list: The retrieved documents. """ if logflag: logger.info(input) diff --git a/comps/retrievers/src/integrations/neo4j.py b/comps/retrievers/src/integrations/neo4j.py new file mode 100644 index 0000000000..7cde18cea5 --- /dev/null +++ b/comps/retrievers/src/integrations/neo4j.py @@ -0,0 +1,320 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os +import re +import time +from typing import Union + +import openai +from llama_index.core import PropertyGraphIndex, Settings +from llama_index.core.indices.property_graph.sub_retrievers.vector import VectorContextRetriever +from llama_index.core.llms import LLM, ChatMessage +from llama_index.core.query_engine import CustomQueryEngine +from llama_index.embeddings.openai import OpenAIEmbedding +from llama_index.embeddings.text_embeddings_inference import TextEmbeddingsInference +from llama_index.llms.openai import OpenAI +from llama_index.llms.openai_like import OpenAILike +from neo4j import GraphDatabase +from pydantic import PrivateAttr + +from comps import CustomLogger, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.cores.proto.api_protocol import ChatCompletionRequest, RetrievalResponseData +from comps.dataprep.src.integrations.neo4j_llamaindex import GraphRAGStore, get_attribute_from_tgi_endpoint + +from .config import ( + LLM_MODEL_ID, + MAX_OUTPUT_TOKENS, + NEO4J_PASSWORD, + NEO4J_URL, + NEO4J_USERNAME, + OPENAI_API_KEY, + OPENAI_EMBEDDING_MODEL, + OPENAI_LLM_MODEL, + TEI_EMBEDDING_ENDPOINT, + TGI_LLM_ENDPOINT, +) + +logger = CustomLogger("neo4j_retrievers") +logflag = os.getenv("LOGFLAG", False) + + +class GraphRAGQueryEngine(CustomQueryEngine): + # https://github.com/run-llama/llama_index/blob/main/docs/docs/examples/cookbooks/GraphRAG_v2.ipynb + # private attr because inherits from BaseModel + _graph_store: GraphRAGStore = PrivateAttr() + _index: PropertyGraphIndex = PrivateAttr() + _llm: LLM = PrivateAttr() + _similarity_top_k: int = PrivateAttr() + + def __init__(self, graph_store: GraphRAGStore, llm: LLM, index: PropertyGraphIndex, similarity_top_k: int = 20): + super().__init__() + self._graph_store = graph_store + self._index = index + self._llm = llm + self._similarity_top_k = similarity_top_k + + def custom_query(self, query_str: str, batch_size: int = 16) -> RetrievalResponseData: + """Process all community summaries to generate answers to a specific query.""" + + entities = self.get_entities(query_str, self._similarity_top_k) + community_summaries = self.retrieve_community_summaries_cypher(entities) + community_ids = list(community_summaries.keys()) + if logflag: + logger.info(f"Community ids: {community_ids}") + # Process community summaries in batches + community_answers = [] + for i in range(0, len(community_ids), batch_size): + batch_ids = community_ids[i : i + batch_size] + batch_summaries = {community_id: community_summaries[community_id] for community_id in batch_ids} + batch_answers = self.generate_batch_answers_from_summaries(batch_summaries, query_str) + community_answers.extend(batch_answers) + # Convert answers to RetrievalResponseData objects + # response_data = [RetrievalResponseData(text=answer, metadata={}) for answer in community_answers] + # logger.info(f"custom_query output result type {type(response_data)}") + # return response_data + return community_answers + + def get_entities(self, query_str, similarity_top_k): + if logflag: + logger.info(f"Retrieving entities for query: {query_str} with top_k: {similarity_top_k}") + nodes_retrieved = self._index.as_retriever(similarity_top_k=self._similarity_top_k).retrieve(query_str) + entities = set() + pattern = r"(\w+(?:\s+\w+)*)\s*->\s*(\w+(?:\s+\w+)*)\s*->\s*(\w+(?:\s+\w+)*)" + if logflag: + # logger.info(f" len of triplets {len(self._index.property_graph_store.get_triplets())}") + logger.info(f"number of nodes retrieved {len(nodes_retrieved), nodes_retrieved}") + for node in nodes_retrieved: + matches = re.findall(pattern, node.text, re.DOTALL) + + for match in matches: + subject = match[0] + obj = match[2] + entities.add(subject) + entities.add(obj) + if logflag: + logger.info(f"entities from query {entities}") + return list(entities) + + def retrieve_entity_communities(self, entity_info, entities): + """Retrieve cluster information for given entities, allowing for multiple clusters per entity. + + Args: + entity_info (dict): Dictionary mapping entities to their cluster IDs (list). + entities (list): List of entity names to retrieve information for. + + Returns: + List of community or cluster IDs to which an entity belongs. + """ + community_ids = [] + + for entity in entities: + if entity in entity_info: + community_ids.extend(entity_info[entity]) + + return list(set(community_ids)) + + def retrieve_community_summaries_cypher(self, entities): + """Retrieve cluster information and summaries for given entities using a Cypher query. + + Args: + entities (list): List of entity names to retrieve information for. + + Returns: + dict: Dictionary where keys are community or cluster IDs and values are summaries. + """ + community_summaries = {} + print(f"driver working? {self._graph_store.driver})") + + with self._graph_store.driver.session() as session: + for entity in entities: + result = session.run( + """ + MATCH (e:Entity {id: $entity_id})-[:BELONGS_TO]->(c:Cluster) + RETURN c.id AS cluster_id, c.summary AS summary + """, + entity_id=entity, + ) + for record in result: + community_summaries[record["cluster_id"]] = record["summary"] + + return community_summaries + + def generate_answer_from_summary(self, community_summary, query): + """Generate an answer from a community summary based on a given query using LLM.""" + prompt = ( + f"Given the community summary: {community_summary}, " + f"how would you answer the following query? Query: {query}" + ) + messages = [ + ChatMessage(role="system", content=prompt), + ChatMessage( + role="user", + content="I need an answer based on the above information.", + ), + ] + response = self._llm.chat(messages) + cleaned_response = re.sub(r"^assistant:\s*", "", str(response)).strip() + return cleaned_response + + def generate_batch_answers_from_summaries(self, batch_summaries, query): + """Generate answers from a batch of community summaries based on a given query using LLM.""" + batch_prompts = [] + for community_id, summary in batch_summaries.items(): + prompt = ( + f"Given the community summary: {summary}, " f"how would you answer the following query? Query: {query}" + ) + messages = [ + ChatMessage(role="system", content=prompt), + ChatMessage( + role="user", + content="I need an answer based on the above information.", + ), + ] + batch_prompts.append((community_id, messages)) + + # Generate answers for the batch + answers = self.generate_batch_responses(batch_prompts) + return answers + + def generate_batch_responses(self, batch_prompts): + """Generate responses for a batch of prompts using LLM.""" + responses = {} + messages = [messages for _, messages in batch_prompts] + + # Generate responses for the batch + if OPENAI_API_KEY: + batch_responses = [OpenAI().chat(message) for message in messages] + else: + batch_responses = [self._llm.chat(message) for message in messages] + + for (community_id, _), response in zip(batch_prompts, batch_responses): + cleaned_response = re.sub(r"^assistant:\s*", "", str(response)).strip() + responses[community_id] = cleaned_response + + return [responses[community_id] for community_id, _ in batch_prompts] + + +# Global variables to store the graph_store and index +graph_store = None +query_engine = None +index = None + + +@OpeaComponentRegistry.register("OPEA_RETRIEVER_NEO4J") +class OpeaNeo4jRetriever(OpeaComponent): + """A specialized retriever component derived from OpeaComponent for neo4j retriever services. + + Attributes: + client (Neo4j): An instance of the neo4j client for vector database operations. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.RETRIEVER.name.lower(), description, config) + + self.query_engine = self._initialize_client() + health_status = self.check_health() + if not health_status: + logger.error("OpeaNeo4jRetriever health check failed.") + + def _initialize_client(self): + """Initializes the neo4j client.""" + # async def initialize_graph_store_and_index(): + global graph_store, index, initialized, query_engine + if OPENAI_API_KEY: + logger.info("OpenAI API Key is set. Verifying its validity...") + openai.api_key = OPENAI_API_KEY + try: + llm = OpenAI(temperature=0, model=OPENAI_LLM_MODEL) + embed_model = OpenAIEmbedding(model=OPENAI_EMBEDDING_MODEL, embed_batch_size=100) + logger.info("OpenAI API Key is valid.") + except openai.AuthenticationError: + logger.info("OpenAI API Key is invalid.") + except Exception as e: + logger.info(f"An error occurred while verifying the API Key: {e}") + else: + logger.info("No OpenAI API KEY provided. Will use TGI/VLLM and TEI endpoints") + # works w VLLM too + llm = OpenAILike( + model=LLM_MODEL_ID, + api_base=TGI_LLM_ENDPOINT + "/v1", + api_key="fake", + timeout=600, + temperature=0.7, + max_tokens=int(MAX_OUTPUT_TOKENS), + ) + emb_name = get_attribute_from_tgi_endpoint(TEI_EMBEDDING_ENDPOINT, "model_id") + embed_model = TextEmbeddingsInference( + base_url=TEI_EMBEDDING_ENDPOINT, + model_name=emb_name, + timeout=600, # timeout in seconds + embed_batch_size=10, # batch size for embedding + ) + Settings.embed_model = embed_model + Settings.llm = llm + + logger.info("Creating graph store from existing...") + start = time.time() + # pre-existiing graph store (created with data_prep/llama-index/extract_graph_neo4j.py) + graph_store = GraphRAGStore(username=NEO4J_USERNAME, password=NEO4J_PASSWORD, url=NEO4J_URL, llm=llm) + logger.info(f"Time to create graph store: {time.time() - start:.2f} seconds") + + logger.info("Creating index from existing...") + start = time.time() + index = PropertyGraphIndex.from_existing( + property_graph_store=graph_store, + embed_model=embed_model or Settings.embed_model, + embed_kg_nodes=True, + ) + logger.info(f"Time to create index: {time.time() - start:.2f} seconds") + + query_engine = GraphRAGQueryEngine( + graph_store=index.property_graph_store, + llm=llm, + index=index, + similarity_top_k=3, + ) + return query_engine + + def check_health(self) -> bool: + """Checks the health of the retriever service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + if logflag: + logger.info("[ check health ] start to check health of neo4j") + try: + result = self.query_engine.query("health check") + logger.info(f"[ check health ] result: {result}") + logger.info("[ check health ] Successfully connected to Neo4j!") + return True + except Exception as e: + logger.info(f"[ check health ] Failed to connect to Neo4j: {e}") + return False + + async def invoke(self, input: Union[ChatCompletionRequest]) -> list: + """Search the Neo4j index for the most similar documents to the input query. + + Args: + input (ChatCompletionRequest): The input query to search for. + Output: + list: The retrieved documents. + """ + if logflag: + logger.info(input) + + if isinstance(input.messages, str): + query = input.messages + else: + query = input.messages[0]["content"] + logger.info(f"Query received in retriever: {query}") + + # The answers from the community summaries + search_res = self.query_engine.query(query) + + if logflag: + logger.info(f"retrieve result: {search_res}") + + return search_res diff --git a/comps/retrievers/src/integrations/opensearch.py b/comps/retrievers/src/integrations/opensearch.py new file mode 100644 index 0000000000..30514d7107 --- /dev/null +++ b/comps/retrievers/src/integrations/opensearch.py @@ -0,0 +1,173 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os +from typing import Callable, List, Union + +import numpy as np +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.vectorstores import OpenSearchVectorSearch +from pydantic import conlist + +from comps import CustomLogger, EmbedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.cores.proto.api_protocol import ChatCompletionRequest, RetrievalRequest + +from .config import ( + EMBED_MODEL, + OPENSEARCH_INDEX_NAME, + OPENSEARCH_INITIAL_ADMIN_PASSWORD, + OPENSEARCH_URL, + TEI_EMBEDDING_ENDPOINT, +) + +logger = CustomLogger("opensearch_retrievers") +logflag = os.getenv("LOGFLAG", False) + + +@OpeaComponentRegistry.register("OPEA_RETRIEVER_OPENSEARCH") +class OpeaOpensearchRetriever(OpeaComponent): + """A specialized retriever component derived from OpeaComponent for opensearch retriever services. + + Attributes: + client (Opensearch): An instance of the opensearch client for vector database operations. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.RETRIEVER.name.lower(), description, config) + + self.embedder = self._initialize_embedder() + self.opensearch_url = OPENSEARCH_URL + self.opensearch_index_name = OPENSEARCH_INDEX_NAME + self.vector_db = self._initialize_client() + health_status = self.check_health() + if not health_status: + logger.error("OpeaOpensearchRetriever health check failed.") + + def _initialize_embedder(self): + if TEI_EMBEDDING_ENDPOINT: + # create embeddings using TEI endpoint service + if logflag: + logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") + embeddings = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + else: + # create embeddings using local embedding model + if logflag: + logger.info(f"[ init embedder ] LOCAL_EMBEDDING_MODEL:{EMBED_MODEL}") + embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + return embeddings + + def _initialize_client(self): + """Initializes the opensearch client.""" + auth = ("admin", OPENSEARCH_INITIAL_ADMIN_PASSWORD) + vector_db = OpenSearchVectorSearch( + opensearch_url=self.opensearch_url, + index_name=self.opensearch_index_name, + embedding_function=self.embedder, + http_auth=auth, + use_ssl=True, + verify_certs=False, + ssl_assert_hostname=False, + ssl_show_warn=False, + ) + return vector_db + + def check_health(self) -> bool: + """Checks the health of the retriever service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + if logflag: + logger.info("[ check health ] start to check health of opensearch") + try: + _ = self.vector_db.client.indices.exists(index=self.opensearch_index_name) + logger.info("[ check health ] Successfully connected to Opensearch!") + return True + except Exception as e: + logger.info(f"[ check health ] Failed to connect to Opensearch: {e}") + return False + + async def invoke(self, input: Union[EmbedDoc, RetrievalRequest, ChatCompletionRequest]) -> list: + """Search the Opensearch index for the most similar documents to the input query. + + Args: + input (EmbedDoc): The input query to search for. + Output: + list: The retrieved documents. + """ + if logflag: + logger.info(input) + + index_exists = self.vector_db.client.indices.exists(index=self.opensearch_index_name) + if index_exists: + doc_count = self.vector_db.client.count(index=self.opensearch_index_name)["count"] + if (not index_exists) or doc_count == 0: + search_res = [] + else: + if isinstance(input, EmbedDoc): + query = input.text + else: + # for RetrievalRequest, ChatCompletionRequest + query = input.input + # if the OpenSearch index has data, perform the search + if input.search_type == "similarity": + search_res = await self.search_all_embeddings_vectors( + embeddings=input.embedding, + func=self.vector_db.asimilarity_search_by_vector, + k=input.k, + ) + elif input.search_type == "similarity_distance_threshold": + if input.distance_threshold is None: + raise ValueError( + "distance_threshold must be provided for " + "similarity_distance_threshold retriever" + ) + search_res = await self.search_all_embeddings_vectors( + embeddings=input.embedding, + func=self.vector_db.asimilarity_search_by_vector, + k=input.k, + distance_threshold=input.distance_threshold, + ) + elif input.search_type == "similarity_score_threshold": + doc_and_similarities = await self.vector_db.asimilarity_search_with_relevance_scores( + query=query, k=input.k, score_threshold=input.score_threshold + ) + search_res = [doc for doc, _ in doc_and_similarities] + elif input.search_type == "mmr": + search_res = await self.vector_db.amax_marginal_relevance_search( + query=query, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult + ) + else: + raise ValueError(f"{input.search_type} not valid") + + if logflag: + logger.info(f"retrieve result: {search_res}") + + return search_res + + async def search_all_embeddings_vectors( + self, + embeddings: Union[conlist(float, min_length=0), List[conlist(float, min_length=0)]], + func: Callable, + *args, + **kwargs, + ): + try: + if not isinstance(embeddings, np.ndarray): + embeddings = np.array(embeddings) + + if not np.issubdtype(embeddings.dtype, np.floating): + raise ValueError("All embeddings values must be floating point numbers") + + if embeddings.ndim == 1: + return await func(embedding=embeddings, *args, **kwargs) + elif embeddings.ndim == 2: + responses = [] + for emb in embeddings: + response = await func(embedding=emb, *args, **kwargs) + responses.extend(response) + return responses + else: + raise ValueError("Embeddings must be one or two dimensional") + except Exception as e: + raise ValueError(f"Embedding data is not valid: {e}") diff --git a/comps/retrievers/src/integrations/pathway.py b/comps/retrievers/src/integrations/pathway.py new file mode 100644 index 0000000000..29dd85dc60 --- /dev/null +++ b/comps/retrievers/src/integrations/pathway.py @@ -0,0 +1,73 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os + +from langchain_community.vectorstores import PathwayVectorClient + +from comps import CustomLogger, EmbedDoc, OpeaComponent, OpeaComponentRegistry, SearchedDoc, ServiceType + +from .config import PATHWAY_HOST, PATHWAY_PORT + +logger = CustomLogger("pathway_retrievers") +logflag = os.getenv("LOGFLAG", False) + + +@OpeaComponentRegistry.register("OPEA_RETRIEVER_PATHWAY") +class OpeaPathwayRetriever(OpeaComponent): + """A specialized retriever component derived from OpeaComponent for pathway retriever services. + + Attributes: + client (Pathway): An instance of the pathway client for vector database operations. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.RETRIEVER.name.lower(), description, config) + + self.host = PATHWAY_HOST + self.port = PATHWAY_PORT + self.client = self._initialize_client() + health_status = self.check_health() + if not health_status: + logger.error("OpeaPathwayRetriever health check failed.") + + def _initialize_client(self) -> PathwayVectorClient: + """Initializes the pathway client.""" + pw_client = PathwayVectorClient(host=self.host, port=self.port) + + return pw_client + + def check_health(self) -> bool: + """Checks the health of the retriever service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + if logflag: + logger.info("[ check health ] start to check health of Pathway") + try: + # Check the status of the Pathway service + _ = self.client.client + logger.info("[ check health ] Successfully connected to Pathway!") + return True + except Exception as e: + logger.info(f"[ check health ] Failed to connect to Pathway: {e}") + return False + + async def invoke(self, input: EmbedDoc) -> SearchedDoc: + """Search the Pathway index for the most similar documents to the input query. + + Args: + input (EmbedDoc): The input query to search for. + Output: + Union[SearchedDoc, RetrievalResponse, ChatCompletionRequest]: The retrieved documents. + """ + if logflag: + logger.info(f"[ similarity search ] input: {input}") + + search_res = self.client.similarity_search(input.text, input.fetch_k) + + if logflag: + logger.info(f"[ similarity search ] search result: {search_res}") + return search_res diff --git a/comps/retrievers/src/integrations/pgvector.py b/comps/retrievers/src/integrations/pgvector.py new file mode 100644 index 0000000000..164a382473 --- /dev/null +++ b/comps/retrievers/src/integrations/pgvector.py @@ -0,0 +1,91 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os + +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.vectorstores import PGVector + +from comps import CustomLogger, EmbedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType + +from .config import EMBED_MODEL, PG_CONNECTION_STRING, PG_INDEX_NAME, TEI_EMBEDDING_ENDPOINT + +logger = CustomLogger("pgvector_retrievers") +logflag = os.getenv("LOGFLAG", False) + + +@OpeaComponentRegistry.register("OPEA_RETRIEVER_PGVECTOR") +class OpeaPGVectorRetriever(OpeaComponent): + """A specialized retriever component derived from OpeaComponent for pgvector retriever services. + + Attributes: + client (PGVector): An instance of the pgvector client for vector database operations. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.RETRIEVER.name.lower(), description, config) + + self.embedder = self._initialize_embedder() + self.pg_connection_string = PG_CONNECTION_STRING + self.pg_index_name = PG_INDEX_NAME + self.vector_db = self._initialize_client() + health_status = self.check_health() + if not health_status: + logger.error("OpeaPGVectorRetriever health check failed.") + + def _initialize_embedder(self): + if TEI_EMBEDDING_ENDPOINT: + # create embeddings using TEI endpoint service + if logflag: + logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") + embeddings = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + else: + # create embeddings using local embedding model + if logflag: + logger.info(f"[ init embedder ] LOCAL_EMBEDDING_MODEL:{EMBED_MODEL}") + embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + return embeddings + + def _initialize_client(self) -> PGVector: + """Initializes the pgvector client.""" + vector_db = PGVector( + embedding_function=self.embedder, + collection_name=self.pg_index_name, + connection_string=self.pg_connection_string, + ) + return vector_db + + def check_health(self) -> bool: + """Checks the health of the retriever service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + if logflag: + logger.info("[ check health ] start to check health of PGvector") + try: + # Check the status of the PGVector service + self.vector_db.create_collection() + logger.info("[ check health ] Successfully connected to PGvector!") + return True + except Exception as e: + logger.info(f"[ check health ] Failed to connect to PGvector: {e}") + return False + + async def invoke(self, input: EmbedDoc) -> list: + """Search the PGVector index for the most similar documents to the input query. + + Args: + input (EmbedDoc): The input query to search for. + Output: + list: The retrieved documents. + """ + if logflag: + logger.info(f"[ similarity search ] input: {input}") + + search_res = await self.vector_db.asimilarity_search_by_vector(embedding=input.embedding) + + if logflag: + logger.info(f"[ similarity search ] search result: {search_res}") + return search_res diff --git a/comps/retrievers/src/integrations/pinecone.py b/comps/retrievers/src/integrations/pinecone.py new file mode 100644 index 0000000000..c8c048a42b --- /dev/null +++ b/comps/retrievers/src/integrations/pinecone.py @@ -0,0 +1,135 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os +import time + +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_pinecone import PineconeVectorStore +from pinecone import Pinecone, ServerlessSpec + +from comps import CustomLogger, EmbedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType + +from .config import EMBED_MODEL, PINECONE_API_KEY, PINECONE_INDEX_NAME, TEI_EMBEDDING_ENDPOINT + +logger = CustomLogger("pinecone_retrievers") +logflag = os.getenv("LOGFLAG", False) + + +@OpeaComponentRegistry.register("OPEA_RETRIEVER_PINECONE") +class OpeaPineconeRetriever(OpeaComponent): + """A specialized retriever component derived from OpeaComponent for pinecone retriever services. + + Attributes: + client (Pinecone): An instance of the pinecone client for vector database operations. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.RETRIEVER.name.lower(), description, config) + + self.embedder = self._initialize_embedder() + self.pinecone_api_key = PINECONE_API_KEY + self.pinecone_index = PINECONE_INDEX_NAME + self.pc, self.index, self.vector_db = self._initialize_client() + health_status = self.check_health() + if not health_status: + logger.error("OpeaPineconeRetriever health check failed.") + + def _initialize_embedder(self): + if TEI_EMBEDDING_ENDPOINT: + # create embeddings using TEI endpoint service + if logflag: + logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") + embeddings = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + else: + # create embeddings using local embedding model + if logflag: + logger.info(f"[ init embedder ] LOCAL_EMBEDDING_MODEL:{EMBED_MODEL}") + embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + return embeddings + + def _initialize_client(self) -> Pinecone: + """Initializes the pinecone client.""" + pc = Pinecone(api_key=self.pinecone_api_key) + spec = ServerlessSpec(cloud="aws", region="us-east-1") + existing_indexes = [index_info["name"] for index_info in pc.list_indexes()] + if self.pinecone_index in existing_indexes: + pc.configure_index(self.pinecone_index, deletion_protection="disabled") + pc.delete_index(self.pinecone_index) + time.sleep(1) + + pc.create_index( + self.pinecone_index, + dimension=768, + deletion_protection="disabled", + spec=spec, + ) + while not pc.describe_index(self.pinecone_index).status["ready"]: + time.sleep(1) + + index = pc.Index(self.pinecone_index) + vector_db = PineconeVectorStore(index=index, embedding=self.embedder) + return pc, index, vector_db + + def check_health(self) -> bool: + """Checks the health of the retriever service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + if logflag: + logger.info("[ check health ] start to check health of pinecone") + try: + health_status = self.index.describe_index_stats() + logger.info(f"[ check health ] health status: {health_status}") + logger.info("[ check health ] Successfully connected to Pinecone!") + return True + except Exception as e: + logger.info(f"[ check health ] Failed to connect to Pinecone: {e}") + return False + + async def invoke(self, input: EmbedDoc) -> list: + """Search the Pinecone index for the most similar documents to the input query. + + Args: + input (EmbedDoc): The input query to search for. + Output: + list: The retrieved documents. + """ + if logflag: + logger.info(input) + + # return empty result if the index has no data + if self.index.describe_index_stats()["total_vector_count"] == 0: + if logflag: + logger.info("[ invoke ] Pinecone index has no data.") + return [] + + # perform the search + search_res = self.vector_db.max_marginal_relevance_search(query=input.text, k=input.k, fetch_k=input.fetch_k) + # if the Pinecone index has data, perform the search + if input.search_type == "similarity": + docs_and_similarities = self.vector_db.similarity_search_by_vector_with_score( + embedding=input.embedding, k=input.k + ) + search_res = [doc for doc, _ in docs_and_similarities] + elif input.search_type == "similarity_distance_threshold": + if input.distance_threshold is None: + raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") + docs_and_similarities = self.vector_db.similarity_search_by_vector_with_score( + embedding=input.embedding, k=input.k + ) + search_res = [doc for doc, similarity in docs_and_similarities if similarity > input.distance_threshold] + elif input.search_type == "similarity_score_threshold": + docs_and_similarities = self.vector_db.similarity_search_by_vector_with_score(query=input.text, k=input.k) + search_res = [doc for doc, similarity in docs_and_similarities if similarity > input.score_threshold] + elif input.search_type == "mmr": + search_res = self.vector_db.max_marginal_relevance_search( + query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult + ) + + if logflag: + logger.info(f"retrieve result: {search_res}") + + return search_res diff --git a/comps/retrievers/src/integrations/qdrant.py b/comps/retrievers/src/integrations/qdrant.py new file mode 100644 index 0000000000..202dcb93c0 --- /dev/null +++ b/comps/retrievers/src/integrations/qdrant.py @@ -0,0 +1,80 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os + +from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever +from haystack_integrations.document_stores.qdrant import QdrantDocumentStore + +from comps import CustomLogger, EmbedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType + +from .config import QDRANT_EMBED_DIMENSION, QDRANT_HOST, QDRANT_INDEX_NAME, QDRANT_PORT + +logger = CustomLogger("qdrant_retrievers") +logflag = os.getenv("LOGFLAG", False) + + +@OpeaComponentRegistry.register("OPEA_RETRIEVER_QDRANT") +class OpeaQDrantRetriever(OpeaComponent): + """A specialized retriever component derived from OpeaComponent for qdrant retriever services. + + Attributes: + client (QDrant): An instance of the qdrant client for vector database operations. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.RETRIEVER.name.lower(), description, config) + + self.retriever = self._initialize_client() + health_status = self.check_health() + if not health_status: + logger.error("OpeaQDrantRetriever health check failed.") + + def _initialize_client(self) -> QdrantEmbeddingRetriever: + """Initializes the qdrant client.""" + qdrant_store = QdrantDocumentStore( + host=QDRANT_HOST, + port=QDRANT_PORT, + embedding_dim=QDRANT_EMBED_DIMENSION, + index=QDRANT_INDEX_NAME, + recreate_index=False, + ) + + retriever = QdrantEmbeddingRetriever(document_store=qdrant_store) + + return retriever + + def check_health(self) -> bool: + """Checks the health of the retriever service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + if logflag: + logger.info("[ check health ] start to check health of QDrant") + try: + # Check the status of the QDrant service + _ = self.retriever.client + logger.info("[ check health ] Successfully connected to QDrant!") + return True + except Exception as e: + logger.info(f"[ check health ] Failed to connect to QDrant: {e}") + return False + + async def invoke(self, input: EmbedDoc) -> list: + """Search the QDrant index for the most similar documents to the input query. + + Args: + input (EmbedDoc): The input query to search for. + Output: + list: The retrieved documents. + """ + if logflag: + logger.info(f"[ similarity search ] input: {input}") + + search_res = self.retriever.run(query_embedding=input.embedding)["documents"] + + if logflag: + logger.info(f"[ similarity search ] search result: {search_res}") + return search_res diff --git a/comps/retrievers/src/integrations/redis.py b/comps/retrievers/src/integrations/redis.py index 4fe6c620f0..f71a0ae5f2 100644 --- a/comps/retrievers/src/integrations/redis.py +++ b/comps/retrievers/src/integrations/redis.py @@ -18,7 +18,7 @@ ) from comps.cores.proto.api_protocol import ChatCompletionRequest, EmbeddingResponse, RetrievalRequest, RetrievalResponse -from .config import BRIDGE_TOWER_EMBEDDING, EMBED_MODEL, INDEX_NAME, REDIS_URL, TEI_EMBEDDING_ENDPOINT +from .config import BRIDGE_TOWER_EMBEDDING, EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL, TEI_EMBEDDING_ENDPOINT logger = CustomLogger("redis_retrievers") logflag = os.getenv("LOGFLAG", False) @@ -59,7 +59,13 @@ def __init__(self, name: str, description: str, config: dict = None): def _initialize_client(self) -> Redis: """Initializes the redis client.""" try: - client = Redis(embedding=self.embeddings, index_name=INDEX_NAME, redis_url=REDIS_URL) + if BRIDGE_TOWER_EMBEDDING: + logger.info(f"generate multimodal redis instance with {BRIDGE_TOWER_EMBEDDING}") + client = Redis( + embedding=self.embeddings, index_name=INDEX_NAME, index_schema=INDEX_SCHEMA, redis_url=REDIS_URL + ) + else: + client = Redis(embedding=self.embeddings, index_name=INDEX_NAME, redis_url=REDIS_URL) return client except Exception as e: logger.error(f"fail to initialize redis client: {e}") diff --git a/comps/retrievers/redis/langchain/redis_schema_multi.yml b/comps/retrievers/src/integrations/redis_schema_multi.yml similarity index 100% rename from comps/retrievers/redis/langchain/redis_schema_multi.yml rename to comps/retrievers/src/integrations/redis_schema_multi.yml diff --git a/comps/retrievers/src/integrations/vdms.py b/comps/retrievers/src/integrations/vdms.py new file mode 100644 index 0000000000..b6a44fdf14 --- /dev/null +++ b/comps/retrievers/src/integrations/vdms.py @@ -0,0 +1,145 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os +import time + +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.vectorstores.vdms import VDMS, VDMS_Client + +from comps import CustomLogger, EmbedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType + +from .config import ( + DISTANCE_STRATEGY, + EMBED_MODEL, + SEARCH_ENGINE, + TEI_EMBEDDING_ENDPOINT, + VDMS_HOST, + VDMS_INDEX_NAME, + VDMS_PORT, + VDMS_USE_CLIP, +) + +logger = CustomLogger("vdms_retrievers") +logflag = os.getenv("LOGFLAG", False) + + +@OpeaComponentRegistry.register("OPEA_RETRIEVER_VDMS") +class OpeaVDMsRetriever(OpeaComponent): + """A specialized retriever component derived from OpeaComponent for vdms retriever services. + + Attributes: + client (VDMs): An instance of the vdms client for vector database operations. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.RETRIEVER.name.lower(), description, config) + + self.embedder = self._initialize_embedder() + self.client = VDMS_Client(VDMS_HOST, VDMS_PORT) + self.vector_db = self._initialize_vector_db() + health_status = self.check_health() + if not health_status: + logger.error("OpeaVDMsRetriever health check failed.") + + def _initialize_embedder(self): + if VDMS_USE_CLIP: + from comps.third_parties.clip.src.clip_embedding import vCLIP + + embeddings = vCLIP({"model_name": "openai/clip-vit-base-patch32", "num_frm": 64}) + if TEI_EMBEDDING_ENDPOINT: + # create embeddings using TEI endpoint service + if logflag: + logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") + embeddings = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + else: + # create embeddings using local embedding model + if logflag: + logger.info(f"[ init embedder ] LOCAL_EMBEDDING_MODEL:{EMBED_MODEL}") + embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + return embeddings + + def _initialize_vector_db(self) -> VDMS: + """Initializes the vdms client.""" + if VDMS_USE_CLIP: + dimensions = self.embedder.get_embedding_length() + vector_db = VDMS( + client=self.client, + embedding=self.embedder, + collection_name=VDMS_INDEX_NAME, + embedding_dimensions=dimensions, + distance_strategy=DISTANCE_STRATEGY, + engine=SEARCH_ENGINE, + ) + else: + vector_db = VDMS( + client=self.client, + embedding=self.embedder, + collection_name=VDMS_INDEX_NAME, + distance_strategy=DISTANCE_STRATEGY, + engine=SEARCH_ENGINE, + ) + return vector_db + + def check_health(self) -> bool: + """Checks the health of the retriever service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + if logflag: + logger.info("[ check health ] start to check health of vdms") + try: + if self.vector_db: + logger.info("[ check health ] Successfully connected to VDMs!") + return True + else: + logger.info("[ check health ] Failed to connect to VDMs.") + return False + except Exception as e: + logger.info(f"[ check health ] Failed to connect to VDMs: {e}") + return False + + async def invoke(self, input: EmbedDoc) -> list: + """Search the VDMs index for the most similar documents to the input query. + + Args: + input (EmbedDoc): The input query to search for. + Output: + list: The retrieved documents. + """ + if logflag: + logger.info(input) + + if input.search_type == "similarity": + search_res = self.vector_db.similarity_search_by_vector( + embedding=input.embedding, k=input.k, filter=input.constraints + ) + elif input.search_type == "similarity_distance_threshold": + if input.distance_threshold is None: + raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") + search_res = self.vector_db.similarity_search_by_vector( + embedding=input.embedding, + k=input.k, + distance_threshold=input.distance_threshold, + filter=input.constraints, + ) + elif input.search_type == "similarity_score_threshold": + docs_and_similarities = self.vector_db.similarity_search_with_relevance_scores( + query=input.text, k=input.k, score_threshold=input.score_threshold, filter=input.constraints + ) + search_res = [doc for doc, _ in docs_and_similarities] + elif input.search_type == "mmr": + search_res = self.vector_db.max_marginal_relevance_search( + query=input.text, + k=input.k, + fetch_k=input.fetch_k, + lambda_mult=input.lambda_mult, + filter=input.constraints, + ) + + if logflag: + logger.info(f"retrieve result: {search_res}") + + return search_res diff --git a/comps/retrievers/src/opea_retrievers_microservice.py b/comps/retrievers/src/opea_retrievers_microservice.py index 273c4d9a91..d1d8624f49 100644 --- a/comps/retrievers/src/opea_retrievers_microservice.py +++ b/comps/retrievers/src/opea_retrievers_microservice.py @@ -6,8 +6,17 @@ import time from typing import Union +# import for retrievers component registration +from integrations.elasticsearch import OpeaElasticsearchRetriever from integrations.milvus import OpeaMilvusRetriever +from integrations.neo4j import OpeaNeo4jRetriever +from integrations.opensearch import OpeaOpensearchRetriever +from integrations.pathway import OpeaPathwayRetriever +from integrations.pgvector import OpeaPGVectorRetriever +from integrations.pinecone import OpeaPineconeRetriever +from integrations.qdrant import OpeaQDrantRetriever from integrations.redis import OpeaRedisRetriever +from integrations.vdms import OpeaVDMsRetriever from comps import ( CustomLogger, @@ -66,6 +75,12 @@ async def ingest_files( if isinstance(input, EmbedDoc) or isinstance(input, EmbedMultimodalDoc): metadata_list = [] for r in response: + # If the input had an image, pass that through in the metadata along with the search result image + if isinstance(input, EmbedMultimodalDoc) and input.base64_image: + if r.metadata["b64_img_str"]: + r.metadata["b64_img_str"] = [input.base64_image, r.metadata["b64_img_str"]] + else: + r.metadata["b64_img_str"] = input.base64_image metadata_list.append(r.metadata) retrieved_docs.append(TextDoc(text=r.page_content)) result = SearchedMultimodalDoc( @@ -73,7 +88,10 @@ async def ingest_files( ) else: for r in response: - retrieved_docs.append(RetrievalResponseData(text=r.page_content, metadata=r.metadata)) + if isinstance(r, str): + retrieved_docs.append(RetrievalResponseData(text=r, metadata=None)) + else: + retrieved_docs.append(RetrievalResponseData(text=r.page_content, metadata=r.metadata)) if isinstance(input, RetrievalRequest): result = RetrievalResponse(retrieved_docs=retrieved_docs) elif isinstance(input, ChatCompletionRequest): diff --git a/comps/retrievers/src/requirements.txt b/comps/retrievers/src/requirements.txt index c15e7811a9..a04fef1771 100644 --- a/comps/retrievers/src/requirements.txt +++ b/comps/retrievers/src/requirements.txt @@ -1,15 +1,42 @@ +bs4 +cairosvg docarray[full] +docx2txt easyocr fastapi -langchain_community --extra-index-url https://download.pytorch.org/whl/cpu -langchain_huggingface --extra-index-url https://download.pytorch.org/whl/cpu -langchain_milvus --extra-index-url https://download.pytorch.org/whl/cpu -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk +future +graspologic +haystack-ai==2.3.1 +langchain-elasticsearch +langchain-pinecone +langchain_community +langchain_huggingface +langchain_milvus +llama-index-core +llama-index-embeddings-openai +llama-index-embeddings-text-embeddings-inference +llama-index-llms-openai +llama-index-llms-openai-like +llama-index-llms-text-generation-inference +llama_index_graph_stores_neo4j +neo4j +numpy +opensearch-py +pathway +pgvector prometheus-fastapi-instrumentator +protobuf==4.24.2 +psycopg2-binary +pydantic pymupdf +pytesseract +python-docx +python-multipart +python-pptx +qdrant-haystack redis sentence_transformers shortuuid +tiktoken uvicorn +vdms==0.0.21 diff --git a/comps/retrievers/vdms/langchain/Dockerfile b/comps/retrievers/vdms/langchain/Dockerfile deleted file mode 100644 index ce79a6b376..0000000000 --- a/comps/retrievers/vdms/langchain/Dockerfile +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - iputils-ping \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -COPY comps /home/user/comps - -USER user -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/retrievers/vdms/langchain/requirements.txt - -RUN pip install --no-cache-dir -U \ - huggingface-hub \ - langchain \ - langchain-community - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -ENV HUGGINGFACEHUB_API_TOKEN=dummy - -ENV USECLIP 0 - -WORKDIR /home/user/comps/retrievers/vdms/langchain - -ENTRYPOINT ["python", "retriever_vdms.py"] diff --git a/comps/retrievers/vdms/langchain/__init__.py b/comps/retrievers/vdms/langchain/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/retrievers/vdms/langchain/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/retrievers/vdms/langchain/requirements.txt b/comps/retrievers/vdms/langchain/requirements.txt deleted file mode 100644 index 44d80c13b6..0000000000 --- a/comps/retrievers/vdms/langchain/requirements.txt +++ /dev/null @@ -1,18 +0,0 @@ -docarray[full] -easyocr -einops -fastapi -langchain-community -langchain-core -langchain-huggingface -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-proto==1.23.0 -opentelemetry-sdk -prometheus-fastapi-instrumentator -protobuf==4.24.2 -pymupdf -sentence_transformers -shortuuid -uvicorn -vdms>=0.0.20 diff --git a/comps/retrievers/vdms/langchain/retriever_vdms.py b/comps/retrievers/vdms/langchain/retriever_vdms.py deleted file mode 100644 index ebd8ac23e6..0000000000 --- a/comps/retrievers/vdms/langchain/retriever_vdms.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import time - -from langchain_community.embeddings import HuggingFaceBgeEmbeddings -from langchain_community.vectorstores.vdms import VDMS, VDMS_Client -from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings -from vdms_config import DEBUG, DISTANCE_STRATEGY, EMBED_MODEL, INDEX_NAME, SEARCH_ENGINE, VDMS_HOST, VDMS_PORT - -from comps import ( - EmbedDoc, - SearchedMultimodalDoc, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) - -tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") -hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN") -use_clip = int(os.getenv("USECLIP")) - -if use_clip: - import sys - - sys.path.append("../../../embeddings/multimodal_clip/") - from embeddings_clip import vCLIP - -# Debugging -if DEBUG: - all_variables = dir() - - for name in all_variables: - # Print the item if it doesn't start with '__' - if not name.startswith("__"): - myvalue = eval(name) - print(name, "is", type(myvalue), "and = ", myvalue) - - -client = VDMS_Client(VDMS_HOST, VDMS_PORT) - - -@register_microservice( - name="opea_service@retriever_vdms", - service_type=ServiceType.RETRIEVER, - endpoint="/v1/retrieval", - host="0.0.0.0", - port=7000, -) -@register_statistics(names=["opea_service@retriever_vdms"]) -def retrieve(input: EmbedDoc) -> SearchedMultimodalDoc: - start = time.time() - - if input.search_type == "similarity": - search_res = vector_db.similarity_search_by_vector( - embedding=input.embedding, k=input.k, filter=input.constraints - ) - elif input.search_type == "similarity_distance_threshold": - if input.distance_threshold is None: - raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") - search_res = vector_db.similarity_search_by_vector( - embedding=input.embedding, k=input.k, distance_threshold=input.distance_threshold, filter=input.constraints - ) - elif input.search_type == "similarity_score_threshold": - docs_and_similarities = vector_db.similarity_search_with_relevance_scores( - query=input.text, k=input.k, score_threshold=input.score_threshold, filter=input.constraints - ) - search_res = [doc for doc, _ in docs_and_similarities] - elif input.search_type == "mmr": - search_res = vector_db.max_marginal_relevance_search( - query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult, filter=input.constraints - ) - searched_docs = [] - metadata_list = [] - for r in search_res: - searched_docs.append(TextDoc(text=r.page_content)) - metadata_list.append(r.metadata) - result = SearchedMultimodalDoc(retrieved_docs=searched_docs, metadata=metadata_list, initial_query=input.text) - statistics_dict["opea_service@retriever_vdms"].append_latency(time.time() - start, None) - return result - - -if __name__ == "__main__": - # Create vectorstore - - if use_clip: - embeddings = vCLIP({"model_name": "openai/clip-vit-base-patch32", "num_frm": 64}) - dimensions = embeddings.get_embedding_length() - elif tei_embedding_endpoint: - embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint, huggingfacehub_api_token=hf_token) - else: - embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - # create embeddings using local embedding model - - if use_clip: - vector_db = VDMS( - client=client, - embedding=embeddings, - collection_name=INDEX_NAME, - embedding_dimensions=dimensions, - distance_strategy=DISTANCE_STRATEGY, - engine=SEARCH_ENGINE, - ) - else: - vector_db = VDMS( - client=client, - embedding=embeddings, - collection_name=INDEX_NAME, - # embedding_dimensions=768, - distance_strategy=DISTANCE_STRATEGY, - engine=SEARCH_ENGINE, - ) - - opea_microservices["opea_service@retriever_vdms"].start() diff --git a/comps/retrievers/vdms/langchain/vdms_config.py b/comps/retrievers/vdms/langchain/vdms_config.py deleted file mode 100644 index 5b6a852139..0000000000 --- a/comps/retrievers/vdms/langchain/vdms_config.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - - -def get_boolean_env_var(var_name, default_value=False): - """Retrieve the boolean value of an environment variable. - - Args: - var_name (str): The name of the environment variable to retrieve. - default_value (bool): The default value to return if the variable - is not found. - - Returns: - bool: The value of the environment variable, interpreted as a boolean. - """ - true_values = {"true", "1", "t", "y", "yes"} - false_values = {"false", "0", "f", "n", "no"} - - # Retrieve the environment variable's value - value = os.getenv(var_name, "").lower() - - # Decide the boolean value based on the content of the string - if value in true_values: - return True - elif value in false_values: - return False - else: - return default_value - - -# Whether or not to enable langchain debugging -DEBUG = get_boolean_env_var("DEBUG", False) -# Set DEBUG env var to "true" if you wish to enable LC debugging module -if DEBUG: - import langchain - - langchain.debug = True - - -# Embedding model -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - - -# VDMS Connection Information -VDMS_HOST = os.getenv("VDMS_HOST", "localhost") -VDMS_PORT = int(os.getenv("VDMS_PORT", 55555)) - - -# def format_vdms_conn_from_env(): -# vdms_url = os.getenv("VDMS_URL", None) -# if vdms_url: -# return vdms_url -# else: -# using_ssl = get_boolean_env_var("VDMS_SSL", False) -# start = "vdmss://" if using_ssl else "vdms://" - -# # if using RBAC -# password = os.getenv("VDMS_PASSWORD", None) -# username = os.getenv("VDMS_USERNAME", "default") -# if password is not None: -# start += f"{username}:{password}@" - -# return start + f"{VDMS_HOST}:{VDMS_PORT}" - - -# VDMS_URL = format_vdms_conn_from_env() - -# Vector Index Configuration -INDEX_NAME = os.getenv("INDEX_NAME", "rag-vdms") -# HUGGINGFACEHUB_API_TOKEN ="dummy-token" - - -# current_file_path = os.path.abspath(__file__) -# parent_dir = os.path.dirname(current_file_path) -# VDMS_SCHEMA = os.getenv("VDMS_SCHEMA", "vdms_schema.yml") -# INDEX_SCHEMA = os.path.join(parent_dir, VDMS_SCHEMA) -SEARCH_ENGINE = "FaissFlat" -DISTANCE_STRATEGY = "IP" diff --git a/comps/retrievers/vdms/langchain/vdms_langchain.yaml b/comps/retrievers/vdms/langchain/vdms_langchain.yaml deleted file mode 100644 index 75efdf36a3..0000000000 --- a/comps/retrievers/vdms/langchain/vdms_langchain.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - tei_xeon_service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - container_name: tei-xeon-server - ports: - - "6060:80" - volumes: - - "./data:/data" - shm_size: 1g - command: --model-id ${RETRIEVE_MODEL_ID} - retriever: - image: opea/retriever-vdms:latest - container_name: retriever-vdms-server - ports: - - "7000:7000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - INDEX_NAME: ${INDEX_NAME} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/text2image/deployment/docker_compose/compose.yaml b/comps/text2image/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..58826edd25 --- /dev/null +++ b/comps/text2image/deployment/docker_compose/compose.yaml @@ -0,0 +1,31 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + text2image: + image: ${REGISTRY:-opea}/text2image:${TAG:-latest} + container_name: text2image + ports: + - "9379:9379" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MODEL=${MODEL} + - HF_TOKEN=${HF_TOKEN} + ipc: host + restart: always + text2image-gaudi: + extends: text2image + image: ${REGISTRY:-opea}/text2image-gaudi:${TAG:-latest} + container_name: text2image-gaudi + environment: + - HABANA_VISIBLE_DEVICES=all + - OMPI_MCA_btl_vader_single_copy_mechanism=none + runtime: habana + cap_add: + - SYS_NICE + +networks: + default: + driver: bridge diff --git a/comps/text2image/deployment/kubernetes/README.md b/comps/text2image/deployment/kubernetes/README.md index e69de29bb2..6d5dab6d3b 100644 --- a/comps/text2image/deployment/kubernetes/README.md +++ b/comps/text2image/deployment/kubernetes/README.md @@ -0,0 +1,18 @@ +# Deploy text2image on kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Xeon + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install text2image oci://ghcr.io/opea-project/charts/text2image --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` + +## Deploy on Gaudi + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install text2image oci://ghcr.io/opea-project/charts/text2image --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml +``` diff --git a/comps/text2image/deployment/kubernetes/cpu-values.yaml b/comps/text2image/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..8558896d42 --- /dev/null +++ b/comps/text2image/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/text2image diff --git a/comps/text2image/deployment/kubernetes/gaudi-values.yaml b/comps/text2image/deployment/kubernetes/gaudi-values.yaml new file mode 100644 index 0000000000..184e96b0c4 --- /dev/null +++ b/comps/text2image/deployment/kubernetes/gaudi-values.yaml @@ -0,0 +1,34 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +accelDevice: "gaudi" + +image: + repository: opea/text2image-gaudi + +resources: + limits: + habana.ai/gaudi: 1 + # The following hugepage related settings is for default MODEL stable-diffusion-v1-5/stable-diffusion-v1-5 + # User should change the resource limits for other models + hugepages-2Mi: 256Mi + +volumes: + - name: hugepage-2mi + emptyDir: + medium: HugePages-2Mi +volumeMounts: + - name: hugepage-2mi + mountPath: /hugepages-2Mi + +OMPI_MCA_btl_vader_single_copy_mechanism: "none" + +readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 +startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/comps/text2image/src/Dockerfile.intel_hpu b/comps/text2image/src/Dockerfile.intel_hpu index b142bbb441..a8cbbfb478 100644 --- a/comps/text2image/src/Dockerfile.intel_hpu +++ b/comps/text2image/src/Dockerfile.intel_hpu @@ -12,7 +12,7 @@ COPY comps /home/user/comps RUN chown -R user /home/user/comps/text2image RUN rm -rf /etc/ssh/ssh_host* -USER user + # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/home/user/optimum-habana @@ -21,7 +21,7 @@ ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/home/user/optimum-habana RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /home/user/comps/text2image/src/requirements.txt && \ pip install --no-cache-dir optimum[habana] - +USER user WORKDIR /home/user/comps/text2image/src RUN echo python opea_text2image_microservice.py --device hpu --use_hpu_graphs --bf16 >> run.sh diff --git a/comps/text2image/src/README.md b/comps/text2image/src/README.md index 51120e00a8..474de8b6f2 100644 --- a/comps/text2image/src/README.md +++ b/comps/text2image/src/README.md @@ -82,6 +82,13 @@ Start text-to-image service on Xeon with below command: docker run --ipc=host -p 9379:9379 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HF_TOKEN=$HF_TOKEN -e MODEL=$MODEL opea/text2image:latest ``` +Or use docker compose with below command: + +```bash +cd ../deployment/docker_compose +docker compose -f compose.yaml up text2image -d +``` + ### 2.2.2 Start Text-to-Image Service on Gaudi Start text-to-image service on Gaudi with below command: @@ -90,6 +97,13 @@ Start text-to-image service on Gaudi with below command: docker run -p 9379:9379 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HF_TOKEN=$HF_TOKEN -e MODEL=$MODEL opea/text2image-gaudi:latest ``` +Or use docker compose with below command: + +```bash +cd ../deployment/docker_compose +docker compose -f compose.yaml up text2image-gaudi -d +``` + # 3 Test Text-to-Image Service ```bash diff --git a/comps/text2sql/deployment/docker_compose/compose.yaml b/comps/text2sql/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..53e70adf3a --- /dev/null +++ b/comps/text2sql/deployment/docker_compose/compose.yaml @@ -0,0 +1,44 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +include: + - ../../../third_parties/tgi/deployment/docker_compose/compose.yaml + +services: + postgres: + image: postgres:latest + container_name: postgres-container + restart: always + environment: + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} + - POSTGRES_DB=${POSTGRES_DB} + ports: + - '5442:5432' + volumes: + - ../../src/chinook.sql:/docker-entrypoint-initdb.d/chinook.sql + + text2sql: + image: opea/text2sql:${TAG:-latest} + container_name: text2sql-server + ports: + - ${TEXT2SQL_PORT:-9090}:8080 + environment: + - TGI_LLM_ENDPOINT=${TGI_LLM_ENDPOINT} + depends_on: + - tgi-server + - postgres + + text2sql-gaudi: + image: opea/text2sql:${TAG:-latest} + container_name: text2sql-gaudi-server + ports: + - ${TEXT2SQL_PORT:-9090}:8080 + environment: + - TGI_LLM_ENDPOINT=${TGI_LLM_ENDPOINT} + depends_on: + - tgi-gaudi-server + - postgres +networks: + default: + driver: bridge diff --git a/comps/text2sql/deployment/docker_compose/langchain.yaml b/comps/text2sql/deployment/docker_compose/langchain.yaml deleted file mode 100644 index 5430472bf4..0000000000 --- a/comps/text2sql/deployment/docker_compose/langchain.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - tgi_service: - image: ghcr.io/huggingface/text-generation-inference:2.1.0 - container_name: tgi-service - ports: - - "8008:80" - volumes: - - "./data:/data" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HF_TOKEN} - shm_size: 1g - command: --model-id ${LLM_MODEL_ID} - - postgres: - image: postgres:latest - container_name: postgres_container - restart: always - environment: - - POSTGRES_USER=${POSTGRES_USER} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} - - POSTGRES_DB=${POSTGRES_DB} - ports: - - '5442:5432' - volumes: - - ./chinook.sql:/docker-entrypoint-initdb.d/chinook.sql - - text2sql_service: - image: opea/text2sql:latest - container_name: text2sql_service - ports: - - "9090:8090" - environment: - - TGI_LLM_ENDPOINT=${TGI_LLM_ENDPOINT} - - -networks: - default: - driver: bridge diff --git a/comps/text2sql/src/Dockerfile b/comps/text2sql/src/Dockerfile index d77e5522dd..592b96d7c3 100644 --- a/comps/text2sql/src/Dockerfile +++ b/comps/text2sql/src/Dockerfile @@ -15,8 +15,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -28,6 +26,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/text2sql/src/ ENTRYPOINT ["python", "opea_text2sql_microservice.py"] \ No newline at end of file diff --git a/comps/text2sql/src/README.md b/comps/text2sql/src/README.md index 3d9b34c77a..1ccdb30c75 100644 --- a/comps/text2sql/src/README.md +++ b/comps/text2sql/src/README.md @@ -113,22 +113,32 @@ docker run --runtime=runc --name="comps-langchain-text2sql" -p 9090:8080 --ipc #### Run via docker compose (Option B) -- Setup Environment Variables. +##### Setup Environment Variables. - ```bash - export TGI_LLM_ENDPOINT=http://${your_ip}:${TGI_PORT} - export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" - export POSTGRES_USER=postgres - export POSTGRES_PASSWORD=testpwd - export POSTGRES_DB=chinook - ``` +```bash +export TGI_LLM_ENDPOINT=http://${your_ip}:${TGI_PORT} +export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" +export POSTGRES_USER=postgres +export POSTGRES_PASSWORD=testpwd +export POSTGRES_DB=chinook +``` -- Start the services. +##### Start the services. - ```bash - docker compose -f docker_compose_text2sql.yaml up - ``` +- Xeon CPU + +```bash +cd comps/text2sql/deployment/docker_compose +docker compose -f compose.yaml up text2sql -d +``` + +- Gaudi2 HPU + +```bash +cd comps/text2sql/deployment/docker_compose +docker compose -f compose.yaml up text2sql-gaudi -d +``` --- diff --git a/comps/embeddings/deployment/docker_compose/compose_multimodal_bridgetower_intel_hpu.yaml b/comps/third_parties/bridgetower/deployment/docker_compose/compose.yaml similarity index 51% rename from comps/embeddings/deployment/docker_compose/compose_multimodal_bridgetower_intel_hpu.yaml rename to comps/third_parties/bridgetower/deployment/docker_compose/compose.yaml index a0d88173e6..161439256e 100644 --- a/comps/embeddings/deployment/docker_compose/compose_multimodal_bridgetower_intel_hpu.yaml +++ b/comps/third_parties/bridgetower/deployment/docker_compose/compose.yaml @@ -2,21 +2,18 @@ # SPDX-License-Identifier: Apache-2.0 services: - embedding-multimodal-bridgetower: - image: opea/embedding-multimodal-bridgetower-hpu:latest - container_name: embedding-multimodal-bridgetower + + multimodal-bridgetower-embedding-serving: + image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower:${TAG:-latest} + container_name: multimodal-bridgetower-embedding-serving ports: - - ${EMBEDDER_PORT}:${EMBEDDER_PORT} + - ${EMBEDDER_PORT:-12400}:${EMBEDDER_PORT:-12400} ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} PORT: ${EMBEDDER_PORT} - HABANA_VISIBLE_DEVICES: all - runtime: habana - cap_add: - - SYS_NICE restart: unless-stopped healthcheck: test: ["CMD-SHELL", "http_proxy='' curl -f http://localhost:${EMBEDDER_PORT}/v1/health_check"] @@ -24,24 +21,29 @@ services: timeout: 6s retries: 18 start_period: 30s - embedding: - image: opea/embedding:latest - container_name: embedding-multimodal-bridgetower-server + + multimodal-bridgetower-embedding-gaudi-serving: + image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower-gaudi:${TAG:-latest} + container_name: multimodal-bridgetower-embedding-gaudi-serving ports: - - ${MM_EMBEDDING_PORT_MICROSERVICE}:${MM_EMBEDDING_PORT_MICROSERVICE} + - ${EMBEDDER_PORT:-12400}:${EMBEDDER_PORT:-12400} ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - MULTIMODAL_EMBEDDING: true - EMBEDDING_COMPONENT_NAME: "OPEA_MULTIMODAL_EMBEDDING_BRIDGETOWER" - MMEI_EMBEDDING_ENDPOINT: ${MMEI_EMBEDDING_ENDPOINT} - MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE} + PORT: ${EMBEDDER_PORT} + HABANA_VISIBLE_DEVICES: all + runtime: habana + cap_add: + - SYS_NICE restart: unless-stopped - depends_on: - embedding-multimodal-bridgetower: - condition: service_healthy + healthcheck: + test: ["CMD-SHELL", "http_proxy='' curl -f http://localhost:${EMBEDDER_PORT}/v1/health_check"] + interval: 10s + timeout: 6s + retries: 18 + start_period: 30s networks: default: diff --git a/comps/third_parties/bridgetower/deployment/docker_compose/compose_intel_cpu.yaml b/comps/third_parties/bridgetower/deployment/docker_compose/compose_intel_cpu.yaml deleted file mode 100644 index 24fab26ae4..0000000000 --- a/comps/third_parties/bridgetower/deployment/docker_compose/compose_intel_cpu.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - embedding-multimodal-bridgetower: - image: opea/embedding-multimodal-bridgetower:latest - container_name: embedding-multimodal-bridgetower - ports: - - ${EMBEDDER_PORT}:${EMBEDDER_PORT} - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PORT: ${EMBEDDER_PORT} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/third_parties/bridgetower/deployment/docker_compose/compose_intel_hpu.yaml b/comps/third_parties/bridgetower/deployment/docker_compose/compose_intel_hpu.yaml deleted file mode 100644 index 3e05f2515d..0000000000 --- a/comps/third_parties/bridgetower/deployment/docker_compose/compose_intel_hpu.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - embedding-multimodal-bridgetower: - image: opea/embedding-multimodal-bridgetower-hpu:latest - container_name: embedding-multimodal-bridgetower - ports: - - ${EMBEDDER_PORT}:${EMBEDDER_PORT} - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - PORT: ${EMBEDDER_PORT} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/third_parties/bridgetower/deployment/kubernetes/README.md b/comps/third_parties/bridgetower/deployment/kubernetes/README.md index e69de29bb2..4f963462b8 100644 --- a/comps/third_parties/bridgetower/deployment/kubernetes/README.md +++ b/comps/third_parties/bridgetower/deployment/kubernetes/README.md @@ -0,0 +1,18 @@ +# Deploy mm-embedding on kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Xeon + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install mm-embedding oci://ghcr.io/opea-project/charts/mm-embedding --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` + +## Deploy on Gaudi + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install mm-embedding oci://ghcr.io/opea-project/charts/mm-embedding --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml +``` diff --git a/comps/third_parties/bridgetower/deployment/kubernetes/cpu-values.yaml b/comps/third_parties/bridgetower/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..b194c9fe0d --- /dev/null +++ b/comps/third_parties/bridgetower/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/embedding-multimodal-bridgetower diff --git a/comps/third_parties/bridgetower/deployment/kubernetes/gaudi-values.yaml b/comps/third_parties/bridgetower/deployment/kubernetes/gaudi-values.yaml new file mode 100644 index 0000000000..f7b19751d0 --- /dev/null +++ b/comps/third_parties/bridgetower/deployment/kubernetes/gaudi-values.yaml @@ -0,0 +1,21 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +accelDevice: "gaudi" + +image: + repository: opea/embedding-multimodal-bridgetower-gaudi + +resources: + limits: + habana.ai/gaudi: 1 + +readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 +startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/comps/third_parties/bridgetower/src/Dockerfile b/comps/third_parties/bridgetower/src/Dockerfile index f8b6dd7b2c..2992b24cbd 100644 --- a/comps/third_parties/bridgetower/src/Dockerfile +++ b/comps/third_parties/bridgetower/src/Dockerfile @@ -6,7 +6,7 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ RUN apt-get update && apt-get install -y curl -USER user + # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana @@ -18,7 +18,7 @@ RUN pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/user - +USER user ARG EMBEDDER_PORT=8080 ENV PORT=$EMBEDDER_PORT diff --git a/comps/third_parties/bridgetower/src/Dockerfile.intel_hpu b/comps/third_parties/bridgetower/src/Dockerfile.intel_hpu index 8ee4633346..648776c2dc 100644 --- a/comps/third_parties/bridgetower/src/Dockerfile.intel_hpu +++ b/comps/third_parties/bridgetower/src/Dockerfile.intel_hpu @@ -9,7 +9,7 @@ RUN useradd -m -s /bin/bash user && \ RUN rm -rf /etc/ssh/ssh_host* RUN apt-get update && apt-get install -y curl -USER user + # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana @@ -22,7 +22,7 @@ RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir optimum[habana] ENV PYTHONPATH=$PYTHONPATH:/home/user - +USER user ARG EMBEDDER_PORT=8080 ENV PORT=$EMBEDDER_PORT diff --git a/comps/third_parties/bridgetower/src/README.md b/comps/third_parties/bridgetower/src/README.md index ffb1e7bf7c..16aebea46a 100644 --- a/comps/third_parties/bridgetower/src/README.md +++ b/comps/third_parties/bridgetower/src/README.md @@ -1,4 +1,4 @@ -# Multimodal Embeddings Microservice with BridgePower +# Multimodal Embeddings Microservice with BridgeTower The Multimodal Embedding Microservice is designed to efficiently convert pairs of textual string and image into vectorized embeddings, facilitating seamless integration into various machine learning and data processing workflows. This service utilizes advanced algorithms to generate high-quality embeddings that capture the joint semantic essence of the input text-and-image pairs, making it ideal for applications in multi-modal data processing, information retrieval, and similar fields. @@ -21,8 +21,11 @@ Currently, we employ [**BridgeTower**](https://huggingface.co/BridgeTower/bridge - Gaudi2 HPU ```bash +# Define port to use for the embeddings microservice +export EMBEDDER_PORT=8080 + cd ../../../../../../../ -docker build -t opea/embedding-multimodal-bridgetower-hpu:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/bridgetower/src/Dockerfile.intel_hpu . +docker build -t opea/embedding-multimodal-bridgetower-gaudi:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/bridgetower/src/Dockerfile.intel_hpu . cd comps/third_parties/bridgetower/deployment/docker_compose/ docker compose -f compose_intel_hpu.yaml up -d ``` @@ -32,6 +35,9 @@ docker compose -f compose_intel_hpu.yaml up -d - Xeon CPU ```bash +# Define port to use for the embeddings microservice +export EMBEDDER_PORT=8080 + cd ../../../../../../../ docker build -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/bridgetower/src/Dockerfile . cd comps/third_parties/bridgetower/deployment/docker_compose/ @@ -43,8 +49,16 @@ docker compose -f compose_intel_cpu.yaml up -d Then you need to test your MMEI service using the following commands: ```bash -curl http://localhost:$your_mmei_port/v1/encode \ +curl http://localhost:$EMBEDDER_PORT/v1/encode \ -X POST \ -H "Content-Type:application/json" \ -d '{"text":"This is example"}' ``` + +To compute a joint embedding of an image-text pair, a base64 encoded image can be passed along with text: + +```bash +curl -X POST http://localhost:$EMBEDDER_PORT/v1/encode \ +-H "Content-Type: application/json" \ +-d '{"text": "This is some sample text.", "img_b64_str" : "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC"}' +``` diff --git a/comps/third_parties/clip/src/Dockerfile b/comps/third_parties/clip/src/Dockerfile index 5479eee297..26f08e64c9 100644 --- a/comps/third_parties/clip/src/Dockerfile +++ b/comps/third_parties/clip/src/Dockerfile @@ -13,8 +13,6 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ @@ -23,6 +21,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/third_parties/clip/src/ ENTRYPOINT ["python", "clip_server.py"] diff --git a/comps/vectorstores/elasticsearch/elasticsearch.yaml b/comps/third_parties/elasticsearch/deployment/docker_compose/compose.yaml similarity index 53% rename from comps/vectorstores/elasticsearch/elasticsearch.yaml rename to comps/third_parties/elasticsearch/deployment/docker_compose/compose.yaml index 2ba8e04f26..d2600ca5aa 100644 --- a/comps/vectorstores/elasticsearch/elasticsearch.yaml +++ b/comps/third_parties/elasticsearch/deployment/docker_compose/compose.yaml @@ -1,16 +1,23 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -version: "3" services: elasticsearch-vector-db: image: docker.elastic.co/elasticsearch/elasticsearch:8.16.0 container_name: elasticsearch-vector-db ports: - - "9200:9200" - - "9300:9300" + - "${ELASTICSEARCH_PORT1:-9200}:9200" + - "${ELASTICSEARCH_PORT2:-9300}:9300" environment: + - no_proxy=${no_proxy} + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} - ES_JAVA_OPTS=-Xms1g -Xmx1g - discovery.type=single-node - xpack.security.enabled=false - bootstrap.memory_lock=false + healthcheck: + test: curl -s http://localhost:9200/_health_report | grep '"status":"green"' + interval: 10s + timeout: 5s + retries: 5 diff --git a/comps/vectorstores/elasticsearch/README.md b/comps/third_parties/elasticsearch/src/README.md similarity index 100% rename from comps/vectorstores/elasticsearch/README.md rename to comps/third_parties/elasticsearch/src/README.md diff --git a/comps/dataprep/multimodal/redis/langchain/__init__.py b/comps/third_parties/elasticsearch/src/__init__.py similarity index 100% rename from comps/dataprep/multimodal/redis/langchain/__init__.py rename to comps/third_parties/elasticsearch/src/__init__.py diff --git a/comps/third_parties/gpt-sovits/deployment/kubernetes/README.md b/comps/third_parties/gpt-sovits/deployment/kubernetes/README.md new file mode 100644 index 0000000000..3a9f77f86e --- /dev/null +++ b/comps/third_parties/gpt-sovits/deployment/kubernetes/README.md @@ -0,0 +1,11 @@ +# Deploy gpt-sovits on kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Xeon + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install gpt-sovits oci://ghcr.io/opea-project/charts/gpt-sovits --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` diff --git a/comps/third_parties/gpt-sovits/deployment/kubernetes/cpu-values.yaml b/comps/third_parties/gpt-sovits/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..087e8b3346 --- /dev/null +++ b/comps/third_parties/gpt-sovits/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/gpt-sovits diff --git a/comps/vectorstores/milvus/docker-compose.yaml b/comps/third_parties/milvus/deployment/docker_compose/compose.yaml similarity index 93% rename from comps/vectorstores/milvus/docker-compose.yaml rename to comps/third_parties/milvus/deployment/docker_compose/compose.yaml index dc7d1c289d..abc6ae1883 100644 --- a/comps/vectorstores/milvus/docker-compose.yaml +++ b/comps/third_parties/milvus/deployment/docker_compose/compose.yaml @@ -1,8 +1,6 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -version: "3.5" - services: etcd: container_name: milvus-etcd @@ -28,8 +26,8 @@ services: MINIO_ACCESS_KEY: minioadmin MINIO_SECRET_KEY: minioadmin ports: - - "5044:9001" - - "5043:9000" + - "${MINIO_PORT1:-5044}:9001" + - "${MINIO_PORT2:-5043}:9000" volumes: - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data command: minio server /minio_data --console-address ":9001" @@ -59,7 +57,7 @@ services: retries: 3 ports: - "19530:19530" - - "9091:9091" + - "${MILVUS_STANDALONE_PORT:-9091}:9091" depends_on: - "etcd" - "minio" diff --git a/comps/vectorstores/milvus/milvus.yaml b/comps/third_parties/milvus/deployment/docker_compose/milvus.yaml similarity index 100% rename from comps/vectorstores/milvus/milvus.yaml rename to comps/third_parties/milvus/deployment/docker_compose/milvus.yaml diff --git a/comps/vectorstores/milvus/README.md b/comps/third_parties/milvus/src/README.md similarity index 100% rename from comps/vectorstores/milvus/README.md rename to comps/third_parties/milvus/src/README.md diff --git a/comps/dataprep/neo4j/__init__.py b/comps/third_parties/milvus/src/__init__.py similarity index 100% rename from comps/dataprep/neo4j/__init__.py rename to comps/third_parties/milvus/src/__init__.py diff --git a/comps/third_parties/mongodb/deployment/kubernetes/README.md b/comps/third_parties/mongodb/deployment/kubernetes/README.md new file mode 100644 index 0000000000..a9c5db7d1e --- /dev/null +++ b/comps/third_parties/mongodb/deployment/kubernetes/README.md @@ -0,0 +1,11 @@ +# Deploy MongoDB on kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Xeon + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install mongodb oci://ghcr.io/opea-project/charts/mongodb --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` diff --git a/comps/third_parties/mongodb/deployment/kubernetes/cpu-values.yaml b/comps/third_parties/mongodb/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..4d81053189 --- /dev/null +++ b/comps/third_parties/mongodb/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,4 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +image: + repository: mongo diff --git a/comps/third_parties/neo4j/deployment/docker_compose/compose.yaml b/comps/third_parties/neo4j/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..6a1ccc1d4a --- /dev/null +++ b/comps/third_parties/neo4j/deployment/docker_compose/compose.yaml @@ -0,0 +1,33 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + neo4j-apoc: + image: neo4j:latest + container_name: neo4j-apoc + ports: + - "${NEO4J_PORT1:-7474}:7474" + - "${NEO4J_PORT2:-7687}:7687" + volumes: + - /$HOME/neo4j/logs:/logs + - /$HOME/neo4j/config:/config + - /$HOME/neo4j/data:/data + - /$HOME/neo4j/plugins:/plugins + ipc: host + environment: + - no_proxy=${no_proxy} + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} + - NEO4J_AUTH=${NEO4J_USERNAME}/${NEO4J_PASSWORD} + - NEO4J_PLUGINS=["apoc"] + - NEO4J_apoc_export_file_enabled=true + - NEO4J_apoc_import_file_enabled=true + - NEO4J_apoc_import_file_use__neo4j__config=true + - NEO4J_dbms_security_procedures_unrestricted=apoc.\* + restart: always + healthcheck: + test: wget http://localhost:7474 || exit 1 + interval: 5s + timeout: 10s + retries: 20 + start_period: 3s diff --git a/comps/third_parties/neo4j/src/README.md b/comps/third_parties/neo4j/src/README.md new file mode 100644 index 0000000000..868021d6ca --- /dev/null +++ b/comps/third_parties/neo4j/src/README.md @@ -0,0 +1,29 @@ +# Start Neo4J Server + +## 1. Download Neo4J image + +```bash +docker pull neo4j:latest +``` + +## 2. Configure the username, password and dbname + +```bash +export NEO4J_AUTH=neo4j/password +export NEO4J_PLUGINS=\[\"apoc\"\] +``` + +## 3. Run Neo4J service + +To launch Neo4j locally, first ensure you have docker installed. Then, you can launch the database with the following docker command. + +```bash +docker run \ + -p 7474:7474 -p 7687:7687 \ + -v $PWD/data:/data -v $PWD/plugins:/plugins \ + --name neo4j-apoc \ + -d \ + -e NEO4J_AUTH=neo4j/password \ + -e NEO4J_PLUGINS=\[\"apoc\"\] \ + neo4j:latest +``` diff --git a/comps/dataprep/neo4j/langchain/__init__.py b/comps/third_parties/neo4j/src/__init__.py similarity index 100% rename from comps/dataprep/neo4j/langchain/__init__.py rename to comps/third_parties/neo4j/src/__init__.py diff --git a/comps/third_parties/nginx/deployment/kubernetes/README.md b/comps/third_parties/nginx/deployment/kubernetes/README.md index e69de29bb2..a96d744db8 100644 --- a/comps/third_parties/nginx/deployment/kubernetes/README.md +++ b/comps/third_parties/nginx/deployment/kubernetes/README.md @@ -0,0 +1,11 @@ +# Deploy nginx on kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Xeon + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install nginx oci://ghcr.io/opea-project/charts/nginx --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` diff --git a/comps/third_parties/nginx/deployment/kubernetes/cpu-values.yaml b/comps/third_parties/nginx/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..98e8182d2c --- /dev/null +++ b/comps/third_parties/nginx/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/nginx diff --git a/comps/third_parties/nginx/src/nginx.conf.template b/comps/third_parties/nginx/src/nginx.conf.template index f0d0f8d6b1..8b4db6413a 100644 --- a/comps/third_parties/nginx/src/nginx.conf.template +++ b/comps/third_parties/nginx/src/nginx.conf.template @@ -37,7 +37,7 @@ server { gzip off; } - location /v1/dataprep { + location /v1/dataprep/ingest { proxy_pass http://${DATAPREP_SERVICE_IP}:${DATAPREP_SERVICE_PORT}; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; @@ -49,7 +49,7 @@ server { send_timeout 6000; } - location /v1/dataprep/get_file { + location /v1/dataprep/get { proxy_pass http://${DATAPREP_SERVICE_IP}:${DATAPREP_SERVICE_PORT}; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; @@ -57,7 +57,7 @@ server { proxy_set_header X-Forwarded-Proto $scheme; } - location /v1/dataprep/delete_file { + location /v1/dataprep/delete { proxy_pass http://${DATAPREP_SERVICE_IP}:${DATAPREP_SERVICE_PORT}; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; diff --git a/comps/llms/text-generation/ollama/langchain/README.md b/comps/third_parties/ollama/README.md similarity index 74% rename from comps/llms/text-generation/ollama/langchain/README.md rename to comps/third_parties/ollama/README.md index 9700fc6a6d..c82e5c6cea 100644 --- a/comps/llms/text-generation/ollama/langchain/README.md +++ b/comps/third_parties/ollama/README.md @@ -53,22 +53,3 @@ curl --noproxy "*" http://localhost:11434/api/generate -d '{ "prompt":"Why is the sky blue?" }' ``` - -## Build Docker Image - -```bash -cd GenAIComps/ -docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/langchain/Dockerfile . -``` - -## Run the Ollama Microservice - -```bash -docker run --network host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/llm-ollama:latest -``` - -## Consume the Ollama Microservice - -```bash -curl http://127.0.0.1:9000/v1/chat/completions -X POST -d '{"model": "llama3", "query":"What is Deep Learning?","max_tokens":32,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"stream":true}' -H 'Content-Type: application/json' -``` diff --git a/comps/third_parties/ollama/deployment/docker_compose/compose.yaml b/comps/third_parties/ollama/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..22c8944ebc --- /dev/null +++ b/comps/third_parties/ollama/deployment/docker_compose/compose.yaml @@ -0,0 +1,19 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + ollama-server: + image: ollama/ollama + container_name: ollama-server + ports: + - ${LLM_ENDPOINT_PORT:-8008}:11434 + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_MODEL_ID: ${LLM_MODEL_ID} + host_ip: ${host_ip} + +networks: + default: + driver: bridge diff --git a/comps/third_parties/opensearch/deployment/docker_compose/compose.yaml b/comps/third_parties/opensearch/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..c5b2846c21 --- /dev/null +++ b/comps/third_parties/opensearch/deployment/docker_compose/compose.yaml @@ -0,0 +1,34 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + opensearch-vector-db: + image: opensearchproject/opensearch:latest + container_name: opensearch-vector-db + environment: + - no_proxy=${no_proxy} + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} + - host_ip=${host_ip} + - cluster.name=opensearch-cluster + - node.name=opensearch-vector-db + - discovery.seed_hosts=opensearch-vector-db + - cluster.initial_master_nodes=opensearch-vector-db + - bootstrap.memory_lock=true # along with the memlock settings below, disables swapping + - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # minimum and maximum Java heap size, recommend setting both to 50% of system RAM + - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD} # Sets the demo admin user password when using demo configuration, required for OpenSearch 2.12 and later + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 # maximum number of open files for the OpenSearch user, set to at least 65536 on modern systems + hard: 65536 + ports: + - "${OPENSEARCH_PORT1:-9200}:9200" + - "${OPENSEARCH_PORT2:-9600}:9600" + healthcheck: + test: ["CMD-SHELL", "sleep 10 && exit 0"] + interval: 1s + timeout: 15s + retries: 1 diff --git a/comps/vectorstores/opensearch/README.md b/comps/third_parties/opensearch/src/README.md similarity index 100% rename from comps/vectorstores/opensearch/README.md rename to comps/third_parties/opensearch/src/README.md diff --git a/comps/dataprep/neo4j/llama_index/__init__.py b/comps/third_parties/opensearch/src/__init__.py similarity index 100% rename from comps/dataprep/neo4j/llama_index/__init__.py rename to comps/third_parties/opensearch/src/__init__.py diff --git a/comps/vectorstores/opensearch/opensearch_dashboards.yml b/comps/third_parties/opensearch/src/opensearch_dashboards.yml similarity index 100% rename from comps/vectorstores/opensearch/opensearch_dashboards.yml rename to comps/third_parties/opensearch/src/opensearch_dashboards.yml diff --git a/comps/third_parties/pathway/deployment/docker_compose/compose.yaml b/comps/third_parties/pathway/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..35dc90c32e --- /dev/null +++ b/comps/third_parties/pathway/deployment/docker_compose/compose.yaml @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +include: + - ../../../tei/deployment/docker_compose/compose.yaml + +services: + pathway-db: + image: ${REGISTRY:-opea}/vectorstore-pathway:${TAG:-latest} + container_name: pathway-db + ports: + - "${PATHWAY_PORT:-6379}:${PATHWAY_PORT:-6379}" + volumes: + - "${PATHWAY_VOLUME:-../../src/README.md}:/app/data/README.md" + network_mode: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PATHWAY_HOST: ${PATHWAY_HOST_DB} + PATHWAY_PORT: ${PATHWAY_PORT} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + healthcheck: + test: ["CMD-SHELL", "sleep 30 && exit 0"] + interval: 1s + timeout: 35s + retries: 1 + depends_on: + tei-embedding-serving: + condition: service_healthy diff --git a/comps/vectorstores/pathway/Dockerfile b/comps/third_parties/pathway/src/Dockerfile similarity index 81% rename from comps/vectorstores/pathway/Dockerfile rename to comps/third_parties/pathway/src/Dockerfile index 8739e69cc6..59c68c31dc 100644 --- a/comps/vectorstores/pathway/Dockerfile +++ b/comps/third_parties/pathway/src/Dockerfile @@ -15,12 +15,12 @@ RUN apt-get update && apt-get install -y \ WORKDIR /app -COPY comps/vectorstores/pathway/requirements.txt /app/ +COPY comps/third_parties/pathway/src/requirements.txt /app/ RUN if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ pip install --no-cache-dir -r requirements.txt -COPY comps/vectorstores/pathway/vectorstore_pathway.py /app/ +COPY comps/third_parties/pathway/src/vectorstore_pathway.py /app/ CMD ["python", "vectorstore_pathway.py"] diff --git a/comps/vectorstores/pathway/README.md b/comps/third_parties/pathway/src/README.md similarity index 96% rename from comps/vectorstores/pathway/README.md rename to comps/third_parties/pathway/src/README.md index c5c82327b5..c669cb2489 100644 --- a/comps/vectorstores/pathway/README.md +++ b/comps/third_parties/pathway/src/README.md @@ -3,7 +3,6 @@ Set the environment variables for Pathway, and the embedding model. > Note: If you are using `TEI_EMBEDDING_ENDPOINT`, make sure embedding service is already running. -> See the instructions under [here](../../retrievers/pathway/langchain/README.md) ```bash export PATHWAY_HOST=0.0.0.0 @@ -57,7 +56,7 @@ For more information, see the relevant Pathway docs: Build the Docker and run the Pathway Vector Store: ```bash -docker build --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/vectorstore-pathway:latest -f comps/vectorstores/src/pathway/Dockerfile . +docker build --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/vectorstore-pathway:latest -f comps/third_parties/pathway/src/Dockerfile . # with locally loaded model, you may add `EMBED_MODEL` env variable to configure the model. docker run -e PATHWAY_HOST=${PATHWAY_HOST} -e PATHWAY_PORT=${PATHWAY_PORT} -e http_proxy=$http_proxy -e https_proxy=$https_proxy -v ./data:/app/data -p ${PATHWAY_PORT}:${PATHWAY_PORT} opea/vectorstore-pathway:latest diff --git a/comps/dataprep/opensearch/langchain/__init__.py b/comps/third_parties/pathway/src/__init__.py similarity index 100% rename from comps/dataprep/opensearch/langchain/__init__.py rename to comps/third_parties/pathway/src/__init__.py diff --git a/comps/vectorstores/pathway/data/nke-10k-2023.pdf b/comps/third_parties/pathway/src/data/nke-10k-2023.pdf similarity index 100% rename from comps/vectorstores/pathway/data/nke-10k-2023.pdf rename to comps/third_parties/pathway/src/data/nke-10k-2023.pdf diff --git a/comps/vectorstores/pathway/requirements.txt b/comps/third_parties/pathway/src/requirements.txt similarity index 100% rename from comps/vectorstores/pathway/requirements.txt rename to comps/third_parties/pathway/src/requirements.txt diff --git a/comps/vectorstores/pathway/vectorstore_pathway.py b/comps/third_parties/pathway/src/vectorstore_pathway.py similarity index 94% rename from comps/vectorstores/pathway/vectorstore_pathway.py rename to comps/third_parties/pathway/src/vectorstore_pathway.py index 802127f54e..22a23a2414 100644 --- a/comps/vectorstores/pathway/vectorstore_pathway.py +++ b/comps/third_parties/pathway/src/vectorstore_pathway.py @@ -4,6 +4,7 @@ import logging import os +import nltk import pathway as pw from langchain import text_splitter from langchain_community.embeddings import HuggingFaceBgeEmbeddings @@ -17,6 +18,10 @@ datefmt="%Y-%m-%d %H:%M:%S", ) +nltk.download("punkt") +nltk.download("punkt_tab") +nltk.download("averaged_perceptron_tagger_eng") + # This creates a Pathway connector that tracks all the files in the `data/` directory. # Additions and modifications will be reflected on the index automatically. diff --git a/comps/third_parties/pgvector/deployment/docker_compose/compose.yaml b/comps/third_parties/pgvector/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..53329642b1 --- /dev/null +++ b/comps/third_parties/pgvector/deployment/docker_compose/compose.yaml @@ -0,0 +1,24 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + pgvector-db: + image: pgvector/pgvector:0.7.0-pg16 + container_name: pgvector-db + hostname: db + ports: + - "${PGVECTOR_PORT:-5432}:5432" + restart: always + environment: + - POSTGRES_DB=${POSTGRES_DB} + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} + - POSTGRES_HOST_AUTH_METHOD=trust + volumes: + - ${VOLUMES_PATH:-./init.sql}:/docker-entrypoint-initdb.d/init.sql + healthcheck: + test: pg_isready + interval: 60s + retries: 3 + start_period: 10s + timeout: 10s diff --git a/comps/vectorstores/pgvector/README.md b/comps/third_parties/pgvector/src/README.md similarity index 100% rename from comps/vectorstores/pgvector/README.md rename to comps/third_parties/pgvector/src/README.md diff --git a/comps/dataprep/pgvector/langchain/__init__.py b/comps/third_parties/pgvector/src/__init__.py similarity index 100% rename from comps/dataprep/pgvector/langchain/__init__.py rename to comps/third_parties/pgvector/src/__init__.py diff --git a/comps/vectorstores/pgvector/init.sql b/comps/third_parties/pgvector/src/init.sql similarity index 100% rename from comps/vectorstores/pgvector/init.sql rename to comps/third_parties/pgvector/src/init.sql diff --git a/comps/third_parties/qdrant/deployment/docker_compose/compose.yaml b/comps/third_parties/qdrant/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..9c1542425f --- /dev/null +++ b/comps/third_parties/qdrant/deployment/docker_compose/compose.yaml @@ -0,0 +1,21 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + qdrant-vector-db: + image: qdrant/qdrant + container_name: qdrant-vector-db + ports: + - "${QDRANT_PORT:-6333}:6333" + - "6334:6334" + healthcheck: + #test: curl -s http://${host_ip}:6334/healthz | grep -q 'healthz check passed' || exit 1 # TODO, This is the real healthcheck, but not work + test: ["CMD-SHELL", "sleep 10 && exit 0"] + interval: 1s + timeout: 15s + retries: 1 + environment: + - no_proxy=${no_proxy} + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} + - host_ip=${host_ip} diff --git a/comps/third_parties/qdrant/src/README.md b/comps/third_parties/qdrant/src/README.md new file mode 100644 index 0000000000..b23060cfda --- /dev/null +++ b/comps/third_parties/qdrant/src/README.md @@ -0,0 +1,21 @@ +# Start PGVector server + +## 1. Download Pgvector image + +```bash +docker pull pgvector/pgvector:0.7.0-pg16 +``` + +## 2. Configure the username, password and dbname + +```bash +export POSTGRES_USER=testuser +export POSTGRES_PASSWORD=testpwd +export POSTGRES_DB=vectordb +``` + +## 3. Run Pgvector service + +```bash +docker run --name vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -d -v ./init.sql:/docker-entrypoint-initdb.d/init.sql -p 5432:5432 pgvector/pgvector:0.7.0-pg16 +``` diff --git a/comps/dataprep/pinecone/langchain/__init__.py b/comps/third_parties/qdrant/src/__init__.py similarity index 100% rename from comps/dataprep/pinecone/langchain/__init__.py rename to comps/third_parties/qdrant/src/__init__.py diff --git a/comps/third_parties/qdrant/src/init.sql b/comps/third_parties/qdrant/src/init.sql new file mode 100644 index 0000000000..0aa0fc2255 --- /dev/null +++ b/comps/third_parties/qdrant/src/init.sql @@ -0,0 +1 @@ +CREATE EXTENSION IF NOT EXISTS vector; diff --git a/comps/third_parties/redis/deployment/docker_compose/compose.yaml b/comps/third_parties/redis/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..4060104c6d --- /dev/null +++ b/comps/third_parties/redis/deployment/docker_compose/compose.yaml @@ -0,0 +1,19 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + ports: + - "${REDIS_PORT1:-6379}:6379" + - "${REDIS_PORT2:-8001}:8001" + environment: + - no_proxy=${no_proxy} + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} + healthcheck: + test: ["CMD", "redis-cli", "ping"] + timeout: 10s + retries: 3 + start_period: 10s diff --git a/comps/third_parties/redis/deployment/kubernetes/README.md b/comps/third_parties/redis/deployment/kubernetes/README.md new file mode 100644 index 0000000000..ab8cdc06c4 --- /dev/null +++ b/comps/third_parties/redis/deployment/kubernetes/README.md @@ -0,0 +1,11 @@ +# Deploy RedisDB on kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Xeon + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install redis-vector-db oci://ghcr.io/opea-project/charts/redis-vector-db --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` diff --git a/comps/third_parties/redis/deployment/kubernetes/cpu-values.yaml b/comps/third_parties/redis/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..415b0aee8b --- /dev/null +++ b/comps/third_parties/redis/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: redis/redis-stack diff --git a/comps/vectorstores/redis/README.md b/comps/third_parties/redis/src/README.md similarity index 100% rename from comps/vectorstores/redis/README.md rename to comps/third_parties/redis/src/README.md diff --git a/comps/dataprep/qdrant/langchain/__init__.py b/comps/third_parties/redis/src/__init__.py similarity index 100% rename from comps/dataprep/qdrant/langchain/__init__.py rename to comps/third_parties/redis/src/__init__.py diff --git a/comps/third_parties/speecht5/deployment/kubernetes/README.md b/comps/third_parties/speecht5/deployment/kubernetes/README.md new file mode 100644 index 0000000000..e0f18a3f7d --- /dev/null +++ b/comps/third_parties/speecht5/deployment/kubernetes/README.md @@ -0,0 +1,18 @@ +# Deploy speecht5 on kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Xeon + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install speecht5 oci://ghcr.io/opea-project/charts/speecht5 --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` + +## Deploy on Gaudi + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install speecht5 oci://ghcr.io/opea-project/charts/speecht5 --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml +``` diff --git a/comps/third_parties/speecht5/deployment/kubernetes/cpu-values.yaml b/comps/third_parties/speecht5/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..56e0cd0cdc --- /dev/null +++ b/comps/third_parties/speecht5/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/speecht5 diff --git a/comps/third_parties/speecht5/deployment/kubernetes/gaudi-values.yaml b/comps/third_parties/speecht5/deployment/kubernetes/gaudi-values.yaml new file mode 100644 index 0000000000..c7e5295bd9 --- /dev/null +++ b/comps/third_parties/speecht5/deployment/kubernetes/gaudi-values.yaml @@ -0,0 +1,8 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/speecht5-gaudi +resources: + limits: + habana.ai/gaudi: 1 diff --git a/comps/third_parties/tei/deployment/docker_compose/compose.yaml b/comps/third_parties/tei/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..9a96ed5045 --- /dev/null +++ b/comps/third_parties/tei/deployment/docker_compose/compose.yaml @@ -0,0 +1,72 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + tei-embedding-serving: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-serving + entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate" + ports: + - "${TEI_EMBEDDER_PORT:-12000}:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + host_ip: ${host_ip} + HF_TOKEN: ${HF_TOKEN} + healthcheck: + test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"] + interval: 10s + timeout: 6s + retries: 48 + + tei-reranking-serving: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-reranking-serving + entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${RERANK_MODEL_ID} --auto-truncate" + ports: + - "${TEI_RERANKING_PORT:-12003}:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + host_ip: ${host_ip} + healthcheck: + test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_RERANKING_PORT}/health"] + interval: 10s + timeout: 6s + retries: 48 + + + tei-reranking-gaudi-serving: + image: ghcr.io/huggingface/tei-gaudi:1.5.2 + container_name: tei-reranking-gaudi-serving + entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${RERANK_MODEL_ID} --auto-truncate" + ports: + - ${TEI_RERANKING_PORT:-12004}:80 + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + host_ip: ${host_ip} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 + runtime: habana + cap_add: + - SYS_NICE + ipc: host + healthcheck: + test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_RERANKING_PORT}/health"] + interval: 10s + timeout: 6s + retries: 48 diff --git a/comps/third_parties/tei/deployment/kubernetes/README.md b/comps/third_parties/tei/deployment/kubernetes/README.md new file mode 100644 index 0000000000..1650330214 --- /dev/null +++ b/comps/third_parties/tei/deployment/kubernetes/README.md @@ -0,0 +1,18 @@ +# Deploy TEI on kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Xeon + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install tei oci://ghcr.io/opea-project/charts/tei --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` + +## Deploy on Gaudi + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install tei oci://ghcr.io/opea-project/charts/tei --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml +``` diff --git a/comps/third_parties/tei/deployment/kubernetes/cpu-values.yaml b/comps/third_parties/tei/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..5eaa0d2744 --- /dev/null +++ b/comps/third_parties/tei/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: ghcr.io/huggingface/text-embeddings-inference diff --git a/comps/third_parties/tei/deployment/kubernetes/gaudi-values.yaml b/comps/third_parties/tei/deployment/kubernetes/gaudi-values.yaml new file mode 100644 index 0000000000..aa8c36da48 --- /dev/null +++ b/comps/third_parties/tei/deployment/kubernetes/gaudi-values.yaml @@ -0,0 +1,22 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +accelDevice: "gaudi" + +OMPI_MCA_btl_vader_single_copy_mechanism: "none" +MAX_WARMUP_SEQUENCE_LENGTH: "512" +image: + repository: ghcr.io/huggingface/tei-gaudi + tag: 1.5.0 + +securityContext: + readOnlyRootFilesystem: false + +resources: + limits: + habana.ai/gaudi: 1 + +livenessProbe: + timeoutSeconds: 1 +readinessProbe: + timeoutSeconds: 1 diff --git a/comps/third_parties/teirerank/deployment/kubernetes/README.md b/comps/third_parties/teirerank/deployment/kubernetes/README.md new file mode 100644 index 0000000000..b67de89cb0 --- /dev/null +++ b/comps/third_parties/teirerank/deployment/kubernetes/README.md @@ -0,0 +1,18 @@ +# Deploy TEIRERANK on kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Xeon + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install teirerank oci://ghcr.io/opea-project/charts/teirerank --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` + +## Deploy on Gaudi + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install teirerank oci://ghcr.io/opea-project/charts/teirerank --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml +``` diff --git a/comps/third_parties/teirerank/deployment/kubernetes/cpu-values.yaml b/comps/third_parties/teirerank/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..5eaa0d2744 --- /dev/null +++ b/comps/third_parties/teirerank/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: ghcr.io/huggingface/text-embeddings-inference diff --git a/comps/third_parties/teirerank/deployment/kubernetes/gaudi-values.yaml b/comps/third_parties/teirerank/deployment/kubernetes/gaudi-values.yaml new file mode 100644 index 0000000000..aa8c36da48 --- /dev/null +++ b/comps/third_parties/teirerank/deployment/kubernetes/gaudi-values.yaml @@ -0,0 +1,22 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +accelDevice: "gaudi" + +OMPI_MCA_btl_vader_single_copy_mechanism: "none" +MAX_WARMUP_SEQUENCE_LENGTH: "512" +image: + repository: ghcr.io/huggingface/tei-gaudi + tag: 1.5.0 + +securityContext: + readOnlyRootFilesystem: false + +resources: + limits: + habana.ai/gaudi: 1 + +livenessProbe: + timeoutSeconds: 1 +readinessProbe: + timeoutSeconds: 1 diff --git a/comps/third_parties/tgi/README.md b/comps/third_parties/tgi/README.md new file mode 100644 index 0000000000..e12f6d34da --- /dev/null +++ b/comps/third_parties/tgi/README.md @@ -0,0 +1,30 @@ +# TGI LLM Microservice + +[Text Generation Inference](https://github.com/huggingface/text-generation-inference) (TGI) is a toolkit for deploying and serving Large Language Models (LLMs). TGI enables high-performance text generation for the most popular open-source LLMs, including Llama, Falcon, StarCoder, BLOOM, GPT-NeoX, and more. + +## Start TGI with docker compose + +Set up environment. + +```bash +export LLM_ENDPOINT_PORT=8008 +export host_ip=${host_ip} +export HF_TOKEN=${HF_TOKEN} +export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export MAX_INPUT_TOKENS=1024 +export MAX_TOTAL_TOKENS=2048 +``` + +Run tgi on xeon. + +```bash +cd deplopyment/docker_compose +docker compose -f compose.yaml tgi-server up -d +``` + +Run tgi on gaudi. + +```bash +cd deplopyment/docker_compose +docker compose -f compose.yaml tgi-gaudi-server up -d +``` diff --git a/comps/llms/deployment/docker_compose/faq-generation_tgi_on_intel_hpu.yaml b/comps/third_parties/tgi/deployment/docker_compose/compose.yaml similarity index 54% rename from comps/llms/deployment/docker_compose/faq-generation_tgi_on_intel_hpu.yaml rename to comps/third_parties/tgi/deployment/docker_compose/compose.yaml index 1ce0ba80a1..d6428d2afc 100644 --- a/comps/llms/deployment/docker_compose/faq-generation_tgi_on_intel_hpu.yaml +++ b/comps/third_parties/tgi/deployment/docker_compose/compose.yaml @@ -1,21 +1,43 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -version: "3.8" - services: - tgi-service: + tgi-server: + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + container_name: tgi-server + ports: + - ${LLM_ENDPOINT_PORT:-8008}:80 + volumes: + - "${DATA_PATH:-./data}:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + host_ip: ${host_ip} + LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT} + MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS:-2048} + MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS:-4096} + healthcheck: + test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 + tgi-gaudi-server: image: ghcr.io/huggingface/tgi-gaudi:2.3.1 container_name: tgi-gaudi-server ports: - ${LLM_ENDPOINT_PORT:-8008}:80 volumes: - - "./data:/data" + - "${DATA_PATH:-./data}:/data" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all @@ -26,6 +48,8 @@ services: FLASH_ATTENTION_RECOMPUTE: true host_ip: ${host_ip} LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT} + MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS:-2048} + MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS:-4096} runtime: habana cap_add: - SYS_NICE @@ -35,26 +59,7 @@ services: interval: 10s timeout: 10s retries: 100 - command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048 - llm: - image: opea/llm-faqgen:latest - container_name: llm-faqgen-server - depends_on: - tgi-service: - condition: service_healthy - ports: - - ${FAQ_PORT:-9000}:9000 - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: ${LLM_ENDPOINT} - LLM_MODEL_ID: ${LLM_MODEL_ID} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME} - LOGFLAG: ${LOGFLAG:-False} - restart: unless-stopped + command: --model-id ${LLM_MODEL_ID} networks: default: diff --git a/comps/third_parties/tgi/deployment/docker_compose/launch_tgi_service.sh b/comps/third_parties/tgi/deployment/docker_compose/launch_tgi_service.sh deleted file mode 100644 index dc88b12fff..0000000000 --- a/comps/third_parties/tgi/deployment/docker_compose/launch_tgi_service.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Set default values -default_port=8008 -default_model="Intel/neural-chat-7b-v3-3" -default_num_cards=1 - -# Check if all required arguments are provided -if [ "$#" -lt 0 ] || [ "$#" -gt 3 ]; then - echo "Usage: $0 [num_cards] [port_number] [model_name]" - exit 1 -fi - -# Assign arguments to variables -num_cards=${1:-$default_num_cards} -port_number=${2:-$default_port} -model_name=${3:-$default_model} - -# Check if num_cards is within the valid range (1-8) -if [ "$num_cards" -lt 1 ] || [ "$num_cards" -gt 8 ]; then - echo "Error: num_cards must be between 1 and 8." - exit 1 -fi - -# Set the volume variable -volume=$PWD/data - -# Build the Docker run command based on the number of cards -if [ "$num_cards" -eq 1 ]; then - docker_cmd="docker run -d --name="ChatQnA_server" -p $port_number:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model_name" --max-input-tokens 2048 --max-total-tokens 4096 -else - docker_cmd="docker run -d --name="ChatQnA_server" -p $port_number:80 -v $volume:/data --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:2.0.5 --max-input-tokens 4096 --max-total-tokens 8192 --model-id $model_name --sharded true --num-shard $num_cards" -fi - -# Execute the Docker run command -eval $docker_cmd diff --git a/comps/third_parties/tgi/deployment/kubernetes/README.md b/comps/third_parties/tgi/deployment/kubernetes/README.md index e69de29bb2..ff37f88ecf 100644 --- a/comps/third_parties/tgi/deployment/kubernetes/README.md +++ b/comps/third_parties/tgi/deployment/kubernetes/README.md @@ -0,0 +1,18 @@ +# Deploy TGI on kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Xeon + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install tgi oci://ghcr.io/opea-project/charts/tgi --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` + +## Deploy on Gaudi + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install tgi oci://ghcr.io/opea-project/charts/tgi --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml +``` diff --git a/comps/third_parties/tgi/deployment/kubernetes/cpu-values.yaml b/comps/third_parties/tgi/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..38297ab3d3 --- /dev/null +++ b/comps/third_parties/tgi/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,26 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Resource requirements for Intel/neural-chat-7b-v3-3 @ 32-bit: +resources: + limits: + cpu: 8 + memory: 70Gi + requests: + cpu: 6 + memory: 65Gi + +livenessProbe: + initialDelaySeconds: 8 + periodSeconds: 8 + failureThreshold: 24 + timeoutSeconds: 4 +readinessProbe: + initialDelaySeconds: 16 + periodSeconds: 8 + timeoutSeconds: 4 +startupProbe: + initialDelaySeconds: 10 + periodSeconds: 5 + failureThreshold: 180 + timeoutSeconds: 2 diff --git a/comps/third_parties/tgi/deployment/kubernetes/gaudi-values.yaml b/comps/third_parties/tgi/deployment/kubernetes/gaudi-values.yaml new file mode 100644 index 0000000000..7ad07ba188 --- /dev/null +++ b/comps/third_parties/tgi/deployment/kubernetes/gaudi-values.yaml @@ -0,0 +1,38 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +accelDevice: "gaudi" + +image: + repository: ghcr.io/huggingface/tgi-gaudi + tag: "2.3.1" + +MAX_INPUT_LENGTH: "1024" +MAX_TOTAL_TOKENS: "2048" +CUDA_GRAPHS: "" +OMPI_MCA_btl_vader_single_copy_mechanism: "none" +ENABLE_HPU_GRAPH: "true" +LIMIT_HPU_GRAPH: "true" +USE_FLASH_ATTENTION: "true" +FLASH_ATTENTION_RECOMPUTE: "true" + +resources: + limits: + habana.ai/gaudi: 1 + requests: + cpu: 1 + memory: 16Gi + +livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 +readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 +startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/comps/third_parties/vdms/deployment/docker_compose/compose.yaml b/comps/third_parties/vdms/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..455d3530ce --- /dev/null +++ b/comps/third_parties/vdms/deployment/docker_compose/compose.yaml @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + vdms-vector-db: + image: intellabs/vdms:latest + container_name: vdms-vector-db + ports: + - "${VDMS_PORT:-55555}:55555" + environment: + - no_proxy=${no_proxy} + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} + healthcheck: + test: ["CMD-SHELL", "sleep 8 && echo 'This is a healthcheck test.' || exit 1"] diff --git a/comps/vectorstores/vdms/README.md b/comps/third_parties/vdms/src/README.md similarity index 100% rename from comps/vectorstores/vdms/README.md rename to comps/third_parties/vdms/src/README.md diff --git a/comps/dataprep/redis/langchain/__init__.py b/comps/third_parties/vdms/src/__init__.py similarity index 100% rename from comps/dataprep/redis/langchain/__init__.py rename to comps/third_parties/vdms/src/__init__.py diff --git a/comps/llms/text-generation/vllm/langchain/README.md b/comps/third_parties/vllm/README.md similarity index 91% rename from comps/llms/text-generation/vllm/langchain/README.md rename to comps/third_parties/vllm/README.md index 4a221e9e64..899ccb0870 100644 --- a/comps/llms/text-generation/vllm/langchain/README.md +++ b/comps/third_parties/vllm/README.md @@ -5,21 +5,17 @@ ## 🚀1. Set up Environment Variables ```bash -export HUGGINGFACEHUB_API_TOKEN= -export vLLM_ENDPOINT="http://${your_ip}:8008" -export LLM_MODEL="meta-llama/Meta-Llama-3-8B-Instruct" +export LLM_ENDPOINT_PORT=8008 +export host_ip=${host_ip} +export HF_TOKEN=${HF_TOKEN} +export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" +export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" ``` -For gated models such as `LLAMA-2`, you will have to pass the environment HUGGINGFACEHUB_API_TOKEN. Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token. +For gated models such as `LLAMA-2`, you will have to pass the environment HF_TOKEN. Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token. ## 🚀2. Set up vLLM Service -First of all, go to the server folder for vllm. - -```bash -cd dependency -``` - ### 2.1 vLLM on CPU First let's enable VLLM on CPU. @@ -32,7 +28,7 @@ bash ./build_docker_vllm.sh The `build_docker_vllm` accepts one parameter `hw_mode` to specify the hardware mode of the service, with the default being `cpu`, and the optional selection can be `hpu`. -#### Launch vLLM service +#### Launch vLLM service with scripts ```bash bash ./launch_vllm_service.sh @@ -44,6 +40,13 @@ If you want to customize the port or model_name, can run: bash ./launch_vllm_service.sh ${port_number} ${model_name} ``` +#### Launch vLLM service with docker compose + +```bash +cd deplopyment/docker_compose +docker compose -f compose.yaml up vllm-server -d +``` + ### 2.2 vLLM on Gaudi Then we show how to enable VLLM on Gaudi. @@ -58,6 +61,15 @@ Set `hw_mode` to `hpu`. #### Launch vLLM service on single node +1. Option 1: Use docker compose for quick deploy + +```bash +cd deplopyment/docker_compose +docker compose -f compose.yaml vllm-gaudi-server up -d +``` + +2. Option 2: Use scripts to set parameters. + For small model, we can just use single node. ```bash @@ -121,10 +133,10 @@ Once it successfully builds, you will have the `opea/vllm-arc:latest` image. It For gated models, such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\ to the docker run command above with a valid Hugging Face Hub read token. -Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get an access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token. +Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get an access token and export `HF_TOKEN` environment with the token. ```bash -export HUGGINGFACEHUB_API_TOKEN= +export HF_TOKEN= ``` To start the model server for Intel CPU: diff --git a/comps/third_parties/vllm/deployment/docker_compose/compose.yaml b/comps/third_parties/vllm/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..e6e1858cef --- /dev/null +++ b/comps/third_parties/vllm/deployment/docker_compose/compose.yaml @@ -0,0 +1,107 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + vllm-server: + image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + container_name: vllm-server + ports: + - ${LLM_ENDPOINT_PORT:-8008}:80 + volumes: + - "${DATA_PATH:-./data}:/data" + shm_size: 128g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + LLM_MODEL_ID: ${LLM_MODEL_ID} + VLLM_TORCH_PROFILER_DIR: "${VLLM_TORCH_PROFILER_DIR:-/mnt}" + host_ip: ${host_ip} + LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT} + VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false} + healthcheck: + test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 + vllm-gaudi-server: + image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} + container_name: vllm-gaudi-server + ports: + - ${LLM_ENDPOINT_PORT:-8008}:80 + volumes: + - "${DATA_PATH:-./data}:/data" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + LLM_MODEL_ID: ${LLM_MODEL_ID} + VLLM_TORCH_PROFILER_DIR: "/mnt" + host_ip: ${host_ip} + LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT} + VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false} + MAX_MODEL_LEN: ${MAX_TOTAL_TOKENS:-4096} + MAX_SEQ_LEN_TO_CAPTURE: ${MAX_TOTAL_TOKENS:-4096} + runtime: habana + cap_add: + - SYS_NICE + ipc: host + healthcheck: + test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"] + interval: 10s + timeout: 10s + retries: 150 + command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 + vllm-openvino: + image: ${REGISTRY:-opea}/vllm-openvino:${TAG:-latest} + container_name: vllm-openvino + ports: + - ${LLM_ENDPOINT_PORT:-8008}:${LLM_ENDPOINT_PORT:-8008} + volumes: + - "${HF_CACHE_DIR:-$HOME/.cache/huggingface}:/root/.cache/huggingface" + environment: + HTTPS_PROXY: ${http_proxy} + HTTP_PROXY: ${https_proxy} + LLM_MODEL_ID: ${LLM_MODEL_ID} + LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT} + host_ip: ${host_ip} + entrypoint: /bin/bash -c " cd / && export VLLM_CPU_KVCACHE_SPACE=50 && python3 -m vllm.entrypoints.openai.api_server --model ${LLM_MODEL_ID} --host 0.0.0.0 --port ${LLM_ENDPOINT_PORT}" + ipc: host + healthcheck: + test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + vllm-openvino-arc: + image: ${REGISTRY:-opea}/vllm-arc:${TAG:-latest} + container_name: vllm-openvino-arc + ports: + - ${LLM_ENDPOINT_PORT:-8008}:${LLM_ENDPOINT_PORT:-8008} + volumes: + - "${HF_CACHE_DIR:-$HOME/.cache/huggingface}:/root/.cache/huggingface" + devices: + - "/dev/dri:/dev/dri" + group_add: + - ${RENDER_GROUP_ID:-110} + environment: + HTTPS_PROXY: ${http_proxy} + HTTP_PROXY: ${https_proxy} + LLM_MODEL_ID: ${LLM_MODEL_ID} + LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT} + host_ip: ${host_ip} + entrypoint: /bin/bash -c " export VLLM_OPENVINO_DEVICE=GPU && export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && python3 -m vllm.entrypoints.openai.api_server --model ${LLM_MODEL_ID} --host 0.0.0.0 --port ${LLM_ENDPOINT_PORT} --max_model_len 8192" + ipc: host + healthcheck: + test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + +networks: + default: + driver: bridge diff --git a/comps/third_parties/vllm/deployment/kubernetes/README.md b/comps/third_parties/vllm/deployment/kubernetes/README.md index e69de29bb2..18b17d9096 100644 --- a/comps/third_parties/vllm/deployment/kubernetes/README.md +++ b/comps/third_parties/vllm/deployment/kubernetes/README.md @@ -0,0 +1,18 @@ +# Deploy vllm on kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Xeon + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install myvllm oci://ghcr.io/opea-project/charts/vllm --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` + +## Deploy on Gaudi + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install myvllm oci://ghcr.io/opea-project/charts/vllm --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml +``` diff --git a/comps/third_parties/vllm/deployment/kubernetes/cpu-values.yaml b/comps/third_parties/vllm/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..c2e01e4be7 --- /dev/null +++ b/comps/third_parties/vllm/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/vllm diff --git a/comps/third_parties/vllm/deployment/kubernetes/gaudi-values.yaml b/comps/third_parties/vllm/deployment/kubernetes/gaudi-values.yaml new file mode 100644 index 0000000000..e9ddbed829 --- /dev/null +++ b/comps/third_parties/vllm/deployment/kubernetes/gaudi-values.yaml @@ -0,0 +1,14 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +accelDevice: "gaudi" + +image: + repository: opea/vllm-gaudi + +# VLLM_CPU_KVCACHE_SPACE: "40" +OMPI_MCA_btl_vader_single_copy_mechanism: none +extraCmdArgs: ["--tensor-parallel-size","1","--block-size","128","--max-num-seqs","256","--max-seq_len-to-capture","2048"] +resources: + limits: + habana.ai/gaudi: 1 diff --git a/comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_gpu b/comps/third_parties/vllm/src/Dockerfile.intel_gpu similarity index 96% rename from comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_gpu rename to comps/third_parties/vllm/src/Dockerfile.intel_gpu index dfb94d2df1..1ad8f6b6bc 100644 --- a/comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_gpu +++ b/comps/third_parties/vllm/src/Dockerfile.intel_gpu @@ -24,7 +24,7 @@ RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --yes WORKDIR /workspace -RUN git clone -b v0.6.3.post1 https://github.com/vllm-project/vllm.git +RUN git clone -b v0.6.6.post1 https://github.com/vllm-project/vllm.git #ARG GIT_REPO_CHECK=0 #RUN --mount=type=bind,source=.git,target=.git \ diff --git a/comps/third_parties/vllm/src/build_docker_vllm.sh b/comps/third_parties/vllm/src/build_docker_vllm.sh index bcbf20c4a3..bd8df2e708 100644 --- a/comps/third_parties/vllm/src/build_docker_vllm.sh +++ b/comps/third_parties/vllm/src/build_docker_vllm.sh @@ -37,7 +37,8 @@ fi if [ "$hw_mode" = "hpu" ]; then git clone https://github.com/HabanaAI/vllm-fork.git cd ./vllm-fork/ - git checkout 3c39626 + git checkout v0.6.4.post2+Gaudi-1.19.0 + sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt docker build -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy cd .. rm -rf vllm-fork diff --git a/comps/third_parties/wav2lip/deployment/docker_compose/compose.yaml b/comps/third_parties/wav2lip/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..4133d4b4bf --- /dev/null +++ b/comps/third_parties/wav2lip/deployment/docker_compose/compose.yaml @@ -0,0 +1,61 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + wav2lip-server: + image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest} + container_name: wav2lip-server + ports: + - ${WAV2LIP_PORT:-7860}:${WAV2LIP_PORT:-7860} + ipc: host + volumes: + - ${PWD}:/outputs + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DEVICE: ${DEVICE} + INFERENCE_MODE: ${INFERENCE_MODE} + CHECKPOINT_PATH: ${CHECKPOINT_PATH} + FACE: ${FACE} + AUDIO: ${AUDIO} + FACESIZE: ${FACESIZE} + OUTFILE: ${OUTFILE} + GFPGAN_MODEL_VERSION: ${GFPGAN_MODEL_VERSION} + UPSCALE_FACTOR: ${UPSCALE_FACTOR} + FPS: ${FPS} + WAV2LIP_PORT: ${WAV2LIP_PORT:-7860} + restart: unless-stopped + wav2lip-gaudi-server: + image: ${REGISTRY:-opea}/wav2lip-gaudi:${TAG:-latest} + container_name: wav2lip-gaudi-server + ports: + - ${WAV2LIP_PORT:-7860}:${WAV2LIP_PORT:-7860} + ipc: host + volumes: + - ${PWD}:/outputs + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + DEVICE: ${DEVICE} + INFERENCE_MODE: ${INFERENCE_MODE} + CHECKPOINT_PATH: ${CHECKPOINT_PATH} + FACE: ${FACE} + AUDIO: ${AUDIO} + FACESIZE: ${FACESIZE} + OUTFILE: ${OUTFILE} + GFPGAN_MODEL_VERSION: ${GFPGAN_MODEL_VERSION} + UPSCALE_FACTOR: ${UPSCALE_FACTOR} + FPS: ${FPS} + WAV2LIP_PORT: ${WAV2LIP_PORT} + runtime: habana + cap_add: + - SYS_NICE + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/third_parties/whisper/deployment/kubernetes/README.md b/comps/third_parties/whisper/deployment/kubernetes/README.md new file mode 100644 index 0000000000..3754916482 --- /dev/null +++ b/comps/third_parties/whisper/deployment/kubernetes/README.md @@ -0,0 +1,18 @@ +# Deploy whisper on kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Xeon + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install whisper oci://ghcr.io/opea-project/charts/whisper --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` + +## Deploy on Gaudi + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install whisper oci://ghcr.io/opea-project/charts/whisper --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml +``` diff --git a/comps/third_parties/whisper/deployment/kubernetes/cpu-values.yaml b/comps/third_parties/whisper/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..f32f55f00f --- /dev/null +++ b/comps/third_parties/whisper/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/whisper diff --git a/comps/third_parties/whisper/deployment/kubernetes/gaudi-values.yaml b/comps/third_parties/whisper/deployment/kubernetes/gaudi-values.yaml new file mode 100644 index 0000000000..3ba40c4b8d --- /dev/null +++ b/comps/third_parties/whisper/deployment/kubernetes/gaudi-values.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/whisper-gaudi + +resources: + limits: + habana.ai/gaudi: 1 diff --git a/comps/tts/deployment/docker_compose/compose.yaml b/comps/tts/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..a99c1688a2 --- /dev/null +++ b/comps/tts/deployment/docker_compose/compose.yaml @@ -0,0 +1,95 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + speecht5-service: + image: ${REGISTRY:-opea}/speecht5:${TAG:-latest} + container_name: speecht5-service + ports: + - ${SPEECHT5_PORT:-7055}:7055 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7055/health"] + interval: 10s + timeout: 6s + retries: 18 + speecht5-gaudi-service: + image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest} + container_name: speecht5-gaudi-service + ports: + - ${SPEECHT5_PORT:-7055}:7055 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + runtime: habana + cap_add: + - SYS_NICE + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7055/health"] + interval: 10s + timeout: 6s + retries: 18 + gptsovits-service: + image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest} + container_name: gpt-sovits-service + ports: + - ${GPT_SOVITS_PORT:-9880}:9880 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9880/health"] + interval: 10s + timeout: 6s + retries: 18 + tts: + image: ${REGISTRY:-opea}/tts:${TAG:-latest} + container_name: tts-service + ports: + - ${TTS_PORT:-9088}:9088 + ipc: host + environment: + TTS_ENDPOINT: ${TTS_ENDPOINT} + TTS_COMPONENT_NAME: ${TTS_COMPONENT_NAME:-OPEA_SPEECHT5_TTS} + tts-speecht5: + extends: tts + container_name: tts-speecht5-service + environment: + TTS_COMPONENT_NAME: ${TTS_COMPONENT_NAME:-OPEA_SPEECHT5_TTS} + depends_on: + speecht5-service: + condition: service_healthy + tts-speecht5-gaudi: + extends: tts + container_name: tts-speecht5-gaudi-service + environment: + TTS_COMPONENT_NAME: ${TTS_COMPONENT_NAME:-OPEA_SPEECHT5_TTS} + depends_on: + speecht5-gaudi-service: + condition: service_healthy + tts-gptsovits: + extends: tts + container_name: tts-gptsovits-service + environment: + TTS_COMPONENT_NAME: ${TTS_COMPONENT_NAME:-OPEA_GPTSOVITS_TTS} + depends_on: + gptsovits-service: + condition: service_healthy + + +networks: + default: + driver: bridge diff --git a/comps/tts/deployment/docker_compose/compose_gptsovits.yaml b/comps/tts/deployment/docker_compose/compose_gptsovits.yaml deleted file mode 100644 index 7cf863010f..0000000000 --- a/comps/tts/deployment/docker_compose/compose_gptsovits.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - gpt-sovits-service: - image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest} - container_name: gpt-sovits-service - ports: - - "9880:9880" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - restart: unless-stopped - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9880/health"] - interval: 10s - timeout: 6s - retries: 18 - tts: - image: ${REGISTRY:-opea}/tts:${TAG:-latest} - container_name: tts-service - ports: - - "3002:9088" - ipc: host - environment: - TTS_ENDPOINT: ${TTS_ENDPOINT} - dependes_on: - speecht5-service: - condition: service_healthy - -networks: - default: - driver: bridge diff --git a/comps/tts/deployment/docker_compose/compose_speecht5.yaml b/comps/tts/deployment/docker_compose/compose_speecht5.yaml deleted file mode 100644 index f5df389a6c..0000000000 --- a/comps/tts/deployment/docker_compose/compose_speecht5.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - speecht5-service: - image: ${REGISTRY:-opea}/speecht5:${TAG:-latest} - container_name: speecht5-service - ports: - - "7055:7055" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - restart: unless-stopped - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:7055/health"] - interval: 10s - timeout: 6s - retries: 18 - tts: - image: ${REGISTRY:-opea}/tts:${TAG:-latest} - container_name: tts-service - ports: - - "3002:9088" - ipc: host - environment: - TTS_ENDPOINT: ${TTS_ENDPOINT} - dependes_on: - speecht5-service: - condition: service_healthy - -networks: - default: - driver: bridge diff --git a/comps/tts/deployment/docker_compose/compose_speect5_hpu.yaml b/comps/tts/deployment/docker_compose/compose_speect5_hpu.yaml deleted file mode 100644 index e30ce3678a..0000000000 --- a/comps/tts/deployment/docker_compose/compose_speect5_hpu.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - speecht5-service: - image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest} - container_name: speecht5-service - ports: - - "7055:7055" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - runtime: habana - cap_add: - - SYS_NICE - restart: unless-stopped - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:7055/health"] - interval: 10s - timeout: 6s - retries: 18 - tts: - image: ${REGISTRY:-opea}/tts:${TAG:-latest} - container_name: tts-service - ports: - - "3002:9088" - ipc: host - environment: - TTS_ENDPOINT: ${TTS_ENDPOINT} - dependes_on: - speecht5-service: - condition: service_healthy - -networks: - default: - driver: bridge diff --git a/comps/tts/deployment/kubernetes/README.md b/comps/tts/deployment/kubernetes/README.md new file mode 100644 index 0000000000..af1bcb05a3 --- /dev/null +++ b/comps/tts/deployment/kubernetes/README.md @@ -0,0 +1,11 @@ +# Deploy tts microservice on Kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Kubernetes + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install tts oci://ghcr.io/opea-project/charts/tts --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` diff --git a/comps/tts/deployment/kubernetes/cpu-values.yaml b/comps/tts/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..c735ab48ab --- /dev/null +++ b/comps/tts/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +speecht5: + enabled: true diff --git a/comps/tts/src/Dockerfile b/comps/tts/src/Dockerfile index de3bbce35d..3ec7555144 100644 --- a/comps/tts/src/Dockerfile +++ b/comps/tts/src/Dockerfile @@ -5,7 +5,7 @@ FROM python:3.11-slim RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user + ENV LANG=C.UTF-8 ARG ARCH=cpu @@ -20,7 +20,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ fi ENV PYTHONPATH=$PYTHONPATH:/home/user - +USER user WORKDIR /home/user/comps/tts/src ENTRYPOINT ["python", "opea_tts_microservice.py"] diff --git a/comps/tts/src/README.md b/comps/tts/src/README.md index fc4ce4a9d2..7bb096d601 100644 --- a/comps/tts/src/README.md +++ b/comps/tts/src/README.md @@ -91,3 +91,29 @@ curl http://localhost:7055/v1/tts -XPOST -d '{"text": "Who are you?"}' -H 'Conte # voice can be 'male' or 'default' curl http://localhost:9088/v1/audio/speech -XPOST -d '{"input":"Who are you?", "voice": "male"}' -H 'Content-Type: application/json' --output speech.wav ``` + +## 🚀3. Start Microservice with Docker Compose (Option 3) + +Alternatively, you can also start the TTS microservice with Docker Compose. + +```bash +export ip_address=$(hostname -I | awk '{print $1}') +# default speecht5 port 7055 +export TTS_ENDPOINT=http://$ip_address:7055 +# default gptsovits port 9880 +# if you want to use gptsovits, please execute the following command instead +# export TTS_ENDPOINT=http://$ip_address:9880 +export no_proxy=localhost,$no_proxy + +# speecht5 cpu +docker compose -f ../deployment/docker_compose/compose.yaml up speecht5-service tts-speecht5 -d + +# speecht5 hpu +docker compose -f ../deployment/docker_compose/compose.yaml up speecht5-gaudi-service tts-speecht5-gaudi -d + +# gptsovits cpu +docker compose -f ../deployment/docker_compose/compose.yaml up tts-gptsovits gptsovits-service -d + +# Test +curl http://localhost:9088/v1/audio/speech -XPOST -d '{"input":"Who are you?"}' -H 'Content-Type: application/json' --output speech.wav +``` diff --git a/comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile b/comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile index 5fce957705..1d53b142e7 100644 --- a/comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile +++ b/comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile @@ -9,7 +9,7 @@ RUN useradd -m -s /bin/bash user && \ # Install system dependencies RUN apt-get update && \ apt-get install -y ffmpeg git-lfs git wget vim build-essential numactl && \ - apt-get install -y libomp-dev google-perftools && \ + apt-get install -y libomp-dev google-perftools curl && \ pip install --upgrade pip ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libiomp5.so:/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4 diff --git a/comps/tts/src/integrations/dependency/speecht5/Dockerfile b/comps/tts/src/integrations/dependency/speecht5/Dockerfile index 34ade3576e..161a44505a 100644 --- a/comps/tts/src/integrations/dependency/speecht5/Dockerfile +++ b/comps/tts/src/integrations/dependency/speecht5/Dockerfile @@ -18,8 +18,6 @@ RUN apt-get update \ COPY --chown=user:user comps /home/user/comps -USER user - RUN pip install --no-cache-dir --upgrade pip setuptools && \ if [ "${ARCH}" = "cpu" ]; then \ pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu ; \ @@ -30,6 +28,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/tts/src/integrations/dependency/speecht5 ENTRYPOINT ["python", "speecht5_server.py", "--device", "cpu"] diff --git a/comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu b/comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu index 895118d1d3..eee2aa2d6c 100644 --- a/comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu +++ b/comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu @@ -20,8 +20,6 @@ RUN apt-get update \ COPY --chown=user:user comps /home/user/comps -USER user - # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /home/user/comps/tts/src/integrations/dependency/speecht5/requirements.txt && \ @@ -29,6 +27,8 @@ RUN pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/tts/src/integrations/dependency/speecht5 ENTRYPOINT ["python", "speecht5_server.py", "--device", "hpu"] diff --git a/comps/tts/src/integrations/opea_gptsovits.py b/comps/tts/src/integrations/gptsovits.py similarity index 95% rename from comps/tts/src/integrations/opea_gptsovits.py rename to comps/tts/src/integrations/gptsovits.py index 71eb5101e6..cc448a4a05 100644 --- a/comps/tts/src/integrations/opea_gptsovits.py +++ b/comps/tts/src/integrations/gptsovits.py @@ -35,7 +35,6 @@ async def invoke( request: AudioSpeechRequest, ) -> requests.models.Response: """Involve the TTS service to generate speech for the provided input.""" - # see https://github.com/Spycsh/GPT-SoVITS/blob/openai_compat/api.py#L948 for usage # make sure you change the refer_wav_path locally request.voice = None diff --git a/comps/tts/src/integrations/opea_speecht5.py b/comps/tts/src/integrations/speecht5.py similarity index 99% rename from comps/tts/src/integrations/opea_speecht5.py rename to comps/tts/src/integrations/speecht5.py index 905e2fb018..4d399edcb4 100644 --- a/comps/tts/src/integrations/opea_speecht5.py +++ b/comps/tts/src/integrations/speecht5.py @@ -3,7 +3,6 @@ import asyncio import os -import time import requests from fastapi.responses import StreamingResponse diff --git a/comps/tts/src/opea_tts_microservice.py b/comps/tts/src/opea_tts_microservice.py index 29bd3e1bc0..2c47cfd118 100644 --- a/comps/tts/src/opea_tts_microservice.py +++ b/comps/tts/src/opea_tts_microservice.py @@ -5,8 +5,8 @@ import time from fastapi.responses import StreamingResponse -from integrations.opea_gptsovits import OpeaGptsovitsTts -from integrations.opea_speecht5 import OpeaSpeecht5Tts +from integrations.gptsovits import OpeaGptsovitsTts +from integrations.speecht5 import OpeaSpeecht5Tts from comps import ( CustomLogger, diff --git a/comps/vectorstores/README.md b/comps/vectorstores/README.md deleted file mode 100644 index 004b44ab8d..0000000000 --- a/comps/vectorstores/README.md +++ /dev/null @@ -1,39 +0,0 @@ -# Vectorstores Microservice - -The Vectorstores Microservice provides convenient way to start various vector database servers. - -## Vectorstores Microservice with Redis - -For details, please refer to this [readme](redis/README.md) - -## Vectorstores Microservice with Qdrant - -For details, please refer to this [readme](qdrant/README.md) - -## Vectorstores Microservice with PGVector - -For details, please refer to this [readme](pgvector/README.md) - -## Vectorstores Microservice with Pinecone - -For details, please refer to this [readme](pinecone/README.md) - -## Vectorstores Microservice with Pathway - -For details, please refer to this [readme](pathway/README.md) - -## Vectorstores Microservice with Milvus - -For details, please refer to this [readme](milvus/README.md) - -## Vectorstores Microservice with LanceDB - -For details, please refer to this [readme](lancedb/README.md) - -## Vectorstores Microservice with Chroma - -For details, please refer to this [readme](chroma/README.md) - -## Vectorstores Microservice with VDMS - -For details, please refer to this [readme](vdms/README.md) diff --git a/comps/vectorstores/__init__.py b/comps/vectorstores/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/vectorstores/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/vectorstores/chroma/README.md b/comps/vectorstores/chroma/README.md deleted file mode 100644 index 49bc674707..0000000000 --- a/comps/vectorstores/chroma/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# Start Chroma server - -## Introduction - -Chroma is a AI-native open-source vector database focused on developer productivity and happiness. Chroma is licensed under Apache 2.0. Chroma runs in various modes, we can deploy it as a server running your local machine or in the cloud. - -## Getting Started - -### Start Chroma Server - -To start the Chroma server on your local machine, follow these steps: - -```bash -git clone https://github.com/chroma-core/chroma.git -cd chroma -docker compose up -d -``` - -### Start Log Output - -Upon starting the server, you should see log outputs similar to the following: - -``` -server-1 | Starting 'uvicorn chromadb.app:app' with args: --workers 1 --host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30 -server-1 | INFO: [02-08-2024 07:03:19] Set chroma_server_nofile to 65536 -server-1 | INFO: [02-08-2024 07:03:19] Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information. -server-1 | DEBUG: [02-08-2024 07:03:19] Starting component System -server-1 | DEBUG: [02-08-2024 07:03:19] Starting component OpenTelemetryClient -server-1 | DEBUG: [02-08-2024 07:03:19] Starting component SqliteDB -server-1 | DEBUG: [02-08-2024 07:03:19] Starting component QuotaEnforcer -server-1 | DEBUG: [02-08-2024 07:03:19] Starting component Posthog -server-1 | DEBUG: [02-08-2024 07:03:19] Starting component LocalSegmentManager -server-1 | DEBUG: [02-08-2024 07:03:19] Starting component SegmentAPI -server-1 | INFO: [02-08-2024 07:03:19] Started server process [1] -server-1 | INFO: [02-08-2024 07:03:19] Waiting for application startup. -server-1 | INFO: [02-08-2024 07:03:19] Application startup complete. -server-1 | INFO: [02-08-2024 07:03:19] Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit) -``` diff --git a/comps/vectorstores/chroma/__init__.py b/comps/vectorstores/chroma/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/vectorstores/chroma/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/vectorstores/elasticsearch/__init__.py b/comps/vectorstores/elasticsearch/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/vectorstores/elasticsearch/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/vectorstores/lancedb/README.md b/comps/vectorstores/lancedb/README.md deleted file mode 100644 index 27487fce06..0000000000 --- a/comps/vectorstores/lancedb/README.md +++ /dev/null @@ -1,139 +0,0 @@ -# Start LanceDB Server - -LanceDB is an embedded vector database for AI applications. It is open source and distributed with an Apache-2.0 license. - -LanceDB datasets are persisted to disk and can be shared in Python. - -## Setup - -```bash -npm install -S vectordb -``` - -## Usage - -### Create a new index from texts - -```python -import os -import tempfile -from langchain.vectorstores import LanceDB -from langchain.embeddings.openai import OpenAIEmbeddings -from vectordb import connect - - -async def run(): - dir = tempfile.mkdtemp(prefix="lancedb-") - db = await connect(dir) - table = await db.create_table("vectors", [{"vector": [0] * 1536, "text": "sample", "id": 1}]) - - vector_store = await LanceDB.from_texts( - ["Hello world", "Bye bye", "hello nice world"], - [{"id": 2}, {"id": 1}, {"id": 3}], - OpenAIEmbeddings(), - table=table, - ) - - result_one = await vector_store.similarity_search("hello world", 1) - print(result_one) - # [ Document(page_content='hello nice world', metadata={'id': 3}) ] - - -# Run the function -import asyncio - -asyncio.run(run()) -``` - -API Reference: - -- `LanceDB` from `@langchain/community/vectorstores/lancedb` -- `OpenAIEmbeddings` from `@langchain/openai` - -### Create a new index from a loader - -```python -import os -import tempfile -from langchain.vectorstores import LanceDB -from langchain.embeddings.openai import OpenAIEmbeddings -from langchain.document_loaders.fs import TextLoader -from vectordb import connect - -# Create docs with a loader -loader = TextLoader("src/document_loaders/example_data/example.txt") -docs = loader.load() - - -async def run(): - dir = tempfile.mkdtemp(prefix="lancedb-") - db = await connect(dir) - table = await db.create_table("vectors", [{"vector": [0] * 1536, "text": "sample", "source": "a"}]) - - vector_store = await LanceDB.from_documents(docs, OpenAIEmbeddings(), table=table) - - result_one = await vector_store.similarity_search("hello world", 1) - print(result_one) - # [ - # Document(page_content='Foo\nBar\nBaz\n\n', metadata={'source': 'src/document_loaders/example_data/example.txt'}) - # ] - - -# Run the function -import asyncio - -asyncio.run(run()) -``` - -API Reference: - -- `LanceDB` from `@langchain/community/vectorstores/lancedb` -- `OpenAIEmbeddings` from `@langchain/openai` -- `TextLoader` from `langchain/document_loaders/fs/text` - -### Open an existing dataset - -```python -import os -import tempfile -from langchain.vectorstores import LanceDB -from langchain.embeddings.openai import OpenAIEmbeddings -from vectordb import connect - - -async def run(): - uri = await create_test_db() - db = await connect(uri) - table = await db.open_table("vectors") - - vector_store = LanceDB(OpenAIEmbeddings(), table=table) - - result_one = await vector_store.similarity_search("hello world", 1) - print(result_one) - # [ Document(page_content='Hello world', metadata={'id': 1}) ] - - -async def create_test_db(): - dir = tempfile.mkdtemp(prefix="lancedb-") - db = await connect(dir) - await db.create_table( - "vectors", - [ - {"vector": [0] * 1536, "text": "Hello world", "id": 1}, - {"vector": [0] * 1536, "text": "Bye bye", "id": 2}, - {"vector": [0] * 1536, "text": "hello nice world", "id": 3}, - ], - ) - return dir - - -# Run the function -import asyncio - -asyncio.run(run()) -``` - -API Reference: - -- `LanceDB` from `@langchain/community/vectorstores/lancedb` -- `OpenAIEmbeddings` from `@langchain/openai` diff --git a/comps/vectorstores/lancedb/__init__.py b/comps/vectorstores/lancedb/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/vectorstores/lancedb/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/vectorstores/milvus/__init__.py b/comps/vectorstores/milvus/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/vectorstores/milvus/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/vectorstores/opensearch/__init__.py b/comps/vectorstores/opensearch/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/vectorstores/opensearch/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/vectorstores/opensearch/docker-compose-opensearch.yaml b/comps/vectorstores/opensearch/docker-compose-opensearch.yaml deleted file mode 100644 index 1769850e65..0000000000 --- a/comps/vectorstores/opensearch/docker-compose-opensearch.yaml +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: '3' -services: - opensearch-node1: - image: opensearchproject/opensearch:latest - container_name: opensearch-node1 - environment: - - cluster.name=opensearch-cluster - - node.name=opensearch-node1 - - discovery.seed_hosts=opensearch-node1,opensearch-node2 - - cluster.initial_master_nodes=opensearch-node1,opensearch-node2 - - bootstrap.memory_lock=true # along with the memlock settings below, disables swapping - - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # minimum and maximum Java heap size, recommend setting both to 50% of system RAM - - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD} # Sets the demo admin user password when using demo configuration, required for OpenSearch 2.12 and later - ulimits: - memlock: - soft: -1 - hard: -1 - nofile: - soft: 65536 # maximum number of open files for the OpenSearch user, set to at least 65536 on modern systems - hard: 65536 - volumes: - - ./opensearch-data1:/var/lib/opensearch/data - ports: - - 9200:9200 - - 9600:9600 # required for Performance Analyzer - networks: - - opensearch-net - security_opt: - - no-new-privileges:true - opensearch-node2: - image: opensearchproject/opensearch:latest - container_name: opensearch-node2 - environment: - - cluster.name=opensearch-cluster - - node.name=opensearch-node2 - - discovery.seed_hosts=opensearch-node1,opensearch-node2 - - cluster.initial_master_nodes=opensearch-node1,opensearch-node2 - - bootstrap.memory_lock=true - - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" - - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD} # Sets the demo admin user password when using demo configuration, required for OpenSearch 2.12 and later - ulimits: - memlock: - soft: -1 - hard: -1 - nofile: - soft: 65536 - hard: 65536 - volumes: - - ./opensearch-data2:/var/lib/opensearch/data - networks: - - opensearch-net - security_opt: - - no-new-privileges:true - opensearch-dashboards: - image: opensearchproject/opensearch-dashboards:latest - volumes: - - ./opensearch_dashboards.yml:/usr/share/opensearch-dashboards/config/opensearch_dashboards.yml - container_name: opensearch-dashboards - ports: - - 5601:5601 - expose: - - "5601" - environment: - OPENSEARCH_HOSTS: '["https://opensearch-node1:9200","https://opensearch-node2:9200"]' # must be a string with no spaces when specified as an environment variable - networks: - - opensearch-net - security_opt: - - no-new-privileges:true - depends_on: - - opensearch-node1 - - opensearch-node2 - -volumes: - opensearch-data1: - opensearch-data2: - -networks: - opensearch-net: diff --git a/comps/vectorstores/pathway/__init__.py b/comps/vectorstores/pathway/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/vectorstores/pathway/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/vectorstores/pgvector/__init__.py b/comps/vectorstores/pgvector/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/vectorstores/pgvector/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/vectorstores/pgvector/pgvector.yaml b/comps/vectorstores/pgvector/pgvector.yaml deleted file mode 100644 index e497390c48..0000000000 --- a/comps/vectorstores/pgvector/pgvector.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - db: - hostname: db - image: pgvector/pgvector:0.7.0-pg16 - ports: - - 5432:5432 - restart: always - environment: - - POSTGRES_DB=vectordb - - POSTGRES_USER=testuser - - POSTGRES_PASSWORD=testpwd - - POSTGRES_HOST_AUTH_METHOD=trust - volumes: - - ./init.sql:/docker-entrypoint-initdb.d/init.sql diff --git a/comps/vectorstores/pinecone/README.md b/comps/vectorstores/pinecone/README.md deleted file mode 100644 index 6696f42e36..0000000000 --- a/comps/vectorstores/pinecone/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# Pinecone setup - -## 1. Create Pinecone account from the below link - -https://app.pinecone.io/ - -More details from Pinecone quick start guide https://docs.pinecone.io/guides/get-started/quickstart - -## 2. Get API key - -API Key is needed to make the API calls. API key can get it from the Project -> Manage -> API keys - -## 3. Create the index in https://app.pinecone.io/ - -Following details are to be provided - -1. Index name -2. Based on the embedding model selected, following has to be provided - a. Dimensions - b. Metric diff --git a/comps/vectorstores/pinecone/__init__.py b/comps/vectorstores/pinecone/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/vectorstores/pinecone/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/vectorstores/qdrant/README.md b/comps/vectorstores/qdrant/README.md deleted file mode 100644 index 518c3fd859..0000000000 --- a/comps/vectorstores/qdrant/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# Start Qdrant server - -## 1. Download Qdrant image - -```bash -docker pull qdrant/qdrant -``` - -## 2. Run Qdrant service - -```bash -docker run -p 6333:6333 -p 6334:6334 -v $(pwd)/qdrant_storage:/qdrant/storage:z qdrant/qdrant -``` diff --git a/comps/vectorstores/qdrant/__init__.py b/comps/vectorstores/qdrant/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/vectorstores/qdrant/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/vectorstores/redis/__init__.py b/comps/vectorstores/redis/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/vectorstores/redis/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/vectorstores/redis/redis.yaml b/comps/vectorstores/redis/redis.yaml deleted file mode 100644 index d7226e13bf..0000000000 --- a/comps/vectorstores/redis/redis.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - redis-vector-db: - image: redis/redis-stack:7.2.0-v9 - container_name: redis-vector-db - ports: - - "6379:6379" - - "8001:8001" diff --git a/comps/vectorstores/vdms/__init__.py b/comps/vectorstores/vdms/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/vectorstores/vdms/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/vectorstores/vdms/vdms.yaml b/comps/vectorstores/vdms/vdms.yaml deleted file mode 100644 index 8e13a0f1b3..0000000000 --- a/comps/vectorstores/vdms/vdms.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3" -services: - vdms-vector-db: - image: intellabs/vdms:latest - container_name: vdms-vector-db - ports: - - "55555:55555" diff --git a/comps/version.py b/comps/version.py index 4888978385..6305c10202 100644 --- a/comps/version.py +++ b/comps/version.py @@ -3,4 +3,4 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -__version__ = "1.1" +__version__ = "1.2" diff --git a/comps/embeddings/deployment/docker_compose/compose_tei.yaml b/comps/web_retrievers/deployment/docker_compose/compose.yaml similarity index 54% rename from comps/embeddings/deployment/docker_compose/compose_tei.yaml rename to comps/web_retrievers/deployment/docker_compose/compose.yaml index 4841b7ffe0..d626313eb1 100644 --- a/comps/embeddings/deployment/docker_compose/compose_tei.yaml +++ b/comps/web_retrievers/deployment/docker_compose/compose.yaml @@ -1,14 +1,30 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -version: "3.8" - services: + web-retriever-service: + image: ${REGISTRY:-opea}/web-retriever:${TAG:-latest} + container_name: web-retriever-service + ports: + - ${WEB_RETRIEVER_PORT:-7077}:7077 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + GOOGLE_API_KEY: ${GOOGLE_API_KEY} + GOOGLE_CSE_ID: ${GOOGLE_CSE_ID} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + restart: unless-stopped + depends_on: + tei-embedding-service: + condition: service_healthy tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-embedding-server + entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate" ports: - - "6006:80" + - ${TEI_PORT:-6060}:80 volumes: - "./data:/data" shm_size: 1g @@ -16,28 +32,12 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate + host_ip: ${host_ip} healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:6006/health"] + test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_PORT}/health"] interval: 10s timeout: 6s - retries: 18 - embedding: - image: opea/embedding:latest - container_name: embedding-server - ports: - - "6000:6000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - EMBEDDING_COMPONENT_NAME: "OPEA_TEI_EMBEDDING" - depends_on: - tei-embedding-service: - condition: service_healthy - restart: unless-stopped + retries: 48 networks: default: diff --git a/comps/web_retrievers/deployment/kubernetes/README.md b/comps/web_retrievers/deployment/kubernetes/README.md new file mode 100644 index 0000000000..c361509fe8 --- /dev/null +++ b/comps/web_retrievers/deployment/kubernetes/README.md @@ -0,0 +1,11 @@ +# Deploy web-retriever microservice on Kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Kubernetes + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install web-retriever oci://ghcr.io/opea-project/charts/web-retriever --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` diff --git a/comps/llms/summarization/tgi/langchain/__init__.py b/comps/web_retrievers/deployment/kubernetes/cpu-values.yaml similarity index 77% rename from comps/llms/summarization/tgi/langchain/__init__.py rename to comps/web_retrievers/deployment/kubernetes/cpu-values.yaml index 916f3a44b2..e2d62ff26f 100644 --- a/comps/llms/summarization/tgi/langchain/__init__.py +++ b/comps/web_retrievers/deployment/kubernetes/cpu-values.yaml @@ -1,2 +1,5 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 + +tei: + enabled: true diff --git a/comps/web_retrievers/src/Dockerfile b/comps/web_retrievers/src/Dockerfile index 7a370e1f0a..d9e1ded610 100644 --- a/comps/web_retrievers/src/Dockerfile +++ b/comps/web_retrievers/src/Dockerfile @@ -11,9 +11,8 @@ RUN useradd -m -s /bin/bash user && \ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev - -USER user + libjemalloc-dev \ + curl COPY comps /home/user/comps @@ -26,6 +25,8 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +USER user + WORKDIR /home/user/comps/web_retrievers/src ENTRYPOINT ["python", "opea_web_retrievers_microservice.py"] diff --git a/comps/web_retrievers/src/README.md b/comps/web_retrievers/src/README.md index bf0670ca39..b276a28b3e 100644 --- a/comps/web_retrievers/src/README.md +++ b/comps/web_retrievers/src/README.md @@ -2,16 +2,16 @@ The Web Retriever Microservice is designed to efficiently search web pages relevant to the prompt, save them into the VectorDB, and retrieve the matched documents with the highest similarity. The retrieved documents will be used as context in the prompt to LLMs. Different from the normal RAG process, a web retriever can leverage advanced search engines for more diverse demands, such as real-time news, verifiable sources, and diverse sources. -## Start Microservice with Docker +## 🚀1. Start Microservice with Docker (Option 1) -### Build Docker Image +### 1.1 Build Docker Image ```bash cd ../../../../ docker build -t opea/web-retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/web_retrievers/src/Dockerfile . ``` -### Start TEI Service +### 1.2 Start TEI Service ```bash model=BAAI/bge-base-en-v1.5 @@ -19,7 +19,7 @@ volume=$PWD/data docker run -d -p 6060:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model --auto-truncate ``` -### Start Web Retriever Service +### 1.3 Start Web Retriever Service ```bash # set TEI endpoint @@ -34,7 +34,23 @@ export GOOGLE_CSE_ID=xxx docker run -d --name="web-retriever-server" -p 7077:7077 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e GOOGLE_API_KEY=$GOOGLE_API_KEY -e GOOGLE_CSE_ID=$GOOGLE_CSE_ID opea/web-retriever:latest ``` -### Consume Web Retriever Service +## 🚀2. Start Microservice with Docker Compose (Option 2) + +Alternatively, you can start the web retriever microservice with Docker Compose. + +```bash +export host_ip=$(hostname -I | awk '{print $1}') +export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export GOOGLE_API_KEY=${GOOGLE_API_KEY} +export GOOGLE_CSE_ID=${GOOGLE_CSE_ID} +export TEI_PORT=6060 +export no_proxy=$host_ip,$no_proxy +export EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5 + +docker compose -f ../deployment/docker_compose/compose.yaml up web-retriever-service tei-embedding-service -d +``` + +## 🚀3. Consume Web Retriever Service To consume the Web Retriever Microservice, you can generate a mock embedding vector of length 768 with Python. diff --git a/tests/agent/ragagent.yaml b/tests/agent/ragagent.yaml index d6b5b3ad37..c8bda98feb 100644 --- a/tests/agent/ragagent.yaml +++ b/tests/agent/ragagent.yaml @@ -17,6 +17,7 @@ services: llm_engine: tgi HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} llm_endpoint_url: ${LLM_ENDPOINT_URL} + timeout: 500 model: ${LLM_MODEL_ID} temperature: ${temperature} max_new_tokens: ${max_new_tokens} diff --git a/tests/agent/test.py b/tests/agent/test.py index e345e89420..2da0f6c8e6 100644 --- a/tests/agent/test.py +++ b/tests/agent/test.py @@ -10,7 +10,7 @@ def generate_answer_agent_api(url, prompt): proxies = {"http": ""} payload = { - "query": prompt, + "messages": prompt, } response = requests.post(url, json=payload, proxies=proxies) answer = response.json()["text"] @@ -21,7 +21,7 @@ def process_request(url, query, is_stream=False): proxies = {"http": ""} payload = { - "query": query, + "messages": query, } try: diff --git a/tests/agent/test_agent_langchain_on_intel_hpu.sh b/tests/agent/test_agent_langchain_on_intel_hpu.sh index 16b1fc8373..090d1ed332 100644 --- a/tests/agent/test_agent_langchain_on_intel_hpu.sh +++ b/tests/agent/test_agent_langchain_on_intel_hpu.sh @@ -53,10 +53,11 @@ function build_vllm_docker_images() { cd $WORKPATH echo $WORKPATH if [ ! -d "./vllm" ]; then - git clone https://github.com/vllm-project/vllm.git + git clone https://github.com/HabanaAI/vllm-fork.git fi - cd ./vllm - git checkout main + cd ./vllm-fork + git checkout v0.6.4.post2+Gaudi-1.19.0 + sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:comps --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy if [ $? -ne 0 ]; then echo "opea/vllm-gaudi:comps failed" @@ -165,6 +166,11 @@ function start_vllm_service_70B() { } function start_react_agent_service() { + + echo "Starting redis for testing agent persistent" + + docker run -d -it -p 6379:6379 --rm --name "test-persistent-redis" --net=host --ipc=host --name redis-vector-db redis/redis-stack:7.2.0-v9 + echo "Starting react agent microservice" docker compose -f $WORKPATH/tests/agent/react_langchain.yaml up -d sleep 120s @@ -235,6 +241,7 @@ function validate() { local EXPECTED_RESULT="$2" local SERVICE_NAME="$3" # local CONTENT_TO_VALIDATE= "$CONTENT" | grep -oP '(?<=text:).*?(?=prompt)' + echo "EXPECTED_RESULT: $EXPECTED_RESULT" echo "Content: $CONTENT" # echo "Content to validate: $CONTENT_TO_VALIDATE" @@ -256,6 +263,12 @@ function validate_microservice() { local EXIT_CODE=$(validate "$CONTENT" "OPEA" "test-agent") echo "$EXIT_CODE" local EXIT_CODE="${EXIT_CODE:0-1}" + if [ "$EXIT_CODE" == "1" ]; then + echo "try new EXPECTED_RESULT: " + local EXIT_CODE=$(validate "$CONTENT" "OPEA stands for Open Platform for Enterprise AI." "test-agent") + echo "$EXIT_CODE" + fi + local EXIT_CODE="${EXIT_CODE:0-1}" echo "return value is $EXIT_CODE" if [ "$EXIT_CODE" == "1" ]; then echo "==================vllm logs ======================" @@ -286,7 +299,7 @@ function validate_microservice_streaming() { function validate_assistant_api() { cd $WORKPATH echo "Testing agent service - assistant api" - local CONTENT=$(python3 comps/agent/src/test_assistant_api.py --ip_addr ${ip_address} --ext_port 9095 --assistants_api_test --query 'What is Intel OPEA project?' 2>&1 | tee ${LOG_PATH}/test-agent-assistantsapi.log) + local CONTENT=$(python3 comps/agent/src/test_assistant_api.py --ip_addr ${ip_address} --ext_port 9095 --assistants_api_test --query 'What is Intel OPEA project?' --llm_endpoint_url $LLM_ENDPOINT_URL 2>&1 | tee ${LOG_PATH}/test-agent-assistantsapi.log) local EXIT_CODE=$(validate "$CONTENT" "OPEA" "test-agent-assistantsapi") echo "$EXIT_CODE" local EXIT_CODE="${EXIT_CODE:0-1}" @@ -326,10 +339,18 @@ function stop_agent_docker() { echo "Docker containers stopped successfully" } +function stop_redis_docker() { + cid=$(docker ps -aq --filter "name=test-persistent-redis") + echo "Stopping the docker containers "${cid} + if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi + echo "Docker containers stopped successfully" +} + function stop_docker() { stop_tgi_docker stop_vllm_docker stop_agent_docker + stop_redis_docker } diff --git a/tests/animation/test_animation_wav2lip.sh b/tests/animation/test_animation_wav2lip.sh index 60fbb74839..f33530fe05 100644 --- a/tests/animation/test_animation_wav2lip.sh +++ b/tests/animation/test_animation_wav2lip.sh @@ -7,17 +7,23 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') +export TAG=comps +export ANIMATION_PORT=10900 +export WAV2LIP_PORT=12300 +export service_name="animation" + + function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build -t opea/wav2lip:comps -f comps/third_parties/wav2lip/src/Dockerfile . + docker build -t opea/wav2lip:$TAG -f comps/third_parties/wav2lip/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/wav2lip built fail" exit 1 else echo "opea/wav2lip built successful" fi - docker build --no-cache -t opea/animation:comps -f comps/animation/src/Dockerfile . + docker build --no-cache -t opea/animation:$TAG -f comps/animation/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/animation built fail" exit 1 @@ -31,8 +37,6 @@ function start_service() { # Set env vars export ip_address=$(hostname -I | awk '{print $1}') export DEVICE="cpu" - export WAV2LIP_PORT=7860 - export ANIMATION_PORT=9066 export INFERENCE_MODE='wav2lip+gfpgan' export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth' export FACE="/home/user/comps/animation/src/assets/img/avatar1.jpg" @@ -42,28 +46,30 @@ function start_service() { export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed export UPSCALE_FACTOR=1 export FPS=10 + export WAV2LIP_ENDPOINT="http://$ip_address:$WAV2LIP_PORT" + + cd $WORKPATH/comps/animation/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d - docker run -d --name="test-comps-animation-wav2lip" -v $WORKPATH/comps/animation/src/assets:/home/user/comps/animation/src/assets -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e DEVICE=$DEVICE -e INFERENCE_MODE=$INFERENCE_MODE -e CHECKPOINT_PATH=$CHECKPOINT_PATH -e FACE=$FACE -e AUDIO=$AUDIO -e FACESIZE=$FACESIZE -e OUTFILE=$OUTFILE -e GFPGAN_MODEL_VERSION=$GFPGAN_MODEL_VERSION -e UPSCALE_FACTOR=$UPSCALE_FACTOR -e FPS=$FPS -e WAV2LIP_PORT=$WAV2LIP_PORT -p 7860:7860 --ipc=host opea/wav2lip:comps - docker run -d --name="test-comps-animation" -v $WORKPATH/comps/animation/src/assets:/home/user/comps/animation/src/assets -e WAV2LIP_ENDPOINT=http://$ip_address:7860 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9066:9066 --ipc=host opea/animation:comps sleep 3m } function validate_microservice() { cd $WORKPATH - result=$(http_proxy="" curl http://localhost:9066/v1/animation -X POST -H "Content-Type: application/json" -d @comps/animation/src/assets/audio/sample_question.json) + result=$(http_proxy="" curl http://localhost:$ANIMATION_PORT/v1/animation -X POST -H "Content-Type: application/json" -d @comps/animation/src/assets/audio/sample_question.json) if [[ $result == *"result.mp4"* ]]; then echo "Result correct." else echo "Result wrong." - docker logs test-comps-animation-wav2lip - docker logs test-comps-animation + docker logs wav2lip-server + docker logs animation-server exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-animation*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cd $WORKPATH/comps/animation/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans } function main() { @@ -77,8 +83,7 @@ function main() { stop_docker - echo y | docker builder prune --all - echo y | docker image prune + echo y | docker system prune } diff --git a/tests/animation/test_animation_wav2lip_on_intel_hpu.sh b/tests/animation/test_animation_wav2lip_on_intel_hpu.sh new file mode 100644 index 0000000000..e04849475b --- /dev/null +++ b/tests/animation/test_animation_wav2lip_on_intel_hpu.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') + +export TAG=comps +export ANIMATION_PORT=10901 +export WAV2LIP_PORT=12301 +export service_name="animation-gaudi" + + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build -t opea/wav2lip-gaudi:$TAG -f comps/third_parties/wav2lip/src/Dockerfile.intel_hpu . + if [ $? -ne 0 ]; then + echo "opea/wav2lip built fail" + exit 1 + else + echo "opea/wav2lip built successful" + fi + docker build --no-cache -t opea/animation:$TAG -f comps/animation/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/animation built fail" + exit 1 + else + echo "opea/animation built successful" + fi +} + +function start_service() { + unset http_proxy + # Set env vars + export ip_address=$(hostname -I | awk '{print $1}') + export DEVICE="hpu" + export INFERENCE_MODE='wav2lip+gfpgan' + export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth' + export FACE="/home/user/comps/animation/src/assets/img/avatar1.jpg" + export AUDIO='None' + export FACESIZE=96 + export OUTFILE="/home/user/comps/animation/src/assets/outputs/result.mp4" + export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed + export UPSCALE_FACTOR=1 + export FPS=10 + export WAV2LIP_ENDPOINT="http://$ip_address:$WAV2LIP_PORT" + + cd $WORKPATH/comps/animation/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d + + sleep 3m +} + +function validate_microservice() { + cd $WORKPATH + result=$(http_proxy="" curl http://localhost:$ANIMATION_PORT/v1/animation -X POST -H "Content-Type: application/json" -d @comps/animation/src/assets/audio/sample_question.json) + if [[ $result == *"result.mp4"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs wav2lip-server + docker logs animation-server + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/comps/animation/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans +} + +function main() { + stop_docker + + build_docker_images + + start_service + + validate_microservice + + stop_docker + + echo y | docker system prune + + +} + +main diff --git a/tests/asr/test_asr_opea_whisper_on_intel_hpu.sh b/tests/asr/test_asr_opea_whisper_on_intel_hpu.sh deleted file mode 100644 index da3317fb60..0000000000 --- a/tests/asr/test_asr_opea_whisper_on_intel_hpu.sh +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - echo $(pwd) - docker build --no-cache -t opea/whisper-gaudi:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu . - - if [ $? -ne 0 ]; then - echo "opea/whisper-gaudi built fail" - exit 1 - else - echo "opea/whisper-gaudi built successful" - fi - - docker build --no-cache -t opea/asr:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile . - - if [ $? -ne 0 ]; then - echo "opea/asr built fail" - exit 1 - else - echo "opea/asr built successful" - fi -} - -function start_service() { - unset http_proxy - docker run -d --name="test-comps-asr-whisper-gaudi" --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -p 7067:7066 --ipc=host opea/whisper-gaudi:comps - sleep 3m - docker run -d --name="test-comps-asr" -e ASR_ENDPOINT=http://$ip_address:7067 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -p 9089:9099 --ipc=host opea/asr:comps - sleep 15 -} - -function validate_microservice() { - wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav - result=$(http_proxy="" curl http://localhost:9089/v1/audio/transcriptions -H "Content-Type: multipart/form-data" -F file="@./sample.wav" -F model="openai/whisper-small") - if [[ $result == *"who is"* ]]; then - echo "Result correct." - else - echo "Result wrong." - docker logs test-comps-asr-whisper-gaudi - docker logs test-comps-asr - exit 1 - fi - -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-asr*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/asr/test_asr_opea_whisper.sh b/tests/asr/test_asr_whisper.sh similarity index 50% rename from tests/asr/test_asr_opea_whisper.sh rename to tests/asr/test_asr_whisper.sh index c038724862..7f39ec2782 100644 --- a/tests/asr/test_asr_opea_whisper.sh +++ b/tests/asr/test_asr_whisper.sh @@ -6,11 +6,14 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') +export TAG=comps +export WHISPER_PORT=10100 +export ASR_PORT=10101 function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/whisper:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile . + docker build --no-cache -t opea/whisper:$TAG --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile . if [ $? -ne 0 ]; then echo "opea/whisper built fail" @@ -19,7 +22,7 @@ function build_docker_images() { echo "opea/whisper built successful" fi - docker build --no-cache -t opea/asr:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile . + docker build --no-cache -t opea/asr:$TAG --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/asr built fail" @@ -31,30 +34,29 @@ function build_docker_images() { function start_service() { unset http_proxy - docker run -d --name="test-comps-asr-whisper" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -p 7066:7066 --ipc=host opea/whisper:comps - sleep 2m - docker run -d --name="test-comps-asr" -e ASR_ENDPOINT=http://$ip_address:7066 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -p 9089:9099 --ipc=host opea/asr:comps + export ASR_ENDPOINT=http://$ip_address:$WHISPER_PORT + + docker compose -f comps/asr/deployment/docker_compose/compose.yaml up whisper-service asr -d sleep 15 } function validate_microservice() { wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav - result=$(http_proxy="" curl http://localhost:9089/v1/audio/transcriptions -H "Content-Type: multipart/form-data" -F file="@./sample.wav" -F model="openai/whisper-small") + result=$(http_proxy="" curl http://localhost:$ASR_PORT/v1/audio/transcriptions -H "Content-Type: multipart/form-data" -F file="@./sample.wav" -F model="openai/whisper-small") rm -f sample.wav if [[ $result == *"who is"* ]]; then echo "Result correct." else echo "Result wrong." - docker logs test-comps-asr-whisper - docker logs test-comps-asr + docker logs whisper-service + docker logs asr-service exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-asr*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + docker ps -a --filter "name=whisper-service" --filter "name=asr-service" --format "{{.Names}}" | xargs -r docker stop } function main() { diff --git a/tests/asr/test_asr_whisper_on_intel_hpu.sh b/tests/asr/test_asr_whisper_on_intel_hpu.sh new file mode 100644 index 0000000000..497b71c840 --- /dev/null +++ b/tests/asr/test_asr_whisper_on_intel_hpu.sh @@ -0,0 +1,76 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') +export TAG=comps +export WHISPER_PORT=10102 +export ASR_PORT=10103 + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/whisper-gaudi:$TAG --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu . + + if [ $? -ne 0 ]; then + echo "opea/whisper-gaudi built fail" + exit 1 + else + echo "opea/whisper-gaudi built successful" + fi + + docker build --no-cache -t opea/asr:$TAG --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile . + + if [ $? -ne 0 ]; then + echo "opea/asr built fail" + exit 1 + else + echo "opea/asr built successful" + fi +} + +function start_service() { + unset http_proxy + export ASR_ENDPOINT=http://$ip_address:$WHISPER_PORT + + docker compose -f comps/asr/deployment/docker_compose/compose.yaml up whisper-gaudi-service asr-whisper-gaudi -d + sleep 15 +} + +function validate_microservice() { + wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav + result=$(http_proxy="" curl http://localhost:$ASR_PORT/v1/audio/transcriptions -H "Content-Type: multipart/form-data" -F file="@./sample.wav" -F model="openai/whisper-small") + rm -f sample.wav + if [[ $result == *"who is"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs whisper-gaudi-service + docker logs asr-whisper-gaudi-service + exit 1 + fi + +} + +function stop_docker() { + docker ps -a --filter "name=whisper-gaudi-service" --filter "name=asr-whisper-gaudi-service" --format "{{.Names}}" | xargs -r docker stop +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/chathistory/test_chathistory_mongo.sh b/tests/chathistory/test_chathistory_mongo.sh index dd81b48d2e..9f32165be7 100644 --- a/tests/chathistory/test_chathistory_mongo.sh +++ b/tests/chathistory/test_chathistory_mongo.sh @@ -15,9 +15,8 @@ export COLLECTION_NAME=${COLLECTION_NAME:-"test"} function build_docker_images() { cd $WORKPATH echo $(pwd) - docker run -d -p 27017:27017 --name=test-comps-mongo mongo:latest - docker build --no-cache -t opea/chathistory-mongo-server:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/mongo/Dockerfile . + docker build --no-cache -t opea/chathistory-mongo-server:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/chathistory-mongo-server built fail" exit 1 @@ -27,15 +26,17 @@ function build_docker_images() { } function start_service() { - - docker run -d --name="test-comps-chathistory-mongo-server" -p 6012:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/chathistory-mongo-server:comps - + cd $WORKPATH + export CHATHISTORY_PORT=11000 + export TAG=comps + cd comps/chathistory/deployment/docker_compose/ + docker compose up -d sleep 10s } function validate_microservice() { result=$(curl -X 'POST' \ - http://${ip_address}:6012/v1/chathistory/create \ + http://${ip_address}:${CHATHISTORY_PORT}/v1/chathistory/create \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -48,14 +49,14 @@ function validate_microservice() { echo "Result correct." else echo "Result wrong." - docker logs test-comps-chathistory-mongo-server + docker logs chathistory-mongo-server exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps*") + cid=$(docker ps -aq --filter "name=chathistory-mongo-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/cores/telemetry/test_telemetry.py b/tests/cores/telemetry/test_telemetry.py index 3812bde2a8..3589ac232d 100644 --- a/tests/cores/telemetry/test_telemetry.py +++ b/tests/cores/telemetry/test_telemetry.py @@ -5,8 +5,7 @@ import time import unittest -from comps import opea_telemetry -from comps.cores.telemetry.opea_telemetry import in_memory_exporter +from comps.cores.telemetry.opea_telemetry import in_memory_exporter, opea_telemetry @opea_telemetry diff --git a/tests/dataprep/test_dataprep_elasticsearch_langchain.sh b/tests/dataprep/test_dataprep_elasticsearch.sh similarity index 55% rename from tests/dataprep/test_dataprep_elasticsearch_langchain.sh rename to tests/dataprep/test_dataprep_elasticsearch.sh index a68bbac9f5..0d712c307a 100644 --- a/tests/dataprep/test_dataprep_elasticsearch_langchain.sh +++ b/tests/dataprep/test_dataprep_elasticsearch.sh @@ -7,7 +7,7 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -dataprep_service_port=6011 +DATAPREP_PORT=11100 function build_docker_images() { cd $WORKPATH @@ -16,48 +16,34 @@ function build_docker_images() { docker pull docker.elastic.co/elasticsearch/elasticsearch:8.16.0 # build dataprep image for elasticsearch - docker build --no-cache -t opea/dataprep-elasticsearch:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/elasticsearch/langchain/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-elasticsearch built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-elasticsearch built successful" + echo "opea/dataprep built successful" fi } function start_service() { - # elasticsearch - elasticsearch_port=9200 - docker run -d --name test-comps-vectorstore-elasticsearch -e ES_JAVA_OPTS="-Xms1g -Xmx1g" -e "discovery.type=single-node" -e "xpack.security.enabled=false" -p $elasticsearch_port:9200 -p 9300:9300 docker.elastic.co/elasticsearch/elasticsearch:8.16.0 - export ES_CONNECTION_STRING="http://${ip_address}:${elasticsearch_port}" - sleep 10s - - # data-prep - INDEX_NAME="test-elasticsearch" - docker run -d --name="test-comps-dataprep-elasticsearch" -p $dataprep_service_port:6011 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME opea/dataprep-elasticsearch:comps - sleep 15s - - bash ./tests/utils/wait-for-it.sh $ip_address:$dataprep_service_port -s -t 100 -- echo "Dataprep service up" - DATAPREP_UP=$? - if [ ${DATAPREP_UP} -ne 0 ]; then - echo "Could not start Dataprep service." - return 1 - fi - - sleep 5s - bash ./tests/utils/wait-for-it.sh ${ip_address}:$dataprep_service_port -s -t 1 -- echo "Dataprep service still up" - DATAPREP_UP=$? - if [ ${DATAPREP_UP} -ne 0 ]; then - echo "Dataprep service crashed." - return 1 - fi + echo "Starting microservice" + export ELASTICSEARCH_PORT1=12300 + export ES_CONNECTION_STRING="http://${ip_address}:${ELASTICSEARCH_PORT1}" + export INDEX_NAME="test-elasticsearch" + export TAG=comps + service_name="elasticsearch-vector-db dataprep-elasticsearch" + cd $WORKPATH + cd comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 1m + echo "Microservice started" } function validate_microservice() { cd $LOG_PATH # test /v1/dataprep - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") @@ -70,17 +56,17 @@ function validate_microservice() { echo "[ dataprep ] Content is as expected." else echo "[ dataprep ] Content does not match the expected result: $CONTENT" - docker logs test-comps-dataprep-elasticsearch >> ${LOG_PATH}/dataprep.log + docker logs dataprep-elasticsearch >> ${LOG_PATH}/dataprep.log exit 1 fi else echo "[ dataprep ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-elasticsearch >> ${LOG_PATH}/dataprep.log + docker logs dataprep-elasticsearch >> ${LOG_PATH}/dataprep.log exit 1 fi # test /v1/dataprep/get_file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get_file" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/get" HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") if [ "$HTTP_STATUS" -eq 200 ]; then echo "[ dataprep - file ] HTTP status is 200. Checking content..." @@ -90,33 +76,30 @@ function validate_microservice() { echo "[ dataprep - file ] Content is as expected." else echo "[ dataprep - file ] Content does not match the expected result: $CONTENT" - docker logs test-comps-dataprep-elasticsearch >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-elasticsearch >> ${LOG_PATH}/dataprep_file.log exit 1 fi else echo "[ dataprep - file ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-elasticsearch >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-elasticsearch >> ${LOG_PATH}/dataprep_file.log exit 1 fi # test /v1/dataprep/delete_file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete_file" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/delete" HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL") if [ "$HTTP_STATUS" -eq 200 ]; then echo "[ dataprep - del ] HTTP status is 200." - docker logs test-comps-dataprep-elasticsearch >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-elasticsearch >> ${LOG_PATH}/dataprep_del.log else echo "[ dataprep - del ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-elasticsearch >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-elasticsearch >> ${LOG_PATH}/dataprep_del.log exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-vectorstore-elasticsearch*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - - cid=$(docker ps -aq --filter "name=test-comps-dataprep-elasticsearch*") + cid=$(docker ps -aq --filter "name=elasticsearch-vector-db" --filter "name=dataprep-elasticsearch") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/dataprep/test_dataprep_milvus.sh b/tests/dataprep/test_dataprep_milvus.sh index 33388593e3..603fb671cd 100644 --- a/tests/dataprep/test_dataprep_milvus.sh +++ b/tests/dataprep/test_dataprep_milvus.sh @@ -7,39 +7,38 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +DATAPREP_PORT=11101 function build_docker_images() { cd $WORKPATH echo $(pwd) # dataprep milvus image - docker build --no-cache -t opea/dataprep-milvus:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-milvus built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-milvus built successful" + echo "opea/dataprep built successful" fi } function start_service() { # start milvus vector db - cd $WORKPATH/comps/dataprep/milvus/langchain/ + cd $WORKPATH/comps/third_parties/milvus/deployment/docker_compose/ # wget https://raw.githubusercontent.com/milvus-io/milvus/v2.4.9/configs/milvus.yaml # wget https://github.com/milvus-io/milvus/releases/download/v2.4.9/milvus-standalone-docker-compose.yml -O docker-compose.yml # sed '/- \${DOCKER_VOLUME_DIRECTORY:-\.}\/volumes\/milvus:\/var\/lib\/milvus/a \ \ \ \ \ \ - \${DOCKER_VOLUME_DIRECTORY:-\.}\/milvus.yaml:\/milvus\/configs\/milvus.yaml' -i docker-compose.yml docker compose up -d - - # start embedding service - embed_port=5021 - embed_model="BAAI/bge-base-en-v1.5" - docker run -d -p $embed_port:80 -v ./data:/data --name test-comps-dataprep-milvus-tei-server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $embed_model - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${embed_port}" - - # start dataprep service - MILVUS_HOST=${ip_address} - dataprep_service_port=5022 - HF_TOKEN=${HF_TOKEN} - docker run -d --name="test-comps-dataprep-milvus-server" -p ${dataprep_service_port}:5000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} -e HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} -e LOGFLAG=true -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MILVUS" --ipc=host opea/dataprep-milvus:comps + sleep 30 + + export host_ip=${ip_address} + export TEI_EMBEDDER_PORT=12005 + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export MILVUS_HOST=${ip_address} + export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" + service_name="dataprep-milvus tei-embedding-serving" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d sleep 1m } @@ -91,43 +90,42 @@ function validate_service() { function validate_microservice() { cd $LOG_PATH - dataprep_service_port=5022 # test /v1/dataprep/delete validate_service \ - "http://${ip_address}:${dataprep_service_port}/v1/dataprep/delete" \ + "http://${ip_address}:${DATAPREP_PORT}/v1/dataprep/delete" \ '{"status":true}' \ "dataprep_del" \ - "test-comps-dataprep-milvus-server" + "dataprep-milvus-server" # test /v1/dataprep upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt validate_service \ - "http://${ip_address}:${dataprep_service_port}/v1/dataprep/ingest" \ + "http://${ip_address}:${DATAPREP_PORT}/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_file" \ - "test-comps-dataprep-milvus-server" + "dataprep-milvus-server" # test /v1/dataprep upload link validate_service \ - "http://${ip_address}:${dataprep_service_port}/v1/dataprep/ingest" \ + "http://${ip_address}:${DATAPREP_PORT}/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_link" \ - "test-comps-dataprep-milvus-server" + "dataprep-milvus-server" # test /v1/dataprep/get_file validate_service \ - "http://${ip_address}:${dataprep_service_port}/v1/dataprep/get" \ + "http://${ip_address}:${DATAPREP_PORT}/v1/dataprep/get" \ '{"name":' \ "dataprep_get" \ - "test-comps-dataprep-milvus-server" + "dataprep-milvus-server" } function stop_docker() { cd $WORKPATH rm -rf milvus/ - cid=$(docker ps -aq --filter "name=test-comps-dataprep-milvus*") + cid=$(docker ps -aq --filter "name=dataprep-milvus*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi cid=$(docker ps -aq --filter "name=milvus-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi diff --git a/tests/dataprep/test_dataprep_neo4j_llama_index_on_intel_hpu.sh b/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh similarity index 66% rename from tests/dataprep/test_dataprep_neo4j_llama_index_on_intel_hpu.sh rename to tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh index aeacdea0ac..2b923bb66d 100755 --- a/tests/dataprep/test_dataprep_neo4j_llama_index_on_intel_hpu.sh +++ b/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh @@ -7,51 +7,44 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +DATAPREP_PORT=11103 +LLM_ENDPOINT_PORT=10510 function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/dataprep-neo4j-llamaindex:comps --build-arg no_proxy=$no_proxy --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/neo4j/llama_index/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg no_proxy=$no_proxy --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-neo4j-llamaindex built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-neo4j-llamaindex built successful" + echo "opea/dataprep built successful" fi - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1 docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 } function start_service() { - # neo4j-apoc - docker run -d -p 7474:7474 -p 7687:7687 --name test-comps-neo4j-apoc --env NEO4J_AUTH=neo4j/neo4jtest -e NEO4J_apoc_export_file_enabled=true -e NEO4J_apoc_import_file_enabled=true -e NEO4J_apoc_import_file_use__neo4j__config=true -e NEO4J_PLUGINS=\[\"apoc\"\] neo4j:latest - #sleep 30s - - # tei endpoint - emb_model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-dataprep-neo4j-tei-endpoint" -p 6006:80 -v ./data:/data -e no_proxy=$no_proxy -e http_proxy=$http_proxy \ - -e https_proxy=$https_proxy -e HF_TOKEN=$HF_TOKEN --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $emb_model - sleep 30s - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006" - - # tgi gaudi endpoint - model="meta-llama/Meta-Llama-3-8B-Instruct" - docker run -d --name="test-comps-dataprep-neo4j-tgi-endpoint" -p 6005:80 -v ./data:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all \ - -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true \ - -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice -e no_proxy=$no_proxy -e http_proxy=$http_proxy -e https_proxy=$https_proxy \ - --ipc=host --pull always ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model --max-input-tokens 1024 --max-total-tokens 3000 - sleep 30s - export TGI_LLM_ENDPOINT="http://${ip_address}:6005" - - # dataprep neo4j - # Not testing openai code path since not able to provide key for cicd - docker run -d --name="test-comps-dataprep-neo4j-server" -p 6004:6004 -v ./data:/data --ipc=host -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT \ - -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e EMBEDDING_MODEL_ID=$emb_model -e LLM_MODEL_ID=$model -e host_ip=$ip_address -e no_proxy=$no_proxy \ - -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URI="bolt://${ip_address}:7687" -e NEO4J_USERNAME="neo4j" \ - -e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e LOGFLAG=True opea/dataprep-neo4j-llamaindex:comps - sleep 30s - export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6004" - + service_name="neo4j-apoc tei-embedding-serving tgi-gaudi-server dataprep-neo4j-llamaindex" + export host_ip=${ip_address} + export TAG="comps" + export NEO4J_AUTH="neo4j/neo4jtest" + export NEO4J_URL="bolt://${ip_address}:7687" + export NEO4J_USERNAME="neo4j" + export NEO4J_PASSWORD="neo4jtest" + export NEO4J_apoc_export_file_enabled=true + export NEO4J_apoc_import_file_use__neo4j__config=true + export NEO4J_PLUGINS=\[\"apoc\"\] + export TEI_EMBEDDER_PORT=12006 + export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct" + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}" + export LLM_ENDPOINT_PORT=10510 + export TGI_LLM_ENDPOINT="http://${ip_address}:${LLM_ENDPOINT_PORT}" + + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 1m } function validate_service() { @@ -102,35 +95,35 @@ function validate_microservice() { "${ip_address}:7474" \ "200 OK" \ "neo4j-apoc" \ - "test-comps-neo4j-apoc" \ + "neo4j-apoc" \ "" sleep 1m # retrieval can't curl as expected, try to wait for more time # tgi for llm service validate_service \ - "${ip_address}:6005/generate" \ + "${ip_address}:${LLM_ENDPOINT_PORT}/generate" \ "generated_text" \ "tgi-gaudi-service" \ - "test-comps-dataprep-neo4j-tgi-endpoint" \ + "tgi-gaudi-server" \ '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' # test /v1/dataprep graph extraction echo "Like many companies in the O&G sector, the stock of Chevron (NYSE:CVX) has declined about 10% over the past 90-days despite the fact that Q2 consensus earnings estimates have risen sharply (~25%) during that same time frame. Over the years, Chevron has kept a very strong balance sheet. FirstEnergy (NYSE:FE – Get Rating) posted its earnings results on Tuesday. The utilities provider reported $0.53 earnings per share for the quarter, topping the consensus estimate of $0.52 by $0.01, RTT News reports. FirstEnergy had a net margin of 10.85% and a return on equity of 17.17%. The Dáil was almost suspended on Thursday afternoon after Sinn Féin TD John Brady walked across the chamber and placed an on-call pager in front of the Minister for Housing Darragh O’Brien during a debate on retained firefighters. Mr O’Brien said Mr Brady had taken part in an act of theatre that was obviously choreographed.Around 2,000 retained firefighters around the country staged a second day of industrial action on Tuesday and are due to start all out-strike action from next Tuesday. The mostly part-time workers, who keep the services going outside of Ireland’s larger urban centres, are taking industrial action in a dispute over pay and working conditions. Speaking in the Dáil, Sinn Féin deputy leader Pearse Doherty said firefighters had marched on Leinster House today and were very angry at the fact the Government will not intervene. Reintroduction of tax relief on mortgages needs to be considered, O’Brien says. Martin withdraws comment after saying People Before Profit would ‘put the jackboot on people’ Taoiseach ‘propagated fears’ farmers forced to rewet land due to nature restoration law – Cairns An intervention is required now. I’m asking you to make an improved offer in relation to pay for retained firefighters, Mr Doherty told the housing minister.I’m also asking you, and challenging you, to go outside after this Order of Business and meet with the firefighters because they are just fed up to the hilt in relation to what you said.Some of them have handed in their pagers to members of the Opposition and have challenged you to wear the pager for the next number of weeks, put up with an €8,600 retainer and not leave your community for the two and a half kilometres and see how you can stand over those type of pay and conditions. At this point, Mr Brady got up from his seat, walked across the chamber and placed the pager on the desk in front of Mr O’Brien. Ceann Comhairle Seán Ó Fearghaíl said the Sinn Féin TD was completely out of order and told him not to carry out a charade in this House, adding it was absolutely outrageous behaviour and not to be encouraged.Mr O’Brien said Mr Brady had engaged in an act of theatre here today which was obviously choreographed and was then interrupted with shouts from the Opposition benches. Mr Ó Fearghaíl said he would suspend the House if this racket continues.Mr O’Brien later said he said he was confident the dispute could be resolved and he had immense regard for firefighters. The minister said he would encourage the unions to re-engage with the State’s industrial relations process while also accusing Sinn Féin of using the issue for their own political gain." > $LOG_PATH/dataprep_file.txt validate_service \ - "http://${ip_address}:6004/v1/dataprep" \ + "http://${ip_address}:${DATAPREP_PORT}/v1/dataprep/ingest" \ "Data preparation succeeded" \ "extract_graph_neo4j" \ - "test-comps-dataprep-neo4j-server" + "dataprep-neo4j-llamaindex" } function stop_docker() { - cid_retrievers=$(docker ps -aq --filter "name=test-comps-dataprep-neo4j*") - if [[ ! -z "$cid_retrievers" ]]; then - docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s + cid=$(docker ps -aq --filter "name=dataprep-neo4j*") + if [[ ! -z "$cid" ]]; then + docker stop $cid && docker rm $cid && sleep 1s fi - cid_db=$(docker ps -aq --filter "name=test-comps-neo4j-apoc") - if [[ ! -z "$cid_retrievers" ]]; then - docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s + cid_db=$(docker ps -aq --filter "name=neo4j-apoc" --filter "name=tgi-gaudi-server") + if [[ ! -z "$cid_db" ]]; then + docker stop $cid_db && docker rm $cid_db && sleep 1s fi } diff --git a/tests/dataprep/test_dataprep_opensearch_langchain.sh b/tests/dataprep/test_dataprep_opensearch.sh similarity index 63% rename from tests/dataprep/test_dataprep_opensearch_langchain.sh rename to tests/dataprep/test_dataprep_opensearch.sh index 11e8006b6c..da9415dad3 100644 --- a/tests/dataprep/test_dataprep_opensearch_langchain.sh +++ b/tests/dataprep/test_dataprep_opensearch.sh @@ -7,60 +7,41 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -dataprep_service_port="6007" +DATAPREP_PORT="11104" OPENSEARCH_INITIAL_ADMIN_PASSWORD="StRoNgOpEa0)" function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build -t opea/dataprep-opensearch:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/opensearch/langchain/Dockerfile . + docker build -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-opensearch built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-opensearch built successful" + echo "opea/dataprep built successful" fi } function start_service() { - # Start OpenSearch vector db container - docker run -d \ - --name test-comps-dataprep-opensearch-langchain \ - -e cluster.name=opensearch-cluster \ - -e node.name=opensearch-vector-db \ - -e discovery.seed_hosts=opensearch-vector-db \ - -e cluster.initial_master_nodes=opensearch-vector-db \ - -e bootstrap.memory_lock=true \ - -e "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" \ - -e OPENSEARCH_INITIAL_ADMIN_PASSWORD=$OPENSEARCH_INITIAL_ADMIN_PASSWORD \ - --ulimit memlock=-1:-1 \ - --ulimit nofile=65536:65536 \ - -p 9200:9200 \ - -p 9600:9600 \ - opensearchproject/opensearch:latest - # Start OpenSearch dataprep container - OPENSEARCH_URL="http://${ip_address}:9200" - echo $(OPENSEARCH_URL) - INDEX_NAME="file-index" - docker run -d \ - --name test-comps-dataprep-opensearch-langchain-server \ - -p 6007:6007 \ - -e https_proxy=$https_proxy \ - -e http_proxy=$http_proxy \ - -e OPENSEARCH_INITIAL_ADMIN_PASSWORD=$OPENSEARCH_INITIAL_ADMIN_PASSWORD \ - -e OPENSEARCH_URL=$OPENSEARCH_URL \ - -e INDEX_NAME=$INDEX_NAME \ - opea/dataprep-opensearch:latest - - sleep 2m + export OPENSEARCH_INITIAL_ADMIN_PASSWORD="StRoNgOpEa0)" + export OPENSEARCH_PORT1=9200 + export OPENSEARCH_URL="http://${ip_address}:${OPENSEARCH_PORT1}" + echo ${OPENSEARCH_URL} + export INDEX_NAME="file-index" + service_name="opensearch-vector-db dataprep-opensearch" + export host_ip=${ip_address} + export TAG="comps" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 1m } function validate_microservice() { cd $LOG_PATH # test /v1/dataprep upload file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' -k -u admin:$OPENSEARCH_INITIAL_ADMIN_PASSWORD "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -69,14 +50,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-opensearch-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-opensearch-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-opensearch-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-opensearch-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -84,7 +65,7 @@ function validate_microservice() { # test /v1/dataprep upload link - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'link_list=["https://www.ces.tech/"]' -k -u admin:$OPENSEARCH_INITIAL_ADMIN_PASSWORD "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -93,21 +74,21 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-opensearch-langchain-server >> ${LOG_PATH}/dataprep_upload_link.log + docker logs dataprep-opensearch-server >> ${LOG_PATH}/dataprep_upload_link.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-opensearch-langchain-server >> ${LOG_PATH}/dataprep_upload_link.log + docker logs dataprep-opensearch-server >> ${LOG_PATH}/dataprep_upload_link.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi # test /v1/dataprep/get_file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get_file" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/get" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -k -u admin:$OPENSEARCH_INITIAL_ADMIN_PASSWORD "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -115,21 +96,21 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-opensearch-langchain-server >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-opensearch-server >> ${LOG_PATH}/dataprep_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" -ne "null" ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-opensearch-langchain-server >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-opensearch-server >> ${LOG_PATH}/dataprep_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi # test /v1/dataprep/delete_file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete_file" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/delete" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' -k -u admin:$OPENSEARCH_INITIAL_ADMIN_PASSWORD "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -138,7 +119,7 @@ function validate_microservice() { # check response status if [ "$HTTP_STATUS" -ne "404" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 404. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-opensearch-langchain-server >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-opensearch-server >> ${LOG_PATH}/dataprep_del.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 404. Checking content..." @@ -146,7 +127,7 @@ function validate_microservice() { # check response body if [[ "$RESPONSE_BODY" != *'{"detail":"Single file deletion is not implemented yet"}'* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-opensearch-langchain-server >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-opensearch-server >> ${LOG_PATH}/dataprep_del.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -154,9 +135,10 @@ function validate_microservice() { } function stop_service() { - cid=$(docker ps -aq --filter "name=test-comps-dataprep-opensearch-langchain*") + cid=$(docker ps -aq --filter "name=dataprep-opensearch-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cid=$(docker ps -aq --filter "name=opensearch-vector-db") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - } function main() { @@ -168,7 +150,7 @@ function main() { validate_microservice stop_service - # echo y | docker system prune + echo y | docker system prune } main diff --git a/tests/dataprep/test_dataprep_pgvector_langchain.sh b/tests/dataprep/test_dataprep_pgvector.sh similarity index 63% rename from tests/dataprep/test_dataprep_pgvector_langchain.sh rename to tests/dataprep/test_dataprep_pgvector.sh index 58592cb896..e8f7e2cf7a 100644 --- a/tests/dataprep/test_dataprep_pgvector_langchain.sh +++ b/tests/dataprep/test_dataprep_pgvector.sh @@ -7,7 +7,7 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -dataprep_service_port=5013 +DATAPREP_PORT="11105" function build_docker_images() { cd $WORKPATH @@ -16,34 +16,35 @@ function build_docker_images() { docker pull pgvector/pgvector:0.7.0-pg16 # build dataprep image for pgvector - docker build --no-cache -t opea/dataprep-pgvector:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/pgvector/langchain/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-pgvector built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-pgvector built successful" + echo "opea/dataprep built successful" fi } function start_service() { + export VOLUMES_PATH=$WORKPATH/comps/third_parties/pgvector/src/init.sql export POSTGRES_USER=testuser export POSTGRES_PASSWORD=testpwd export POSTGRES_DB=vectordb - - docker run --name test-comps-vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -p 5432:5432 -d -v $WORKPATH/comps/vectorstores/pgvector/init.sql:/docker-entrypoint-initdb.d/init.sql pgvector/pgvector:0.7.0-pg16 - - sleep 10s - - docker run -d --name="test-comps-dataprep-pgvector" -p ${dataprep_service_port}:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:5432/${POSTGRES_DB} opea/dataprep-pgvector:comps - - sleep 3m + export PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:5432/${POSTGRES_DB} + + service_name="pgvector-db dataprep-pgvector" + export host_ip=${ip_address} + export TAG="comps" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 1m } function validate_microservice() { cd $LOG_PATH - # test /v1/dataprep - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep" + # test /v1/dataprep/ingest + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") if [ "$HTTP_STATUS" -eq 200 ]; then @@ -55,17 +56,17 @@ function validate_microservice() { echo "[ dataprep ] Content is as expected." else echo "[ dataprep ] Content does not match the expected result: $CONTENT" - docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep.log + docker logs dataprep-pgvector-server >> ${LOG_PATH}/dataprep.log exit 1 fi else echo "[ dataprep ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep.log + docker logs dataprep-pgvector-server >> ${LOG_PATH}/dataprep.log exit 1 fi - # test /v1/dataprep/get_file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get_file" + # test /v1/dataprep/get + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/get" HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") if [ "$HTTP_STATUS" -eq 200 ]; then echo "[ dataprep - file ] HTTP status is 200. Checking content..." @@ -75,33 +76,33 @@ function validate_microservice() { echo "[ dataprep - file ] Content is as expected." else echo "[ dataprep - file ] Content does not match the expected result: $CONTENT" - docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-pgvector-server >> ${LOG_PATH}/dataprep_file.log exit 1 fi else echo "[ dataprep - file ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-pgvector-server >> ${LOG_PATH}/dataprep_file.log exit 1 fi - # test /v1/dataprep/delete_file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete_file" + # test /v1/dataprep/delete + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/delete" HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL") if [ "$HTTP_STATUS" -eq 200 ]; then echo "[ dataprep - del ] HTTP status is 200." - docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-pgvector-server >> ${LOG_PATH}/dataprep_del.log else echo "[ dataprep - del ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-pgvector-server >> ${LOG_PATH}/dataprep_del.log exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-vectorstore-postgres*") + cid=$(docker ps -aq --filter "name=dataprep-pgvector-server") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - cid=$(docker ps -aq --filter "name=test-comps-dataprep-pgvector*") + cid=$(docker ps -aq --filter "name=pgvector-db") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/dataprep/test_dataprep_pinecone_langchain.sh b/tests/dataprep/test_dataprep_pinecone.sh similarity index 61% rename from tests/dataprep/test_dataprep_pinecone_langchain.sh rename to tests/dataprep/test_dataprep_pinecone.sh index 31661de5e4..6afde25c9d 100644 --- a/tests/dataprep/test_dataprep_pinecone_langchain.sh +++ b/tests/dataprep/test_dataprep_pinecone.sh @@ -6,16 +6,18 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') +DATAPREP_PORT="11106" + function build_docker_images() { cd $WORKPATH # build dataprep image for pinecone - docker build --no-cache -t opea/dataprep-pinecone:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/pinecone/langchain/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-pinecone built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-pinecone built successful" + echo "opea/dataprep built successful" fi } @@ -24,13 +26,15 @@ function start_service() { export PINECONE_INDEX_NAME="test-index" export HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN - docker run -d --name="test-comps-dataprep-pinecone" -p 5039:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e PINECONE_API_KEY=$PINECONE_API_KEY -e PINECONE_INDEX_NAME=$PINECONE_INDEX_NAME -e LOGFLAG=true opea/dataprep-pinecone:comps - + service_name="dataprep-pinecone" + export TAG="comps" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d sleep 1m } function validate_microservice() { - URL="http://$ip_address:5039/v1/dataprep" + URL="http://$ip_address:${DATAPREP_PORT}/v1/dataprep/ingest" echo 'The OPEA platform includes: Detailed framework of composable building blocks for state-of-the-art generative AI systems including LLMs, data stores, and prompt engines' > ./dataprep_file.txt result=$(curl --noproxy $ip_address --location --request POST \ --form 'files=@./dataprep_file.txt' $URL) @@ -38,26 +42,23 @@ function validate_microservice() { echo "Result correct." else echo "Result wrong. Received was $result" - docker logs test-comps-dataprep-pinecone + docker logs dataprep-pinecone-server exit 1 fi - DELETE_URL="http://$ip_address:5039/v1/dataprep/delete_file" + DELETE_URL="http://$ip_address:${DATAPREP_PORT}/v1/dataprep/delete" result=$(curl --noproxy $ip_address --location --request POST \ -d '{"file_path": "all"}' -H 'Content-Type: application/json' $DELETE_URL) if [[ $result == *"true"* ]]; then echo "Result correct." else echo "Result wrong. Received was $result" - docker logs test-comps-dataprep-pinecone + docker logs dataprep-pinecone-server exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=vectorstore-pinecone*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - - cid=$(docker ps -aq --filter "name=test-comps-dataprep-pinecone*") + cid=$(docker ps -aq --filter "name=dataprep-pinecone-server*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/dataprep/test_dataprep_qdrant_langchain.sh b/tests/dataprep/test_dataprep_qdrant.sh similarity index 59% rename from tests/dataprep/test_dataprep_qdrant_langchain.sh rename to tests/dataprep/test_dataprep_qdrant.sh index 226d454751..818f99da24 100644 --- a/tests/dataprep/test_dataprep_qdrant_langchain.sh +++ b/tests/dataprep/test_dataprep_qdrant.sh @@ -7,30 +7,34 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +DATAPREP_PORT="11107" +TEI_EMBEDDER_PORT="10220" function build_docker_images() { cd $WORKPATH # dataprep qdrant image - docker build --no-cache -t opea/dataprep-qdrant:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/qdrant/langchain/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-qdrant built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-qdrant built successful" + echo "opea/dataprep built successful" fi } function start_service() { - QDRANT_PORT=6360 - docker run -d --name="test-comps-dataprep-qdrant-langchain" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $QDRANT_PORT:6333 -p 6334:6334 --ipc=host qdrant/qdrant - tei_embedding_port=6361 - model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-dataprep-qdrant-langchain-tei" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $tei_embedding_port:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model - dataprep_service_port=6362 - TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_embedding_port}" - COLLECTION_NAME="rag-qdrant" - docker run -d --name="test-comps-dataprep-qdrant-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e QDRANT_HOST=$ip_address -e QDRANT_PORT=$QDRANT_PORT -e COLLECTION_NAME=$COLLECTION_NAME -e TEI_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -p ${dataprep_service_port}:6007 --ipc=host opea/dataprep-qdrant:comps + export host_ip=${ip_address} + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export TEI_EMBEDDER_PORT="10224" + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}" + export COLLECTION_NAME="rag-qdrant" + export QDRANT_HOST=$ip_address + export QDRANT_PORT=6360 + export TAG="comps" + service_name="qdrant-vector-db tei-embedding-serving dataprep-qdrant" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d sleep 1m } @@ -57,8 +61,8 @@ function validate_services() { # check response status if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-qdrant-langchain-tei >> ${LOG_PATH}/tei-endpoint.log - docker logs test-comps-dataprep-qdrant-langchain-server >> ${LOG_PATH}/dataprep-qdrant.log + docker logs tei-embedding-serving >> ${LOG_PATH}/tei-endpoint.log + docker logs dataprep-qdrant-server >> ${LOG_PATH}/dataprep-qdrant.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." @@ -66,8 +70,8 @@ function validate_services() { # check response body if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-qdrant-langchain-tei >> ${LOG_PATH}/tei-endpoint.log - docker logs test-comps-dataprep-qdrant-langchain-server >> ${LOG_PATH}/dataprep-qdrant.log + docker logs tei-embedding-serving >> ${LOG_PATH}/tei-endpoint.log + docker logs dataprep-qdrant-server >> ${LOG_PATH}/dataprep-qdrant.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -79,31 +83,31 @@ function validate_services() { function validate_microservice() { # tei for embedding service validate_services \ - "${ip_address}:6361/embed" \ + "${ip_address}:${TEI_EMBEDDER_PORT}/embed" \ "[[" \ "tei_embedding" \ - "test-comps-dataprep-qdrant-langchain-tei" \ + "tei-embedding-serving" \ '{"inputs":"What is Deep Learning?"}' # dataprep upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt validate_services \ - "${ip_address}:6362/v1/dataprep" \ + "${ip_address}:${DATAPREP_PORT}/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_file" \ - "test-comps-dataprep-qdrant-langchain-server" + "dataprep-qdrant-server" # dataprep upload link validate_services \ - "${ip_address}:6362/v1/dataprep" \ + "${ip_address}:${DATAPREP_PORT}/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_link" \ - "test-comps-dataprep-qdrant-langchain-server" + "dataprep-qdrant-server" } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-dataprep-qdrant-langchain*") + cid=$(docker ps -aq --filter "name=dataprep-qdrant-server*" --filter "name=tei-embedding-serving*" --filter "name=qdrant-vector-db") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi rm $LOG_PATH/dataprep_file.txt diff --git a/tests/dataprep/test_dataprep_redis.sh b/tests/dataprep/test_dataprep_redis.sh index 7e8af5b005..3cbd6b02b5 100644 --- a/tests/dataprep/test_dataprep_redis.sh +++ b/tests/dataprep/test_dataprep_redis.sh @@ -7,42 +7,44 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +DATAPREP_PORT="11108" +TEI_EMBEDDER_PORT="10221" function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build -t opea/dataprep-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . + docker build -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-redis built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-redis built successful" + echo "opea/dataprep built successful" fi } function start_service() { - REDIS_PORT=6380 - docker run -d --name="test-comps-dataprep-redis-langchain" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $REDIS_PORT:6379 -p 8002:8001 --ipc=host redis/redis-stack:7.2.0-v9 - embed_port=5439 - embed_model="BAAI/bge-base-en-v1.5" - docker run -d -p $embed_port:80 -v ./data:/data --name test-comps-dataprep-redis-langchain-tei-server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $embed_model - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${embed_port}" - - export dataprep_service_port=5013 - REDIS_URL="redis://${ip_address}:${REDIS_PORT}" + export host_ip=${ip_address} + export REDIS_HOST=$ip_address + export REDIS_PORT=6379 + export DATAPREP_PORT="11108" + export TEI_EMBEDDER_PORT="10221" + export REDIS_URL="redis://${ip_address}:${REDIS_PORT}" + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}" export INDEX_NAME="rag_redis" - export HF_TOKEN=${HF_TOKEN} - docker run -d --name="test-comps-dataprep-redis-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e REDIS_HOST=$ip_address -e REDIS_PORT=$REDIS_PORT -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e INDEX_NAME=$INDEX_NAME -e HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} -e LOGFLAG=true -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_REDIS" -p ${dataprep_service_port}:5000 --ipc=host opea/dataprep-redis:comps + export TAG="comps" + service_name="redis-vector-db tei-embedding-serving dataprep-redis" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d sleep 1m } function validate_microservice() { cd $LOG_PATH - export dataprep_service_port=5013 # test /v1/dataprep/delete - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete" + URL="http://${ip_address}:${DATAPREP_PORT}/v1/dataprep/delete" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -50,7 +52,7 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-redis-server >> ${LOG_PATH}/dataprep_del.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." @@ -58,14 +60,14 @@ function validate_microservice() { # check response body if [[ "$RESPONSE_BODY" != *'{"status":true}'* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-redis-server >> ${LOG_PATH}/dataprep_del.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi # test /v1/dataprep/ingest upload file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/ingest" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -74,21 +76,21 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi # test /v1/dataprep/ingest upload link - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/ingest" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'link_list=["https://www.ces.tech/"]' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -97,21 +99,21 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_link.log + docker logs dataprep-redis-server >> ${LOG_PATH}/dataprep_upload_link.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_link.log + docker logs dataprep-redis-server >> ${LOG_PATH}/dataprep_upload_link.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi # test /v1/dataprep/get - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/get" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -119,14 +121,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-redis-server >> ${LOG_PATH}/dataprep_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *'{"name":'* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-redis-server >> ${LOG_PATH}/dataprep_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -135,7 +137,7 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-dataprep-redis-langchain*") + cid=$(docker ps -aq --filter "name=dataprep-redis-server*" --filter "name=redis-vector-*" --filter "name=tei-embedding-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/dataprep/test_dataprep_redis_langchain_ray.sh b/tests/dataprep/test_dataprep_redis_langchain_ray.sh deleted file mode 100644 index 84851b8d62..0000000000 --- a/tests/dataprep/test_dataprep_redis_langchain_ray.sh +++ /dev/null @@ -1,113 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - echo "Building the docker images" - cd $WORKPATH - docker build --no-cache -t opea/dataprep-on-ray-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/dataprep-on-ray-redis built fail" - exit 1 - else - echo "opea/dataprep-on-ray-redis built successful" - fi -} - -function start_service() { - echo "Starting redis microservice" - # redis endpoint - docker run -d --name="test-comps-dataprep-redis-ray" --runtime=runc -p 5038:6379 -p 8004:8001 redis/redis-stack:7.2.0-v9 - - # dataprep-redis-server endpoint - export REDIS_URL="redis://${ip_address}:5038" - export INDEX_NAME="rag-redis" - echo "Starting dataprep-redis-server" - docker run -d --name="test-comps-dataprep-redis-ray-server" --runtime=runc -p 5037:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e TIMEOUT_SECONDS=600 -e LOGFLAG=true opea/dataprep-on-ray-redis:comps - - sleep 10 - echo "Service started successfully" -} - -function validate_microservice() { - cd $LOG_PATH - - dataprep_service_port=5037 - export URL="http://${ip_address}:$dataprep_service_port/v1/dataprep" - export GET_URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get_file" - - echo "Starting validating the microservice" - export PATH="${HOME}/miniforge3/bin:$PATH" - source activate - echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > dataprep_file.txt - EXIT_CODE=0 - python -c "$(cat << 'EOF' -import requests -import json -import os -proxies = {'http':""} -url = os.environ['URL'] -get_url = os.environ['GET_URL'] - -print("test single file ingestion") -file_list = ["dataprep_file.txt"] -files = [('files', (f, open(f, 'rb'))) for f in file_list] -resp = requests.request('POST', url=url, headers={}, files=files, proxies=proxies) -print(resp.text) -resp.raise_for_status() # Raise an exception for unsuccessful HTTP status codes -print("Request successful!") - -print("test 20 files ingestion") -file_list = ["dataprep_file.txt"] * 20 -files = [('files', (f, open(f, 'rb'))) for f in file_list] -resp = requests.request('POST', url=url, headers={}, files=files, proxies=proxies) -print(resp.text) -resp.raise_for_status() # Raise an exception for unsuccessful HTTP status codes -print("Request successful!") - -print("test get file structure") -resp = requests.request('POST', url=get_url, headers={}, proxies=proxies) -print(resp.text) -assert "name" in resp.text, "Response does not meet expectation." -print("Request successful!") -EOF -)" || EXIT_CODE=$? - rm -rf dataprep_file.txt - if [ $EXIT_CODE -ne 0 ]; then - echo "[ dataprep ] Validation failed. Entire log as below doc " - docker container logs test-comps-dataprep-redis-ray-server | tee -a ${LOG_PATH}/dataprep.log - exit 1 - else - echo "[ dataprep ] Validation succeed. " - fi -} - - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-dataprep-redis-ray*") - echo "Stopping the docker containers "${cid} - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - echo "Docker containers stopped successfully" -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune 2>&1 > /dev/null - -} - -main diff --git a/tests/dataprep/test_dataprep_redis_llama_index.sh b/tests/dataprep/test_dataprep_redis_llama_index.sh deleted file mode 100644 index 4015887e65..0000000000 --- a/tests/dataprep/test_dataprep_redis_llama_index.sh +++ /dev/null @@ -1,97 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - echo $(pwd) - docker build --no-cache -t opea/dataprep-redis-llama-index:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/llama_index/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/dataprep-redis-llama-index built fail" - exit 1 - else - echo "opea/dataprep-redis-llama-index built successful" - fi -} - -function start_service() { - docker run -d --name="test-comps-dataprep-redis-llama-index" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 6381:6379 -p 8003:8001 --ipc=host redis/redis-stack:7.2.0-v9 - dataprep_service_port=5012 - REDIS_URL="redis://${ip_address}:6381" - docker run -d --name="test-comps-dataprep-redis-llama-index-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -p ${dataprep_service_port}:6007 -e LOGFLAG=true --ipc=host opea/dataprep-redis-llama-index:comps - sleep 2m -} - -function validate_microservice() { - cd $LOG_PATH - - # test /v1/dataprep - dataprep_service_port=5012 - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep" - echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ dataprep ] HTTP status is 200. Checking content..." - local CONTENT=$(curl -s -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/dataprep.log) - - if echo "$CONTENT" | grep -q "Data preparation succeeded"; then - echo "[ dataprep ] Content is as expected." - else - echo "[ dataprep ] Content does not match the expected result: $CONTENT" - docker logs test-comps-dataprep-redis-llama-index-server >> ${LOG_PATH}/dataprep.log - exit 1 - fi - else - echo "[ dataprep ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-llama-index-server >> ${LOG_PATH}/dataprep.log - exit 1 - fi - rm -rf $LOG_PATH/dataprep_file.txt - - # test /v1/dataprep/get_file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get_file" - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ dataprep - file ] HTTP status is 200. Checking content..." - local CONTENT=$(curl -s -X POST -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/dataprep_file.log) - - if echo "$CONTENT" | grep -q '{"name":'; then - echo "[ dataprep - file ] Content is as expected." - else - echo "[ dataprep - file ] Content does not match the expected result: $CONTENT" - docker logs test-comps-dataprep-redis-llama-index-server >> ${LOG_PATH}/dataprep_file.log - exit 1 - fi - else - echo "[ dataprep ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-llama-index-server >> ${LOG_PATH}/dataprep_file.log - exit 1 - fi -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-dataprep-redis-llama*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/dataprep/test_dataprep_multimodal_redis_langchain.sh b/tests/dataprep/test_dataprep_redis_multimodal.sh similarity index 67% rename from tests/dataprep/test_dataprep_multimodal_redis_langchain.sh rename to tests/dataprep/test_dataprep_redis_multimodal.sh index caef637d09..f99a3878e8 100644 --- a/tests/dataprep/test_dataprep_multimodal_redis_langchain.sh +++ b/tests/dataprep/test_dataprep_redis_multimodal.sh @@ -20,44 +20,48 @@ audio_fn="${tmp_dir}/${audio_name}.wav" image_name="apple" image_fn="${tmp_dir}/${image_name}.png" caption_fn="${tmp_dir}/${image_name}.txt" +pdf_name="nke-10k-2023" +pdf_fn="${tmp_dir}/${pdf_name}.pdf" +DATAPREP_PORT="11109" function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/dataprep-multimodal-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimodal/redis/langchain/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-multimodal-redis built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-multimodal-redis built successful" + echo "opea/dataprep built successful" fi } function build_lvm_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/lvm-llava:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/dependency/Dockerfile . + docker build --no-cache -t opea/lvm-llava:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/llava/Dockerfile . if [ $? -ne 0 ]; then echo "opea/lvm-llava built fail" exit 1 else echo "opea/lvm-llava built successful" fi - docker build --no-cache -t opea/lvm-llava-svc:comps -f comps/lvms/llava/Dockerfile . + docker build --no-cache -t opea/lvm:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/lvm-llava-svc built fail" + echo "opea/lvm built fail" exit 1 else - echo "opea/lvm-llava-svc built successful" + echo "opea/lvm built successful" fi } function start_lvm_service() { unset http_proxy docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5029:8399 --ipc=host opea/lvm-llava:comps - docker run -d --name="test-comps-lvm-llava-svc" -e LVM_ENDPOINT=http://$ip_address:5029 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${LVM_PORT}:9399 --ipc=host opea/lvm-llava-svc:comps - sleep 5m + sleep 4m + docker run -d --name="test-comps-lvm-llava-svc" -e LVM_ENDPOINT=http://$ip_address:5029 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${LVM_PORT}:9399 --ipc=host opea/lvm:comps + sleep 1m } function start_lvm() { @@ -71,17 +75,17 @@ function start_lvm() { } function start_service() { - # start redis - echo "Starting Redis server" - REDIS_PORT=6380 - docker run -d --name="test-redis" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $REDIS_PORT:6379 -p 8002:8001 --ipc=host redis/redis-stack:7.2.0-v9 - - # start dataprep microservice - echo "Starting dataprep microservice" - dataprep_service_port=5013 - REDIS_URL="redis://${ip_address}:${REDIS_PORT}" - docker run -d --name="test-comps-dataprep-multimodal-redis" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e LVM_ENDPOINT=$LVM_ENDPOINT -p ${dataprep_service_port}:6007 --runtime=runc --ipc=host opea/dataprep-multimodal-redis:comps - + export host_ip=${ip_address} + export REDIS_HOST=$ip_address + export REDIS_PORT=6379 + export REDIS_URL="redis://${ip_address}:${REDIS_PORT}" + export LVM_PORT=5028 + export LVM_ENDPOINT="http://${ip_address}:${LVM_PORT}/v1/lvm" + export INDEX_NAME="dataprep" + export TAG="comps" + service_name="redis-vector-db dataprep-multimodal-redis" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d sleep 1m } @@ -132,6 +136,9 @@ tire.""" > ${transcript_fn} echo "Downloading Audio" wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav -O ${audio_fn} + echo "Downloading PDF" + wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf -O ${pdf_fn} + } function validate_microservice() { @@ -139,7 +146,7 @@ function validate_microservice() { # test v1/generate_transcripts upload file echo "Testing generate_transcripts API" - URL="http://${ip_address}:$dataprep_service_port/v1/generate_transcripts" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/generate_transcripts" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@$video_fn" -F "files=@$audio_fn" -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -147,22 +154,22 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi - # test v1/ingest_with_text upload video file - echo "Testing ingest_with_text API with video+transcripts" - URL="http://${ip_address}:$dataprep_service_port/v1/ingest_with_text" + # test ingest upload video file + echo "Testing ingest API with video+transcripts" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@$video_fn" -F "files=@$transcript_fn" -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -171,22 +178,22 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi - # test v1/ingest_with_text upload image file - echo "Testing ingest_with_text API with image+caption" - URL="http://${ip_address}:$dataprep_service_port/v1/ingest_with_text" + # test ingest upload image file + echo "Testing ingest API with image+caption" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@$image_fn" -F "files=@$caption_fn" -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -195,22 +202,22 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi - # test v1/ingest_with_text with video and image - echo "Testing ingest_with_text API with both video+transcript and image+caption" - URL="http://${ip_address}:$dataprep_service_port/v1/ingest_with_text" + # test ingest with video and image + echo "Testing ingest API with both video+transcript and image+caption" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@$image_fn" -F "files=@$caption_fn" -F "files=@$video_fn" -F "files=@$transcript_fn" -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -219,22 +226,22 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi - # test v1/ingest_with_text with invalid input (.png image with .vtt transcript) - echo "Testing ingest_with_text API with invalid input (.png and .vtt)" - URL="http://${ip_address}:$dataprep_service_port/v1/ingest_with_text" + # test ingest with invalid input (.png image with .vtt transcript) + echo "Testing ingest API with invalid input (.png and .vtt)" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@$image_fn" -F "files=@$transcript_fn" -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -243,22 +250,46 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "400" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 400. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 400. Checking content..." fi if [[ "$RESPONSE_BODY" != *"No caption file found for $image_name"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi + + # test ingest with a PDF file + echo "Testing ingest API with a PDF file" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/ingest" + + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@$pdf_fn" -H 'Content-Type: multipart/form-data' "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + SERVICE_NAME="dataprep - upload - file" + + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi - # test v1/generate_captions upload video file + # test generate_captions upload video file echo "Testing generate_captions API with video" - URL="http://${ip_address}:$dataprep_service_port/v1/generate_captions" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/generate_captions" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@$video_fn" -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -267,14 +298,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -282,7 +313,7 @@ function validate_microservice() { # test v1/generate_captions upload image file echo "Testing generate_captions API with image" - URL="http://${ip_address}:$dataprep_service_port/v1/generate_captions" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/generate_captions" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@$image_fn" -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -291,14 +322,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -306,7 +337,7 @@ function validate_microservice() { # test /v1/dataprep/get_files echo "Testing get_files API" - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get_files" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/get" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -314,22 +345,22 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi - if [[ "$RESPONSE_BODY" != *${image_name}* || "$RESPONSE_BODY" != *${video_name}* || "$RESPONSE_BODY" != *${audio_name}* ]]; then + if [[ "$RESPONSE_BODY" != *${image_name}* || "$RESPONSE_BODY" != *${video_name}* || "$RESPONSE_BODY" != *${audio_name}* || "$RESPONSE_BODY" != *${pdf_name}* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_file.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi - # test /v1/dataprep/delete_files - echo "Testing delete_files API" - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete_files" + # test /v1/dataprep/delete + echo "Testing delete API" + URL="http://${ip_address}:$DATAPREP_PORT/v1/dataprep/delete" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -338,7 +369,7 @@ function validate_microservice() { # check response status if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_del.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." @@ -346,7 +377,7 @@ function validate_microservice() { # check response body if [[ "$RESPONSE_BODY" != *'{"status":true}'* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_del.log + docker logs dataprep-multimodal-redis-server >> ${LOG_PATH}/dataprep_del.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -354,10 +385,10 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-*") + cid=$(docker ps -aq --filter "name=dataprep-multimodal-redis-server*" --filter "name=redis-vector-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cid=$(docker ps -aq --filter "name=test-comps-lvm*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - # cid=$(docker ps -aq --filter "name=test-comps-lvm*") - # if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } @@ -370,11 +401,12 @@ function delete_data() { function main() { stop_docker - start_lvm build_docker_images - start_service prepare_data + start_lvm + start_service + validate_microservice delete_data stop_docker diff --git a/tests/dataprep/test_dataprep_vdms_langchain.sh b/tests/dataprep/test_dataprep_vdms.sh similarity index 60% rename from tests/dataprep/test_dataprep_vdms_langchain.sh rename to tests/dataprep/test_dataprep_vdms.sh index 4fe0d0f0a0..2409e13f70 100644 --- a/tests/dataprep/test_dataprep_vdms_langchain.sh +++ b/tests/dataprep/test_dataprep_vdms.sh @@ -7,28 +7,34 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +DATAPREP_PORT="11110" function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/dataprep-vdms:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/vdms/langchain/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-vdms built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-vdms built successful" + echo "opea/dataprep built successful" fi docker pull intellabs/vdms:latest } function start_service() { - VDMS_PORT=5043 - docker run -d --name="test-comps-dataprep-vdms" -p $VDMS_PORT:55555 intellabs/vdms:latest - dataprep_service_port=5013 - COLLECTION_NAME="test-comps" - docker run -d --name="test-comps-dataprep-vdms-server" -e COLLECTION_NAME=$COLLECTION_NAME -e no_proxy=$no_proxy -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e VDMS_HOST=$ip_address -e VDMS_PORT=$VDMS_PORT -p ${dataprep_service_port}:6007 --ipc=host opea/dataprep-vdms:comps - sleep 30s + export host_ip=${ip_address} + export VDMS_HOST=$ip_address + export VDMS_PORT=55555 + export COLLECTION_NAME="test-comps" + export QDRANT_HOST=$ip_address + export QDRANT_PORT=$QDRANT_PORT + export TAG="comps" + service_name="vdms-vector-db dataprep-vdms" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 1m } function validate_microservice() { @@ -36,9 +42,7 @@ function validate_microservice() { echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt - dataprep_service_port=5013 - - URL="http://$ip_address:$dataprep_service_port/v1/dataprep" + URL="http://$ip_address:$DATAPREP_PORT/v1/dataprep/ingest" HTTP_STATUS=$(http_proxy="" curl -s -o /dev/null -w "%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' ${URL} ) if [ "$HTTP_STATUS" -eq 200 ]; then echo "[ dataprep-upload-file ] HTTP status is 200. Checking content..." @@ -47,14 +51,14 @@ function validate_microservice() { echo "[ dataprep-upload-file ] Content is correct." else echo "[ dataprep-upload-file ] Content is not correct. Received content was $CONTENT" - docker logs test-comps-dataprep-vdms-server >> ${LOG_PATH}/dataprep-upload-file.log - docker logs test-comps-dataprep-vdms >> ${LOG_PATH}/dataprep-upload-file_vdms.log + docker logs dataprep-vdms-server >> ${LOG_PATH}/dataprep-upload-file.log + docker logs vdms-vector-db >> ${LOG_PATH}/dataprep-upload-file_vdms.log exit 1 fi else echo "[ dataprep-upload-file ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-vdms-server >> ${LOG_PATH}/dataprep-upload-file.log - docker logs test-comps-dataprep-vdms >> ${LOG_PATH}/dataprep-upload-file_vdms.log + docker logs dataprep-vdms-server >> ${LOG_PATH}/dataprep-upload-file.log + docker logs vdms-vector-db >> ${LOG_PATH}/dataprep-upload-file_vdms.log exit 1 fi rm ./dataprep_file.txt @@ -62,7 +66,7 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-dataprep-vdms*") + cid=$(docker ps -aq --filter "name=dataprep-vdms*" --filter "name=vdms-vector*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/dataprep/test_dataprep_vdms_multimodal_langchain.sh b/tests/dataprep/test_dataprep_vdms_multimodal.sh similarity index 65% rename from tests/dataprep/test_dataprep_vdms_multimodal_langchain.sh rename to tests/dataprep/test_dataprep_vdms_multimodal.sh index 3dc70a7a36..a3af8dd5e0 100755 --- a/tests/dataprep/test_dataprep_vdms_multimodal_langchain.sh +++ b/tests/dataprep/test_dataprep_vdms_multimodal.sh @@ -7,28 +7,35 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +DATAPREP_PORT="11111" function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/dataprep-vdms:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/vdms/multimodal_langchain/Dockerfile . + docker build --no-cache -t opea/dataprep:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-vdms built fail" + echo "opea/dataprep built fail" exit 1 else - echo "opea/dataprep-vdms built successful" + echo "opea/dataprep built successful" fi docker pull intellabs/vdms:latest } function start_service() { - VDMS_PORT=5043 - docker run -d --name="test-comps-dataprep-vdms" -p $VDMS_PORT:55555 intellabs/vdms:latest - dataprep_service_port=5013 - COLLECTION_NAME="test-comps" - docker run -d --name="test-comps-dataprep-vdms-server" -e COLLECTION_NAME=$COLLECTION_NAME -e no_proxy=$no_proxy -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e VDMS_HOST=$ip_address -e VDMS_PORT=$VDMS_PORT -p ${dataprep_service_port}:6007 --ipc=host opea/dataprep-vdms:comps - sleep 30s + export host_ip=${ip_address} + export VDMS_HOST=$ip_address + export VDMS_PORT=55555 + export COLLECTION_NAME="test-comps" + export QDRANT_HOST=$ip_address + export QDRANT_PORT=$QDRANT_PORT + export TAG="comps" + service_name="vdms-vector-db dataprep-vdms-multimodal" + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 1m + } function validate_microservice() { @@ -37,7 +44,7 @@ function validate_microservice() { sleep 5 # test /v1/dataprep upload file - URL="http://$ip_address:$dataprep_service_port/v1/dataprep" + URL="http://$ip_address:$DATAPREP_PORT/v1/dataprep/ingest_videos" response=$(http_proxy="" curl -s -w "\n%{http_code}" -X POST -F 'files=@./silence_girl.mp4' -H 'Content-Type: multipart/form-data' ${URL}) CONTENT=$(echo "$response" | sed -e '$ d') @@ -49,14 +56,14 @@ function validate_microservice() { echo "[ dataprep-upload-videos ] Content is correct." else echo "[ dataprep-upload-videos ] Content is not correct. Received content was $CONTENT" - docker logs test-comps-dataprep-vdms-server >> ${LOG_PATH}/dataprep-upload-videos.log - docker logs test-comps-dataprep-vdms >> ${LOG_PATH}/dataprep-upload-videos_vdms.log + docker logs dataprep-vdms-multimodal-server >> ${LOG_PATH}/dataprep-upload-videos.log + docker logs vdms-vector-db >> ${LOG_PATH}/dataprep-upload-videos_vdms.log exit 1 fi else echo "[ dataprep-upload-videos ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-vdms-server >> ${LOG_PATH}/dataprep-get-videos.log - docker logs test-comps-dataprep-vdms >> ${LOG_PATH}/dataprep-upload-videos_vdms.log + docker logs dataprep-vdms-multimodal-server >> ${LOG_PATH}/dataprep-get-videos.log + docker logs vdms-vector-db >> ${LOG_PATH}/dataprep-upload-videos_vdms.log exit 1 fi @@ -64,7 +71,7 @@ function validate_microservice() { rm ./silence_girl.mp4 # test /v1/dataprep/get_videos - URL="http://$ip_address:$dataprep_service_port/v1/dataprep/get_videos" + URL="http://$ip_address:$DATAPREP_PORT/v1/dataprep/get_videos" response=$(http_proxy="" curl -s -w "\n%{http_code}" -X GET ${URL}) CONTENT=$(echo "$response" | sed -e '$ d') @@ -76,19 +83,19 @@ function validate_microservice() { echo "[ dataprep-get-videos ] Content is correct." else echo "[ dataprep-get-videos ] Content is not correct. Received content was $CONTENT" - docker logs test-comps-dataprep-vdms-server >> ${LOG_PATH}/dataprep-get-videos.log + docker logs dataprep-vdms-multimodal-server >> ${LOG_PATH}/dataprep-get-videos.log exit 1 fi else echo "[ dataprep-get-videos ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-vdms-server >> ${LOG_PATH}/dataprep-get-videos.log + docker logs dataprep-vdms-multimodal-server >> ${LOG_PATH}/dataprep-get-videos.log exit 1 fi # test /v1/dataprep/get_file/{filename} file_list=$CONTENT filename=$(echo $file_list | sed 's/^\[//;s/\]$//;s/,.*//;s/"//g') - URL="http://$ip_address:$dataprep_service_port/v1/dataprep/get_file/${filename}" + URL="http://$ip_address:$DATAPREP_PORT/v1/dataprep/get/${filename}" http_proxy="" wget ${URL} CONTENT=$(ls) @@ -96,14 +103,14 @@ function validate_microservice() { echo "[ download_file ] Content is correct." else echo "[ download_file ] Content is not correct. $CONTENT" - docker logs test-comps-dataprep-vdms-server >> ${LOG_PATH}/download_file.log + docker logs dataprep-vdms-multimodal-server >> ${LOG_PATH}/download_file.log exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-dataprep-vdms*") + cid=$(docker ps -aq --filter "name=vdms-vector-db*" --filter "name=dataprep-vdms-multimodal*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/embeddings/test_embeddings_multimodal_bridgetower.sh b/tests/embeddings/test_embeddings_multimodal_bridgetower.sh index 721f6ae45e..304b2b70ed 100644 --- a/tests/embeddings/test_embeddings_multimodal_bridgetower.sh +++ b/tests/embeddings/test_embeddings_multimodal_bridgetower.sh @@ -6,17 +6,17 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') -export your_mmei_port=8089 +export your_mmei_port=12400 export EMBEDDER_PORT=$your_mmei_port export MMEI_EMBEDDING_ENDPOINT="http://$ip_address:$your_mmei_port" -export your_embedding_port_microservice=6609 +export your_embedding_port_microservice=10202 export MM_EMBEDDING_PORT_MICROSERVICE=$your_embedding_port_microservice unset http_proxy function build_mm_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/embedding:latest -f comps/embeddings/src/Dockerfile . + docker build --no-cache -t opea/embedding:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/embedding built fail" exit 1 @@ -44,9 +44,10 @@ function build_docker_images() { } function start_service() { + service_name="multimodal-bridgetower-embedding-serving multimodal-bridgetower-embedding-server" cd $WORKPATH cd comps/embeddings/deployment/docker_compose/ - docker compose -f compose_multimodal_bridgetower.yaml up -d + docker compose up ${service_name} -d sleep 30 } @@ -60,8 +61,8 @@ function validate_microservice_text_embedding() { echo "Result correct." else echo "Result wrong. Received was $result" - docker logs embedding-multimodal-bridgetower - docker logs embedding-multimodal-bridgetower-server + docker logs multimodal-bridgetower-embedding-serving + docker logs multimodal-bridgetower-embedding-server exit 1 fi } @@ -76,8 +77,24 @@ function validate_microservice_image_text_pair_embedding() { echo "Result correct." else echo "Result wrong. Received was $result" - docker logs embedding-multimodal-bridgetower - docker logs embedding-multimodal-bridgetower-server + docker logs multimodal-bridgetower-embedding-serving + docker logs multimodal-bridgetower-embedding-server + exit 1 + fi +} + +function validate_microservice_b64_image_text_pair_embedding() { + result=$(http_proxy="" curl http://${ip_address}:$MM_EMBEDDING_PORT_MICROSERVICE/v1/embeddings \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{"text": {"text" : "This is some sample text."}, "image" : {"base64_image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC"}}') + + if [[ $result == *"embedding"* ]] && [[ $result == *"base64_image"* ]] ; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs multimodal-bridgetower-embedding-serving + docker logs multimodal-bridgetower-embedding-server exit 1 fi } @@ -85,10 +102,11 @@ function validate_microservice_image_text_pair_embedding() { function validate_microservice() { validate_microservice_text_embedding validate_microservice_image_text_pair_embedding + validate_microservice_b64_image_text_pair_embedding } function stop_docker() { - cid=$(docker ps -aq --filter "name=embedding-multimodal-bridgetower") + cid=$(docker ps -aq --filter "name=multimodal-bridgetower-embedding-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/embeddings/test_embeddings_multimodal_bridgetower_on_intel_hpu.sh b/tests/embeddings/test_embeddings_multimodal_bridgetower_on_intel_hpu.sh index 5a0d89b86a..8db4b0e630 100644 --- a/tests/embeddings/test_embeddings_multimodal_bridgetower_on_intel_hpu.sh +++ b/tests/embeddings/test_embeddings_multimodal_bridgetower_on_intel_hpu.sh @@ -6,10 +6,10 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') -export your_mmei_port=8087 +export your_mmei_port=12401 export EMBEDDER_PORT=$your_mmei_port export MMEI_EMBEDDING_ENDPOINT="http://$ip_address:$your_mmei_port" -export your_embedding_port_microservice=6608 +export your_embedding_port_microservice=10203 export MM_EMBEDDING_PORT_MICROSERVICE=$your_embedding_port_microservice unset http_proxy @@ -28,7 +28,7 @@ function build_mm_docker_images() { function build_embedding_service_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/embedding-multimodal-bridgetower-hpu:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/bridgetower/src/Dockerfile.intel_hpu . + docker build --no-cache -t opea/embedding-multimodal-bridgetower-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/bridgetower/src/Dockerfile.intel_hpu . if [ $? -ne 0 ]; then echo "opea/embedding-multimodal-bridgetower built fail" @@ -44,9 +44,10 @@ function build_docker_images() { } function start_service() { + service_name="multimodal-bridgetower-embedding-gaudi-serving multimodal-bridgetower-embedding-gaudi-server" cd $WORKPATH cd comps/embeddings/deployment/docker_compose/ - docker compose -f compose_multimodal_bridgetower_intel_hpu.yaml up -d + docker compose up ${service_name} -d sleep 30 } @@ -60,8 +61,8 @@ function validate_microservice_text_embedding() { echo "Result correct." else echo "Result wrong. Received was $result" - docker logs embedding-multimodal-bridgetower - docker logs embedding-multimodal-bridgetower-server + docker logs multimodal-bridgetower-embedding-gaudi-serving + docker logs multimodal-bridgetower-embedding-gaudi-server exit 1 fi } @@ -76,8 +77,8 @@ function validate_microservice_image_text_pair_embedding() { echo "Result correct." else echo "Result wrong. Received was $result" - docker logs embedding-multimodal-bridgetower - docker logs embedding-multimodal-bridgetower-server + docker logs multimodal-bridgetower-embedding-gaudi-serving + docker logs multimodal-bridgetower-embedding-gaudi-server exit 1 fi } @@ -88,7 +89,7 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=embedding-multimodal-bridgetower") + cid=$(docker ps -aq --filter "name=multimodal-bridgetower-embedding*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/embeddings/test_embeddings_predictionguard.sh b/tests/embeddings/test_embeddings_predictionguard.sh index 7ffacfaf4a..2d107e409f 100644 --- a/tests/embeddings/test_embeddings_predictionguard.sh +++ b/tests/embeddings/test_embeddings_predictionguard.sh @@ -13,7 +13,7 @@ fi function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/embedding:comps -f comps/embeddings/src/Dockerfile . + docker build --no-cache -t opea/embedding:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/embedding built fail" exit 1 @@ -23,19 +23,19 @@ function build_docker_images() { } function start_service() { - pg_service_port=5124 - unset http_proxy - docker run -d --name=test-comps-embedding-pg-server \ - -e LOGFLAG=True -e http_proxy=$http_proxy -e https_proxy=$https_proxy \ - -e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \ - -e EMBEDDING_COMPONENT_NAME="OPEA_PREDICTIONGUARD_EMBEDDING" \ - -p ${pg_service_port}:6000 --ipc=host opea/embedding:comps - sleep 60 + export EMBEDDER_PORT=10201 + export PG_EMBEDDING_MODEL_NAME="bridgetower-large-itm-mlm-itc" + export TAG=comps + service_name="pg-embedding-server" + cd $WORKPATH + cd comps/embeddings/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 10 } function validate_service() { local INPUT_DATA="$1" - pg_service_port=5124 + pg_service_port=10201 result=$(http_proxy="" curl http://${ip_address}:${pg_service_port}/v1/embeddings \ -X POST \ -d "$INPUT_DATA" \ @@ -46,11 +46,11 @@ function validate_service() { echo "Result correct." elif [[ $result == *"error"* || $result == *"detail"* ]]; then echo "Result wrong. Error received was: $result" - docker logs test-comps-embedding-pg-server + docker logs pg-embedding-server exit 1 else echo "Unexpected result format received was: $result" - docker logs test-comps-embedding-pg-server + docker logs pg-embedding-server exit 1 fi } @@ -66,7 +66,7 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-embedding-pg-*") + cid=$(docker ps -aq --filter "name=pg-embedding-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/embeddings/test_embeddings_tei.sh b/tests/embeddings/test_embeddings_tei.sh index ceecaf0c37..bb20a105d4 100644 --- a/tests/embeddings/test_embeddings_tei.sh +++ b/tests/embeddings/test_embeddings_tei.sh @@ -1,3 +1,4 @@ + #!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -10,7 +11,7 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/embedding:comps -f comps/embeddings/src/Dockerfile . + docker build --no-cache -t opea/embedding:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/embedding built fail" exit 1 @@ -20,20 +21,22 @@ function build_docker_images() { } function start_service() { - tei_endpoint=5001 - model="BAAI/bge-base-en-v1.5" - unset http_proxy - docker run -d --name="test-comps-embedding-endpoint" -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model - sleep 3m - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" - tei_service_port=5002 - docker run -d --name="test-comps-embedding-server" -e LOGFLAG=True -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${tei_service_port}:6000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e EMBEDDING_COMPONENT_NAME="OPEA_TEI_EMBEDDING" opea/embedding:comps + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export TEI_EMBEDDER_PORT=12000 + export EMBEDDER_PORT=10200 + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}" + export TAG=comps + export host_ip=${ip_address} + service_name="tei-embedding-serving tei-embedding-server" + cd $WORKPATH + cd comps/embeddings/deployment/docker_compose/ + docker compose up ${service_name} -d sleep 15 } function validate_service() { local INPUT_DATA="$1" - tei_service_port=5002 + tei_service_port=10200 result=$(http_proxy="" curl http://${ip_address}:$tei_service_port/v1/embeddings \ -X POST \ -d "$INPUT_DATA" \ @@ -42,8 +45,8 @@ function validate_service() { echo "Result correct." else echo "Result wrong. Received was $result" - docker logs test-comps-embedding-endpoint - docker logs test-comps-embedding-server + docker logs tei-embedding-serving + docker logs tei-embedding-server exit 1 fi } @@ -59,17 +62,17 @@ function validate_microservice() { } function validate_microservice_with_openai() { - tei_service_port=5002 + tei_service_port=10200 python3 ${WORKPATH}/tests/utils/validate_svc_with_openai.py $ip_address $tei_service_port "embedding" if [ $? -ne 0 ]; then - docker logs test-comps-embedding-endpoint - docker logs test-comps-embedding-server + docker logs tei-embedding-serving + docker logs tei-embedding-server exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-embedding-*") + cid=$(docker ps -aq --filter "name=tei-embedding-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/feedback_management/test_feedback_management_mongo.sh b/tests/feedback_management/test_feedback_management_mongo.sh index ffe352b590..7bd57ae374 100644 --- a/tests/feedback_management/test_feedback_management_mongo.sh +++ b/tests/feedback_management/test_feedback_management_mongo.sh @@ -15,27 +15,28 @@ export COLLECTION_NAME=${COLLECTION_NAME:-"test"} function build_docker_images() { cd $WORKPATH echo $(pwd) - docker run -d -p 27017:27017 --name=test-comps-mongo mongo:latest - docker build --no-cache -t opea/feedbackmanagement-mongo-server:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/feedback_management/src/Dockerfile . + docker build --no-cache -t opea/feedbackmanagement-mongo:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/feedback_management/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/feedbackmanagement-mongo-server built fail" + echo "opea/feedbackmanagement-mongo built fail" exit 1 else - echo "opea/feedbackmanagement-mongo-server built successful" + echo "opea/feedbackmanagement-mongo built successful" fi } function start_service() { - - docker run -d --name="test-comps-feedbackmanagement-mongo-server" -p 6016:6016 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/feedbackmanagement-mongo-server:comps - + cd $WORKPATH + export FEEDBACK_MANAGEMENT_PORT=11200 + export TAG=comps + cd comps/feedback_management/deployment/docker_compose/ + docker compose up -d sleep 10s } function validate_microservice() { result=$(curl -X 'POST' \ - http://$ip_address:6016/v1/feedback/create \ + http://$ip_address:${FEEDBACK_MANAGEMENT_PORT}/v1/feedback/create \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -70,14 +71,14 @@ function validate_microservice() { echo "Correct result." else echo "Incorrect result." - docker logs test-comps-feedbackmanagement-mongo-server + docker logs feedbackmanagement-mongo-server exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps*") + cid=$(docker ps -aq --filter "name=feedbackmanagement-mongo-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/finetuning/test_finetuning_native.sh b/tests/finetuning/test_finetuning_native.sh index 879c8fc7bd..fa269e77da 100644 --- a/tests/finetuning/test_finetuning_native.sh +++ b/tests/finetuning/test_finetuning_native.sh @@ -9,6 +9,7 @@ LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') finetuning_service_port=8015 ray_port=8265 +service_name="finetuning" function build_docker_images() { cd $WORKPATH @@ -24,7 +25,8 @@ function build_docker_images() { function start_service() { export no_proxy="localhost,127.0.0.1,"${ip_address} - docker run -d --name="test-comps-finetuning-server" -p $finetuning_service_port:$finetuning_service_port -p $ray_port:$ray_port --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy opea/finetuning:comps + cd $WORKPATH/comps/finetuning/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > start_services_with_compose.log sleep 1m } @@ -131,7 +133,7 @@ function validate_microservice() { validate_upload \ "http://${ip_address}:$finetuning_service_port/v1/files" \ "general - upload" \ - "test-comps-finetuning-server" \ + "finetuning" \ "fine-tune" \ "test_data.json" @@ -139,7 +141,7 @@ function validate_microservice() { validate_finetune \ "http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs" \ "general - finetuning" \ - "test-comps-finetuning-server" \ + "finetuning" \ '{"id":"ft-job' \ '{"training_file": "test_data.json","model": "facebook/opt-125m"}' @@ -159,7 +161,7 @@ EOF validate_upload \ "http://${ip_address}:$finetuning_service_port/v1/files" \ "rerank - upload" \ - "test-comps-finetuning-server" \ + "finetuning" \ "fine-tune" \ "test_data_rerank.json" @@ -167,7 +169,7 @@ EOF validate_finetune \ "http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs" \ "rerank - finetuning" \ - "test-comps-finetuning-server" \ + "finetuning" \ '{"id":"ft-job' \ '{"training_file": "test_data_rerank.json","model": "BAAI/bge-reranker-base","General":{"task":"rerank","lora_config":null}}' @@ -187,7 +189,7 @@ EOF validate_upload \ "http://${ip_address}:$finetuning_service_port/v1/files" \ "pretrain - upload" \ - "test-comps-finetuning-server" \ + "finetuning" \ "fine-tune" \ "test_data_pretrain.json" @@ -195,7 +197,7 @@ EOF validate_finetune \ "http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs" \ "pretrain - finetuning" \ - "test-comps-finetuning-server" \ + "finetuning" \ '{"id":"ft-job' \ '{"training_file": "test_data_pretrain.json","model": "facebook/opt-125m","General":{"task":"pretraining","lora_config":null}}' @@ -211,7 +213,7 @@ EOF validate_upload \ "http://${ip_address}:$finetuning_service_port/v1/files" \ "dpo - upload" \ - "test-comps-finetuning-server" \ + "finetuning" \ "fine-tune" \ "test_data_dpo.jsonl" @@ -219,15 +221,15 @@ EOF validate_finetune \ "http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs" \ "dpo - finetuning" \ - "test-comps-finetuning-server" \ + "finetuning" \ '{"id":"ft-job' \ '{"training_file": "test_data_dpo.jsonl","model": "facebook/opt-125m","General":{"task":"dpo"}}' } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-finetuning-server*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cd $WORKPATH/comps/finetuning/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans } function main() { diff --git a/tests/finetuning/test_finetuning_native_on_intel_hpu.sh b/tests/finetuning/test_finetuning_native_on_intel_hpu.sh index d76d48bf7c..33d9b48a85 100644 --- a/tests/finetuning/test_finetuning_native_on_intel_hpu.sh +++ b/tests/finetuning/test_finetuning_native_on_intel_hpu.sh @@ -9,6 +9,7 @@ LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') finetuning_service_port=8015 ray_port=8265 +service_name="finetuning-gaudi" function build_docker_images() { cd $WORKPATH @@ -24,7 +25,8 @@ function build_docker_images() { function start_service() { export no_proxy="localhost,127.0.0.1,"${ip_address} - docker run -d --name="test-comps-finetuning-gaudi-server" --runtime=habana -e HABANA_VISIBLE_DEVICES=all -p $finetuning_service_port:$finetuning_service_port -p $ray_port:$ray_port -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host -e https_proxy=$https_proxy -e http_proxy=$http_proxy -e no_proxy=$no_proxy -e HF_TOKEN=$HF_TOKEN opea/finetuning-gaudi:latest + cd $WORKPATH/comps/finetuning/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > start_services_with_compose.log sleep 1m } @@ -61,7 +63,7 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-finetuning-gaudi-server > ${LOG_PATH}/finetuning-server_upload_file.log + docker logs finetuning-gaudi > ${LOG_PATH}/finetuning-server_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." @@ -69,7 +71,7 @@ function validate_microservice() { # Check if the parsed values match the expected values if [[ "$purpose" != "$expected_purpose" || "$filename" != "$expected_filename" ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-finetuning-gaudi-server > ${LOG_PATH}/finetuning-server_upload_file.log + docker logs finetuning-gaudi > ${LOG_PATH}/finetuning-server_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -86,14 +88,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-finetuning-gaudi-server >> ${LOG_PATH}/finetuning-server_create.log + docker logs finetuning-gaudi >> ${LOG_PATH}/finetuning-server_create.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *'{"id":"ft-job'* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-finetuning-gaudi-server >> ${LOG_PATH}/finetuning-server_create.log + docker logs finetuning-gaudi >> ${LOG_PATH}/finetuning-server_create.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -139,14 +141,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-finetuning-gaudi-server > ${LOG_PATH}/finetuning-server_create.log + docker logs finetuning-gaudi > ${LOG_PATH}/finetuning-server_create.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *'{"id":"ft-job'* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-finetuning-gaudi-server > ${LOG_PATH}/finetuning-server_create.log + docker logs finetuning-gaudi > ${LOG_PATH}/finetuning-server_create.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -173,12 +175,12 @@ function validate_microservice() { done # get logs - docker logs test-comps-finetuning-gaudi-server >> ${LOG_PATH}/finetuning-server_create.log + docker logs finetuning-gaudi >> ${LOG_PATH}/finetuning-server_create.log } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-finetuning-gaudi-server*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cd $WORKPATH/comps/finetuning/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans } function main() { diff --git a/tests/guardrails/test_guardrails_bias_detection_distilbias.sh b/tests/guardrails/test_guardrails_bias_detection_distilbias.sh index 93bdaf89fe..e47ecce238 100644 --- a/tests/guardrails/test_guardrails_bias_detection_distilbias.sh +++ b/tests/guardrails/test_guardrails_bias_detection_distilbias.sh @@ -21,35 +21,40 @@ function build_docker_images() { function start_service() { echo "Starting microservice" - docker run -d --runtime=runc --name="test-comps-guardrails-bias-detection-endpoint" -p 9092:9092 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/guardrails-bias-detection:comps - sleep 30 + export BIAS_DETECTION_PORT=11301 + export TAG=comps + service_name="guardrails-bias-detection-server" + cd $WORKPATH + cd comps/guardrails/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 15 echo "Microservice started" } function validate_microservice() { echo "Validate microservice started" echo "test 1 - biased" - result=$(curl localhost:9092/v1/bias -X POST -d '{"text":"John McCain exposed as an unprincipled politician."}' -H 'Content-Type: application/json') + result=$(curl localhost:11301/v1/bias -X POST -d '{"text":"John McCain exposed as an unprincipled politician."}' -H 'Content-Type: application/json') if [[ $result == *"Violated"* ]]; then echo "Result correct." else - docker logs test-comps-guardrails-bias-detection-endpoint + docker logs guardrails-bias-detection-server exit 1 fi echo "test 2 - non-biased" - result=$(curl localhost:9092/v1/bias -X POST -d '{"text":"John McCain described as an unprincipled politician."}' -H 'Content-Type: application/json') + result=$(curl localhost:11301/v1/bias -X POST -d '{"text":"John McCain described as an unprincipled politician."}' -H 'Content-Type: application/json') if [[ $result == *"described"* ]]; then echo "Result correct." else echo "Result wrong." - docker logs test-comps-guardrails-bias-detection-endpoint + docker logs guardrails-bias-detection-server exit 1 fi echo "Validate microservice completed" } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-guardrails-bias-detection-endpoint") + cid=$(docker ps -aq --filter "name=guardrails-bias-detection-server") echo "Shutdown legacy containers "$cid if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/guardrails/test_guardrails_factuality_alignment_predictionguard.sh b/tests/guardrails/test_guardrails_factuality_alignment_predictionguard.sh index 24f6f0d929..05f5ec093e 100644 --- a/tests/guardrails/test_guardrails_factuality_alignment_predictionguard.sh +++ b/tests/guardrails/test_guardrails_factuality_alignment_predictionguard.sh @@ -13,29 +13,27 @@ fi function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/factuality-pg:comps -f comps/guardrails/src/factuality_alignment/Dockerfile . + docker build --no-cache -t opea/guardrails-factuality-predictionguard:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/src/factuality_alignment/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/factuality-pg build failed" + echo "opea/guardrails-factuality-predictionguard build failed" exit 1 else - echo "opea/factuality-pg built successfully" + echo "opea/guardrails-factuality-predictionguard built successfully" fi } function start_service() { - factuality_service_port=9075 - unset http_proxy - - # Set your API key here (ensure this environment variable is set) - docker run -d --name=test-comps-factuality-pg-server \ - -e http_proxy= -e https_proxy= \ - -e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \ - -p 9075:9075 --ipc=host opea/factuality-pg:comps - sleep 60 # Sleep for 3 minutes to allow the service to start + export FACTUALITY_ALIGNMENT_PORT=11302 + service_name="guardrails-factuality-predictionguard-server" + export TAG=comps + cd $WORKPATH + cd comps/guardrails/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 15 } function validate_microservice() { - factuality_service_port=9075 + factuality_service_port=11302 result=$(http_proxy="" curl http://${ip_address}:${factuality_service_port}/v1/factuality \ -X POST \ -d '{"reference": "The Eiffel Tower is in Paris.", "text": "The Eiffel Tower is in Berlin."}' \ @@ -45,13 +43,13 @@ function validate_microservice() { echo "Service response is correct." else echo "Result wrong. Received was $result" - docker logs test-comps-factuality-pg-server + docker logs guardrails-factuality-predictionguard-server exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-factuality-pg-*") + cid=$(docker ps -aq --filter "name=guardrails-factuality-predictionguard-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/guardrails/test_guardrails_guardrails_llamaguard_on_intel_hpu.sh b/tests/guardrails/test_guardrails_guardrails_llamaguard_on_intel_hpu.sh index 1bca180507..cfd31c4f47 100644 --- a/tests/guardrails/test_guardrails_guardrails_llamaguard_on_intel_hpu.sh +++ b/tests/guardrails/test_guardrails_guardrails_llamaguard_on_intel_hpu.sh @@ -10,56 +10,60 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { echo "Start building docker images for microservice" cd $WORKPATH - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 - docker build --no-cache -t opea/guardrails-llamaguard:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/src/guardrails/Dockerfile . + docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1 + docker build --no-cache -t opea/guardrails:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/src/guardrails/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/guardrails-llamaguard built fail" + echo "opea/guardrails built fail" exit 1 else - echo "opea/guardrails-llamaguard built successful" + echo "opea/guardrails built successful" fi } function start_service() { echo "Starting microservice" - export model_id="meta-llama/Meta-Llama-Guard-2-8B" + export host_ip=${ip_address} + export LLM_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B" + export LLM_ENDPOINT_PORT=12110 export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B" - export SAFETY_GUARD_ENDPOINT=http://${ip_address}:5035/v1/chat/completions - - docker run -d --name="test-comps-guardrails-langchain-tgi-server" -p 5035:80 --runtime=habana -e HF_TOKEN=$HF_TOKEN -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048 - sleep 4m - docker run -d --name="test-comps-guardrails-langchain-service" -p 5036:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_MODEL_ID=$SAFETY_GUARD_MODEL_ID -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN opea/guardrails-llamaguard:comps - sleep 10s + export SAFETY_GUARD_ENDPOINT=http://${ip_address}:${LLM_ENDPOINT_PORT}/v1/chat/completions + export GUARDRAILS_PORT=11303 + export TAG=comps + service_name="tgi-gaudi-server llamaguard-guardrails-server" + cd $WORKPATH + cd comps/guardrails/deployment/docker_compose/ + docker compose up ${service_name} -d echo "Microservice started" + sleep 15 } function validate_microservice() { echo "Validate microservice started" echo "test 1 - violated policies" - result=$(http_proxy= curl http://localhost:5036/v1/guardrails -X POST -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' -H 'Content-Type: application/json') + result=$(http_proxy= curl http://localhost:11303/v1/guardrails -X POST -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' -H 'Content-Type: application/json') if [[ $result == *"Violated"* ]]; then echo "Result correct." else echo "Result wrong." - docker logs test-comps-guardrails-langchain-tgi-server - docker logs test-comps-guardrails-langchain-service + docker logs tgi-gaudi-server + docker logs llamaguard-guardrails-server exit 1 fi echo "test 2 - safe" - result=$(http_proxy= curl http://localhost:5036/v1/guardrails -X POST -d '{"text":"How do you buy a car in the US?","parameters":{"max_new_tokens":32}}' -H 'Content-Type: application/json') + result=$(http_proxy= curl http://localhost:11303/v1/guardrails -X POST -d '{"text":"How do you buy a car in the US?","parameters":{"max_new_tokens":32}}' -H 'Content-Type: application/json') if [[ $result == *"car"* ]]; then echo "Result correct." else echo "Result wrong." - docker logs test-comps-guardrails-langchain-tgi-server - docker logs test-comps-guardrails-langchain-service + docker logs tgi-gaudi-server + docker logs llamaguard-guardrails-server exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-guardrails-langchain*") + cid=$(docker ps -aq --filter "name=llamaguard-guardrails-server" --filter "name=tgi-gaudi-server") echo "Shutdown legacy containers "$cid if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/guardrails/test_guardrails_guardrails_wildguard_on_intel_hpu.sh b/tests/guardrails/test_guardrails_guardrails_wildguard_on_intel_hpu.sh index e2ea7f46ba..52fb1db1d1 100644 --- a/tests/guardrails/test_guardrails_guardrails_wildguard_on_intel_hpu.sh +++ b/tests/guardrails/test_guardrails_guardrails_wildguard_on_intel_hpu.sh @@ -10,55 +10,59 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { echo "Start building docker images for microservice" cd $WORKPATH - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1 - docker build --no-cache -t opea/guardrails-wildguard:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/src/guardrails/Dockerfile . + docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1 + docker build --no-cache -t opea/guardrails:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/src/guardrails/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/guardrails-wildguard built fail" + echo "opea/guardrails built fail" exit 1 else - echo "opea/guardrails-wildguard built successful" + echo "opea/guardrails built successful" fi } function start_service() { echo "Starting microservice" - export model_id="allenai/wildguard" + export host_ip=${ip_address} + export LLM_MODEL_ID="allenai/wildguard" + export LLM_ENDPOINT_PORT=12120 export SAFETY_GUARD_MODEL_ID="allenai/wildguard" - export SAFETY_GUARD_ENDPOINT=http://${ip_address}:5035 - - docker run -d --name="test-comps-guardrails-tgi-server" -p 5035:80 --runtime=habana -e HF_TOKEN=$HF_TOKEN -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048 - sleep 4m - docker run -d --name="test-comps-guardrails-service" -p 5036:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_MODEL_ID=$SAFETY_GUARD_MODEL_ID -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e GUARDRAILS_COMPONENT_NAME="OPEA_WILD_GUARD" opea/guardrails-wildguard:comps - sleep 10s + export SAFETY_GUARD_ENDPOINT=http://${ip_address}:${LLM_ENDPOINT_PORT} + export GUARDRAILS_PORT=11304 + export TAG=comps + service_name="tgi-gaudi-server wildguard-guardrails-server" + cd $WORKPATH + cd comps/guardrails/deployment/docker_compose/ + docker compose up ${service_name} -d echo "Microservice started" + sleep 15 } function validate_microservice() { echo "Validate microservice started" echo "test 1 - violated policies" - result=$(http_proxy= curl http://localhost:5036/v1/guardrails -X POST -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' -H 'Content-Type: application/json') + result=$(http_proxy= curl http://localhost:11304/v1/guardrails -X POST -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' -H 'Content-Type: application/json') if [[ $result == *"Violated"* ]]; then echo "Result correct." else echo "Result wrong." - docker logs test-comps-guardrails-tgi-server - docker logs test-comps-guardrails-service + docker logs tgi-gaudi-server + docker logs wildguard-guardrails-server exit 1 fi echo "test 2 - safe" - result=$(http_proxy= curl http://localhost:5036/v1/guardrails -X POST -d '{"text":"How do you buy a car in the US?","parameters":{"max_new_tokens":32}}' -H 'Content-Type: application/json') + result=$(http_proxy= curl http://localhost:11304/v1/guardrails -X POST -d '{"text":"How do you buy a car in the US?","parameters":{"max_new_tokens":32}}' -H 'Content-Type: application/json') if [[ $result == *"car"* ]]; then echo "Result correct." else echo "Result wrong." - docker logs test-comps-guardrails-tgi-server - docker logs test-comps-guardrails-service + docker logs tgi-gaudi-server + docker logs wildguard-guardrails-server exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-guardrails*") + cid=$(docker ps -aq --filter "name=wildguard-guardrails-server" --filter "name=tgi-gaudi-server") echo "Shutdown legacy containers "$cid if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh b/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh new file mode 100644 index 0000000000..92b29827fe --- /dev/null +++ b/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + echo "Start building docker images for microservice" + cd $WORKPATH + git clone https://github.com/HabanaAI/vllm-fork.git + cd vllm-fork/ + git checkout v0.6.4.post2+Gaudi-1.19.0 + sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.hpu -t opea/vllm-gaudi:comps --shm-size=128g . + if [ $? -ne 0 ]; then + echo "opea/vllm-gaudi built fail" + exit 1 + else + echo "opea/vllm-gaudi built successful" + fi + + cd $WORKPATH + docker build --no-cache -t opea/hallucination-detection:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/src/hallucination_detection/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/hallucination-detection built fail" + exit 1 + else + echo "opea/hallucination-detection built successful" + fi +} + +function start_service() { + echo "Starting microservice" + export host_ip=$(hostname -I | awk '{print $1}') + export LLM_MODEL_ID="PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct" + export LLM_ENDPOINT_PORT=12210 + export vLLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" + export HALLUCINATION_DETECTION_PORT=11305 + export TAG=comps + service_name="vllm-gaudi-server hallucination-detection-server" + cd $WORKPATH + cd comps/guardrails/deployment/docker_compose/ + docker compose up ${service_name} -d + echo "Microservice started" + sleep 15 +} + +function validate_microservice() { + echo "Validate microservice started" + DATA='{"messages":[{"role": "user", "content": "Given the following QUESTION, DOCUMENT and ANSWER you must analyze the provided answer and determine whether it is faithful to the contents of the DOCUMENT. The ANSWER must not offer new information beyond the context provided in the DOCUMENT. The ANSWER also must not contradict information provided in the DOCUMENT. Output your final verdict by strictly following this format: \"PASS\" is the answer is faithful to the DOCUMENT and \"FAIL\" if the answer is not faithful to the DOCUMENT. Show your reasoning.\n\n--\nQUESTION (THIS DOES NOT COUNT AS BACKGROUND INFORMATION):\n{question}\n\n--\nDOCUMENT:\n{document}\n\n--\nANSWER:\n{answer}\n\n--\n\n Your output should be in JSON FORMAT with the keys \"REASONING\" and \"SCORE\":\n{{\"REASONING\": , \"SCORE\": }}"}], "max_tokens":600,"model": "PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct" }' + + echo "test 1 - Case with Hallucination (Invalid or Inconsistent Output)" + DOCUMENT="750 Seventh Avenue is a 615 ft (187m) tall Class-A office skyscraper in New York City. 101 Park Avenue is a 629 ft tall skyscraper in New York City, New York." + QUESTION=" 750 7th Avenue and 101 Park Avenue, are located in which city?" + ANSWER="750 7th Avenue and 101 Park Avenue are located in Albany, New York" + + DATA1=$(echo $DATA | sed "s/{question}/$QUESTION/g; s/{document}/$DOCUMENT/g; s/{answer}/$ANSWER/g") + printf "$DATA1\n" + + result=$(curl localhost:11305/v1/hallucination_detection -X POST -d "$DATA1" -H 'Content-Type: application/json') + if [[ $result == *"FAIL"* ]]; then + echo "Result correct." + else + docker logs hallucination-detection-server + exit 1 + fi + + echo "test 2 - Case without Hallucination (Valid Output)" + DOCUMENT=".......An important part of CDC’s role during a public health emergency is to develop a test for the pathogen and equip state and local public health labs with testing capacity. CDC developed an rRT-PCR test to diagnose COVID-19. As of the evening of March 17, 89 state and local public health labs in 50 states......" + QUESTION="What kind of test can diagnose COVID-19?" + ANSWER=" rRT-PCR test" + + DATA2=$(echo $DATA | sed "s/{question}/$QUESTION/g; s/{document}/$DOCUMENT/g; s/{answer}/$ANSWER/g") + printf "$DATA2\n" + + result=$(curl localhost:11305/v1/hallucination_detection -X POST -d "$DATA2" -H 'Content-Type: application/json') + if [[ $result == *"PASS"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs hallucination-detection-server + exit 1 + fi + echo "Validate microservice completed" +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=hallucination-detection-server" --filter "name=vllm-gaudi-server") + echo "Shutdown legacy containers "$cid + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo "cleanup container images and volumes" + echo y | docker system prune 2>&1 > /dev/null + +} + +main diff --git a/tests/guardrails/test_guardrails_pii_detection_predictionguard.sh b/tests/guardrails/test_guardrails_pii_detection_predictionguard.sh index 6703c3b48b..e9c4a06d88 100644 --- a/tests/guardrails/test_guardrails_pii_detection_predictionguard.sh +++ b/tests/guardrails/test_guardrails_pii_detection_predictionguard.sh @@ -13,27 +13,27 @@ fi function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/pii-pg:comps -f comps/guardrails/src/pii_detection/Dockerfile . + docker build --no-cache -t opea/pii-detection-predictionguard:comps -f comps/guardrails/src/pii_detection/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/pii-pg build failed" + echo "opea/pii-detection-predictionguard build failed" exit 1 else - echo "opea/pii-pg built successfully" + echo "opea/pii-detection-predictionguard built successfully" fi } function start_service() { - pii_service_port=9080 - unset http_proxy - docker run -d --name=test-comps-pii-pg-server \ - -e http_proxy= -e https_proxy= \ - -e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \ - -p 9080:9080 --ipc=host opea/pii-pg:comps - sleep 60 # Sleep for 1 minute to allow the service to start + export PII_PREDICTIONGUARD_PORT=11306 + export TAG=comps + service_name="pii-predictionguard-server" + cd $WORKPATH + cd comps/guardrails/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 15 } function validate_microservice() { - pii_service_port=9080 + pii_service_port=11306 result=$(http_proxy="" curl http://${ip_address}:${pii_service_port}/v1/pii \ -X POST \ -d '{"prompt": "My name is John Doe and my phone number is 123-456-7890.", "replace": true, "replace_method": "mask"}' \ @@ -43,13 +43,13 @@ function validate_microservice() { echo "Service response is correct." else echo "Result wrong. Received was $result" - docker logs test-comps-pii-pg-server + docker logs pii-predictionguard-server exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-pii-pg-*") + cid=$(docker ps -aq --filter "name=pii-predictionguard-server") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/guardrails/test_guardrails_prompt_injection_predictionguard.sh b/tests/guardrails/test_guardrails_prompt_injection_predictionguard.sh index fa48d26a70..e27fdf1545 100644 --- a/tests/guardrails/test_guardrails_prompt_injection_predictionguard.sh +++ b/tests/guardrails/test_guardrails_prompt_injection_predictionguard.sh @@ -13,27 +13,27 @@ fi function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/injection-pg:comps -f comps/guardrails/src/prompt_injection/Dockerfile . + docker build --no-cache -t opea/injection-predictionguard:comps -f comps/guardrails/src/prompt_injection/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/injection-pg build failed" + echo "opea/injection-predictionguard build failed" exit 1 else - echo "opea/injection-pg built successfully" + echo "opea/injection-predictionguard built successfully" fi } function start_service() { - injection_service_port=9085 - unset http_proxy - docker run -d --name=test-comps-injection-pg-server \ - -e http_proxy= -e https_proxy= \ - -e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \ - -p 9085:9085 --ipc=host opea/injection-pg:comps - sleep 60 # Sleep for 1 minute to allow the service to start + export INJECTION_PREDICTIONGUARD_PORT=11307 + export TAG=comps + service_name="injection-predictionguard-server" + cd $WORKPATH + cd comps/guardrails/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 15 } function validate_microservice() { - injection_service_port=9085 + injection_service_port=11307 result=$(http_proxy="" curl http://${ip_address}:${injection_service_port}/v1/injection \ -X POST \ -d '{"text": "How to bypass login screen?"}' \ @@ -43,13 +43,13 @@ function validate_microservice() { echo "Service response is correct." else echo "Result wrong. Received was $result" - docker logs test-comps-injection-pg-server + docker logs injection-predictionguard-server exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-injection-pg-*") + cid=$(docker ps -aq --filter "name=injection-predictionguard-server") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/guardrails/test_guardrails_toxicity_detection_predictionguard.sh b/tests/guardrails/test_guardrails_toxicity_detection_predictionguard.sh index 38fa1de142..3919378be8 100644 --- a/tests/guardrails/test_guardrails_toxicity_detection_predictionguard.sh +++ b/tests/guardrails/test_guardrails_toxicity_detection_predictionguard.sh @@ -13,27 +13,27 @@ fi function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/toxicity-pg:comps -f comps/guardrails/src/toxicity_detection/Dockerfile . + docker build --no-cache -t opea/toxicity-predictionguard:comps -f comps/guardrails/src/toxicity_detection/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/toxicity-pg build failed" + echo "opea/toxicity-predictionguard build failed" exit 1 else - echo "opea/toxicity-pg built successfully" + echo "opea/toxicity-predictionguard built successfully" fi } function start_service() { - toxicity_service_port=9090 - unset http_proxy - docker run -d --name=test-comps-toxicity-pg-server \ - -e http_proxy= -e https_proxy= \ - -e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \ - -p 9090:9090 --ipc=host opea/toxicity-pg:comps - sleep 60 # Sleep for 1 minute to allow the service to start + export TOXICITY_PREDICTIONGUARD_PORT=11308 + export TAG=comps + service_name="toxicity-predictionguard-server" + cd $WORKPATH + cd comps/guardrails/deployment/docker_compose/ + docker compose up ${service_name} -d + sleep 15 } function validate_microservice() { - toxicity_service_port=9090 + toxicity_service_port=11308 result=$(http_proxy="" curl http://${ip_address}:${toxicity_service_port}/v1/toxicity \ -X POST \ -d '{"text": "I hate you."}' \ @@ -43,13 +43,13 @@ function validate_microservice() { echo "Service response is correct." else echo "Result wrong. Received was $result" - docker logs test-comps-toxicity-pg-server + docker logs toxicity-predictionguard-server exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-toxicity-pg-*") + cid=$(docker ps -aq --filter "name=toxicity-predictionguard-server") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/image2image/test_image2image.sh b/tests/image2image/test_image2image.sh index ab299e8a62..9c8e60051c 100644 --- a/tests/image2image/test_image2image.sh +++ b/tests/image2image/test_image2image.sh @@ -7,10 +7,15 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') + +export TAG=comps +export IMAGE2IMAGE_PORT=10400 +export service_name="image2image" + function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/image2image:latest -f comps/image2image/src/Dockerfile . + docker build --no-cache -t opea/image2image:$TAG -f comps/image2image/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/image2image built fail" exit 1 @@ -21,25 +26,27 @@ function build_docker_images() { function start_service() { unset http_proxy - docker run -d --name="test-comps-image2image" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e MODEL=stabilityai/stable-diffusion-xl-refiner-1.0 -p 9389:9389 --ipc=host opea/image2image:latest + cd $WORKPATH/comps/image2image/deployment/docker_compose + export MODEL='stabilityai/stable-diffusion-xl-refiner-1.0' + docker compose -f compose.yaml up ${service_name} -d sleep 30s } function validate_microservice() { - result=$(http_proxy="" curl http://localhost:9389/v1/image2image -XPOST -d '{"image": "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png", "prompt":"a photo of an astronaut riding a horse on mars", "num_images_per_prompt":1}' -H 'Content-Type: application/json') + result=$(http_proxy="" curl http://localhost:$IMAGE2IMAGE_PORT/v1/image2image -XPOST -d '{"image": "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png", "prompt":"a photo of an astronaut riding a horse on mars", "num_images_per_prompt":1}' -H 'Content-Type: application/json') if [[ $result == *"images"* ]]; then echo "Result correct." else echo "Result wrong." - docker logs test-comps-image2image + docker logs image2image-server exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-image2image*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cd $WORKPATH/comps/image2image/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans } function main() { diff --git a/tests/image2image/test_image2image_on_intel_hpu.sh b/tests/image2image/test_image2image_on_intel_hpu.sh new file mode 100644 index 0000000000..600b3fc49d --- /dev/null +++ b/tests/image2image/test_image2image_on_intel_hpu.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') + + +export TAG=comps +export IMAGE2IMAGE_PORT=10401 +export service_name="image2image-gaudi" + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/image2image-gaudi:$TAG -f comps/image2image/src/Dockerfile.intel_hpu . + if [ $? -ne 0 ]; then + echo "opea/image2image built fail" + exit 1 + else + echo "opea/image2image built successful" + fi +} + +function start_service() { + unset http_proxy + export MODEL='stabilityai/stable-diffusion-xl-refiner-1.0' + + cd $WORKPATH/comps/image2image/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d + sleep 30s +} + +function validate_microservice() { + result=$(http_proxy="" curl http://localhost:$IMAGE2IMAGE_PORT/v1/image2image -XPOST -d '{"image": "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png", "prompt":"a photo of an astronaut riding a horse on mars", "num_images_per_prompt":1}' -H 'Content-Type: application/json') + if [[ $result == *"images"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs image2image-gaudi-server + exit 1 + fi + +} + +function stop_docker() { + cd $WORKPATH/comps/image2image/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/image2video/test_image2video_native.sh b/tests/image2video/test_image2video_native.sh index af21583b32..caf52f67ca 100644 --- a/tests/image2video/test_image2video_native.sh +++ b/tests/image2video/test_image2video_native.sh @@ -6,6 +6,7 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') +service_name="image2video" function build_docker_images() { cd $WORKPATH @@ -21,7 +22,8 @@ function build_docker_images() { function start_service() { unset http_proxy - docker run -d --name="test-comps-image2video" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9369:9369 --ipc=host opea/image2video:latest + cd $WORKPATH/comps/image2video/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > start_services_with_compose.log sleep 3m } @@ -31,15 +33,15 @@ function validate_microservice() { echo "Result correct." else echo "Result wrong." - docker logs test-comps-image2video + docker logs image2video exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-image2video*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cd $WORKPATH/comps/image2video/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans } function main() { diff --git a/tests/image2video/test_image2video_native_on_intel_hpu.sh b/tests/image2video/test_image2video_native_on_intel_hpu.sh index 11e66e25b8..5fb5c1e8a4 100644 --- a/tests/image2video/test_image2video_native_on_intel_hpu.sh +++ b/tests/image2video/test_image2video_native_on_intel_hpu.sh @@ -6,6 +6,7 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') +service_name="image2video-gaudi" function build_docker_images() { cd $WORKPATH @@ -21,7 +22,8 @@ function build_docker_images() { function start_service() { unset http_proxy - docker run -d --name="test-comps-image2video-gaudi" -p 9369:9369 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/image2video-gaudi:latest + cd $WORKPATH/comps/image2video/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > start_services_with_compose.log sleep 3m } @@ -31,15 +33,15 @@ function validate_microservice() { echo "Result correct." else echo "Result wrong." - docker logs test-comps-image2video-gaudi + docker logs image2video-gaudi exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-image2video*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cd $WORKPATH/comps/image2video/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans } function main() { diff --git a/tests/intent_detection/test_intent_detection_langchain.sh b/tests/intent_detection/test_intent_detection_langchain.sh deleted file mode 100644 index 023afbc9da..0000000000 --- a/tests/intent_detection/test_intent_detection_langchain.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -xe - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - docker build --no-cache -t opea/intent-detection-tgi:comps -f comps/intent_detection/langchain/Dockerfile . -} - -function start_service() { - tgi_endpoint=5044 - # Remember to set HF_TOKEN before invoking this test! - export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} - model=Intel/neural-chat-7b-v3-3 - docker run -d --name="test-comps-intent-tgi-endpoint" -p $tgi_endpoint:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id $model - - export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint}" - intent_port=5043 - unset http_proxy - docker run -d --name="test-comps-intent-server" -p ${intent_port}:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/intent-detection-tgi:comps - - # check whether tgi is fully ready - n=0 - until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do - docker logs test-comps-intent-tgi-endpoint > ${LOG_PATH}/tgi.log - n=$((n+1)) - if grep -q Connected ${LOG_PATH}/tgi.log; then - break - fi - sleep 5s - done - sleep 5s -} - -function validate_microservice() { - intent_port=5043 - result=$(http_proxy="" curl http://localhost:${intent_port}/v1/chat/intent\ - -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":10,"top_k":1,"temperature":0.001,"stream":false}' \ - -H 'Content-Type: application/json') - - if [[ $result == *"QA"* ]]; then - echo $result - echo "Result correct." - else - echo "Result wrong. Received was $result" - docker logs test-comps-intent-server > ${LOG_PATH}/intent_detection.log - docker logs test-comps-intent-tgi-endpoint > ${LOG_PATH}/tgi.log - exit 1 - fi - -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-intent*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/llms/test_llms_doc-summarization_tgi.sh b/tests/llms/test_llms_doc-summarization_tgi.sh new file mode 100644 index 0000000000..16e2018548 --- /dev/null +++ b/tests/llms/test_llms_doc-summarization_tgi.sh @@ -0,0 +1,158 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +host_ip=$(hostname -I | awk '{print $1}') +LOG_PATH="$WORKPATH/tests" +service_name="docsum-tgi" + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llm-docsum built fail" + exit 1 + else + echo "opea/llm-docsum built successful" + fi +} + +function start_service() { + export host_ip=${host_ip} + export LLM_ENDPOINT_PORT=12105 # 12100-12199 + export DOCSUM_PORT=10505 #10500-10599 + export HF_TOKEN=${HF_TOKEN} + export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" + export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + export MAX_INPUT_TOKENS=2048 + export MAX_TOTAL_TOKENS=4096 + export LOGFLAG=True + + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_doc-summarization.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 30s +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + echo $CONTENT + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_microservices() { + URL="http://${host_ip}:$DOCSUM_PORT/v1/docsum" + + echo "Validate tgi..." + validate_services \ + "${LLM_ENDPOINT}/generate" \ + "generated_text" \ + "tgi-server" \ + "tgi-server" \ + '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' + + echo "Validate stream=True..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-tgi" \ + "docsum-tgi" \ + '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en"}' + + echo "Validate stream=False..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-tgi" \ + "docsum-tgi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "stream":false}' + + echo "Validate Chinese mode..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-tgi" \ + "docsum-tgi" \ + '{"messages":"2024年9月26日,北京——今日,英特尔正式发布英特尔® 至强® 6性能核处理器(代号Granite Rapids),为AI、数据分析、科学计算等计算密集型业务提供卓越性能。", "max_tokens":32, "language":"zh", "stream":false}' + + echo "Validate truncate mode..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-tgi" \ + "docsum-tgi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "truncate", "chunk_size": 2000}' + + echo "Validate map_reduce mode..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-tgi" \ + "docsum-tgi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false}' + + echo "Validate refine mode..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-tgi" \ + "docsum-tgi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000}' +} + +function stop_docker() { + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_doc-summarization.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservices + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/llms/test_llms_doc-summarization_tgi_on_intel_hpu.sh b/tests/llms/test_llms_doc-summarization_tgi_on_intel_hpu.sh new file mode 100644 index 0000000000..b8c97f5b60 --- /dev/null +++ b/tests/llms/test_llms_doc-summarization_tgi_on_intel_hpu.sh @@ -0,0 +1,159 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +host_ip=$(hostname -I | awk '{print $1}') +LOG_PATH="$WORKPATH/tests" +service_name="docsum-tgi-gaudi" + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llm-docsum built fail" + exit 1 + else + echo "opea/llm-docsum built successful" + fi +} + +function start_service() { + export host_ip=${host_ip} + export LLM_ENDPOINT_PORT=12104 # 12100-12199 + export DOCSUM_PORT=10504 #10500-10599 + export HF_TOKEN=${HF_TOKEN} + export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" + export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + export MAX_INPUT_TOKENS=2048 + export MAX_TOTAL_TOKENS=4096 + export LOGFLAG=True + export DATA_PATH="/data2/cache" + + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_doc-summarization.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 30s +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + echo $CONTENT + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_microservices() { + URL="http://${host_ip}:$DOCSUM_PORT/v1/docsum" + + echo "Validate tgi..." + validate_services \ + "${LLM_ENDPOINT}/generate" \ + "generated_text" \ + "tgi-gaudi-server" \ + "tgi-gaudi-server" \ + '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' + + echo "Validate stream=True..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-tgi-gaudi" \ + "docsum-tgi-gaudi" \ + '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en"}' + + echo "Validate stream=False..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-tgi-gaudi" \ + "docsum-tgi-gaudi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "stream":false}' + + echo "Validate Chinese mode..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-tgi-gaudi" \ + "docsum-tgi-gaudi" \ + '{"messages":"2024年9月26日,北京——今日,英特尔正式发布英特尔® 至强® 6性能核处理器(代号Granite Rapids),为AI、数据分析、科学计算等计算密集型业务提供卓越性能。", "max_tokens":32, "language":"zh", "stream":false}' + + echo "Validate truncate mode..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-tgi-gaudi" \ + "docsum-tgi-gaudi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "truncate", "chunk_size": 2000}' + + echo "Validate map_reduce mode..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-tgi-gaudi" \ + "docsum-tgi-gaudi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false}' + + echo "Validate refine mode..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-tgi-gaudi" \ + "docsum-tgi-gaudi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000}' +} + +function stop_docker() { + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_doc-summarization.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservices + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/llms/test_llms_doc-summarization_vllm.sh b/tests/llms/test_llms_doc-summarization_vllm.sh new file mode 100644 index 0000000000..f545d42edd --- /dev/null +++ b/tests/llms/test_llms_doc-summarization_vllm.sh @@ -0,0 +1,170 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +host_ip=$(hostname -I | awk '{print $1}') +LOG_PATH="$WORKPATH/tests" +service_name="docsum-vllm" + +function build_docker_images() { + cd $WORKPATH + git clone https://github.com/vllm-project/vllm.git + cd ./vllm/ + docker build --no-cache -f Dockerfile.cpu -t ${REGISTRY:-opea}/vllm:${TAG:-latest} --shm-size=128g . + if [ $? -ne 0 ]; then + echo "opea/vllm built fail" + exit 1 + else + echo "opea/vllm built successful" + fi + + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llm-docsum built fail" + exit 1 + else + echo "opea/llm-docsum built successful" + fi +} + +function start_service() { + export host_ip=${host_ip} + export LLM_ENDPOINT_PORT=12107 # 12100-12199 + export DOCSUM_PORT=10507 #10500-10599 + export HF_TOKEN=${HF_TOKEN} + export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" + export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" + export MAX_INPUT_TOKENS=2048 + export MAX_TOTAL_TOKENS=4096 + export VLLM_SKIP_WARMUP=true + export LOGFLAG=True + + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_doc-summarization.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 30s +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + echo $CONTENT + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_microservices() { + URL="http://${host_ip}:$DOCSUM_PORT/v1/docsum" + + echo "Validate vllm..." + validate_services \ + "${LLM_ENDPOINT}/v1/completions" \ + "text" \ + "vllm-server" \ + "vllm-server" \ + '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}' + + echo "Validate stream=True..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-vllm" \ + "docsum-vllm" \ + '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en"}' + + echo "Validate stream=False..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-vllm" \ + "docsum-vllm" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "stream":false}' + + echo "Validate Chinese mode..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-vllm" \ + "docsum-vllm" \ + '{"messages":"2024年9月26日,北京——今日,英特尔正式发布英特尔® 至强® 6性能核处理器(代号Granite Rapids),为AI、数据分析、科学计算等计算密集型业务提供卓越性能。", "max_tokens":32, "language":"zh", "stream":false}' + + echo "Validate truncate mode..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-vllm" \ + "docsum-vllm" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "truncate", "chunk_size": 2000}' + + echo "Validate map_reduce mode..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-vllm" \ + "docsum-vllm" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false}' + + echo "Validate refine mode..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-vllm" \ + "docsum-vllm" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000}' +} + +function stop_docker() { + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_doc-summarization.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservices + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh b/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh new file mode 100644 index 0000000000..d9552e9a0d --- /dev/null +++ b/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh @@ -0,0 +1,173 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +host_ip=$(hostname -I | awk '{print $1}') +LOG_PATH="$WORKPATH/tests" +service_name="docsum-vllm-gaudi" + +function build_docker_images() { + cd $WORKPATH + git clone https://github.com/HabanaAI/vllm-fork.git + cd vllm-fork/ + git checkout v0.6.4.post2+Gaudi-1.19.0 + sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt + docker build --no-cache -f Dockerfile.hpu -t ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} --shm-size=128g . + if [ $? -ne 0 ]; then + echo "opea/vllm-gaudi built fail" + exit 1 + else + echo "opea/vllm-gaudi built successful" + fi + + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llm-docsum built fail" + exit 1 + else + echo "opea/llm-docsum built successful" + fi +} + +function start_service() { + export host_ip=${host_ip} + export LLM_ENDPOINT_PORT=12106 # 12100-12199 + export DOCSUM_PORT=10506 #10500-10599 + export HF_TOKEN=${HF_TOKEN} + export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" + export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + export MAX_INPUT_TOKENS=2048 + export MAX_TOTAL_TOKENS=4096 + export VLLM_SKIP_WARMUP=true + export LOGFLAG=True + export DATA_PATH="/data2/cache" + + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_doc-summarization.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 30s +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + echo $CONTENT + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_microservices() { + URL="http://${host_ip}:$DOCSUM_PORT/v1/docsum" + + echo "Validate vllm..." + validate_services \ + "${LLM_ENDPOINT}/v1/completions" \ + "text" \ + "vllm-gaudi-server" \ + "vllm-gaudi-server" \ + '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}' + + echo "Validate stream=True..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-vllm-gaudi" \ + "docsum-vllm-gaudi" \ + '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en"}' + + echo "Validate stream=False..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-vllm-gaudi" \ + "docsum-vllm-gaudi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "stream":false}' + + echo "Validate Chinese mode..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-vllm-gaudi" \ + "docsum-vllm-gaudi" \ + '{"messages":"2024年9月26日,北京——今日,英特尔正式发布英特尔® 至强® 6性能核处理器(代号Granite Rapids),为AI、数据分析、科学计算等计算密集型业务提供卓越性能。", "max_tokens":32, "language":"zh", "stream":false}' + + echo "Validate truncate mode..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-vllm-gaudi" \ + "docsum-vllm-gaudi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "truncate", "chunk_size": 2000}' + + echo "Validate map_reduce mode..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-vllm-gaudi" \ + "docsum-vllm-gaudi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false}' + + echo "Validate refine mode..." + validate_services \ + "$URL" \ + 'text' \ + "docsum-vllm-gaudi" \ + "docsum-vllm-gaudi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000}' +} + +function stop_docker() { + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_doc-summarization.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservices + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/llms/test_llms_faq-generation_langchain_tgi.sh b/tests/llms/test_llms_faq-generation_tgi.sh similarity index 64% rename from tests/llms/test_llms_faq-generation_langchain_tgi.sh rename to tests/llms/test_llms_faq-generation_tgi.sh index 6273c8b2a6..d0ae7aa95c 100644 --- a/tests/llms/test_llms_faq-generation_langchain_tgi.sh +++ b/tests/llms/test_llms_faq-generation_tgi.sh @@ -4,13 +4,20 @@ set -x +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + WORKPATH=$(dirname "$PWD") host_ip=$(hostname -I | awk '{print $1}') LOG_PATH="$WORKPATH/tests" +service_name="faqgen-tgi" function build_docker_images() { cd $WORKPATH - docker build --no-cache -t opea/llm-faqgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/faq-generation/Dockerfile . + docker build --no-cache -t ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/faq-generation/Dockerfile . if [ $? -ne 0 ]; then echo "opea/llm-faqgen built fail" exit 1 @@ -20,17 +27,16 @@ function build_docker_images() { } function start_service() { - export LLM_ENDPOINT_PORT=5060 - export FAQ_PORT=5061 + export LLM_ENDPOINT_PORT=12101 # 12100-12199 + export FAQ_PORT=10501 #10500-10599 export host_ip=${host_ip} - export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} # Remember to set HF_TOKEN before invoking this test! + export HF_TOKEN=${HF_TOKEN} # Remember to set HF_TOKEN before invoking this test! export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" - export FAQGen_COMPONENT_NAME="OPEAFAQGen_TGI" export LOGFLAG=True cd $WORKPATH/comps/llms/deployment/docker_compose - docker compose -f faq-generation_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose_faq-generation.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log sleep 30s } @@ -79,22 +85,22 @@ function validate_backend_microservices() { validate_services \ "${host_ip}:${FAQ_PORT}/v1/faqgen" \ "text" \ - "llm - faqgen" \ - "llm-faqgen-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 32}' + "faqgen-tgi" \ + "faqgen-tgi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 32}' # faq, non-stream validate_services \ "${host_ip}:${FAQ_PORT}/v1/faqgen" \ "text" \ - "FAQGen" \ - "llm-faqgen-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 32, "stream":false}' + "faqgen-tgi" \ + "faqgen-tgi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 32, "stream":false}' } function stop_docker() { cd $WORKPATH/comps/llms/deployment/docker_compose - docker compose -f faq-generation_tgi.yaml down + docker compose -f compose_faq-generation.yaml down ${service_name} --remove-orphans } function main() { diff --git a/tests/llms/test_llms_faq-generation_langchain_tgi_on_intel_hpu.sh b/tests/llms/test_llms_faq-generation_tgi_on_intel_hpu.sh similarity index 62% rename from tests/llms/test_llms_faq-generation_langchain_tgi_on_intel_hpu.sh rename to tests/llms/test_llms_faq-generation_tgi_on_intel_hpu.sh index 7a1a4fc698..50b1524c09 100644 --- a/tests/llms/test_llms_faq-generation_langchain_tgi_on_intel_hpu.sh +++ b/tests/llms/test_llms_faq-generation_tgi_on_intel_hpu.sh @@ -4,13 +4,20 @@ set -x +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + WORKPATH=$(dirname "$PWD") host_ip=$(hostname -I | awk '{print $1}') LOG_PATH="$WORKPATH/tests" +service_name="faqgen-tgi-gaudi" function build_docker_images() { cd $WORKPATH - docker build --no-cache -t opea/llm-faqgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/faq-generation/Dockerfile . + docker build --no-cache -t ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/faq-generation/Dockerfile . if [ $? -ne 0 ]; then echo "opea/llm-faqgen built fail" exit 1 @@ -20,17 +27,17 @@ function build_docker_images() { } function start_service() { - export LLM_ENDPOINT_PORT=5062 - export FAQ_PORT=5063 + export LLM_ENDPOINT_PORT=12100 # 12100-12199 + export FAQ_PORT=10500 #10500-10599 export host_ip=${host_ip} - export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} # Remember to set HF_TOKEN before invoking this test! + export HF_TOKEN=${HF_TOKEN} # Remember to set HF_TOKEN before invoking this test! export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" - export FAQGen_COMPONENT_NAME="OPEAFAQGen_TGI" export LOGFLAG=True + export DATA_PATH="/data2/cache" cd $WORKPATH/comps/llms/deployment/docker_compose - docker compose -f faq-generation_tgi_on_intel_hpu.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose_faq-generation.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log sleep 30s } @@ -71,7 +78,7 @@ function validate_backend_microservices() { validate_services \ "${host_ip}:${LLM_ENDPOINT_PORT}/generate" \ "generated_text" \ - "tgi" \ + "tgi-gaudi-server" \ "tgi-gaudi-server" \ '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' @@ -79,22 +86,22 @@ function validate_backend_microservices() { validate_services \ "${host_ip}:${FAQ_PORT}/v1/faqgen" \ "text" \ - "llm - faqgen" \ - "llm-faqgen-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 32}' + "faqgen-tgi-gaudi" \ + "faqgen-tgi-gaudi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 32}' # faq, non-stream validate_services \ "${host_ip}:${FAQ_PORT}/v1/faqgen" \ "text" \ - "FAQGen" \ - "llm-faqgen-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 32, "stream":false}' + "faqgen-tgi-gaudi" \ + "faqgen-tgi-gaudi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 32, "stream":false}' } function stop_docker() { cd $WORKPATH/comps/llms/deployment/docker_compose - docker compose -f faq-generation_tgi_on_intel_hpu.yaml down + docker compose -f compose_faq-generation.yaml down ${service_name} --remove-orphans } function main() { diff --git a/tests/llms/test_llms_faq-generation_vllm.sh b/tests/llms/test_llms_faq-generation_vllm.sh new file mode 100644 index 0000000000..936febd838 --- /dev/null +++ b/tests/llms/test_llms_faq-generation_vllm.sh @@ -0,0 +1,133 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +host_ip=$(hostname -I | awk '{print $1}') +LOG_PATH="$WORKPATH/tests" +service_name="faqgen-vllm" + +function build_docker_images() { + cd $WORKPATH + git clone https://github.com/vllm-project/vllm.git + cd ./vllm/ + docker build --no-cache -f Dockerfile.cpu -t ${REGISTRY:-opea}/vllm:${TAG:-latest} --shm-size=128g . + if [ $? -ne 0 ]; then + echo "opea/vllm built fail" + exit 1 + else + echo "opea/vllm built successful" + fi + + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/faq-generation/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llm-faqgen built fail" + exit 1 + else + echo "opea/llm-faqgen built successful" + fi + +} + +function start_service() { + export LLM_ENDPOINT_PORT=12103 # 12100-12199 + export FAQ_PORT=10503 #10500-10599 + export host_ip=${host_ip} + export HF_TOKEN=${HF_TOKEN} # Remember to set HF_TOKEN before invoking this test! + export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" + export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" + export VLLM_SKIP_WARMUP=true + export LOGFLAG=True + + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_faq-generation.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 30s +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_backend_microservices() { + # vllm + validate_services \ + "${host_ip}:${LLM_ENDPOINT_PORT}/v1/completions" \ + "text" \ + "vllm-server" \ + "vllm-server" \ + '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}' + + # faq + validate_services \ + "${host_ip}:${FAQ_PORT}/v1/faqgen" \ + "text" \ + "faqgen-vllm" \ + "faqgen-vllm" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 32}' + + # faq, non-stream + validate_services \ + "${host_ip}:${FAQ_PORT}/v1/faqgen" \ + "text" \ + "faqgen-vllm" \ + "faqgen-vllm" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 32, "stream":false}' +} + +function stop_docker() { + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_faq-generation.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_backend_microservices + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/llms/test_llms_faq-generation_langchain_vllm_on_intel_hpu.sh b/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh similarity index 62% rename from tests/llms/test_llms_faq-generation_langchain_vllm_on_intel_hpu.sh rename to tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh index 37d3be22dc..5d489b250d 100644 --- a/tests/llms/test_llms_faq-generation_langchain_vllm_on_intel_hpu.sh +++ b/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh @@ -4,16 +4,24 @@ set -x +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + WORKPATH=$(dirname "$PWD") host_ip=$(hostname -I | awk '{print $1}') LOG_PATH="$WORKPATH/tests" +service_name="faqgen-vllm-gaudi" function build_docker_images() { cd $WORKPATH git clone https://github.com/HabanaAI/vllm-fork.git cd vllm-fork/ - git checkout 3c39626 - docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . + git checkout v0.6.4.post2+Gaudi-1.19.0 + sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt + docker build --no-cache -f Dockerfile.hpu -t ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} --shm-size=128g . if [ $? -ne 0 ]; then echo "opea/vllm-gaudi built fail" exit 1 @@ -22,7 +30,7 @@ function build_docker_images() { fi cd $WORKPATH - docker build --no-cache -t opea/llm-faqgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/faq-generation/Dockerfile . + docker build --no-cache -t ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/faq-generation/Dockerfile . if [ $? -ne 0 ]; then echo "opea/llm-faqgen built fail" exit 1 @@ -33,18 +41,18 @@ function build_docker_images() { } function start_service() { - export LLM_ENDPOINT_PORT=5066 - export FAQ_PORT=5067 + export LLM_ENDPOINT_PORT=12102 # 12100-12199 + export FAQ_PORT=10502 #10500-10599 export host_ip=${host_ip} - export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} # Remember to set HF_TOKEN before invoking this test! + export HF_TOKEN=${HF_TOKEN} # Remember to set HF_TOKEN before invoking this test! export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" - export FAQGen_COMPONENT_NAME="OPEAFAQGen_vLLM" export VLLM_SKIP_WARMUP=true export LOGFLAG=True + export DATA_PATH="/data2/cache" cd $WORKPATH/comps/llms/deployment/docker_compose - docker compose -f faq-generation_vllm_on_intel_hpu.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose_faq-generation.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log sleep 30s } @@ -85,7 +93,7 @@ function validate_backend_microservices() { validate_services \ "${host_ip}:${LLM_ENDPOINT_PORT}/v1/completions" \ "text" \ - "vllm" \ + "vllm-gaudi-server" \ "vllm-gaudi-server" \ '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}' @@ -93,22 +101,22 @@ function validate_backend_microservices() { validate_services \ "${host_ip}:${FAQ_PORT}/v1/faqgen" \ "text" \ - "llm - faqgen" \ - "llm-faqgen-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 32}' + "faqgen-vllm-gaudi" \ + "faqgen-vllm-gaudi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 32}' # faq, non-stream validate_services \ "${host_ip}:${FAQ_PORT}/v1/faqgen" \ "text" \ - "FAQGen" \ - "llm-faqgen-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 32, "stream":false}' + "faqgen-vllm-gaudi" \ + "faqgen-vllm-gaudi" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens": 32, "stream":false}' } function stop_docker() { cd $WORKPATH/comps/llms/deployment/docker_compose - docker compose -f faq-generation_vllm_on_intel_hpu.yaml down + docker compose -f compose_faq-generation.yaml down ${service_name} --remove-orphans } function main() { diff --git a/tests/llms/test_llms_summarization_tgi_langchain.sh b/tests/llms/test_llms_summarization_tgi_langchain.sh deleted file mode 100644 index ee12777657..0000000000 --- a/tests/llms/test_llms_summarization_tgi_langchain.sh +++ /dev/null @@ -1,133 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -xe - -WORKPATH=$(dirname "$PWD") -ip_address=$(hostname -I | awk '{print $1}') -LOG_PATH="$WORKPATH/tests" - -function build_docker_images() { - cd $WORKPATH - docker build --no-cache -t opea/llm-sum-tgi:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/tgi/langchain/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/llm-textgen built fail" - exit 1 - else - echo "opea/llm-textgen built successful" - fi -} - -function start_service() { - tgi_endpoint_port=5075 - export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" - export MAX_INPUT_TOKENS=2048 - export MAX_TOTAL_TOKENS=4096 - # Remember to set HF_TOKEN before invoking this test! - export HF_TOKEN=${HF_TOKEN} - docker run -d --name="test-comps-llm-sum-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS} - export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint_port}" - - sum_port=5076 - docker run -d --name="test-comps-llm-sum-tgi-server" -p ${sum_port}:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e LLM_MODEL_ID=$LLM_MODEL_ID -e MAX_INPUT_TOKENS=$MAX_INPUT_TOKENS -e MAX_TOTAL_TOKENS=$MAX_TOTAL_TOKENS -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN -e LOGFLAG=True opea/llm-sum-tgi:comps - - # check whether tgi is fully ready - n=0 - until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do - docker logs test-comps-llm-sum-tgi-endpoint > ${LOG_PATH}/test-comps-llm-sum-tgi-endpoint.log - n=$((n+1)) - if grep -q Connected ${LOG_PATH}/test-comps-llm-sum-tgi-endpoint.log; then - break - fi - sleep 5s - done - sleep 5s -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - - echo "===========================================" - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - echo $CONTENT - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_microservices() { - sum_port=5076 - URL="http://${ip_address}:$sum_port/v1/chat/docsum" - - validate_services \ - "$URL" \ - 'text' \ - "llm_summarization" \ - "test-comps-llm-sum-tgi-server" \ - '{"query": "What is Deep Learning?"}' - - validate_services \ - "$URL" \ - 'text' \ - "llm_summarization" \ - "test-comps-llm-sum-tgi-server" \ - '{"query": "What is Deep Learning?", "summary_type": "truncate"}' - - validate_services \ - "$URL" \ - 'text' \ - "llm_summarization" \ - "test-comps-llm-sum-tgi-server" \ - '{"query": "What is Deep Learning?", "summary_type": "map_reduce"}' - - validate_services \ - "$URL" \ - 'text' \ - "llm_summarization" \ - "test-comps-llm-sum-tgi-server" \ - '{"query": "What is Deep Learning?", "summary_type": "refine"}' -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-llm-sum-tgi*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservices - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/llms/test_llms_text-generation_bedrock.sh b/tests/llms/test_llms_text-generation_bedrock.sh new file mode 100644 index 0000000000..ce9989c8b9 --- /dev/null +++ b/tests/llms/test_llms_text-generation_bedrock.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t opea/bedrock:latest \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy \ + -f comps/llms/src/text-generation/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/bedrock:latest built fail" + exit 1 + else + echo "opea/bedrock:latest built successful" + fi +} + +function start_service() { + # Check for required AWS credentials + if [ -z "$AWS_ACCESS_KEY_ID" ] || [ -z "$AWS_SECRET_ACCESS_KEY" ]; then + echo "AWS credentials not set in environment" + exit 1 + fi + + # If session token is set, include it as an environment variable + + if [ ! -z "$AWS_SESSION_TOKEN" ]; then + echo "Detected AWS_SESSION_TOKEN, treating credentials as IAM Role" + docker run -d --name="bedrock-test" \ + -p 9009:9000 \ + --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e LLM_COMPONENT_NAME="OpeaTextGenBedrock" \ + -e AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} \ + -e AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} \ + -e AWS_SESSION_TOKEN=${AWS_SESSION_TOKEN} \ + opea/bedrock:latest + else + echo "Did not detect AWS_SESSION_TOKEN, treating credentials as IAM User" + docker run -d --name="bedrock-test" \ + -p 9009:9000 \ + --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e LLM_COMPONENT_NAME="OpeaTextGenBedrock" \ + -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ + opea/bedrock:latest + fi + + # Give the service time to start + sleep 10s +} + +function validate_microservice() { + bedrock_port=9009 + result=$(http_proxy="" curl http://${ip_address}:${bedrock_port}/v1/chat/completions \ + -X POST \ + -d '{"model": "us.anthropic.claude-3-haiku-20240307-v1:0", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17, "stream": "true"}' \ + -H 'Content-Type: application/json') + + if [[ $result == *"data: [DONE]"* ]]; then + echo "Result correct." + echo "$result" >> ${LOG_PATH}/bedrock.log + else + echo "Result wrong. Received was $result" + docker logs bedrock-test >> ${LOG_PATH}/bedrock.log + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=bedrock-test") + if [[ ! -z "$cid" ]]; then + docker stop $cid && docker rm $cid && sleep 1s + fi +} + +function main() { + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune +} + +main diff --git a/tests/llms/test_llms_text-generation_native_langchain_on_intel_hpu.sh b/tests/llms/test_llms_text-generation_native_langchain_on_intel_hpu.sh deleted file mode 100644 index 51e610dddf..0000000000 --- a/tests/llms/test_llms_text-generation_native_langchain_on_intel_hpu.sh +++ /dev/null @@ -1,91 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - docker build --no-cache \ - --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy \ - -t opea/llm-native:comps \ - -f comps/llms/text-generation/native/langchain/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/llm-native built fail" - exit 1 - else - echo "opea/llm-native built successful" - fi -} - -function start_service() { - LLM_NATIVE_MODEL="Qwen/Qwen2-7B-Instruct" - llm_native_service_port=5070 - docker run -d \ - --name="test-comps-llm-native-server" \ - -p ${llm_native_service_port}:9000 \ - --runtime=habana \ - --cap-add=SYS_NICE \ - --ipc=host \ - -e http_proxy=${http_proxy} \ - -e https_proxy=${https_proxy} \ - -e LLM_NATIVE_MODEL=${LLM_NATIVE_MODEL} \ - -e HABANA_VISIBLE_DEVICES=all \ - -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ - -e TOKENIZERS_PARALLELISM=false \ - --restart unless-stopped \ - --network bridge \ - opea/llm-native:comps - - sleep 3m -} - -function validate_microservice() { - llm_native_service_port=5070 - URL="http://${ip_address}:${llm_native_service_port}/v1/chat/completions" - INPUT_DATA='{"query":"What is Deep Learning?"}' - HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') - RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') - SERVICE_NAME="llm-native" - - # check response status - if [ "$HTTP_STATUS" -ne "200" ]; then - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-llm-native-server >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - else - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - fi - # check response body - if [[ "$RESPONSE_BODY" != *'"text":"What'* ]]; then - echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-llm-native-server >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - else - echo "[ $SERVICE_NAME ] Content is as expected." - fi -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-llm-native*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - build_docker_images - start_service - validate_microservice - stop_docker - - echo y | docker system prune - -} - -main diff --git a/tests/llms/test_llms_text-generation_native_llamaindex_on_intel_hpu.sh b/tests/llms/test_llms_text-generation_native_llamaindex_on_intel_hpu.sh deleted file mode 100644 index d6f1943786..0000000000 --- a/tests/llms/test_llms_text-generation_native_llamaindex_on_intel_hpu.sh +++ /dev/null @@ -1,91 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - docker build --no-cache \ - --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy \ - -t opea/llm-native-llamaindex:comps \ - -f comps/llms/text-generation/native/llama_index/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/llm-native-llamaindex built fail" - exit 1 - else - echo "opea/llm-native-llamaindex built successful" - fi -} - -function start_service() { - LLM_NATIVE_MODEL="Qwen/Qwen2-7B-Instruct" - llm_native_service_port=5070 - docker run -d \ - --name="test-comps-llm-native-server" \ - -p ${llm_native_service_port}:9000 \ - --runtime=habana \ - --cap-add=SYS_NICE \ - --ipc=host \ - -e http_proxy=${http_proxy} \ - -e https_proxy=${https_proxy} \ - -e LLM_NATIVE_MODEL=${LLM_NATIVE_MODEL} \ - -e HABANA_VISIBLE_DEVICES=all \ - -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ - -e TOKENIZERS_PARALLELISM=false \ - --restart unless-stopped \ - --network bridge \ - opea/llm-native-llamaindex:comps - - sleep 3m -} - -function validate_microservice() { - llm_native_service_port=5070 - URL="http://${ip_address}:${llm_native_service_port}/v1/chat/completions" - INPUT_DATA='{"query":"What is Deep Learning?"}' - HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') - RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') - SERVICE_NAME="llm-native" - - # check response status - if [ "$HTTP_STATUS" -ne "200" ]; then - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-llm-native-server >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - else - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - fi - # check response body - if [[ "$RESPONSE_BODY" != *'"text":"What'* ]]; then - echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-llm-native-server >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - else - echo "[ $SERVICE_NAME ] Content is as expected." - fi -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-llm-native*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - build_docker_images - start_service - validate_microservice - stop_docker - - echo y | docker system prune - -} - -main diff --git a/tests/llms/test_llms_text-generation_native_on_intel_hpu.sh b/tests/llms/test_llms_text-generation_native_on_intel_hpu.sh new file mode 100644 index 0000000000..0d39a86905 --- /dev/null +++ b/tests/llms/test_llms_text-generation_native_on_intel_hpu.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +host_ip=$(hostname -I | awk '{print $1}') +LOG_PATH="$WORKPATH/tests" +service_name="textgen-native-gaudi" + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/llm-textgen-gaudi:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile.intel_hpu . + if [ $? -ne 0 ]; then + echo "opea/llm-textgen-gaudi built fail" + exit 1 + else + echo "opea/llm-textgen-gaudi built successful" + fi +} + +function start_service() { + export TEXTGEN_PORT=10512 #10500-10599 + export host_ip=${host_ip} + export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + export LOGFLAG=True + export DATA_PATH="/data2/cache" + + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_text-generation.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 2m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_microservices() { + URL="http://${host_ip}:${TEXTGEN_PORT}/v1/chat/completions" + + # textgen + echo "Validate textgen with string messages input..." + validate_services \ + "$URL" \ + "text" \ + "textgen-native-gaudi" \ + "textgen-native-gaudi" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": "What is Deep Learning?", "max_tokens":17, "stream":false}' +} + +function stop_docker() { + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_text-generation.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + build_docker_images + start_service + validate_microservice + stop_docker + + echo y | docker system prune + +} + +main diff --git a/tests/llms/test_llms_text-generation_opea_tgi.sh b/tests/llms/test_llms_text-generation_opea_tgi.sh deleted file mode 100644 index d167f1b5dc..0000000000 --- a/tests/llms/test_llms_text-generation_opea_tgi.sh +++ /dev/null @@ -1,104 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/llm:comps -f comps/llms/src/text-generation/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/llm built fail" - exit 1 - else - echo "opea/llm built successful" - fi -} - -function start_service() { - tgi_endpoint_port=5004 - export hf_llm_model=$1 - # Remember to set HF_TOKEN before invoking this test! - export HF_TOKEN=${HF_TOKEN} - docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ~/.cache/huggingface/hub:/data --shm-size 1g -e HF_TOKEN=${HF_TOKEN} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${hf_llm_model} --max-input-tokens 1024 --max-total-tokens 2048 - export LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint_port}" - - # check whether tgi is fully ready - n=0 - until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do - docker logs test-comps-llm-tgi-endpoint >> ${LOG_PATH}/test-comps-vllm-service.log - n=$((n+1)) - if grep -q Connected ${LOG_PATH}/test-comps-vllm-service.log; then - break - fi - sleep 5s - done - sleep 5s - - llm_port=5005 - unset http_proxy - docker run -d --name="test-comps-llm-tgi-server" -p ${llm_port}:9000 --ipc=host -e LOGFLAG=True -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LLM_ENDPOINT=$LLM_ENDPOINT -e LLM_MODEL_ID=$hf_llm_model -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN opea/llm:comps - sleep 20s -} - -function validate_microservice() { - llm_port=5005 - - result=$(http_proxy="" curl http://${ip_address}:${llm_port}/v1/chat/completions \ - -X POST \ - -d '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17, "stream":false}' \ - -H 'Content-Type: application/json') - if [[ $result == *"content"* ]]; then - echo "Result correct." - else - echo "Result wrong. Received was $result" - docker logs test-comps-llm-tgi-endpoint >> ${LOG_PATH}/llm-tgi.log - docker logs test-comps-llm-tgi-server >> ${LOG_PATH}/llm-server.log - exit 1 - fi -} - -function validate_microservice_with_openai() { - llm_service_port=5005 - python3 ${WORKPATH}/tests/utils/validate_svc_with_openai.py "$ip_address" "$llm_service_port" "llm" - if [ $? -ne 0 ]; then - docker logs test-comps-llm-tgi-endpoint >> ${LOG_PATH}/llm-tgi.log - docker logs test-comps-llm-tgi-server >> ${LOG_PATH}/llm-server.log - exit 1 - fi -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-llm-tgi*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - build_docker_images - - pip install --no-cache-dir openai - - llm_models=( - Intel/neural-chat-7b-v3-3 - # meta-llama/Llama-2-7b-chat-hf - # meta-llama/Meta-Llama-3-8B-Instruct - # microsoft/Phi-3-mini-4k-instruct - ) - for model in "${llm_models[@]}"; do - start_service "${model}" - validate_microservice - validate_microservice_with_openai - stop_docker - done - - echo y | docker system prune - -} - -main diff --git a/tests/llms/test_llms_text-generation_opea_vllm_on_intel_hpu.sh b/tests/llms/test_llms_text-generation_opea_vllm_on_intel_hpu.sh deleted file mode 100644 index eb5911bb69..0000000000 --- a/tests/llms/test_llms_text-generation_opea_vllm_on_intel_hpu.sh +++ /dev/null @@ -1,144 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - ## Build VLLM docker - cd $WORKPATH - git clone https://github.com/HabanaAI/vllm-fork.git - cd vllm-fork/ - git checkout 3c39626 - docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:comps --shm-size=128g . - if [ $? -ne 0 ]; then - echo "opea/vllm-gaudi built fail" - exit 1 - else - echo "opea/vllm-gaudi built successful" - fi - - ## Build OPEA microservice docker - cd $WORKPATH - docker build \ - --no-cache -t opea/llm:comps \ - -f comps/llms/src/text-generation/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/llm built fail" - exit 1 - else - echo "opea/llm built successful" - fi -} - -function start_service() { - export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" - port_number=5025 - docker run -d --rm \ - --runtime=habana \ - --name="test-comps-vllm-service" \ - -v $PWD/data:/data \ - -p $port_number:80 \ - -e HABANA_VISIBLE_DEVICES=all \ - -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ - -e VLLM_SKIP_WARMUP=true \ - --cap-add=sys_nice \ - --ipc=host \ - -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ - opea/vllm-gaudi:comps \ - --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048 - - export LLM_ENDPOINT="http://${ip_address}:${port_number}" - - # check whether vllm ray is fully ready - n=0 - until [[ "$n" -ge 70 ]] || [[ $ready == true ]]; do - docker logs test-comps-vllm-service > ${WORKPATH}/tests/test-comps-vllm-service.log - n=$((n+1)) - if grep -q throughput ${WORKPATH}/tests/test-comps-vllm-service.log; then - break - fi - sleep 5s - done - sleep 5s - - docker run -d --rm \ - --name="test-comps-vllm-microservice" \ - -p 5030:9000 \ - --ipc=host \ - -e LLM_ENDPOINT=$LLM_ENDPOINT \ - -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ - -e LLM_MODEL_ID=$LLM_MODEL_ID \ - -e LOGFLAG=True \ - opea/llm:comps - sleep 20s -} - -function validate_microservice() { - result=$(http_proxy="" curl http://${ip_address}:5025/v1/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "Intel/neural-chat-7b-v3-3", - "prompt": "What is Deep Learning?", - "max_tokens": 32, - "temperature": 0 - }') - if [[ $result == *"text"* ]]; then - echo "Result correct." - else - echo "Result wrong. Received was $result" - docker logs test-comps-vllm-service - docker logs test-comps-vllm-microservice - exit 1 - fi - - result=$(http_proxy="" curl http://${ip_address}:5030/v1/chat/completions \ - -X POST \ - -d '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17, "stream":false}' \ - -H 'Content-Type: application/json') - if [[ $result == *"content"* ]]; then - echo "Result correct." - else - echo "Result wrong. Received was $result" - docker logs test-comps-vllm-service - docker logs test-comps-vllm-microservice - exit 1 - fi - - result=$(http_proxy="" curl http://${ip_address}:5030/v1/chat/completions \ - -X POST \ - -d '{"model": "Intel/neural-chat-7b-v3-3", "messages": "What is Deep Learning?", "max_tokens":17, "stream":false}' \ - -H 'Content-Type: application/json') - if [[ $result == *"text"* ]]; then - echo "Result correct." - else - echo "Result wrong. Received was $result" - docker logs test-comps-vllm-service - docker logs test-comps-vllm-microservice - exit 1 - fi -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-vllm*") - if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/llms/test_llms_text-generation_predictionguard.sh b/tests/llms/test_llms_text-generation_predictionguard.sh index 6cb3507283..2959e1d38a 100644 --- a/tests/llms/test_llms_text-generation_predictionguard.sh +++ b/tests/llms/test_llms_text-generation_predictionguard.sh @@ -2,55 +2,63 @@ # Copyright (C) 2024 Prediction Guard, Inc. # SPDX-License-Identifier: Apache-2.0 -set -x # Print commands and their arguments as they are executed +set -x + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" WORKPATH=$(dirname "$PWD") -ip_address=$(hostname -I | awk '{print $1}') # Adjust to a more reliable command -if [ -z "$ip_address" ]; then - ip_address="localhost" # Default to localhost if IP address is empty +host_ip=$(hostname -I | awk '{print $1}') # Adjust to a more reliable command +if [ -z "$host_ip" ]; then + host_ip="localhost" # Default to localhost if IP address is empty fi +LOG_PATH="$WORKPATH/tests" +service_name="textgen-predictionguard" function build_docker_images() { cd $WORKPATH - echo $(pwd) - docker build --no-cache -t opea/llm-pg:comps -f comps/llms/text-generation/predictionguard/Dockerfile . + docker build --no-cache -t ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/llm-pg built failed" + echo "opea/llm-textgen built fail" exit 1 else - echo "opea/llm-pg built successfully" + echo "opea/llm-textgen built successful" fi } function start_service() { - llm_service_port=9000 - unset http_proxy - docker run -d --name=test-comps-llm-pg-server \ - -e http_proxy= -e https_proxy= \ - -e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \ - -p 9000:9000 --ipc=host opea/llm-pg:comps + export TEXTGEN_PORT=10513 #10500-10599 + export host_ip=${host_ip} + export PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} + export LOGFLAG=True + + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_text-generation.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + sleep 60 # Sleep for 1 minute to allow the service to start } function validate_microservice() { - llm_service_port=9000 - result=$(http_proxy="" curl http://${ip_address}:${llm_service_port}/v1/chat/completions \ + result=$(http_proxy="" curl http://${host_ip}:${TEXTGEN_PORT}/v1/chat/completions \ -X POST \ - -d '{"model": "Hermes-2-Pro-Llama-3-8B", "query": "What is AI?", "stream": false, "max_tokens": 100, "temperature": 0.7, "top_p": 1.0, "top_k": 50}' \ + -d '{"model": "Hermes-2-Pro-Llama-3-8B", "messages": "What is AI?", "stream": false, "max_tokens": 100, "temperature": 0.7, "top_p": 1.0, "top_k": 50}' \ -H 'Content-Type: application/json') - if [[ $result == *"text"* ]]; then + if [[ $result == *"content"* ]]; then echo "Service response is correct." else echo "Result wrong. Received was $result" - docker logs test-comps-llm-pg-server + docker logs ${service_name} exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-llm-pg-*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_text-generation.yaml down ${service_name} --remove-orphans } function main() { diff --git a/tests/llms/test_llms_text-generation_service_ollama.sh b/tests/llms/test_llms_text-generation_service_ollama.sh new file mode 100644 index 0000000000..d5087ce7ec --- /dev/null +++ b/tests/llms/test_llms_text-generation_service_ollama.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +host_ip=$(hostname -I | awk '{print $1}') +LOG_PATH="$WORKPATH/tests" +service_name="textgen-service-ollama" + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llm-textgen built fail" + exit 1 + else + echo "opea/llm-textgen built successful" + fi +} + +function start_service() { + export LLM_ENDPOINT_PORT=12114 # 12100-12199 + export TEXTGEN_PORT=10514 #10500-10599 + export host_ip=${host_ip} + export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" + export LLM_MODEL_ID=$1 + export LOGFLAG=True + + cd $WORKPATH/comps/third_parties/ollama/deployment/docker_compose/ + docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose_ollama.log + + sleep 5s + docker exec ollama-server ollama pull $LLM_MODEL_ID + sleep 20s + + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_text-generation.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 30s +} + +function validate_microservice() { + result=$(http_proxy="" curl http://${host_ip}:${TEXTGEN_PORT}/v1/chat/completions \ + -X POST \ + -d '{"messages": [{"role": "user", "content": "What is Deep Learning?"}]}' \ + -H 'Content-Type: application/json') + if [[ $result == *"content"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs ollama-server >> ${LOG_PATH}/llm-ollama.log + docker logs ${service_name} >> ${LOG_PATH}/llm-server.log + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_text-generation.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + build_docker_images + + llm_models=( + llama3.2:1b + ) + for model in "${llm_models[@]}"; do + start_service "${model}" + validate_microservice + stop_docker + done + + echo y | docker system prune + +} + +main diff --git a/tests/llms/test_llms_text-generation_service_tgi.sh b/tests/llms/test_llms_text-generation_service_tgi.sh new file mode 100644 index 0000000000..c604470257 --- /dev/null +++ b/tests/llms/test_llms_text-generation_service_tgi.sh @@ -0,0 +1,135 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +host_ip=$(hostname -I | awk '{print $1}') +LOG_PATH="$WORKPATH/tests" +service_name="textgen-service-tgi" + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llm-textgen built fail" + exit 1 + else + echo "opea/llm-textgen built successful" + fi +} + +function start_service() { + export LLM_ENDPOINT_PORT=12108 # 12100-12199 + export TEXTGEN_PORT=10508 #10500-10599 + export host_ip=${host_ip} + export HF_TOKEN=${HF_TOKEN} # Remember to set HF_TOKEN before invoking this test! + export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" + export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + export LOGFLAG=True + + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_text-generation.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 30s +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_microservices() { + URL="http://${host_ip}:${TEXTGEN_PORT}/v1/chat/completions" + + # tgi + echo "Validate tgi..." + validate_services \ + "${LLM_ENDPOINT}/generate" \ + "generated_text" \ + "tgi-server" \ + "tgi-server" \ + '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' + + # textgen + echo "Validate textgen with string messages input..." + validate_services \ + "$URL" \ + "text" \ + "textgen-service-tgi" \ + "textgen-service-tgi" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": "What is Deep Learning?", "max_tokens":17, "stream":false}' + + echo "Validate textgen with dict messages input..." + validate_services \ + "$URL" \ + "content" \ + "textgen-service-tgi" \ + "textgen-service-tgi" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17, "stream":false}' +} + +function validate_microservice_with_openai() { + python3 ${WORKPATH}/tests/utils/validate_svc_with_openai.py "$host_ip" "$TEXTGEN_PORT" "llm" + if [ $? -ne 0 ]; then + docker logs tgi-server >> ${LOG_PATH}/llm-tgi.log + docker logs textgen-service-tgi >> ${LOG_PATH}/llm-server.log + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_text-generation.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + pip install --no-cache-dir openai pydantic + start_service + + validate_microservices + validate_microservice_with_openai + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/llms/test_llms_text-generation_service_tgi_on_intel_hpu.sh b/tests/llms/test_llms_text-generation_service_tgi_on_intel_hpu.sh new file mode 100644 index 0000000000..c91a51498b --- /dev/null +++ b/tests/llms/test_llms_text-generation_service_tgi_on_intel_hpu.sh @@ -0,0 +1,136 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +host_ip=$(hostname -I | awk '{print $1}') +LOG_PATH="$WORKPATH/tests" +service_name="textgen-service-tgi-gaudi" + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llm-textgen built fail" + exit 1 + else + echo "opea/llm-textgen built successful" + fi +} + +function start_service() { + export LLM_ENDPOINT_PORT=12109 # 12100-12199 + export TEXTGEN_PORT=10509 #10500-10599 + export host_ip=${host_ip} + export HF_TOKEN=${HF_TOKEN} # Remember to set HF_TOKEN before invoking this test! + export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" + export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + export LOGFLAG=True + export DATA_PATH="/data2/cache" + + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_text-generation.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 30s +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_microservices() { + URL="http://${host_ip}:${TEXTGEN_PORT}/v1/chat/completions" + + # tgi + echo "Validate tgi..." + validate_services \ + "${LLM_ENDPOINT}/generate" \ + "generated_text" \ + "tgi-gaudi-server" \ + "tgi-gaudi-server" \ + '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' + + # textgen + echo "Validate textgen with string messages input..." + validate_services \ + "$URL" \ + "text" \ + "textgen-service-tgi-gaudi" \ + "textgen-service-tgi-gaudi" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": "What is Deep Learning?", "max_tokens":17, "stream":false}' + + echo "Validate textgen with dict messages input..." + validate_services \ + "$URL" \ + "content" \ + "textgen-service-tgi-gaudi" \ + "textgen-service-tgi-gaudi" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17, "stream":false}' +} + +function validate_microservice_with_openai() { + python3 ${WORKPATH}/tests/utils/validate_svc_with_openai.py "$host_ip" "$TEXTGEN_PORT" "llm" + if [ $? -ne 0 ]; then + docker logs tgi-gaudi-server >> ${LOG_PATH}/llm--gaudi.log + docker logs textgen-service-tgi-gaudi >> ${LOG_PATH}/llm-server.log + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_text-generation.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + pip install --no-cache-dir openai pydantic + start_service + + validate_microservices + validate_microservice_with_openai + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh b/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh new file mode 100644 index 0000000000..7c32a8977b --- /dev/null +++ b/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh @@ -0,0 +1,150 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +host_ip=$(hostname -I | awk '{print $1}') +LOG_PATH="$WORKPATH/tests" +service_name="textgen-service-vllm-gaudi" + +function build_docker_images() { + cd $WORKPATH + git clone https://github.com/HabanaAI/vllm-fork.git + cd vllm-fork/ + git checkout v0.6.4.post2+Gaudi-1.19.0 + sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt + docker build --no-cache -f Dockerfile.hpu -t ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} --shm-size=128g . + if [ $? -ne 0 ]; then + echo "opea/vllm-gaudi built fail" + exit 1 + else + echo "opea/vllm-gaudi built successful" + fi + + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llm-textgen built fail" + exit 1 + else + echo "opea/llm-textgen built successful" + fi +} + +function start_service() { + export LLM_ENDPOINT_PORT=12110 # 12100-12199 + export TEXTGEN_PORT=10510 #10500-10599 + export host_ip=${host_ip} + export HF_TOKEN=${HF_TOKEN} # Remember to set HF_TOKEN before invoking this test! + export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" + export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + export VLLM_SKIP_WARMUP=true + export LOGFLAG=True + export DATA_PATH="/data2/cache" + + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_text-generation.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 30s +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_microservices() { + URL="http://${host_ip}:${TEXTGEN_PORT}/v1/chat/completions" + + # vllm + echo "Validate vllm..." + validate_services \ + "${LLM_ENDPOINT}/v1/completions" \ + "text" \ + "vllm-gaudi-server" \ + "vllm-gaudi-server" \ + '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}' + + # textgen + echo "Validate textgen with string messages input..." + validate_services \ + "$URL" \ + "text" \ + "textgen-service-vllm-gaudi" \ + "textgen-service-vllm-gaudi" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": "What is Deep Learning?", "max_tokens":17, "stream":false}' + + echo "Validate textgen with dict messages input..." + validate_services \ + "$URL" \ + "content" \ + "textgen-service-vllm-gaudi" \ + "textgen-service-vllm-gaudi" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17, "stream":false}' +} + +function validate_microservice_with_openai() { + python3 ${WORKPATH}/tests/utils/validate_svc_with_openai.py "$host_ip" "$TEXTGEN_PORT" "llm" + if [ $? -ne 0 ]; then + docker logs vllm-gaudi-server >> ${LOG_PATH}/llm--gaudi.log + docker logs textgen-service-vllm-gaudi >> ${LOG_PATH}/llm-server.log + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_text-generation.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + pip install --no-cache-dir openai pydantic + start_service + + validate_microservices + validate_microservice_with_openai + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/lvms/test_lvms_llava.sh b/tests/lvms/test_lvms_llava.sh index 4627ec6ee7..7ef8d04839 100644 --- a/tests/lvms/test_lvms_llava.sh +++ b/tests/lvms/test_lvms_llava.sh @@ -7,83 +7,121 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +export TAG=comps +export LLAVA_PORT=11500 +export LVM_PORT=11501 function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/lvm-llava:comps -f comps/lvms/llava/dependency/Dockerfile . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/lvm-llava:$TAG -f comps/lvms/src/integrations/dependency/llava/Dockerfile . if [ $? -ne 0 ]; then echo "opea/lvm-llava built fail" exit 1 else echo "opea/lvm-llava built successful" fi - docker build --no-cache -t opea/lvm-llava-svc:comps -f comps/lvms/llava/Dockerfile . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/lvm:$TAG -f comps/lvms/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/lvm-llava-svc built fail" + echo "opea/lvm built fail" exit 1 else - echo "opea/lvm-llava-svc built successful" + echo "opea/lvm built successful" fi } function start_service() { - unset http_proxy - lvm_port=5051 - docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5028:8399 --ipc=host opea/lvm-llava:comps - docker run -d --name="test-comps-lvm-llava-svc" -e LVM_ENDPOINT=http://$ip_address:5028 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $lvm_port:9399 --ipc=host opea/lvm-llava-svc:comps - sleep 8m + + export LVM_ENDPOINT=http://$ip_address:$LLAVA_PORT + + export LVM_COMPONENT_NAME=OPEA_LLAVA_LVM + docker compose -f comps/lvms/deployment/docker_compose/compose.yaml up llava-service lvm-llava -d + sleep 15s } function validate_microservice() { - lvm_port=5051 - result=$(http_proxy="" curl http://localhost:$lvm_port/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json') + result=$(http_proxy="" curl http://localhost:$LVM_PORT/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json') if [[ $result == *"yellow"* ]]; then echo "Result correct." else echo "Result wrong." - docker logs test-comps-lvm-llava >> ${LOG_PATH}/llava-dependency.log - docker logs test-comps-lvm-llava-svc >> ${LOG_PATH}/llava-server.log + docker logs llava-service >> ${LOG_PATH}/llava-dependency.log + docker logs lvm-llava-service >> ${LOG_PATH}/lvm.log + exit 1 + fi + + # Test sending two images with a text prompt with one image tag in the prompt. + # The first image is green and the second image is blue. Since the default MAX_IMAGES is 1, only the blue image should be sent to the LVM. + result=$(http_proxy="" curl http://localhost:$LVM_PORT/v1/lvm -XPOST -d '{"image": ["iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNk+M9Qz0AEYBxVSF+FAAhKDveksOjmAAAAAElFTkSuQmCC", "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNkYPhfz0AEYBxVSF+FAP5FDvcfRYWgAAAAAElFTkSuQmCC"], "prompt":"\nWhat are in these images?"}' -H 'Content-Type: application/json') + if [[ $result == *"blue"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs llava-service >> ${LOG_PATH}/llava-dependency.log + docker logs lvm-llava-service >> ${LOG_PATH}/lvm.log + exit 1 + fi + + # Test sending two images with a text prompt without any image tags. + # The first image is blue and the second image is green. Since the default MAX_IMAGES is 1, only the green image should be sent to the LVM. + result=$(http_proxy="" curl http://localhost:$LVM_PORT/v1/lvm -XPOST -d '{"image": ["iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNkYPhfz0AEYBxVSF+FAP5FDvcfRYWgAAAAAElFTkSuQmCC", "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNk+M9Qz0AEYBxVSF+FAAhKDveksOjmAAAAAElFTkSuQmCC"], "prompt":"What are in these images?"}' -H 'Content-Type: application/json') + if [[ $result == *"green"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs llava-service >> ${LOG_PATH}/llava-dependency.log + docker logs lvm-llava-service >> ${LOG_PATH}/lvm.log + exit 1 + fi + + # Same test as above, except including two image tags with the prompt to ensure the number of image tags is reconciled. + # The first image is blue and the second image is green. Since the default MAX_IMAGES is 1, only the green image should be sent to the LVM. + result=$(http_proxy="" curl http://localhost:$LVM_PORT/v1/lvm -XPOST -d '{"image": ["iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNkYPhfz0AEYBxVSF+FAP5FDvcfRYWgAAAAAElFTkSuQmCC", "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNk+M9Qz0AEYBxVSF+FAAhKDveksOjmAAAAAElFTkSuQmCC"], "prompt":"\n\nWhat are in these images?"}' -H 'Content-Type: application/json') + if [[ $result == *"green"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs llava-service >> ${LOG_PATH}/llava-dependency.log + docker logs lvm-llava-service >> ${LOG_PATH}/lvm.log exit 1 fi - result=$(http_proxy="" curl http://localhost:$lvm_port/v1/lvm -XPOST -d '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [{"b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "transcript_for_inference": "yellow image", "video_id": "8c7461df-b373-4a00-8696-9a2234359fe0", "time_of_frame_ms":"37000000", "source_video":"WeAreGoingOnBullrun_8c7461df-b373-4a00-8696-9a2234359fe0.mp4"}]}' -H 'Content-Type: application/json') + result=$(http_proxy="" curl http://localhost:$LVM_PORT/v1/lvm -XPOST -d '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [{"b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "transcript_for_inference": "yellow image", "video_id": "8c7461df-b373-4a00-8696-9a2234359fe0", "time_of_frame_ms":"37000000", "source_video":"WeAreGoingOnBullrun_8c7461df-b373-4a00-8696-9a2234359fe0.mp4"}]}' -H 'Content-Type: application/json') if [[ $result == *"yellow"* ]]; then echo "Result correct." else echo "Result wrong." - docker logs test-comps-lvm-llava >> ${LOG_PATH}/llava-dependency.log - docker logs test-comps-lvm-llava-svc >> ${LOG_PATH}/llava-server.log + docker logs llava-service >> ${LOG_PATH}/llava-dependency.log + docker logs lvm-llava-service >> ${LOG_PATH}/lvm.log exit 1 fi - result=$(http_proxy="" curl http://localhost:$lvm_port/v1/lvm -XPOST -d '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [{"b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "transcript_for_inference": "yellow image", "video_id": "8c7461df-b373-4a00-8696-9a2234359fe0", "time_of_frame_ms":"37000000", "source_video":"WeAreGoingOnBullrun_8c7461df-b373-4a00-8696-9a2234359fe0.mp4"}], "chat_template":"The caption of the image is: '\''{context}'\''. {question}"}' -H 'Content-Type: application/json') + result=$(http_proxy="" curl http://localhost:$LVM_PORT/v1/lvm -XPOST -d '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [{"b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "transcript_for_inference": "yellow image", "video_id": "8c7461df-b373-4a00-8696-9a2234359fe0", "time_of_frame_ms":"37000000", "source_video":"WeAreGoingOnBullrun_8c7461df-b373-4a00-8696-9a2234359fe0.mp4"}], "chat_template":"The caption of the image is: '\''{context}'\''. {question}"}' -H 'Content-Type: application/json') if [[ $result == *"yellow"* ]]; then echo "Result correct." else echo "Result wrong." - docker logs test-comps-lvm-llava >> ${LOG_PATH}/llava-dependency.log - docker logs test-comps-lvm-llava-svc >> ${LOG_PATH}/llava-server.log + docker logs llava-service >> ${LOG_PATH}/llava-dependency.log + docker logs lvm-llava-service >> ${LOG_PATH}/lvm.log exit 1 fi # Test the LVM with text only (no image) - result=$(http_proxy="" curl http://localhost:$lvm_port/v1/lvm -XPOST -d '{"image": "", "prompt":"What is deep learning?"}' -H 'Content-Type: application/json') + result=$(http_proxy="" curl http://localhost:$LVM_PORT/v1/lvm -XPOST -d '{"image": "", "prompt":"What is deep learning?"}' -H 'Content-Type: application/json') if [[ $result == *"Deep learning is"* ]]; then echo "Result correct." else echo "Result wrong." - docker logs test-comps-lvm-llava >> ${LOG_PATH}/llava-dependency.log - docker logs test-comps-lvm-llava-svc >> ${LOG_PATH}/llava-server.log + docker logs llava-service >> ${LOG_PATH}/llava-dependency.log + docker logs lvm-llava-service >> ${LOG_PATH}/lvm.log exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + docker ps -a --filter "name=llava-service" --filter "name=lvm-llava-service" --format "{{.Names}}" | xargs -r docker stop } function main() { diff --git a/tests/lvms/test_lvms_predictionguard.sh b/tests/lvms/test_lvms_predictionguard.sh index 77d7a7779d..3310f6fbc9 100644 --- a/tests/lvms/test_lvms_predictionguard.sh +++ b/tests/lvms/test_lvms_predictionguard.sh @@ -9,11 +9,15 @@ ip_address=$(hostname -I | awk '{print $1}') # Adjust to a more reliable comman if [ -z "$ip_address" ]; then ip_address="localhost" # Default to localhost if IP address is empty fi +export TAG=comps +export PREDICTIONGUARD_PORT=11504 +# export LVM_PORT=11505 + function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/lvm-pg:comps -f comps/lvms/predictionguard/Dockerfile . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/lvm-pg:comps -f comps/lvms/src/integrations/dependency/predictionguard/Dockerfile . if [ $? -ne 0 ]; then echo "opea/lvm-pg build failed" exit 1 @@ -23,18 +27,18 @@ function build_docker_images() { } function start_service() { - lvm_service_port=9399 + unset http_proxy - docker run -d --name=test-comps-lvm-pg-server \ - -e http_proxy= -e https_proxy= \ - -e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \ - -p 9399:9399 --ipc=host opea/lvm-pg:comps + + export LVM_ENDPOINT=http://$ip_address:$PREDICTIONGUARD_PORT + export LVM_COMPONENT_NAME=OPEA_PREDICTION_GUARD_LVM + docker compose -f comps/lvms/deployment/docker_compose/compose.yaml up predictionguard-service -d + sleep 60 # Sleep for 1 minute to allow the service to start } function validate_microservice() { - lvm_service_port=9399 - result=$(http_proxy="" curl http://${ip_address}:${lvm_service_port}/v1/lvm \ + result=$(http_proxy="" curl http://${ip_address}:${PREDICTIONGUARD_PORT}/v1/lvm \ -X POST \ -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt": "Describe the image.", "max_new_tokens": 100}' \ -H 'Content-Type: application/json') @@ -43,14 +47,13 @@ function validate_microservice() { echo "Service response is correct." else echo "Result wrong. Received was $result" - docker logs test-comps-lvm-pg-server + docker logs predictionguard-service exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-lvm-pg-*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + docker ps -a --filter "name=predictionguard-service" --format "{{.Names}}" | xargs -r docker stop } function main() { diff --git a/tests/lvms/test_lvms_tgi-llava_on_intel_hpu.sh b/tests/lvms/test_lvms_tgi-llava_on_intel_hpu.sh deleted file mode 100644 index 1fa0155266..0000000000 --- a/tests/lvms/test_lvms_tgi-llava_on_intel_hpu.sh +++ /dev/null @@ -1,78 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - echo $(pwd) - - docker build --no-cache -t opea/lvm-tgi:comps -f comps/lvms/tgi-llava/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/lvm-tgi built fail" - exit 1 - else - echo "opea/lvm-tgi built successful" - fi -} - -function start_service() { - unset http_proxy - model="llava-hf/llava-v1.6-mistral-7b-hf" - lvm_port=5050 - docker run -d --name="test-comps-lvm-tgi-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5027:80 --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e SKIP_TOKENIZER_IN_TGI=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model --max-input-tokens 4096 --max-total-tokens 8192 - docker run -d --name="test-comps-lvm-tgi" -e LVM_ENDPOINT=http://$ip_address:5027 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $lvm_port:9399 --ipc=host opea/lvm-tgi:comps - sleep 3m -} - -function validate_microservice() { - lvm_port=5050 - result=$(http_proxy="" curl http://localhost:$lvm_port/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json') - if [[ $result == *"yellow"* ]]; then - echo "LVM prompt with an image - Result correct." - else - echo "LVM prompt with an image - Result wrong." - docker logs test-comps-lvm-tgi-llava >> ${LOG_PATH}/llava-dependency.log - docker logs test-comps-lvm-tgi >> ${LOG_PATH}/llava-server.log - exit 1 - fi - - result=$(http_proxy="" curl http://localhost:$lvm_port/v1/lvm --silent --write-out "HTTPSTATUS:%{http_code}" -XPOST -d '{"image": "", "prompt":"What is deep learning?"}' -H 'Content-Type: application/json') - http_status=$(echo $result | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') - if [ "$http_status" -ne "200" ]; then - - echo "LVM prompt without image - HTTP status is not 200. Received status was $http_status" - docker logs test-comps-lvm-tgi-llava >> ${LOG_PATH}/llava-dependency.log - docker logs test-comps-lvm-tgi >> ${LOG_PATH}/llava-server.log - exit 1 - else - echo "LVM prompt without image - HTTP status (successful)" - fi - -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-lvm-tgi*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/lvms/test_lvms_tgi_llava_on_intel_hpu.sh b/tests/lvms/test_lvms_tgi_llava_on_intel_hpu.sh new file mode 100644 index 0000000000..65ff57811d --- /dev/null +++ b/tests/lvms/test_lvms_tgi_llava_on_intel_hpu.sh @@ -0,0 +1,117 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export TAG=comps +export LLAVA_TGI_PORT=11502 +export LVM_PORT=11503 + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/lvm:$TAG -f comps/lvms/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/lvm built fail" + exit 1 + else + echo "opea/lvm built successful" + fi +} + +function start_service() { + + unset http_proxy + export LVM_ENDPOINT=http://$ip_address:$LLAVA_TGI_PORT + export LLM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf" + + export LVM_COMPONENT_NAME=OPEA_TGI_LLAVA_LVM + docker compose -f comps/lvms/deployment/docker_compose/compose.yaml up llava-tgi-service lvm-llava-tgi -d + + sleep 15s +} + +function validate_microservice() { + result=$(http_proxy="" curl http://localhost:$LVM_PORT/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json') + if [[ $result == *"yellow"* ]]; then + echo "LVM prompt with an image - Result correct." + else + echo "LVM prompt with an image - Result wrong." + docker logs llava-tgi-service >> ${LOG_PATH}/llava-tgi.log + docker logs lvm-llava-tgi-service >> ${LOG_PATH}/lvm.log + exit 1 + fi + + result=$(http_proxy="" curl http://localhost:$LVM_PORT/v1/lvm --silent --write-out "HTTPSTATUS:%{http_code}" -XPOST -d '{"image": "", "prompt":"What is deep learning?"}' -H 'Content-Type: application/json') + http_status=$(echo $result | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + if [ "$http_status" -ne "200" ]; then + + echo "LVM prompt without image - HTTP status is not 200. Received status was $http_status" + docker logs llava-tgi-service >> ${LOG_PATH}/llava-tgi.log + docker logs lvm-llava-tgi-service >> ${LOG_PATH}/lvm.log + exit 1 + else + echo "LVM prompt without image - HTTP status (successful)" + fi + + # Test sending two images with a text prompt with one image tag in the prompt. + # The first image is green and the second image is blue. Since the default MAX_IMAGES is 1, only the blue image should be sent to the LVM. + result=$(http_proxy="" curl http://localhost:$LVM_PORT/v1/lvm -XPOST -d '{"image": ["iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNk+M9Qz0AEYBxVSF+FAAhKDveksOjmAAAAAElFTkSuQmCC", "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNkYPhfz0AEYBxVSF+FAP5FDvcfRYWgAAAAAElFTkSuQmCC"], "prompt":"\nWhat are in these images?"}' -H 'Content-Type: application/json') + if [[ $result == *"blue"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs llava-tgi-service >> ${LOG_PATH}/llava-tgi.log + docker logs lvm-llava-tgi-service >> ${LOG_PATH}/lvm.log + exit 1 + fi + + # Test sending two images with a text prompt without any image tags. + # The first image is blue and the second image is green. Since the default MAX_IMAGES is 1, only the green image should be sent to the LVM. + result=$(http_proxy="" curl http://localhost:$LVM_PORT/v1/lvm -XPOST -d '{"image": ["iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNkYPhfz0AEYBxVSF+FAP5FDvcfRYWgAAAAAElFTkSuQmCC", "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNk+M9Qz0AEYBxVSF+FAAhKDveksOjmAAAAAElFTkSuQmCC"], "prompt":"What are in these images?"}' -H 'Content-Type: application/json') + if [[ $result == *"green"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs llava-tgi-service >> ${LOG_PATH}/llava-tgi.log + docker logs lvm-llava-tgi-service >> ${LOG_PATH}/lvm.log + exit 1 + fi + + # Same test as above, except including two image tags with the prompt to ensure the number of image tags is reconciled. + # The first image is blue and the second image is green. Since the default MAX_IMAGES is 1, only the green image should be sent to the LVM. + result=$(http_proxy="" curl http://localhost:$LVM_PORT/v1/lvm -XPOST -d '{"image": ["iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNkYPhfz0AEYBxVSF+FAP5FDvcfRYWgAAAAAElFTkSuQmCC", "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNk+M9Qz0AEYBxVSF+FAAhKDveksOjmAAAAAElFTkSuQmCC"], "prompt":"\n\nWhat are in these images?"}' -H 'Content-Type: application/json') + if [[ $result == *"green"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs llava-tgi-service >> ${LOG_PATH}/llava-tgi.log + docker logs lvm-llava-tgi-service >> ${LOG_PATH}/lvm.log + exit 1 + fi +} + +function stop_docker() { + docker ps -a --filter "name=llava-tgi-service" --filter "name=lvm-llava-tgi-service" --format "{{.Names}}" | xargs -r docker stop +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/lvms/test_lvms_video-llama.sh b/tests/lvms/test_lvms_video-llama.sh deleted file mode 100644 index b488452a34..0000000000 --- a/tests/lvms/test_lvms_video-llama.sh +++ /dev/null @@ -1,117 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -xe - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - echo $(pwd) - docker build --no-cache -t opea/video-llama-lvm-server:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/video-llama/dependency/Dockerfile . - if $? ; then - echo "opea/video-llama-lvm-server built fail" - exit 1 - else - echo "opea/video-llama-lvm-server built successful" - fi - docker build --no-cache -t opea/lvm-video-llama:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/video-llama/Dockerfile . - if $? ; then - echo "opea/lvm-video-llama built fail" - exit 1 - else - echo "opea/lvm-video-llama built successful" - fi - -} - -function start_service() { - cd $WORKPATH - unset http_proxy - dependency_port=5051 - server_port=5052 - export LVM_ENDPOINT=http://$ip_address:$dependency_port - - docker run -d --name="test-comps-lvm-video-llama-dependency" -p $dependency_port:9009 \ - --ipc=host \ - -e http_proxy=$http_proxy \ - -e https_proxy=$https_proxy \ - -e no_proxy=$no_proxy \ - -e llm_download="True" \ - opea/video-llama-lvm-server:comps - - docker run -d --name="test-comps-lvm-video-llama" -p $server_port:9000 \ - --ipc=host \ - -e http_proxy=$http_proxy \ - -e https_proxy=$https_proxy \ - -e no_proxy=$no_proxy \ - -e LVM_ENDPOINT=$LVM_ENDPOINT \ - opea/lvm-video-llama:comps - - echo "Waiting for the LVM service to start" - - # check whether lvm dependency is fully ready - n=0 - until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do - docker logs test-comps-lvm-video-llama-dependency &> ${LOG_PATH}/lvm-video-llama-dependency.log - n=$((n+1)) - if grep -q "Uvicorn running on" ${LOG_PATH}/lvm-video-llama-dependency.log; then - break - fi - sleep 5s - done - sleep 5s - - # check whether lvm service is fully ready - n=0 - until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do - docker logs test-comps-lvm-video-llama &> ${LOG_PATH}/lvm-video-llama.log - n=$((n+1)) - if grep -q "Uvicorn running on" ${LOG_PATH}/lvm-video-llama.log; then - break - fi - sleep 5s - done - sleep 5s -} - -function validate_microservice() { - - server_port=5052 - result=$(http_proxy="" curl http://localhost:$server_port/v1/lvm -X POST -d '{"video_url":"silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the person doing?","max_new_tokens": 50}' -H 'Content-Type: application/json') - - if [[ $result == *"silence"* ]]; then - echo "Result correct." - else - echo "Result wrong." - docker logs test-comps-lvm-video-llama-dependency &> ${LOG_PATH}/lvm-video-llama-dependency.log - docker logs test-comps-lvm-video-llama &> ${LOG_PATH}/lvm-video-llama.log - exit 1 - fi -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-lvm-video-llama*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - if docker volume ls | grep -q video-llama-model; then docker volume rm video-llama-model; fi - -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/lvms/test_lvms_video_llama.sh b/tests/lvms/test_lvms_video_llama.sh new file mode 100644 index 0000000000..7dc1be0e70 --- /dev/null +++ b/tests/lvms/test_lvms_video_llama.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export TAG=comps +export VIDEO_LLAMA_PORT=11506 +export LVM_PORT=11507 + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/lvm-video-llama:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/video-llama/Dockerfile . + if $? ; then + echo "opea/lvm-video-llama built fail" + exit 1 + else + echo "opea/lvm-video-llama built successful" + fi + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/lvm:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/Dockerfile . + if $? ; then + echo "opea/lvm built fail" + exit 1 + else + echo "opea/lvm built successful" + fi + +} + +function start_service() { + cd $WORKPATH + unset http_proxy + + export LVM_ENDPOINT=http://$ip_address:$VIDEO_LLAMA_PORT + export LVM_COMPONENT_NAME=OPEA_VIDEO_LLAMA_LVM + + docker compose -f comps/lvms/deployment/docker_compose/compose.yaml up video-llama-service lvm-video-llama -d + + sleep 15 +} + +function validate_microservice() { + + result=$(http_proxy="" curl http://localhost:$LVM_PORT/v1/lvm -X POST -d '{"video_url":"silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the person doing?","max_new_tokens": 50}' -H 'Content-Type: application/json') + + if [[ $result == *"silence"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs video-llama-service &> ${LOG_PATH}/video-llama-service.log + docker logs lvm-video-llama-service &> ${LOG_PATH}/lvm.log + exit 1 + fi +} + +function stop_docker() { + docker ps -a --filter "name=video-llama-service" --filter "name=lvm-video-llama-service" --format "{{.Names}}" | xargs -r docker stop + +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/prompt_registry/test_prompt_registry_mongo.sh b/tests/prompt_registry/test_prompt_registry_mongo.sh index 47b84aa6af..b63dab81b4 100644 --- a/tests/prompt_registry/test_prompt_registry_mongo.sh +++ b/tests/prompt_registry/test_prompt_registry_mongo.sh @@ -15,27 +15,28 @@ export COLLECTION_NAME=${COLLECTION_NAME:-"test"} function build_docker_images() { cd $WORKPATH echo $(pwd) - docker run -d -p 27017:27017 --name=test-comps-mongo mongo:latest - docker build --no-cache -t opea/promptregistry-server:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/src/Dockerfile . + docker build --no-cache -t opea/promptregistry-mongo:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/promptregistry-server built fail" + echo "opea/promptregistry-mongo built fail" exit 1 else - echo "opea/promptregistry-server built successful" + echo "opea/promptregistry-mongo built successful" fi } function start_service() { - - docker run -d --name="test-comps-promptregistry-server" -p 6018:6018 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-server:comps - + cd $WORKPATH + export PROMPT_REGISTRY_PORT=10600 + export TAG=comps + cd comps/prompt_registry/deployment/docker_compose/ + docker compose up -d sleep 10s } function validate_microservice() { result=$(curl -X 'POST' \ - http://$ip_address:6018/v1/prompt/create \ + http://$ip_address:${PROMPT_REGISTRY_PORT}/v1/prompt/create \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -46,14 +47,14 @@ function validate_microservice() { echo "Correct result." else echo "Incorrect result." - docker logs test-comps-promptregistry-server + docker logs promptregistry-mongo-server exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps*") + cid=$(docker ps -aq --filter "name=promptregistry-mongo-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/rerankings/test_rerankings_tei.sh b/tests/rerankings/test_rerankings_tei.sh index 3d58658cf5..8839f46e48 100644 --- a/tests/rerankings/test_rerankings_tei.sh +++ b/tests/rerankings/test_rerankings_tei.sh @@ -2,10 +2,11 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -x +set -xe WORKPATH=$(dirname "$PWD") -ip_address=$(hostname -I | awk '{print $1}') +host_ip=$(hostname -I | awk '{print $1}') +service_name="reranking-tei" function build_docker_images() { cd $WORKPATH @@ -24,24 +25,21 @@ function build_docker_images() { } function start_service() { - tei_endpoint=5006 - # Remember to set HF_TOKEN before invoking this test! - export HF_TOKEN=${HF_TOKEN} - model=BAAI/bge-reranker-base - revision=refs/pr/4 - volume=$PWD/data - docker run -d --name="test-comps-reranking-endpoint" -p $tei_endpoint:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model - sleep 3m - export TEI_RERANKING_ENDPOINT="http://${ip_address}:${tei_endpoint}" - tei_service_port=5007 - unset http_proxy - docker run -d --name="test-comps-reranking-server" -e LOGFLAG=True -p ${tei_service_port}:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_RERANKING_ENDPOINT=$TEI_RERANKING_ENDPOINT -e HF_TOKEN=$HF_TOKEN -e RERANK_COMPONENT_NAME="OPEA_TEI_RERANKING" opea/reranking:comps - sleep 15 + export RERANK_MODEL_ID="BAAI/bge-reranker-base" + export TEI_RERANKING_PORT=12003 + export RERANK_PORT=10700 + export TEI_RERANKING_ENDPOINT="http://${host_ip}:${TEI_RERANKING_PORT}" + export TAG=comps + export host_ip=${host_ip} + + cd $WORKPATH/comps/rerankings/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > start_services_with_compose.log + sleep 1m } function validate_microservice() { - tei_service_port=5007 - local CONTENT=$(curl http://${ip_address}:${tei_service_port}/v1/reranking \ + tei_service_port=10700 + local CONTENT=$(curl http://${host_ip}:${tei_service_port}/v1/reranking \ -X POST \ -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ -H 'Content-Type: application/json') @@ -57,8 +55,8 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-reranking*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cd $WORKPATH/comps/rerankings/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans } function main() { diff --git a/tests/rerankings/test_rerankings_tei_on_intel_hpu.sh b/tests/rerankings/test_rerankings_tei_on_intel_hpu.sh new file mode 100644 index 0000000000..7920f455d0 --- /dev/null +++ b/tests/rerankings/test_rerankings_tei_on_intel_hpu.sh @@ -0,0 +1,76 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH=$(dirname "$PWD") +host_ip=$(hostname -I | awk '{print $1}') +service_name="reranking-tei-gaudi" + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache \ + -t opea/reranking:comps \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy \ + --build-arg SERVICE=tei \ + -f comps/rerankings/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/reranking built fail" + exit 1 + else + echo "opea/reranking built successful" + fi +} + +function start_service() { + export RERANK_MODEL_ID="BAAI/bge-reranker-base" + export TEI_RERANKING_PORT=12004 + export RERANK_PORT=10701 + export TEI_RERANKING_ENDPOINT="http://${host_ip}:${TEI_RERANKING_PORT}" + export TAG=comps + export host_ip=${host_ip} + + cd $WORKPATH/comps/rerankings/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > start_services_with_compose.log + sleep 1m +} + +function validate_microservice() { + tei_service_port=10701 + local CONTENT=$(curl http://${host_ip}:${tei_service_port}/v1/reranking \ + -X POST \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ + -H 'Content-Type: application/json') + + if echo "$CONTENT" | grep -q "documents"; then + echo "Content is as expected." + else + echo "Content does not match the expected result: $CONTENT" + docker logs test-comps-reranking-server + docker logs test-comps-reranking-endpoint + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/comps/rerankings/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/rerankings/test_rerankings_videoqna.sh b/tests/rerankings/test_rerankings_videoqna.sh index a89dd06add..c1a3667779 100644 --- a/tests/rerankings/test_rerankings_videoqna.sh +++ b/tests/rerankings/test_rerankings_videoqna.sh @@ -6,6 +6,8 @@ set -xe WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') +service_name="reranking-videoqna" + function build_docker_images() { cd $WORKPATH @@ -24,28 +26,24 @@ function build_docker_images() { } function start_service() { - docker run -d --name "test-comps-reranking-server" \ - -p 5037:8000 \ - --ipc=host \ - -e no_proxy=${no_proxy} \ - -e http_proxy=${http_proxy} \ - -e https_proxy=${https_proxy} \ - -e CHUNK_DURATION=${CHUNK_DURATION} \ - -e RERANK_COMPONENT_NAME="OPEA_VIDEO_RERANKING" \ - -e FILE_SERVER_ENDPOINT=${FILE_SERVER_ENDPOINT} \ - opea/reranking:comps - - - until docker logs test-comps-reranking-server 2>&1 | grep -q "Uvicorn running on"; do - sleep 2 - done + export TEI_RERANKING_PORT=12006 + export RERANK_PORT=10703 + export TEI_RERANKING_ENDPOINT="http://${host_ip}:${TEI_RERANKING_PORT}" + export TAG=comps + export host_ip=${host_ip} + + cd $WORKPATH/comps/rerankings/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > start_services_with_compose.log + sleep 1m } function validate_microservice() { + videoqna_service_port=10703 + result=$(\ http_proxy="" \ curl -X 'POST' \ - "http://${ip_address}:5037/v1/reranking" \ + "http://${ip_address}:$videoqna_service_port/v1/reranking" \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -71,7 +69,7 @@ function validate_microservice() { result=$(\ http_proxy="" \ curl -X 'POST' \ - "http://${ip_address}:5037/v1/reranking" \ + "http://${ip_address}:$videoqna_service_port/v1/reranking" \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -90,8 +88,8 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-reranking*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cd $WORKPATH/comps/rerankings/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans } function main() { diff --git a/tests/retrievers/test_retrievers_elasticsearch.sh b/tests/retrievers/test_retrievers_elasticsearch.sh new file mode 100644 index 0000000000..60996a44ec --- /dev/null +++ b/tests/retrievers/test_retrievers_elasticsearch.sh @@ -0,0 +1,98 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +export host_ip=$(hostname -I | awk '{print $1}') +service_name="retriever-elasticsearch" + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/retriever:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/retriever built fail" + exit 1 + else + echo "opea/retriever built successful" + fi +} + +function start_service() { + export ELASTICSEARCH_PORT1=11608 # 11600-11699 + export ELASTICSEARCH_PORT2=11609 + export RETRIEVER_PORT=11610 + export HF_TOKEN=${HF_TOKEN} + export ES_CONNECTION_STRING="http://${host_ip}:${ELASTICSEARCH_PORT1}" + export INDEX_NAME="test-elasticsearch" + export LOGFLAG=True + + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 15s + + cd $WORKPATH + bash ./tests/utils/wait-for-it.sh ${host_ip}:$RETRIEVER_PORT -s -t 100 -- echo "Retriever up" + RETRIEVER_UP=$? + if [ ${RETRIEVER_UP} -ne 0 ]; then + echo "Could not start Retriever." + return 1 + fi + + sleep 5s + bash ./tests/utils/wait-for-it.sh ${host_ip}:$RETRIEVER_PORT -s -t 1 -- echo "Retriever still up" + RETRIEVER_UP=$? + if [ ${RETRIEVER_UP} -ne 0 ]; then + echo "Retriever crashed." + return 1 + fi +} + +function validate_microservice() { + test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + + + result=$(http_proxy='' + curl http://${host_ip}:$RETRIEVER_PORT/v1/retrieval \ + -X POST \ + -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \ + -H 'Content-Type: application/json') + if [[ $result == *"retrieved_docs"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs elasticsearch-vector-db >> ${LOG_PATH}/vectorstore.log + docker logs ${service_name} >> ${LOG_PATH}/retriever-elasticsearch.log + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/retrievers/test_retrievers_elasticsearch_langchain.sh b/tests/retrievers/test_retrievers_elasticsearch_langchain.sh deleted file mode 100644 index 53db592b59..0000000000 --- a/tests/retrievers/test_retrievers_elasticsearch_langchain.sh +++ /dev/null @@ -1,93 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - docker build --no-cache -t opea/retriever-elasticsearch:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/elasticsearch/langchain/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/retriever-elasticsearch built fail" - exit 1 - else - echo "opea/retriever-elasticsearch built successful" - fi -} - -function start_service() { - # elasticsearch - elasticsearch_port=9200 - docker run -d --name "test-comps-retriever-elasticsearch-vectorstore" -e ES_JAVA_OPTS="-Xms1g -Xmx1g" -e "discovery.type=single-node" -e "xpack.security.enabled=false" -p $elasticsearch_port:9200 -p 9300:9300 docker.elastic.co/elasticsearch/elasticsearch:8.16.0 - export ES_CONNECTION_STRING="http://${ip_address}:${elasticsearch_port}" - sleep 10s - - # elasticsearch retriever - INDEX_NAME="test-elasticsearch" - retriever_port=7000 - docker run -d --name="test-comps-retriever-elasticsearch-ms" -p $retriever_port:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME opea/retriever-elasticsearch:comps - sleep 15s - - bash ./tests/utils/wait-for-it.sh ${ip_address}:$retriever_port -s -t 100 -- echo "Retriever up" - RETRIEVER_UP=$? - if [ ${RETRIEVER_UP} -ne 0 ]; then - echo "Could not start Retriever." - return 1 - fi - - sleep 5s - bash ./tests/utils/wait-for-it.sh ${ip_address}:$retriever_port -s -t 1 -- echo "Retriever still up" - RETRIEVER_UP=$? - if [ ${RETRIEVER_UP} -ne 0 ]; then - echo "Retriever crashed." - return 1 - fi -} - -function validate_microservice() { - retriever_port=7000 - test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - - - result=$(http_proxy='' - curl http://${ip_address}:$retriever_port/v1/retrieval \ - -X POST \ - -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \ - -H 'Content-Type: application/json') - if [[ $result == *"retrieved_docs"* ]]; then - echo "Result correct." - else - echo "Result wrong. Received was $result" - docker logs test-comps-retriever-elasticsearch-vectorstore >> ${LOG_PATH}/vectorstore.log - docker logs test-comps-retriever-elasticsearch-tei-endpoint >> ${LOG_PATH}/tei-endpoint.log - docker logs test-comps-retriever-elasticsearch-ms >> ${LOG_PATH}/retriever-elasticsearch.log - exit 1 - fi -} - -function stop_docker() { - cid_retrievers=$(docker ps -aq --filter "name=test-comps-retriever-elasticsearch*") - if [[ ! -z "$cid_retrievers" ]]; then - docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s - fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/retrievers/test_retrievers_milvus.sh b/tests/retrievers/test_retrievers_milvus.sh index dae4243ea5..507f43c5af 100644 --- a/tests/retrievers/test_retrievers_milvus.sh +++ b/tests/retrievers/test_retrievers_milvus.sh @@ -4,41 +4,49 @@ set -x +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') +export host_ip=$(hostname -I | awk '{print $1}') +service_name="retriever-milvus" function build_docker_images() { cd $WORKPATH - docker build --no-cache -t opea/retriever-milvus:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . + docker build --no-cache -t ${REGISTRY:-opea}/retriever:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/retriever-milvus built fail" + echo "opea/retriever built fail" exit 1 else - echo "opea/retriever-milvus built successful" + echo "opea/retriever built successful" fi } function start_service() { - # start milvus vector db - cd $WORKPATH/comps/dataprep/milvus/langchain/ + export MINIO_PORT1=11611 # 11600-11699 + export MINIO_PORT2=11612 + export MILVUS_STANDALONE_PORT=11613 + export TEI_EMBEDDER_PORT=11614 + export RETRIEVER_PORT=11615 + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export HF_TOKEN=${HF_TOKEN} + export LOGFLAG=True + export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" + export MILVUS_HOST=${host_ip} + # wget https://raw.githubusercontent.com/milvus-io/milvus/v2.4.9/configs/milvus.yaml # wget https://github.com/milvus-io/milvus/releases/download/v2.4.9/milvus-standalone-docker-compose.yml -O docker-compose.yml # sed '/- \${DOCKER_VOLUME_DIRECTORY:-\.}\/volumes\/milvus:\/var\/lib\/milvus/a \ \ \ \ \ \ - \${DOCKER_VOLUME_DIRECTORY:-\.}\/milvus.yaml:\/milvus\/configs\/milvus.yaml' -i docker-compose.yml - docker compose up -d - # tei endpoint - tei_endpoint=5014 - model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-retriever-milvus-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" + cd $WORKPATH/comps/third_parties/milvus/deployment/docker_compose/ + docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose_milvus.log - # milvus retriever - export MILVUS_HOST=${ip_address} - export HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN - retriever_port=5015 - # unset http_proxy - docker run -d --name="test-comps-retriever-milvus-server" -p ${retriever_port}:7000 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MILVUS_HOST=$ip_address -e LOGFLAG=true -e RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_MILVUS" opea/retriever-milvus:comps + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log sleep 1m } @@ -46,10 +54,9 @@ function start_service() { function validate_microservice() { local test_embedding="$1" - retriever_port=5015 export PATH="${HOME}/miniforge3/bin:$PATH" source activate - URL="http://${ip_address}:$retriever_port/v1/retrieval" + URL="http://${host_ip}:$RETRIEVER_PORT/v1/retrieval" HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL") if [ "$HTTP_STATUS" -eq 200 ]; then @@ -60,23 +67,22 @@ function validate_microservice() { echo "[ retriever ] Content is as expected." else echo "[ retriever ] Content does not match the expected result: $CONTENT" - docker logs test-comps-retriever-milvus-server >> ${LOG_PATH}/retriever.log + docker logs ${service_name} >> ${LOG_PATH}/retriever.log exit 1 fi else echo "[ retriever ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-retriever-milvus-server >> ${LOG_PATH}/retriever.log + docker logs ${service_name} >> ${LOG_PATH}/retriever.log exit 1 fi } function stop_docker() { - cid_retrievers=$(docker ps -aq --filter "name=test-comps-retriever-milvus*") - if [[ ! -z "$cid_retrievers" ]]; then - docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s - fi - cid=$(docker ps -aq --filter "name=milvus-*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cd $WORKPATH/comps/third_parties/milvus/deployment/docker_compose/ + docker compose -f compose.yaml down --remove-orphans + + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans } function main() { diff --git a/tests/retrievers/test_retrievers_neo4j_langchain.sh b/tests/retrievers/test_retrievers_neo4j_langchain.sh deleted file mode 100644 index 9855fe75ff..0000000000 --- a/tests/retrievers/test_retrievers_neo4j_langchain.sh +++ /dev/null @@ -1,99 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - docker run -d -p 7474:7474 -p 7687:7687 -v ./data:/data -v ./plugins:/plugins --name test-comps-neo4j-apoc1 -e NEO4J_AUTH=neo4j/password -e NEO4J_PLUGINS=\[\"apoc\"\] neo4j:latest - sleep 30s - - docker build --no-cache -t opea/retriever-neo4j:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/neo4j/langchain/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/retriever-neo4j built fail" - exit 1 - else - echo "opea/retriever-neo4j built successful" - fi -} - -function start_service() { - # tei endpoint - tei_endpoint=5434 - model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-retriever-neo4j-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model - sleep 30s - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" - - # Neo4J retriever - export NEO4J_URI="bolt://${ip_address}:7687" - export NEO4J_USERNAME="neo4j" - export NEO4J_PASSWORD="password" - retriever_port=5435 - # unset http_proxy - export no_proxy="localhost,127.0.0.1,"${ip_address} - docker run -d --name="test-comps-retriever-neo4j-server" -p ${retriever_port}:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URI="bolt://${ip_address}:7687" -e NEO4J_USERNAME="neo4j" -e NEO4J_PASSWORD="password" opea/retriever-neo4j:comps - - sleep 1m -} - -function validate_microservice() { - retriever_port=5435 - export PATH="${HOME}/miniforge3/bin:$PATH" - source activate - URL="http://${ip_address}:$retriever_port/v1/retrieval" - - test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ retriever ] HTTP status is 200. Checking content..." - local CONTENT=$(curl -s -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/retriever.log) - - if echo "$CONTENT" | grep -q "retrieved_docs"; then - echo "[ retriever ] Content is as expected." - else - echo "[ retriever ] Content does not match the expected result: $CONTENT" - docker logs test-comps-retriever-neo4j-server >> ${LOG_PATH}/retriever.log - docker logs test-comps-retriever-neo4j-tei-endpoint >> ${LOG_PATH}/tei.log - exit 1 - fi - else - echo "[ retriever ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-retriever-neo4j-server >> ${LOG_PATH}/retriever.log - docker logs test-comps-retriever-neo4j-tei-endpoint >> ${LOG_PATH}/tei.log - exit 1 - fi -} - -function stop_docker() { - cid_retrievers=$(docker ps -aq --filter "name=test-comps-retriever-neo4j*") - if [[ ! -z "$cid_retrievers" ]]; then - docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s - fi - cid_db=$(docker ps -aq --filter "name=test-comps-neo4j-apoc1") - if [[ ! -z "$cid_retrievers" ]]; then - docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s - fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/retrievers/test_retrievers_neo4j_llama_index_on_intel_hpu.sh b/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh old mode 100755 new mode 100644 similarity index 61% rename from tests/retrievers/test_retrievers_neo4j_llama_index_on_intel_hpu.sh rename to tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh index 3d252ffc13..f6857f35cb --- a/tests/retrievers/test_retrievers_neo4j_llama_index_on_intel_hpu.sh +++ b/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh @@ -4,83 +4,70 @@ set -x +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') +export host_ip=$(hostname -I | awk '{print $1}') +service_name="retriever-neo4j" function build_docker_images() { cd $WORKPATH echo "current dir: $PWD" - docker build --no-cache -t opea/retriever-neo4j-llamaindex:comps --build-arg no_proxy=$no_proxy --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/neo4j/llama_index/Dockerfile . + docker build --no-cache -t ${REGISTRY:-opea}/retriever:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/retriever-neo4j-llamaindex built fail" + echo "opea/retriever built fail" exit 1 else - echo "opea/retriever-neo4j-llamaindex built successful" + echo "opea/retriever built successful" fi - docker build --no-cache -t opea/dataprep-neo4j-llamaindex:comps --build-arg no_proxy=$no_proxy --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/neo4j/llama_index/Dockerfile . + docker build --no-cache -t opea/dataprep-neo4j-llamaindex:comps --build-arg no_proxy=$no_proxy --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/dataprep-neo4j-llamaindex built fail" exit 1 else echo "opea/dataprep-neo4j-llamaindex built successful" fi - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + + docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1 docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 } function start_service() { - # neo4j-apoc - docker run -d -p 7474:7474 -p 7687:7687 --name test-comps-retrievers-neo4j-llama-index-neo4j-apoc --env NEO4J_AUTH=neo4j/neo4jtest -e NEO4J_apoc_export_file_enabled=true -e NEO4J_apoc_import_file_enabled=true -e NEO4J_apoc_import_file_use__neo4j__config=true -e NEO4J_PLUGINS=\[\"apoc\"\] neo4j:latest - - # tei endpoint - emb_model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-retrievers-neo4j-llama-index-tei" -p 6006:80 -v ./data:/data -e no_proxy=$no_proxy -e http_proxy=$http_proxy \ - -e https_proxy=$https_proxy -e HF_TOKEN=$HF_TOKEN --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $emb_model - sleep 30s - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006" - - # tgi gaudi endpoint - # Meta-Llama-3-8B-Instruct IS NOT GOOD ENOUGH FOR EXTRACTING HIGH QUALITY GRAPH BUT OK FOR CI TESTING - model="meta-llama/Meta-Llama-3-8B-Instruct" - docker run -d --name="test-comps-retrievers-neo4j-llama-index-tgi" -p 6005:80 -v ./data:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all \ - -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true \ - -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice -e no_proxy=$no_proxy -e http_proxy=$http_proxy -e https_proxy=$https_proxy \ - --ipc=host --pull always ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model --max-input-tokens 1024 --max-total-tokens 3000 - # extra time to load large model - echo "Waiting for tgi gaudi ready" - n=0 - until [[ "$n" -ge 300 ]] || [[ $ready == true ]]; do - docker logs test-comps-retrievers-neo4j-llama-index-tgi &> ${LOG_PATH}/tgi-gaudi-service.log - n=$((n+1)) - if grep -q Connected ${LOG_PATH}/tgi-gaudi-service.log; then - break - fi - sleep 5s - done - sleep 5s - echo "Service started successfully" - export TGI_LLM_ENDPOINT="http://${ip_address}:6005" + export NEO4J_PORT1=11631 + export NEO4J_PORT2=11632 + export TEI_EMBEDDER_PORT=11633 + export LLM_ENDPOINT_PORT=11634 + export RETRIEVER_PORT=11635 + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export DATA_PATH="/data2/cache" + export MAX_INPUT_TOKENS=1024 + export MAX_TOTAL_TOKENS=3000 + export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" + export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct" + export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" + export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6004" + export NEO4J_URI="bolt://${host_ip}:${NEO4J_PORT2}" + export NEO4J_USERNAME="neo4j" + export NEO4J_PASSWORD="neo4jtest" + export no_proxy="localhost,127.0.0.1,"${host_ip} + export LOGFLAG=True + + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + sleep 1m # dataprep neo4j # Not testing openai code path since not able to provide key for cicd - docker run -d --name="test-comps-retrievers-neo4j-llama-index-dataprep" -p 6004:6004 -v ./data:/data --ipc=host -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT \ - -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e EMBEDDING_MODEL_ID=$emb_model -e LLM_MODEL_ID=$model -e host_ip=$ip_address -e no_proxy=$no_proxy \ - -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URI="bolt://${ip_address}:7687" -e NEO4J_USERNAME="neo4j" \ - -e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e LOGFLAG=True opea/dataprep-neo4j-llamaindex:comps - sleep 30s - export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6004" - - # Neo4J retriever - # Not testing openai code path since not able to provide key for cicd - export NEO4J_URI="bolt://${ip_address}:7687" - export NEO4J_USERNAME="neo4j" - export NEO4J_PASSWORD="neo4jtest" - export no_proxy="localhost,127.0.0.1,"${ip_address} - docker run -d --name="test-comps-retrievers-neo4j-llama-index-server" -p 6009:6009 --ipc=host -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT \ - -e EMBEDDING_MODEL_ID=$emb_model -e LLM_MODEL_ID=$model -e host_ip=$ip_address -e http_proxy=$http_proxy -e no_proxy=$no_proxy -e https_proxy=$https_proxy \ - -e NEO4J_URI="bolt://${ip_address}:7687" -e NEO4J_USERNAME="neo4j" -e NEO4J_PASSWORD="neo4jtest" -e LOGFLAG=True opea/retriever-neo4j-llamaindex:comps + docker run -d --name="test-comps-retrievers-neo4j-llama-index-dataprep" -p 6004:5000 -v ./data:/data --ipc=host -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT \ + -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e EMBEDDING_MODEL_ID=$EMBEDDING_MODEL_ID -e LLM_MODEL_ID=$LLM_MODEL_ID -e host_ip=$host_ip -e no_proxy=$no_proxy \ + -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}" -e NEO4J_USERNAME="neo4j" \ + -e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j-llamaindex:comps sleep 1m @@ -131,48 +118,44 @@ function validate_service() { function validate_microservice() { # validate neo4j-apoc validate_service \ - "${ip_address}:7474" \ + "${host_ip}:${NEO4J_PORT1}" \ "200 OK" \ "neo4j-apoc" \ - "test-comps-retrievers-neo4j-llama-index-neo4j-apoc" \ + "neo4j-apoc" \ "" sleep 1m # retrieval can't curl as expected, try to wait for more time # tgi for llm service validate_service \ - "${ip_address}:6005/generate" \ + "${host_ip}:${LLM_ENDPOINT_PORT}/generate" \ "generated_text" \ "tgi-gaudi-service" \ - "test-comps-retrievers-neo4j-llama-index-tgi" \ + "tgi-gaudi-server" \ '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' # test /v1/dataprep graph extraction echo "The stock of company Chevron has declined about 10% over the past 90-days despite the fact that Q2 consensus earnings estimates have risen sharply (~25%) during that same time frame. Over the years, Chevron has kept a very strong balance sheet. FirstEnergy company posted its earnings results on Tuesday. The utilities provider reported $0.53 earnings per share for the quarter, topping the consensus estimate of $0.52 by $0.01, RTT News reports. FirstEnergy had a net margin of 10.85% and a return on equity of 17.17%. The Dáil was almost suspended on Thursday afternoon after Sinn Féin TD John Brady walked across the chamber and placed an on-call pager in front of the Minister for Housing Darragh O’Brien during a debate on retained firefighters. Darragh O’Brien said John Brady had taken part in an act of theatre that was obviously choreographed. Around 2,000 retained firefighters around the country staged a second day of industrial action on Tuesday and are due to start all out-strike action from next Tuesday. The mostly part-time workers, who keep the services going outside of Ireland’s larger urban centres, are taking industrial action in a dispute over pay and working conditions. Speaking in the Dáil, Sinn Féin deputy leader Pearse Doherty said firefighters had marched on Leinster House today and were very angry at the fact the Government will not intervene. Reintroduction of tax relief on mortgages needs to be considered, Darragh O’Brien says. Martin withdraws comment after saying People Before Profit would ‘put the jackboot on people’ Taoiseach ‘propagated fears’ farmers forced to rewet land due to nature restoration law – Cairns An intervention is required now. I’m asking you to make an improved offer in relation to pay for retained firefighters, Mr Doherty told the housing minister. I’m also asking you, and challenging you, to go outside after this Order of Business and meet with the firefighters because they are just fed up to the hilt in relation to what you said. Some of them have handed in their pagers to members of the Opposition and have challenged you to wear the pager for the next number of weeks, put up with an €8,600 retainer and not leave your community for the two and a half kilometres and see how you can stand over those type of pay and conditions. At this point, John Brady got up from his seat, walked across the chamber and placed the pager on the desk in front of Darragh O’Brien. Ceann Comhairle Seán Ó Fearghaíl said the Sinn Féin TD was completely out of order and told him not to carry out a charade in this House, adding it was absolutely outrageous behaviour and not to be encouraged. Darragh O’Brien said John Brady had engaged in an act of theatre here today which was obviously choreographed and was then interrupted with shouts from the Opposition benches. Mr Ó Fearghaíl said he would suspend the House if this racket continues. Darragh O’Brien later said he was confident the dispute could be resolved and he had immense regard for firefighters. The minister said he would encourage the unions to re-engage with the State’s industrial relations process while also accusing Sinn Féin of using the issue for their own political gain." > $LOG_PATH/dataprep_file.txt validate_service \ - "http://${ip_address}:6004/v1/dataprep" \ + "http://${host_ip}:6004/v1/dataprep/ingest" \ "Data preparation succeeded" \ "extract_graph_neo4j" \ "test-comps-retrievers-neo4j-llama-index-dataprep" # retrieval microservice validate_service \ - "${ip_address}:6009/v1/retrieval" \ - "Retrieval of answers from community summaries successful" \ + "${host_ip}:${RETRIEVER_PORT}/v1/retrieval" \ + "documents" \ "retriever_community_answers_neo4j" \ - "test-comps-retrievers-neo4j-llama-index-server" \ + "${service_name}" \ "{\"messages\": [{\"role\": \"user\",\"content\": \"Who is John Brady and has he had any confrontations?\"}]}" } function stop_docker() { - cid_retrievers=$(docker ps -aq --filter "name=test-comps-retrievers-neo4j*") - if [[ ! -z "$cid_retrievers" ]]; then - docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s - fi - cid_db=$(docker ps -aq --filter "name=test-comps-retrievers-neo4j-llama-index-neo4j-apoc") - if [[ ! -z "$cid_retrievers" ]]; then - docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s - fi + cid=$(docker ps -aq --filter "name=test-comps-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans } function main() { diff --git a/tests/retrievers/test_retrievers_opensearch.sh b/tests/retrievers/test_retrievers_opensearch.sh new file mode 100644 index 0000000000..7a5fc0aeb2 --- /dev/null +++ b/tests/retrievers/test_retrievers_opensearch.sh @@ -0,0 +1,92 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +export host_ip=$(hostname -I | awk '{print $1}') +service_name="opensearch-vector-db tei-embedding-serving retriever-opensearch" + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/retriever:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/retriever built fail" + exit 1 + else + echo "opea/retriever built successful" + fi +} + +function start_service() { + export OPENSEARCH_PORT1=11627 + export OPENSEARCH_PORT2=11628 + export RETRIEVER_PORT=11629 + export TEI_EMBEDDER_PORT=11630 + export OPENSEARCH_INITIAL_ADMIN_PASSWORD="StRoNgOpEa0)" + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" + export OPENSEARCH_URL="http://${host_ip}:${OPENSEARCH_PORT1}" + export INDEX_NAME="file-index" + + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + + sleep 2m +} + +function validate_microservice() { + export PATH="${HOME}/miniforge3/bin:$PATH" + source activate + URL="http://${host_ip}:$RETRIEVER_PORT/v1/retrieval" + + test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' -k -u admin:$OPENSEARCH_INITIAL_ADMIN_PASSWORD "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ retriever ] HTTP status is 200. Checking content..." + local CONTENT=$(curl -s -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/retriever.log) + + if echo "$CONTENT" | grep -q "retrieved_docs"; then + echo "[ retriever ] Content is as expected." + else + echo "[ retriever ] Content does not match the expected result: $CONTENT" + docker logs ${service_name} >> ${LOG_PATH}/retriever.log + docker logs tei-embedding-serving >> ${LOG_PATH}/tei.log + exit 1 + fi + else + echo "[ retriever ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${service_name} >> ${LOG_PATH}/retriever.log + docker logs tei-embedding-serving >> ${LOG_PATH}/tei.log + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans +} + +function main() { + stop_service + + build_docker_images + start_service + + validate_microservice + + stop_service + echo y | docker system prune +} + +main diff --git a/tests/retrievers/test_retrievers_opensearch_langchain.sh b/tests/retrievers/test_retrievers_opensearch_langchain.sh deleted file mode 100644 index a03a28cc08..0000000000 --- a/tests/retrievers/test_retrievers_opensearch_langchain.sh +++ /dev/null @@ -1,111 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -retriever_port="7000" -OPENSEARCH_INITIAL_ADMIN_PASSWORD="StRoNgOpEa0)" - -function build_docker_images() { - cd $WORKPATH - docker build -t opea/retriever-opensearch:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/opensearch/langchain/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/retriever-opensearch built fail" - exit 1 - else - echo "opea/retriever-opensearch built successful" - fi -} - -function start_service() { - # Start OpenSearch vector db container - docker run -d \ - --name test-comps-retriever-opensearch \ - -e cluster.name=opensearch-cluster \ - -e node.name=opensearch-vector-db \ - -e discovery.seed_hosts=opensearch-vector-db \ - -e cluster.initial_master_nodes=opensearch-vector-db \ - -e bootstrap.memory_lock=true \ - -e "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" \ - -e OPENSEARCH_INITIAL_ADMIN_PASSWORD=$OPENSEARCH_INITIAL_ADMIN_PASSWORD \ - --ulimit memlock=-1:-1 \ - --ulimit nofile=65536:65536 \ - -p 9200:9200 \ - -p 9600:9600 \ - opensearchproject/opensearch:latest - - # tei endpoint - tei_endpoint=6060 - model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-retriever-opensearch-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model - sleep 30s - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" - - # Start OpenSearch retriever container - OPENSEARCH_URL="http://${ip_address}:9200" - INDEX_NAME="file-index" - docker run -d \ - --name test-comps-retriever-opensearch-server \ - -p 7000:7000 \ - -e https_proxy=$https_proxy \ - -e http_proxy=$http_proxy \ - -e OPENSEARCH_INITIAL_ADMIN_PASSWORD=$OPENSEARCH_INITIAL_ADMIN_PASSWORD \ - -e OPENSEARCH_URL=$OPENSEARCH_URL \ - -e INDEX_NAME=$INDEX_NAME \ - -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} \ - opea/retriever-opensearch:latest - - sleep 2m -} - -function validate_microservice() { - export PATH="${HOME}/miniforge3/bin:$PATH" - source activate - URL="http://${ip_address}:$retriever_port/v1/retrieval" - - test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' -k -u admin:$OPENSEARCH_INITIAL_ADMIN_PASSWORD "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ retriever ] HTTP status is 200. Checking content..." - local CONTENT=$(curl -s -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/retriever.log) - - if echo "$CONTENT" | grep -q "retrieved_docs"; then - echo "[ retriever ] Content is as expected." - else - echo "[ retriever ] Content does not match the expected result: $CONTENT" - docker logs test-comps-retriever-opensearch-server >> ${LOG_PATH}/retriever.log - docker logs test-comps-retriever-opensearch-tei-endpoint >> ${LOG_PATH}/tei.log - exit 1 - fi - else - echo "[ retriever ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-retriever-opensearch-server >> ${LOG_PATH}/retriever.log - docker logs test-comps-retriever-opensearch-tei-endpoint >> ${LOG_PATH}/tei.log - exit 1 - fi -} - -function stop_service() { - cid=$(docker ps -aq --filter "name=test-comps-retriever-opensearch*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - -} - -function main() { - stop_service - - build_docker_images - start_service - - validate_microservice - - stop_service - # echo y | docker system prune -} - -main diff --git a/tests/retrievers/test_retrievers_pathway.sh b/tests/retrievers/test_retrievers_pathway.sh new file mode 100644 index 0000000000..86fadaa812 --- /dev/null +++ b/tests/retrievers/test_retrievers_pathway.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +export host_ip=$(hostname -I | awk '{print $1}') +service_name="retriever-pathway" + +function build_docker_images() { + cd $WORKPATH + + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t ${REGISTRY:-opea}/vectorstore-pathway:${TAG:-latest} -f comps/third_parties/pathway/src/Dockerfile . + + docker build --no-cache -t ${REGISTRY:-opea}/retriever:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/retriever built fail" + exit 1 + else + echo "opea/retriever built successful" + fi +} + +function start_service() { + export TEI_EMBEDDER_PORT=11619 + export PATHWAY_PORT=11620 + export RETRIEVER_PORT=11621 + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" + export PATHWAY_HOST_DB="0.0.0.0" + export PATHWAY_VOLUME="$WORKPATH/comps/third_parties/pathway/src/README.md" + export PATHWAY_HOST=$host_ip # needed in order to reach to vector store + + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 2m +} + +function validate_microservice() { + export PATH="${HOME}/miniforge3/bin:$PATH" + + test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + + result=$(http_proxy='' + curl http://${host_ip}:$RETRIEVER_PORT/v1/retrieval \ + -X POST \ + -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \ + -H 'Content-Type: application/json') + if [[ $result == *"retrieved_docs"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs pathway-db >> ${LOG_PATH}/vectorstore-pathway.log + docker logs tei-embedding-serving >> ${LOG_PATH}/tei-endpoint.log + docker logs ${service_name} >> ${LOG_PATH}/retriever-pathway.log + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/retrievers/test_retrievers_pathway_langchain.sh b/tests/retrievers/test_retrievers_pathway_langchain.sh deleted file mode 100644 index 33d60b025f..0000000000 --- a/tests/retrievers/test_retrievers_pathway_langchain.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -function build_docker_images() { - cd $WORKPATH - - docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/vectorstore-pathway:comps -f comps/vectorstores/pathway/Dockerfile . - - cd $WORKPATH - - docker build --no-cache -t opea/retriever-pathway:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/pathway/langchain/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/retriever-pathway built fail" - exit 1 - else - echo "opea/retriever-pathway built successful" - fi -} - -function start_service() { - cd $WORKPATH - - # tei endpoint - tei_endpoint=5008 - model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-retriever-pathway-tei-endpoint" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model - - sleep 30s - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" - - result=$(http_proxy='' - curl $TEI_EMBEDDING_ENDPOINT -X POST -d '{"inputs":"Hey,"}' -H 'Content-Type: application/json') - - echo "embed_result:" - echo $result - - sleep 30s - - # pathway - export PATHWAY_HOST="0.0.0.0" - export PATHWAY_PORT=5433 - - docker run -d --name="test-comps-retriever-pathway-vectorstore" -e PATHWAY_HOST=${PATHWAY_HOST} -e PATHWAY_PORT=${PATHWAY_PORT} -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e http_proxy=$http_proxy -e https_proxy=$https_proxy -v $WORKPATH/comps/vectorstores/pathway/README.md:/app/data/README.md -p ${PATHWAY_PORT}:${PATHWAY_PORT} --network="host" opea/vectorstore-pathway:comps - - sleep 45s - - export PATHWAY_HOST=$ip_address # needed in order to reach to vector store - - docker run -d --name="test-comps-retriever-pathway-ms" -p 5009:7000 -e PATHWAY_HOST=${PATHWAY_HOST} -e PATHWAY_PORT=${PATHWAY_PORT} -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/retriever-pathway:comps - - sleep 10s -} - -function validate_microservice() { - retriever_port=5009 - export PATH="${HOME}/miniforge3/bin:$PATH" - - test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - - result=$(http_proxy='' - curl http://${ip_address}:$retriever_port/v1/retrieval \ - -X POST \ - -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \ - -H 'Content-Type: application/json') - if [[ $result == *"retrieved_docs"* ]]; then - echo "Result correct." - else - echo "Result wrong. Received was $result" - docker logs test-comps-retriever-pathway-vectorstore >> ${LOG_PATH}/vectorstore-pathway.log - docker logs test-comps-retriever-pathway-tei-endpoint >> ${LOG_PATH}/tei-endpoint.log - docker logs test-comps-retriever-pathway-ms >> ${LOG_PATH}/retriever-pathway.log - exit 1 - fi -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-retriever-pathway*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/retrievers/test_retrievers_pgvector.sh b/tests/retrievers/test_retrievers_pgvector.sh new file mode 100644 index 0000000000..021d81a0c2 --- /dev/null +++ b/tests/retrievers/test_retrievers_pgvector.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +export host_ip=$(hostname -I | awk '{print $1}') +service_name="retriever-pgvector" + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/retriever:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/retriever built fail" + exit 1 + else + echo "opea/retriever built successful" + fi +} + +function start_service() { + export PGVECTOR_PORT=11617 + export RETRIEVER_PORT=11618 + export POSTGRES_USER=testuser + export POSTGRES_PASSWORD=testpwd + export POSTGRES_DB=vectordb + export HF_TOKEN=${HF_TOKEN} + export VOLUMES_PATH=$WORKPATH/comps/third_parties/pgvector/src/init.sql + export PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$host_ip:$PGVECTOR_PORT/${POSTGRES_DB} + export LOGFLAG=True + + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 1m +} + +function validate_microservice() { + test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + + result=$(http_proxy='' + curl http://${host_ip}:$RETRIEVER_PORT/v1/retrieval \ + -X POST \ + -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \ + -H 'Content-Type: application/json') + if [[ $result == *"retrieved_docs"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs pgvector-db >> ${LOG_PATH}/vectorstore.log + docker logs ${service_name} >> ${LOG_PATH}/retriever-pgvector.log + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/retrievers/test_retrievers_pgvector_langchain.sh b/tests/retrievers/test_retrievers_pgvector_langchain.sh deleted file mode 100644 index 40825de6b6..0000000000 --- a/tests/retrievers/test_retrievers_pgvector_langchain.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -function build_docker_images() { - cd $WORKPATH - docker build --no-cache -t opea/retriever-pgvector:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/pgvector/langchain/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/retriever-pgvector built fail" - exit 1 - else - echo "opea/retriever-pgvector built successful" - fi -} - -function start_service() { - # pgvector - export POSTGRES_USER=testuser - export POSTGRES_PASSWORD=testpwd - export POSTGRES_DB=vectordb - - pgvector_port=5079 - docker run --name test-comps-retriever-pgvector-vectorstore -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -d -v $WORKPATH/comps/vectorstores/pgvector/init.sql:/docker-entrypoint-initdb.d/init.sql -p $pgvector_port:5432 pgvector/pgvector:0.7.0-pg16 - sleep 10s - - # tei endpoint - tei_endpoint=5431 - model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-retriever-pgvector-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model - sleep 30s - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" - - # pgvector retriever - docker run -d --name="test-comps-retriever-pgvector-ms" -p 5003:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:$pgvector_port/${POSTGRES_DB} -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/retriever-pgvector:comps - sleep 3m -} - -function validate_microservice() { - retriever_port=5003 - test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - - - result=$(http_proxy='' - curl http://${ip_address}:$retriever_port/v1/retrieval \ - -X POST \ - -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \ - -H 'Content-Type: application/json') - if [[ $result == *"retrieved_docs"* ]]; then - echo "Result correct." - else - echo "Result wrong. Received was $result" - docker logs test-comps-retriever-pgvector-vectorstore >> ${LOG_PATH}/vectorstore.log - docker logs test-comps-retriever-pgvector-tei-endpoint >> ${LOG_PATH}/tei-endpoint.log - docker logs test-comps-retriever-pgvector-ms >> ${LOG_PATH}/retriever-pgvector.log - exit 1 - fi -} - -function stop_docker() { - cid_retrievers=$(docker ps -aq --filter "name=test-comps-retriever-pgvector*") - if [[ ! -z "$cid_retrievers" ]]; then - docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s - fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/retrievers/test_retrievers_pinecone.sh b/tests/retrievers/test_retrievers_pinecone.sh new file mode 100644 index 0000000000..55817e99d5 --- /dev/null +++ b/tests/retrievers/test_retrievers_pinecone.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +export host_ip=$(hostname -I | awk '{print $1}') +service_name="retriever-pinecone" + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/retriever:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/retriever built fail" + exit 1 + else + echo "opea/retriever built successful" + fi +} + +function start_service() { + export RETRIEVER_PORT=11616 + export PINECONE_API_KEY=$PINECONE_KEY + export PINECONE_INDEX_NAME="langchain-test" + export HF_TOKEN=$HF_TOKEN + export LOGFLAG=True + + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 1m +} + +function validate_microservice() { + local test_embedding="$1" + + export PATH="${HOME}/miniforge3/bin:$PATH" + source activate + URL="http://${host_ip}:$RETRIEVER_PORT/v1/retrieval" + + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ retriever ] HTTP status is 200. Checking content..." + local CONTENT=$(curl -s -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/retriever.log) + + if echo "$CONTENT" | grep -q "retrieved_docs"; then + echo "[ retriever ] Content is as expected." + else + echo "[ retriever ] Content does not match the expected result: $CONTENT" + docker logs ${service_name} >> ${LOG_PATH}/retriever.log + exit 1 + fi + else + echo "[ retriever ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${service_name} >> ${LOG_PATH}/retriever.log + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + build_docker_images + + start_service + test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + validate_microservice "$test_embedding" + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/retrievers/test_retrievers_pinecone_langchain.sh b/tests/retrievers/test_retrievers_pinecone_langchain.sh deleted file mode 100644 index a6604e0e92..0000000000 --- a/tests/retrievers/test_retrievers_pinecone_langchain.sh +++ /dev/null @@ -1,100 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - docker build --no-cache -t opea/retriever-pinecone:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/pinecone/langchain/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/retriever-pinecone built fail" - exit 1 - else - echo "opea/retriever-pinecone built successful" - fi -} - -function start_service() { - # tei endpoint - tei_endpoint=5053 - model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-retriever-pinecone-tei-endpoint" -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model - bash ./tests/utils/wait-for-it.sh ${ip_address}:${tei_endpoint} -s -t 30 -- echo "tei endpoint up" - TEI_ENDPOINT_UP=$? - if [ ${TEI_ENDPOINT_UP} -ne 0 ]; then - echo "Could not start TEI endpoint." - return 1 - fi - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" - echo ${TEI_EMBEDDING_ENDPOINT} - - # pinecone retriever - export PINECONE_API_KEY=$PINECONE_KEY - export PINECONE_INDEX_NAME="langchain-test" - export HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN - retriever_port=5054 - unset http_proxy - docker run -d --name="test-comps-retriever-pinecone-server" -p ${retriever_port}:7000 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PINECONE_API_KEY=$PINECONE_API_KEY -e PINECONE_INDEX_NAME=$PINECONE_INDEX_NAME -e INDEX_NAME=$PINECONE_INDEX_NAME -e LOGFLAG="DEBUG" opea/retriever-pinecone:comps - - bash ./tests/utils/wait-for-it.sh ${ip_address}:$retriever_port -s -t 100 -- echo "Retriever up" - RETRIEVER_UP=$? - if [ ${RETRIEVER_UP} -ne 0 ]; then - echo "Could not start Retriever." - return 1 - fi - - sleep 5s - bash ./tests/utils/wait-for-it.sh ${ip_address}:$retriever_port -s -t 1 -- echo "Retriever still up" - RETRIEVER_UP=$? - if [ ${RETRIEVER_UP} -ne 0 ]; then - echo "Retriever crashed." - return 1 - fi - -} - -function validate_microservice() { - retriever_port=5054 - test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - - result=$(http_proxy='' curl --noproxy $ip_address http://${ip_address}:$retriever_port/v1/retrieval \ - -X POST \ - -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \ - -H 'Content-Type: application/json') - if [[ $result == *"retrieved_docs"* ]]; then - echo "Result correct." - else - echo "Result wrong. Received was $result" - docker logs test-comps-retriever-pinecone-tei-endpoint >> ${LOG_PATH}/tei-endpoint.log - docker logs test-comps-retriever-pinecone-server >> ${LOG_PATH}/retriever-pinecone.log - exit 1 - fi -} - -function stop_docker() { - cid_retrievers=$(docker ps -aq --filter "name=test-comps-retriever-pinecone*") - if [[ ! -z "$cid_retrievers" ]]; then - docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s - fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/retrievers/test_retrievers_qdrant.sh b/tests/retrievers/test_retrievers_qdrant.sh new file mode 100644 index 0000000000..da2d343ffc --- /dev/null +++ b/tests/retrievers/test_retrievers_qdrant.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +export host_ip=$(hostname -I | awk '{print $1}') +service_name="retriever-qdrant" + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/retriever:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/retriever built fail" + exit 1 + else + echo "opea/retriever built successful" + fi +} + +function start_service() { + export QDRANT_PORT=11622 + export RETRIEVER_PORT=11623 + export QDRANT_HOST=${host_ip} + export INDEX_NAME="rag-qdrant" + export LOGFLAG=True + + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 3m +} + +function validate_microservice() { + export PATH="${HOME}/miniforge3/bin:$PATH" + source activate + test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + result=$(http_proxy='' curl http://${host_ip}:$RETRIEVER_PORT/v1/retrieval \ + -X POST \ + -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \ + -H 'Content-Type: application/json') + if [[ $result == *"retrieved_docs"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs ${service_name} >> ${LOG_PATH}/retriever.log + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/retrievers/test_retrievers_qdrant_haystack.sh b/tests/retrievers/test_retrievers_qdrant_haystack.sh deleted file mode 100644 index ccc4762c6b..0000000000 --- a/tests/retrievers/test_retrievers_qdrant_haystack.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -function build_docker_images() { - cd $WORKPATH - docker build --no-cache -t opea/retriever-qdrant:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/qdrant/haystack/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/retriever-qdrant built fail" - exit 1 - else - echo "opea/retriever-qdrant built successful" - fi -} - -function start_service() { - # qdrant - docker run -d --name test-comps-retriever-qdrant-vector-db -p 5056:6333 -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy qdrant/qdrant - sleep 10s - - # tei endpoint - tei_endpoint=5055 - model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-retriever-qdrant-tei-endpoint" -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model - sleep 30s - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" - - # qdrant retriever - export QDRANT_HOST="${ip_address}" - export QDRANT_PORT=5056 - export INDEX_NAME="rag-qdrant" - retriever_port=5057 - unset http_proxy - docker run -d --name="test-comps-retriever-qdrant-server" -p ${retriever_port}:7000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e QDRANT_HOST=$QDRANT_HOST -e QDRANT_PORT=$QDRANT_PORT -e INDEX_NAME=$INDEX_NAME opea/retriever-qdrant:comps - - sleep 3m -} - -function validate_microservice() { - retriever_port=5057 - export PATH="${HOME}/miniforge3/bin:$PATH" - source activate - test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - result=$(http_proxy='' curl http://${ip_address}:$retriever_port/v1/retrieval \ - -X POST \ - -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \ - -H 'Content-Type: application/json') - if [[ $result == *"retrieved_docs"* ]]; then - echo "Result correct." - else - echo "Result wrong. Received was $result" - docker logs test-comps-retriever-qdrant-server >> ${LOG_PATH}/retriever.log - docker logs test-comps-retriever-qdrant-tei-endpoint >> ${LOG_PATH}/tei-endpoint.log - exit 1 - fi -} - -function stop_docker() { - cid_retrievers=$(docker ps -aq --filter "name=test-comps-retriever-qdrant*") - if [[ ! -z "$cid_retrievers" ]]; then - docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s - fi - -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/retrievers/test_retrievers_redis.sh b/tests/retrievers/test_retrievers_redis.sh index b22195d6b8..0964049f98 100644 --- a/tests/retrievers/test_retrievers_redis.sh +++ b/tests/retrievers/test_retrievers_redis.sh @@ -4,66 +4,71 @@ set -x +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') +export host_ip=$(hostname -I | awk '{print $1}') +service_name="retriever-redis" +service_name_mm="retriever-redis-multimodal" function build_docker_images() { cd $WORKPATH - docker build --no-cache -t opea/retriever-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . + docker build --no-cache -t ${REGISTRY:-opea}/retriever:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/retriever-redis built fail" + echo "opea/retriever built fail" exit 1 else - echo "opea/retriever-redis built successful" + echo "opea/retriever built successful" fi } function start_service() { - # redis - docker run -d --name test-comps-retriever-redis-vector-db -p 5010:6379 -p 5011:8001 -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy redis/redis-stack:7.2.0-v9 - sleep 10s - - # tei endpoint - tei_endpoint=5434 - model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-retriever-redis-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model - sleep 30s - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" - - # redis retriever - export REDIS_URL="redis://${ip_address}:5010" + export REDIS_PORT1=11601 # 11600-11699 + export REDIS_PORT2=11602 + export TEI_EMBEDDER_PORT=11603 + export RETRIEVER_PORT=11604 + export HF_TOKEN=${HF_TOKEN} + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" + export REDIS_URL="redis://${host_ip}:${REDIS_PORT1}" export INDEX_NAME="rag-redis" - export HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN - retriever_port=5435 - # unset http_proxy - docker run -d --name="test-comps-retriever-redis-server" -p ${retriever_port}:7000 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e LOGFLAG=true -e RETRIEVER_TYPE="redis" opea/retriever-redis:comps + export LOGFLAG=True + + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log - sleep 3m + sleep 2m } function start_multimodal_service() { - # redis - docker run -d --name test-comps-retriever-redis-vector-db -p 5689:6379 -p 5011:8001 -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy redis/redis-stack:7.2.0-v9 - sleep 10s - - # redis retriever - export REDIS_URL="redis://${ip_address}:5689" - export INDEX_NAME="rag-redis" - retriever_port=5435 - unset http_proxy - docker run -d --name="test-comps-retriever-redis-server" -p ${retriever_port}:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e BRIDGE_TOWER_EMBEDDING=true -e LOGFLAG=true -e RETRIEVER_TYPE="redis" opea/retriever-redis:comps + export REDIS_PORT1=11605 # 11600-11699 + export REDIS_PORT2=11606 + export RETRIEVER_PORT=11607 + export HF_TOKEN=${HF_TOKEN} + export REDIS_URL="redis://${host_ip}:${REDIS_PORT1}" + export INDEX_NAME="mm-rag-redis" + export LOGFLAG=True + export BRIDGE_TOWER_EMBEDDING=true + export RETRIEVER_TYPE="redis" + + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml up ${service_name_mm} -d > ${LOG_PATH}/start_services_with_compose_multimodal.log sleep 2m } function validate_microservice() { local test_embedding="$1" + local container_name="$2" - retriever_port=5435 export PATH="${HOME}/miniforge3/bin:$PATH" source activate - URL="http://${ip_address}:$retriever_port/v1/retrieval" + URL="http://${host_ip}:$RETRIEVER_PORT/v1/retrieval" HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL") if [ "$HTTP_STATUS" -eq 200 ]; then @@ -74,23 +79,60 @@ function validate_microservice() { echo "[ retriever ] Content is as expected." else echo "[ retriever ] Content does not match the expected result: $CONTENT" - docker logs test-comps-retriever-redis-server >> ${LOG_PATH}/retriever.log + docker logs ${container_name} >> ${LOG_PATH}/retriever.log exit 1 fi else echo "[ retriever ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-retriever-redis-server >> ${LOG_PATH}/retriever.log + docker logs ${container_name} >> ${LOG_PATH}/retriever.log exit 1 fi } -function stop_docker() { - cid_retrievers=$(docker ps -aq --filter "name=test-comps-retriever-redis*") - if [[ ! -z "$cid_retrievers" ]]; then - docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s +function validate_mm_microservice() { + local test_embedding="$1" + local container_name="$2" + + export PATH="${HOME}/miniforge3/bin:$PATH" + source activate + URL="http://${host_ip}:$RETRIEVER_PORT/v1/retrieval" + + # Test the retriever with a b64 image that should be passed through + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding},\"base64_image\":\"iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC\"}" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ retriever ] HTTP status is 200. Checking content..." + local CONTENT=$(curl -s -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/retriever.log) + + if echo "$CONTENT" | grep -q "retrieved_docs"; then + echo "[ retriever ] Content has retrieved_docs as expected." + empty_search_results=$(echo "$CONTENT" | grep "\"retrieved_docs\":\[\]") + if [ -z "$empty_search_results" ]; then + # If search results are not empty, check for b64 image string + if echo "$CONTENT" | grep -q "b64_img_str"; then + echo "[ retriever ] Content has b64_img_str as expected." + else + echo "[ retriever ] Content does not include the b64_img_str: $CONTENT" + docker logs ${container_name} >> ${LOG_PATH}/retriever.log + exit 1 + fi + fi + else + echo "[ retriever ] Content does not match the expected result: $CONTENT" + docker logs ${container_name} >> ${LOG_PATH}/retriever.log + exit 1 + fi + else + echo "[ retriever ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${container_name} >> ${LOG_PATH}/retriever.log + exit 1 fi } +function stop_docker() { + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} ${service_name_mm} --remove-orphans +} + function main() { stop_docker @@ -99,13 +141,14 @@ function main() { # test text retriever start_service test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - validate_microservice "$test_embedding" + validate_microservice "$test_embedding" "$service_name" stop_docker # test multimodal retriever start_multimodal_service test_embedding_multi=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") - validate_microservice "$test_embedding_multi" + validate_microservice "$test_embedding_multi" "$service_name_mm" + validate_mm_microservice "$test_embedding_multi" "$service_name_mm" # clean env stop_docker diff --git a/tests/retrievers/test_retrievers_vdms.sh b/tests/retrievers/test_retrievers_vdms.sh new file mode 100644 index 0000000000..cd2b41b53e --- /dev/null +++ b/tests/retrievers/test_retrievers_vdms.sh @@ -0,0 +1,97 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +export host_ip=$(hostname -I | awk '{print $1}') +no_proxy=$no_proxy,$host_ip +service_name="retriever-vdms" + +function build_docker_images() { + cd $WORKPATH + hf_token="dummy" + docker build --no-cache -t ${REGISTRY:-opea}/retriever:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg huggingfacehub_api_token=$hf_token -f comps/retrievers/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/retriever built fail" + exit 1 + else + echo "opea/retriever built successful" + fi + +} + +function start_service() { + export VDMS_PORT=11624 + export TEI_EMBEDDER_PORT=11625 + export RETRIEVER_PORT=11626 + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" + export INDEX_NAME="rag-vdms" + export VDMS_USE_CLIP=0 #set to 1 if openai clip embedding should be used + export HF_TOKEN=${HF_TOKEN} + export LOGFLAG=True + + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 3m +} + +function validate_microservice() { + URL="http://${host_ip}:$RETRIEVER_PORT/v1/retrieval" + + test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL") + + echo "HTTP_STATUS = $HTTP_STATUS" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ retriever ] HTTP status is 200. Checking content..." + local CONTENT=$(curl -s -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/retriever.log) + + if echo "$CONTENT" | grep -q "retrieved_docs"; then + echo "[ retriever ] Content is as expected." + else + echo "[ retriever ] Content does not match the expected result: $CONTENT" + docker logs ${service_name} >> ${LOG_PATH}/retriever.log + exit 1 + fi + else + echo "[ retriever ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${service_name} >> ${LOG_PATH}/retriever.log + exit 1 + fi + + docker logs tei-embedding-serving >> ${LOG_PATH}/tei.log +} + +function stop_docker() { + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} ${service_name_mm} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/retrievers/test_retrievers_vdms_langchain.sh b/tests/retrievers/test_retrievers_vdms_langchain.sh deleted file mode 100644 index 7e480b7d22..0000000000 --- a/tests/retrievers/test_retrievers_vdms_langchain.sh +++ /dev/null @@ -1,113 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -xe - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -no_proxy=$no_proxy,$ip_address - -function build_docker_images() { - cd $WORKPATH - hf_token="dummy" - docker build --no-cache -t opea/retriever-vdms:comps \ - --build-arg https_proxy=$https_proxy \ - --build-arg http_proxy=$http_proxy \ - --build-arg huggingfacehub_api_token=$hf_token\ - -f comps/retrievers/vdms/langchain/Dockerfile . - -} - -function start_service() { - #unset http_proxy - # vdms - vdms_port=55555 - docker run -d --name test-comps-retriever-vdms-vector-db \ - -p $vdms_port:$vdms_port intellabs/vdms:latest - sleep 10s - - # tei endpoint - tei_endpoint=5058 - model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-retriever-vdms-tei-endpoint" \ - -p $tei_endpoint:80 -v ./data:/data \ - -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy \ - --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 \ - --model-id $model - sleep 30s - - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" - - export INDEX_NAME="rag-vdms" - - # vdms retriever - unset http_proxy - use_clip=0 #set to 1 if openai clip embedding should be used - - retriever_port=5059 - docker run -d --name="test-comps-retriever-vdms-server" -p $retriever_port:7000 --ipc=host \ - -e INDEX_NAME=$INDEX_NAME -e VDMS_HOST=$ip_address \ - -e https_proxy=$https_proxy -e http_proxy=$http_proxy \ - -e VDMS_PORT=$vdms_port -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ - -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e USECLIP=$use_clip \ - opea/retriever-vdms:comps - sleep 3m -} - -function validate_microservice() { - - - retriever_port=5059 - URL="http://${ip_address}:$retriever_port/v1/retrieval" - #test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - - test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL") - - #echo "HTTP_STATUS = $HTTP_STATUS" - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ retriever ] HTTP status is 200. Checking content..." - local CONTENT=$(curl -s -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/retriever.log) - - if echo "$CONTENT" | grep -q "retrieved_docs"; then - echo "[ retriever ] Content is as expected." - else - echo "[ retriever ] Content does not match the expected result: $CONTENT" - docker logs test-comps-retriever-vdms-server >> ${LOG_PATH}/retriever.log - exit 1 - fi - else - echo "[ retriever ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-retriever-vdms-server >> ${LOG_PATH}/retriever.log - exit 1 - fi - - docker logs test-comps-retriever-vdms-tei-endpoint >> ${LOG_PATH}/tei.log -} - -function stop_docker() { - cid_vdms=$(docker ps -aq --filter "name=test-comps-retriever-vdms*") - if [[ ! -z "$cid_vdms" ]]; then - docker stop $cid_vdms && docker rm $cid_vdms && sleep 1s - fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/text2image/test_text2image_native.sh b/tests/text2image/test_text2image_native.sh index 71249aa68a..e81c773523 100644 --- a/tests/text2image/test_text2image_native.sh +++ b/tests/text2image/test_text2image_native.sh @@ -6,6 +6,7 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') +service_name="text2image" function build_docker_images() { cd $WORKPATH @@ -21,7 +22,9 @@ function build_docker_images() { function start_service() { unset http_proxy - docker run -d --name="test-comps-text2image" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e MODEL=stabilityai/stable-diffusion-xl-base-1.0 -p 9379:9379 --ipc=host opea/text2image:latest + export MODEL=stabilityai/stable-diffusion-xl-base-1.0 + cd $WORKPATH/comps/text2image/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > start_services_with_compose.log sleep 30s } @@ -31,15 +34,15 @@ function validate_microservice() { echo "Result correct." else echo "Result wrong." - docker logs test-comps-text2image + docker logs text2image exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-text2image*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cd $WORKPATH/comps/text2image/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans } function main() { diff --git a/tests/text2image/test_text2image_native_on_intel_hpu.sh b/tests/text2image/test_text2image_native_on_intel_hpu.sh index b5f21de03b..f291b2eba6 100644 --- a/tests/text2image/test_text2image_native_on_intel_hpu.sh +++ b/tests/text2image/test_text2image_native_on_intel_hpu.sh @@ -6,6 +6,7 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') +service_name="text2image-gaudi" function build_docker_images() { cd $WORKPATH @@ -21,7 +22,9 @@ function build_docker_images() { function start_service() { unset http_proxy - docker run -d -p 9379:9379 --name="test-comps-text2image-gaudi" --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HF_TOKEN=$HF_TOKEN -e MODEL=stabilityai/stable-diffusion-xl-base-1.0 opea/text2image-gaudi:latest + export MODEL=stabilityai/stable-diffusion-xl-base-1.0 + cd $WORKPATH/comps/text2image/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > start_services_with_compose.log sleep 30s } @@ -31,15 +34,15 @@ function validate_microservice() { echo "Result correct." else echo "Result wrong." - docker logs test-comps-text2image-gaudi + docker logs text2image-gaudi exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-text2image*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cd $WORKPATH/comps/text2image/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans } function main() { diff --git a/tests/text2sql/test_text2sql.sh b/tests/text2sql/test_text2sql.sh new file mode 100644 index 0000000000..fb663b4a06 --- /dev/null +++ b/tests/text2sql/test_text2sql.sh @@ -0,0 +1,107 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +DATA_PATH=$WORKPATH/data + +export TAG='comps' + +export TEXT2SQL_PORT=11700 +export LLM_ENDPOINT_PORT=11710 + + +export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" +export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export POSTGRES_USER=postgres +export POSTGRES_PASSWORD=testpwd +export POSTGRES_DB=chinook + + +export service_name="text2sql" + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t opea/text2sql:$TAG -f comps/text2sql/src/Dockerfile . +} + +check_tgi_connection() { + url=$1 + timeout=1200 + interval=10 + + local start_time=$(date +%s) + + while true; do + if curl --silent --head --fail "$url" > /dev/null; then + echo "Success" + return 0 + fi + echo + local current_time=$(date +%s) + + local elapsed_time=$((current_time - start_time)) + + if [ "$elapsed_time" -ge "$timeout" ]; then + echo "Timeout,$((timeout / 60))min can't connect $url" + return 1 + fi + echo "Waiting for service for $elapsed_time seconds" + sleep "$interval" + done +} + + +function start_service() { + + + export TGI_LLM_ENDPOINT="http://${ip_address}:${LLM_ENDPOINT_PORT}" + unset http_proxy + + cd $WORKPATH/comps/text2sql/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + check_tgi_connection "${TGI_LLM_ENDPOINT}/health" +} + +function validate_microservice() { + result=$(http_proxy="" curl http://${ip_address}:${TEXT2SQL_PORT}/v1/text2sql\ + -X POST \ + -d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${ip_address}'", "port": "5442", "database": "'${POSTGRES_DB}'" }}' \ + -H 'Content-Type: application/json') + + if [[ $result == *"output"* ]]; then + echo $result + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs text2sql-server > ${LOG_PATH}/text2sql.log + docker logs tgi-server > ${LOG_PATH}/tgi.log + exit 1 + fi + +} + +function stop_docker() { + cd $WORKPATH/comps/text2sql/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/text2sql/test_text2sql_opea.sh b/tests/text2sql/test_text2sql_on_intel_hpu.sh similarity index 54% rename from tests/text2sql/test_text2sql_opea.sh rename to tests/text2sql/test_text2sql_on_intel_hpu.sh index 2ee0615766..143cb528e2 100644 --- a/tests/text2sql/test_text2sql_opea.sh +++ b/tests/text2sql/test_text2sql_on_intel_hpu.sh @@ -7,21 +7,28 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -tgi_port=8080 -tgi_volume=$WORKPATH/data -export model="mistralai/Mistral-7B-Instruct-v0.3" +export DATA_PATH=$WORKPATH/data + +export TAG='comps' + +export TEXT2SQL_PORT=11701 +export LLM_ENDPOINT_PORT=11711 + + +export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export POSTGRES_USER=postgres export POSTGRES_PASSWORD=testpwd export POSTGRES_DB=chinook +export service_name="text2sql-gaudi" + function build_docker_images() { cd $WORKPATH - docker build --no-cache -t opea/text2sql:comps -f comps/text2sql/src/Dockerfile . + docker build --no-cache -t opea/text2sql:$TAG -f comps/text2sql/src/Dockerfile . } - check_tgi_connection() { url=$1 timeout=1200 @@ -48,25 +55,19 @@ check_tgi_connection() { done } - function start_service() { - docker run --name test-text2sql-postgres --ipc=host -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -p 5442:5432 -d -v $WORKPATH/comps/text2sql/src/chinook.sql:/docker-entrypoint-initdb.d/chinook.sql postgres:latest - - docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $tgi_port:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $model - export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_port}" - text2sql_port=9090 + export TGI_LLM_ENDPOINT="http://${ip_address}:${LLM_ENDPOINT_PORT}" unset http_proxy - docker run -d --name="test-text2sql-server" --ipc=host -p ${text2sql_port}:8080 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT opea/text2sql:comps - # check whether tgi is fully ready + cd $WORKPATH/comps/text2sql/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log check_tgi_connection "${TGI_LLM_ENDPOINT}/health" } function validate_microservice() { - text2sql_port=9090 - result=$(http_proxy="" curl http://${ip_address}:${text2sql_port}/v1/text2sql\ + result=$(http_proxy="" curl http://${ip_address}:${TEXT2SQL_PORT}/v1/text2sql\ -X POST \ -d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${ip_address}'", "port": "5442", "database": "'${POSTGRES_DB}'" }}' \ -H 'Content-Type: application/json') @@ -76,16 +77,16 @@ function validate_microservice() { echo "Result correct." else echo "Result wrong. Received was $result" - docker logs test-text2sql-server > ${LOG_PATH}/text2sql.log - docker logs test-text2sql-tgi-endpoint > ${LOG_PATH}/tgi.log + docker logs text2sql-gaudi-server > ${LOG_PATH}/text2sql.log + docker logs tgi-gaudi-server > ${LOG_PATH}/tgi.log exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-text2sql*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cd $WORKPATH/comps/text2sql/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans } function main() { diff --git a/tests/vectorstores/test_vectorstores_milvus.sh b/tests/third_parties/test_third_parties_milvus.sh similarity index 97% rename from tests/vectorstores/test_vectorstores_milvus.sh rename to tests/third_parties/test_third_parties_milvus.sh index 59310127ad..e1da6e06f8 100644 --- a/tests/vectorstores/test_vectorstores_milvus.sh +++ b/tests/third_parties/test_third_parties_milvus.sh @@ -10,7 +10,7 @@ ip_address=$(hostname -I | awk '{print $1}') function start_service() { - cd $WORKPATH/comps/vectorstores/milvus + cd $WORKPATH/comps/third_parties/milvus/deployment/docker_compose rm -rf volumes/ docker compose up -d diff --git a/tests/3rd_parties/test_3rd_parties_nginx.sh b/tests/third_parties/test_third_parties_nginx.sh similarity index 100% rename from tests/3rd_parties/test_3rd_parties_nginx.sh rename to tests/third_parties/test_third_parties_nginx.sh diff --git a/tests/vectorstores/test_vectorstores_pathway.sh b/tests/third_parties/test_third_parties_pathway.sh similarity index 91% rename from tests/vectorstores/test_vectorstores_pathway.sh rename to tests/third_parties/test_third_parties_pathway.sh index 8ffbca887e..ed256f6cb7 100644 --- a/tests/vectorstores/test_vectorstores_pathway.sh +++ b/tests/third_parties/test_third_parties_pathway.sh @@ -10,7 +10,7 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH - docker build --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/vectorstore-pathway:comps -f comps/vectorstores/pathway/Dockerfile . + docker build --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/vectorstore-pathway:comps -f comps/third_parties/pathway/src/Dockerfile . cd $WORKPATH @@ -45,7 +45,7 @@ function start_service() { export PATHWAY_HOST="0.0.0.0" export PATHWAY_PORT=5437 - docker run -d --name="test-comps-vectorstore-pathway-ms" -e PATHWAY_HOST=${PATHWAY_HOST} -e PATHWAY_PORT=${PATHWAY_PORT} -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e http_proxy=$http_proxy -e https_proxy=$https_proxy -v $WORKPATH/comps/vectorstores/pathway/README.md:/app/data/README.md -p ${PATHWAY_PORT}:${PATHWAY_PORT} --network="host" opea/vectorstore-pathway:comps + docker run -d --name="test-comps-vectorstore-pathway-ms" -e PATHWAY_HOST=${PATHWAY_HOST} -e PATHWAY_PORT=${PATHWAY_PORT} -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e http_proxy=$http_proxy -e https_proxy=$https_proxy -v $WORKPATH/comps/third_parties/pathway/src/README.md:/app/data/README.md -p ${PATHWAY_PORT}:${PATHWAY_PORT} --network="host" opea/vectorstore-pathway:comps sleep 70s diff --git a/tests/third_parties/test_third_parties_vllm_openvino.sh b/tests/third_parties/test_third_parties_vllm_openvino.sh new file mode 100644 index 0000000000..3d4b89c4b7 --- /dev/null +++ b/tests/third_parties/test_third_parties_vllm_openvino.sh @@ -0,0 +1,119 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +export host_ip=$(hostname -I | awk '{print $1}') +LOG_PATH="$WORKPATH/tests" +service_name="vllm-openvino" + +function build_container() { + cd $WORKPATH + git clone https://github.com/vllm-project/vllm.git vllm-openvino + cd ./vllm-openvino/ && git checkout v0.6.1 # something wrong with main branch image build + + docker build --no-cache -t ${REGISTRY:-opea}/vllm-openvino:${TAG:-latest} \ + -f Dockerfile.openvino \ + . \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy + if [ $? -ne 0 ]; then + echo "vllm-openvino built fail" + exit 1 + else + echo "vllm-openvino built successful" + fi + cd $WORKPATH + rm -rf vllm-openvino +} + +# Function to start Docker container +start_container() { + export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + export LLM_ENDPOINT_PORT=12205 + export HF_CACHE_DIR=$HOME/.cache/huggingface + + cd $WORKPATH/comps/third_parties/vllm/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + # check whether service is fully ready + n=0 + until [[ "$n" -ge 300 ]]; do + docker logs $service_name > /tmp/$service_name.log 2>&1 + n=$((n+1)) + if grep -q "Uvicorn running on" /tmp/$service_name.log; then + break + fi + sleep 3s + done + +} + +# Function to test API endpoint +function test_api_endpoint { + local endpoint="$1" + local expected_status="$2" + + # Make the HTTP request + if test "$1" = "v1/completions" + then + local response=$(curl "http://$host_ip:$LLM_ENDPOINT_PORT/$endpoint" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Intel/neural-chat-7b-v3-3", + "prompt": "What is the key advantage of Openvino framework", + "max_tokens": 300, + "temperature": 0.7 + }' \ + --write-out '%{http_code}' \ + --silent \ + --output /dev/null) + else + local response=$(curl "http://$host_ip:$LLM_ENDPOINT_PORT/$endpoint" \ + --write-out '%{http_code}' \ + --silent \ + --output /dev/null) + fi + + # Assert the response status code + if [[ "$response" -eq "$expected_status" ]]; then + echo "PASS: $endpoint returned expected status code: $expected_status" + else + echo "FAIL: $endpoint returned unexpected status code: $response (expected: $expected_status)" + docker logs $service_name + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_faq-generation.yaml down ${service_name} --remove-orphans +} + +# Main function +main() { + + build_container + start_container + + # Sleep to allow the container to start up fully + sleep 10 + # Test the /v1/models API + test_api_endpoint "v1/models" 200 + + # Test the /v1/completions API + test_api_endpoint "v1/completions" 200 + + stop_docker +} + +# Call main function +main diff --git a/tests/third_parties/test_third_parties_vllm_openvino_on_intel_arc.sh b/tests/third_parties/test_third_parties_vllm_openvino_on_intel_arc.sh new file mode 100644 index 0000000000..c52765dcd7 --- /dev/null +++ b/tests/third_parties/test_third_parties_vllm_openvino_on_intel_arc.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +export host_ip=$(hostname -I | awk '{print $1}') +LOG_PATH="$WORKPATH/tests" +service_name="vllm-openvino-arc" + +function build_container() { + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/vllm-arc:${TAG:-latest} -f comps/third_parties/vllm/src/Dockerfile.intel_gpu . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy + + if [ $? -ne 0 ]; then + echo "vllm-arc built fail" + exit 1 + else + echo "vllm-arc built successful" + fi +} + +# Function to start Docker container +start_container() { + export LLM_ENDPOINT_PORT=12206 + export HF_CACHE_DIR=$HOME/.cache/huggingface + export RENDER_GROUP_ID=110 + export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + + cd $WORKPATH/comps/third_parties/vllm/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + # check whether service is fully ready + n=0 + until [[ "$n" -ge 300 ]]; do + docker logs $service_name > /tmp/$service_name.log 2>&1 + n=$((n+1)) + if grep -q "Uvicorn running on" /tmp/$service_name.log; then + break + fi + sleep 3s + done + +} + +# Function to test API endpoint +function test_api_endpoint { + local endpoint="$1" + local expected_status="$2" + + # Make the HTTP request + if test "$1" = "v1/completions" + then + local response=$(curl "http://localhost:$LLM_ENDPOINT_PORT/$endpoint" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Intel/neural-chat-7b-v3-3", + "prompt": "What is the key advantage of Openvino framework", + "max_tokens": 300, + "temperature": 0.7 + }' \ + --write-out '%{http_code}' \ + --silent \ + --output /dev/null) + else + local response=$(curl "http://localhost:$LLM_ENDPOINT_PORT/$endpoint" \ + --write-out '%{http_code}' \ + --silent \ + --output /dev/null) + fi + + # Assert the response status code + if [[ "$response" -eq "$expected_status" ]]; then + echo "PASS: $endpoint returned expected status code: $expected_status" + else + echo "FAIL: $endpoint returned unexpected status code: $response (expected: $expected_status)" + docker logs $service_name + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/comps/llms/deployment/docker_compose + docker compose -f compose_faq-generation.yaml down ${service_name} --remove-orphans +} + +# Main function +main() { + + build_container + start_container + + # Sleep to allow the container to start up fully + sleep 10 + # Test the /v1/models API + test_api_endpoint "v1/models" 200 + + # Test the /v1/completions API + test_api_endpoint "v1/completions" 200 + + stop_docker +} + +# Call main function +main diff --git a/tests/tts/test_tts_opea_gptsovits.sh b/tests/tts/test_tts_gptsovits.sh similarity index 52% rename from tests/tts/test_tts_opea_gptsovits.sh rename to tests/tts/test_tts_gptsovits.sh index bff8f1ccf3..0a6e46b48b 100644 --- a/tests/tts/test_tts_opea_gptsovits.sh +++ b/tests/tts/test_tts_gptsovits.sh @@ -6,11 +6,14 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') +export TAG=comps +export GPT_SOVITS_PORT=11804 +export TTS_PORT=11805 function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/gpt-sovits:comps -f comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/gpt-sovits:$TAG -f comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile . if [ $? -ne 0 ]; then echo "opea/gpt-sovits built fail" exit 1 @@ -18,7 +21,7 @@ function build_docker_images() { echo "opea/gpt-sovits built successful" fi - docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/tts:comps -f comps/tts/src/Dockerfile . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/tts:$TAG -f comps/tts/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/tts built fail" exit 1 @@ -30,28 +33,28 @@ function build_docker_images() { function start_service() { unset http_proxy - docker run -d --name="test-comps-tts-gpt-sovits" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9880:9880 --ipc=host opea/gpt-sovits:comps - sleep 2m - docker run -d --name="test-comps-tts" -e TTS_ENDPOINT=http://$ip_address:9880 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TTS_COMPONENT_NAME="OPEA_GPTSOVITS_TTS" -p 5016:9088 --ipc=host opea/tts:comps + export TTS_ENDPOINT=http://$ip_address:$GPT_SOVITS_PORT + export TTS_COMPONENT_NAME=OPEA_GPTSOVITS_TTS + + docker compose -f comps/tts/deployment/docker_compose/compose.yaml up gptsovits-service tts-gptsovits -d sleep 15 } function validate_microservice() { - http_proxy="" curl localhost:5016/v1/audio/speech -XPOST -d '{"input":"Hello, who are you? 你好。"}' -H 'Content-Type: application/json' --output speech.mp3 + http_proxy="" curl localhost:$TTS_PORT/v1/audio/speech -XPOST -d '{"input":"Hello, who are you? 你好。"}' -H 'Content-Type: application/json' --output speech.mp3 if [[ $(file speech.mp3) == *"RIFF"* ]]; then echo "Result correct." else echo "Result wrong." - docker logs test-comps-tts-gpt-sovits - docker logs test-comps-tts + docker logs gptsovits-service + docker logs tts-gptsovits-service exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-tts*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + docker ps -a --filter "name=gptsovits-service" --filter "name=tts-gptsovits-service" --format "{{.Names}}" | xargs -r docker stop } function main() { diff --git a/tests/tts/test_tts_opea_speecht5.sh b/tests/tts/test_tts_speecht5.sh similarity index 54% rename from tests/tts/test_tts_opea_speecht5.sh rename to tests/tts/test_tts_speecht5.sh index 2282f0b2de..ab0e6ca44c 100644 --- a/tests/tts/test_tts_opea_speecht5.sh +++ b/tests/tts/test_tts_speecht5.sh @@ -6,18 +6,21 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') +export TAG=comps +export SPEECHT5_PORT=11800 +export TTS_PORT=11801 function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/speecht5:comps -f comps/tts/src/integrations/dependency/speecht5/Dockerfile . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/speecht5:$TAG -f comps/tts/src/integrations/dependency/speecht5/Dockerfile . if [ $? -ne 0 ]; then echo "opea/speecht5 built fail" exit 1 else echo "opea/speecht5 built successful" fi - docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/tts:comps -f comps/tts/src/Dockerfile . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/tts:$TAG -f comps/tts/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/tts built fail" exit 1 @@ -28,28 +31,28 @@ function build_docker_images() { function start_service() { unset http_proxy - docker run -d --name="test-comps-tts-speecht5" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5017:7055 --ipc=host opea/speecht5:comps - sleep 2m - docker run -d --name="test-comps-tts" -e TTS_ENDPOINT=http://$ip_address:5017 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5016:9088 --ipc=host opea/tts:comps + export TTS_ENDPOINT=http://$ip_address:$SPEECHT5_PORT + export TTS_COMPONENT_NAME=OPEA_SPEECHT5_TTS + + docker compose -f comps/tts/deployment/docker_compose/compose.yaml up speecht5-service tts-speecht5 -d sleep 15 } function validate_microservice() { - http_proxy="" curl localhost:5016/v1/audio/speech -XPOST -d '{"input":"Hello, who are you?"}' -H 'Content-Type: application/json' --output speech.mp3 + http_proxy="" curl localhost:$TTS_PORT/v1/audio/speech -XPOST -d '{"input":"Hello, who are you?"}' -H 'Content-Type: application/json' --output speech.mp3 if [[ $(file speech.mp3) == *"RIFF"* ]]; then echo "Result correct." else echo "Result wrong." - docker logs test-comps-tts-speecht5 - docker logs test-comps-tts + docker logs speecht5-service + docker logs tts-speecht5-service exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-tts*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + docker ps -a --filter "name=speecht5-service" --filter "name=tts-speecht5-service" --format "{{.Names}}" | xargs -r docker stop } function main() { diff --git a/tests/tts/test_tts_opea_speecht5_on_intel_hpu.sh b/tests/tts/test_tts_speecht5_on_intel_hpu.sh similarity index 50% rename from tests/tts/test_tts_opea_speecht5_on_intel_hpu.sh rename to tests/tts/test_tts_speecht5_on_intel_hpu.sh index bae801580e..44edfb0f77 100644 --- a/tests/tts/test_tts_opea_speecht5_on_intel_hpu.sh +++ b/tests/tts/test_tts_speecht5_on_intel_hpu.sh @@ -6,18 +6,21 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') +export TAG=comps +export SPEECHT5_PORT=11802 +export TTS_PORT=11803 function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/speecht5-gaudi:comps -f comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/speecht5-gaudi:$TAG -f comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu . if [ $? -ne 0 ]; then echo "opea/speecht5-gaudi built fail" exit 1 else echo "opea/speecht5-gaudi built successful" fi - docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/tts:comps -f comps/tts/src/Dockerfile . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/tts:$TAG -f comps/tts/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/tts built fail" exit 1 @@ -28,29 +31,30 @@ function build_docker_images() { function start_service() { unset http_proxy - docker run -d --name="test-comps-tts-speecht5" --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5017:7055 --ipc=host opea/speecht5-gaudi:comps - sleep 3m - docker run -d --name="test-comps-tts" -e TTS_ENDPOINT=http://$ip_address:5017 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5016:9088 --ipc=host opea/tts:comps + export TTS_ENDPOINT=http://$ip_address:$SPEECHT5_PORT + + export TTS_COMPONENT_NAME=OPEA_SPEECHT5_TTS + + docker compose -f comps/tts/deployment/docker_compose/compose.yaml up speecht5-gaudi-service tts-speecht5-gaudi -d sleep 15 } function validate_microservice() { - http_proxy="" curl localhost:5016/v1/audio/speech -XPOST -d '{"input":"Hello, who are you?"}' -H 'Content-Type: application/json' --output speech.mp3 + http_proxy="" curl localhost:$TTS_PORT/v1/audio/speech -XPOST -d '{"input":"Hello, who are you?"}' -H 'Content-Type: application/json' --output speech.mp3 if [[ $(file speech.mp3) == *"RIFF"* ]]; then echo "Result correct." else echo "Result wrong." - docker logs test-comps-tts-speecht5 - docker logs test-comps-tts + docker logs speecht5-gaudi-service + docker logs tts-speecht5-gaudi-service exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-tts*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + docker ps -a --filter "name=speecht5-gaudi-service" --filter "name=tts-speecht5-gaudi-service" --format "{{.Names}}" | xargs -r docker stop } function main() { diff --git a/tests/web_retrievers/test_web_retrievers_google_search.sh b/tests/web_retrievers/test_web_retrievers_google_search.sh index 8569307f23..8b8877a106 100644 --- a/tests/web_retrievers/test_web_retrievers_google_search.sh +++ b/tests/web_retrievers/test_web_retrievers_google_search.sh @@ -6,9 +6,13 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') +export TAG=comps +export WEB_RETRIEVER_PORT=11900 +export TEI_PORT=11901 + function build_docker_images() { cd $WORKPATH - docker build --no-cache -t opea/web-retriever:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/web_retrievers/src/Dockerfile . + docker build --no-cache -t opea/web-retriever:$TAG --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/web_retrievers/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/web-retriever built fail" exit 1 @@ -18,26 +22,19 @@ function build_docker_images() { } function start_service() { + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export TEI_EMBEDDING_ENDPOINT=http://${ip_address}:${TEI_PORT} + export host_ip=${ip_address} + export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} - # tei endpoint - tei_endpoint=5018 - model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-web-retriever-tei-endpoint" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" - - # web retriever - retriever_port=5019 - unset http_proxy - docker run -d --name="test-comps-web-retriever-server" -p ${retriever_port}:7077 --ipc=host -e GOOGLE_API_KEY=$GOOGLE_API_KEY -e GOOGLE_CSE_ID=$GOOGLE_CSE_ID -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/web-retriever:comps - - sleep 3m + docker compose -f comps/web_retrievers/deployment/docker_compose/compose.yaml up -d + sleep 15s } function validate_microservice() { - retriever_port=5019 export PATH="${HOME}/miniforge3/bin:$PATH" test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - result=$(http_proxy='' curl http://${ip_address}:$retriever_port/v1/web_retrieval \ + result=$(http_proxy='' curl http://${ip_address}:$WEB_RETRIEVER_PORT/v1/web_retrieval \ -X POST \ -d "{\"text\":\"What is OPEA?\",\"embedding\":${test_embedding}}" \ -H 'Content-Type: application/json') @@ -45,17 +42,14 @@ function validate_microservice() { echo "Result correct." else echo "Result wrong. Received status was $result" - docker logs test-comps-web-retriever-tei-endpoint - docker logs test-comps-web-retriever-server + docker logs tei-embedding-server + docker logs web-retriever-service exit 1 fi } function stop_docker() { - cid_retrievers=$(docker ps -aq --filter "name=test-comps-web*") - if [[ ! -z "$cid_retrievers" ]]; then - docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s - fi + docker ps -a --filter "name=tei-embedding-server" --filter "name=web-retriever-service" --format "{{.Names}}" | xargs -r docker stop } function main() {