CoPilot LLM Performance Evaluation #298
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CoPilot LLM Performance Evaluation | |
on: | |
push: | |
branches: [ "build-test-ci" ] | |
pull_request: | |
branches: [ "main", "dev" ] | |
types: [ labeled ] | |
schedule: | |
- cron: '45 22 * * 6' | |
workflow_dispatch: | |
jobs: | |
build-and-test: | |
runs-on: [ self-hosted, dind ] | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v3 | |
- name: Build Docker.tests image | |
uses: docker/build-push-action@v5 | |
with: | |
context: . | |
file: copilot/Dockerfile.tests | |
push: false | |
load: true | |
tags: nlqs/tests:0.1 | |
- name: Create db config | |
run: | | |
mkdir configs | |
echo "$DB_CONFIG" > configs/db_config.json | |
echo "$LLM_CONFIG_OPENAI_GPT4" > configs/openai_gpt4_config.json | |
echo "$LLM_CONFIG_OPENAI_GPT4O" > configs/openai_gpt4o_config.json | |
echo "$LLM_CONFIG_AZURE_GPT35" > configs/azure_llm_config.json | |
echo "$LLM_CONFIG_OPENAI_GPT35" > configs/openai_gpt3.5-turbo_config.json | |
echo "$LLM_CONFIG_GCP_TEXT_BISON" > configs/gcp_text-bison_config.json | |
echo "$LLM_CONFIG_HUGGINGFACE_PHI3" > configs/huggingface_severless_endpoint_phi3_config.json | |
echo "$GCP_CREDS_CONFIG" > configs/GCP_CREDS.json | |
echo "$LLM_TEST_EVALUATOR" > configs/test_evaluation_model_config.json | |
echo "$LLM_CONFIG_GROQ_MIXTRAL" > configs/groq_mixtral_config.json | |
echo "$LLM_CONFIG_BEDROCK_CLAUDE3" > configs/bedrock_config.json | |
echo "$LLM_CONFIG_HUGGINGFACE_LLAMA70B" > configs/huggingface_llama70b_config.json | |
echo "$LLM_CONFIG_WATSONX_MISTRAL_LARGE" > configs/ibm_watsonx_config.json | |
echo "$MILVUS_CONFIG" > configs/milvus_config.json | |
env: | |
DB_CONFIG: ${{ secrets.DB_CONFIG }} | |
LLM_CONFIG_OPENAI_GPT4: ${{ secrets.LLM_CONFIG_OPENAI_GPT4 }} | |
LLM_CONFIG_AZURE_GPT35: ${{ secrets.LLM_CONFIG_AZURE_GPT35 }} | |
LLM_CONFIG_GCP_TEXT_BISON: ${{ secrets.LLM_CONFIG_GCP_TEXT_BISON }} | |
LLM_CONFIG_OPENAI_GPT35: ${{ secrets.LLM_CONFIG_OPENAI_GPT35 }} | |
LLM_CONFIG_GROQ_MIXTRAL: ${{ secrets.LLM_CONFIG_GROQ_MIXTRAL }} | |
LLM_CONFIG_BEDROCK_CLAUDE3: ${{ secrets.LLM_CONFIG_BEDROCK_CLAUDE3 }} | |
LLM_CONFIG_HUGGINGFACE_PHI3: ${{ secrets.LLM_CONFIG_HUGGINGFACE_PHI3 }} | |
LLM_CONFIG_OPENAI_GPT4O: ${{ secrets.LLM_CONFIG_OPENAI_GPT4O }} | |
LLM_CONFIG_HUGGINGFACE_LLAMA70B: ${{ secrets.LLM_CONFIG_HUGGINGFACE_LLAMA70B }} | |
LLM_CONFIG_WATSONX_MISTRAL_LARGE: ${{ secrets.LLM_CONFIG_WATSONX_MISTRAL_LARGE }} | |
GCP_CREDS_CONFIG: ${{ secrets.GCP_CREDS_CONFIG }} | |
LLM_TEST_EVALUATOR: ${{ secrets.LLM_TEST_EVALUATOR }} | |
MILVUS_CONFIG: ${{ secrets.MILVUS_CONFIG }} | |
- name: Run Docker Container for PR | |
if: ${{ github.event_name }} == 'pull_request' | |
run: | | |
docker rm -f nlqs-tests || true | |
docker run -it -v $(pwd)/configs/:/code/app/configs -e GOOGLE_APPLICATION_CREDENTIALS=/code/app/configs/GCP_CREDS.json -e WANDB_API_KEY=${{ secrets.WANDB_API_KEY }} -e PR_NUMBER=${{ github.event.number }} --name nlqs-tests -d nlqs/tests:0.1 | |
- name: Run Docker Container for Regress | |
if: ${{ github.event_name == 'schedule' }} | |
run: | | |
docker rm -f nlqs-tests || true | |
docker run -it -v $(pwd)/configs/:/code/app/configs -e GOOGLE_APPLICATION_CREDENTIALS=/code/app/configs/GCP_CREDS.json -e WANDB_API_KEY=${{ secrets.WANDB_API_KEY }} -e PR_NUMBER="DailyRegression" --name nlqs-tests -d nlqs/tests:0.1 | |
- name: Execute PR Tests | |
if: github.event_name == 'pull_request' | |
run: | | |
docker exec nlqs-tests bash -c "./run_tests.sh ${{ github.event.label.name }} all" | |
status=$? | |
if [ $status -ne 0 ]; then | |
echo "test failed with status $status" | |
exit $status | |
fi | |
url=$(docker exec nlqs-tests bash -c "cat /code/tests/report_url.txt") | |
echo "REPORT_URL=${url}" >> $GITHUB_ENV | |
- name: Execute Regress Tests | |
if: github.event_name == 'schedule' | |
run: | | |
docker exec nlqs-tests bash -c "./run_tests.sh all all" | |
status=$? | |
if [ $status -ne 0 ]; then | |
echo "test failed with status $status" | |
exit $status | |
fi | |
url=$(docker exec nlqs-tests bash -c "cat /code/tests/report_url.txt") | |
echo "REPORT_URL=${url}" >> $GITHUB_ENV | |
- name: Add PR Comment | |
if: ${{ github.event_name }} == 'pull_request' | |
uses: mshick/[email protected] | |
with: | |
message: | | |
Test Report: ${{ env.REPORT_URL }} | |
- name: Cleanup | |
run: | | |
docker rm -f nlqs-tests | |
echo ${{ env.REPORT_URL }} |