test konflux build

ansible · Feb 17, 2025 · e1f535a · e1f535a
1 parent 62c4480
commit e1f535a
Show file tree

Hide file tree

Showing 9 changed files with 569 additions and 38 deletions.
diff --git a/.github/workflows/eval_e2e.yaml b/.github/workflows/eval_e2e.yaml
diff --git a/.tekton/ansible-chatbot-service-pull-request.yaml b/.tekton/ansible-chatbot-service-pull-request.yaml
@@ -220,6 +220,41 @@ spec:
                 echo -n $(date -d @$(git log -1 --format=%at) "+%Y%m%d%H%M") > $(results.commit-timestamp.path)
           results:
             - name: commit-timestamp
+      - name: eval-framework testing
+        runAfter:
+          - clone-repository
+        workspaces:
+          - name: source
+            workspace: workspace
+        taskSpec:
+          workspaces:
+            - name: source
+          env:
+          - name: OLS_API_KEY
+            valueFrom:
+              secretKeyRef:
+                name: ols-api-key
+                key: OLS_API_KEY
+          steps:
+            - name: install make dependency and run e2e eval-framework
+              image: registry.access.redhat.com/ubi9/ubi-minimal@sha256:db8bf20cac0f250659f1ffbba929639dcd7e09863c20c99c5cb8f3ce8ded0497
+              script: |
+                #!/bin/sh
+                microdnf install -y python3.11 pip
+                pwd
+                cd "$(workspaces.source.path)/source"
+                pwd
+                make install-deps
+                pip install matplotlib
+                pip install rouge-score
+                pip install pdm
+                export MODEL=granite3-8b
+                export ARTIFACT_DIR=tests/test_results
+                export TEST_TAGS=response_evaluation
+                export PROVIDER=my_rhoai_g3
+                export SUITE_ID=rhoai_vllm
+                export PROVIDER_KEY_PATH=ols-api-key
+                pdm run make test-e2e
       - name: prefetch-dependencies
         params:
           - name: input

diff --git a/olsconfig.yaml b/olsconfig.yaml
diff --git a/scripts/evaluation/eval_data/question_answer_pair.json b/scripts/evaluation/eval_data/question_answer_pair.json
@@ -3,31 +3,31 @@
         "eval1": {
             "question": "what is ansible?",
             "answer": {
-                "rhoai_vllm+granite3-8b+with_rag": {
+                "my_rhoai_g3+granite3-8b+with_rag": {
                     "cutoff_score": 0.35,
                     "text": [
                         "Ansible is an open source IT automation engine that automates various IT processes such as provisioning, configuration management, application deployment, orchestration, and more. It is free to use and benefits from the contributions of its thousands of contributors. Ansible does not require any paid subscription for its core functionalities."
                     ]
                 },
                 "ground_truth": {
                     "cutoff_score": {
-                        "rhoai_vllm+granite3-8b+with_rag": 0.2
+                        "my_rhoai_g3+granite3-8b+with_rag": 0.2
                     },
                     "text": [
                         "Ansible is an open-source automation tool used for configuration management, application deployment, and IT orchestration. It is agentless, uses YAML-based playbooks, and communicates over SSH or WinRM. Ansible ensures idempotency, meaning tasks produce consistent results. It is widely used for server provisioning, cloud automation, and DevOps workflows."
                     ]
                 },
                 "ground_truth+with_rag": {
                     "cutoff_score": {
-                        "rhoai_vllm+granite3-8b+with_rag": 0.2
+                        "my_rhoai_g3+granite3-8b+with_rag": 0.2
                     },
                     "text": [
                         "Ansible is an open source IT automation engine that automates various IT processes such as provisioning, configuration management, application deployment, orchestration, and more. It is free to use and benefits from the contributions of its thousands of contributors. Ansible does not require any paid subscription for its core functionalities."
                     ]
                 },
                 "ground_truth+without_rag": {
                     "cutoff_score": {
-                        "rhoai_vllm+granite3-8b+with_rag": 0.2
+                        "my_rhoai_g3+granite3-8b+with_rag": 0.2
                     },
                     "text": [
                         "Ansible is an open-source automation tool used for configuration management, application deployment, and IT orchestration. It is agentless, uses YAML-based playbooks, and communicates over SSH or WinRM. Ansible ensures idempotency, meaning tasks produce consistent results. It is widely used for server provisioning, cloud automation, and DevOps workflows."

diff --git a/scripts/evaluation/utils/response.py b/scripts/evaluation/utils/response.py
@@ -18,6 +18,7 @@ def get_model_response(query, provider, model, mode, api_client=None):
     if mode == "ols":
         response = api_client.post(
             "/api/v1/ai/chat/",
+            # "/chat/completions",
             json={
                 "query": query,
                 "provider": provider,

diff --git a/tests/config/operator_install/olsconfig.crd.my_rhoai_g3.yaml b/tests/config/operator_install/olsconfig.crd.my_rhoai_g3.yaml
@@ -0,0 +1,34 @@
+apiVersion: ols.openshift.io/v1alpha1
+kind: OLSConfig
+metadata:
+  name: cluster
+  labels:
+    app.kubernetes.io/created-by: lightspeed-operator
+    app.kubernetes.io/instance: olsconfig-sample
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: olsconfig
+    app.kubernetes.io/part-of: lightspeed-operator
+spec:
+  llm:
+    providers:
+      - credentialsSecretRef:
+          name: llmcreds
+        models:
+          - name: granite3-8b
+        name: my_rhoai_g3
+        type: openai
+        url: "https://stage.ai.ansible.redhat.com"
+  ols:
+    defaultModel: granite3-8b
+    defaultProvider: my_rhoai_g3
+    deployment:
+      replicas: 1
+    disableAuth: false
+    logLevel: DEBUG
+    queryFilters:
+      - name: foo_filter
+        pattern: '\b(?:foo)\b'
+        replaceWith: "deployment"
+      - name: bar_filter
+        pattern: '\b(?:bar)\b'
+        replaceWith: "openshift"
diff --git a/tests/config/operator_install/olsconfig.crd.rhoai_vllm.yaml b/tests/config/operator_install/olsconfig.crd.rhoai_vllm.yaml
@@ -14,12 +14,11 @@ spec:
       - credentialsSecretRef:
           name: llmcreds
         models:
-          - name: granite3-8b
+          - name: gpt-3.5-turbo
         name: rhoai_vllm
-        type: openai
-        url: "https://granite3-8b-wisdom-model-staging.apps.stage2-west.v2dz.p1.openshiftapps.com/v1"
+        type: rhoai_vllm
   ols:
-    defaultModel: granite3-8b
+    defaultModel: gpt-3.5-turbo
     defaultProvider: rhoai_vllm
     deployment:
       replicas: 1

diff --git a/tests/e2e/test_api.py b/tests/e2e/test_api.py
@@ -530,7 +530,7 @@ def test_generated_service_certs_rotation():
 def test_ca_service_certs_rotation():
     """Verify OLS responds after ca certificate rotation."""
     cluster_utils.delete_resource(
-        resource="secret", name="signing-key", namespace="ansible-service-ca"
+        resource="secret", name="signing-key", namespace="openshift-service-ca"
     )
     response = pytest.client.post(
         "/v1/query",
@@ -539,13 +539,13 @@ def test_ca_service_certs_rotation():
     )
     assert response.status_code == requests.codes.ok
     cluster_utils.restart_deployment(
-        name="lightspeed-operator-controller-manager", namespace="ansible-lightspeed"
+        name="lightspeed-operator-controller-manager", namespace="openshift-lightspeed"
     )
     cluster_utils.restart_deployment(
-        name="lightspeed-app-server", namespace="ansible-lightspeed"
+        name="lightspeed-app-server", namespace="openshift-lightspeed"
     )
     cluster_utils.restart_deployment(
-        name="lightspeed-console-plugin", namespace="ansible-lightspeed"
+        name="lightspeed-console-plugin", namespace="openshift-lightspeed"
     )
     # Wait for service to become available again
     time.sleep(120)

diff --git a/tests/scripts/utils.sh b/tests/scripts/utils.sh
@@ -14,7 +14,7 @@ EOF
 function cleanup_ols() {
     # Deletes may fail if this is the first time running against
     # the cluster, so ignore failures
-    oc delete --wait --ignore-not-found ns ansible-lightspeed
+    oc delete --wait --ignore-not-found ns openshift-lightspeed
     oc delete --wait --ignore-not-found clusterrole ols-sar-check
     oc delete --wait --ignore-not-found clusterrolebinding ols-sar-check
     oc delete --wait --ignore-not-found clusterrole ols-user
@@ -32,7 +32,7 @@ function cleanup_ols_operator() {
     operator-sdk cleanup lightspeed-operator
 
     # delete the OLS namespace
-    oc delete --wait --ignore-not-found ns ansible-lightspeed
+    oc delete --wait --ignore-not-found ns openshift-lightspeed
 
     # delete the ImageDigestMirrorSet
     oc delete --wait --ignore-not-found imagedigestmirrorset/openshift-lightspeed-prod-to-ci