Merge branch 'main' into fix-tracing

kritinv · Nov 13, 2024 · 1a9c797 · 1a9c797
2 parents 3e7ff4e + 9a19f13
commit 1a9c797
Show file tree

Hide file tree

Showing 33 changed files with 1,013 additions and 97 deletions.
diff --git a/deepeval/__init__.py b/deepeval/__init__.py
@@ -5,26 +5,30 @@
 from ._version import __version__
 
 from deepeval.event import track
-from deepeval.monitor import monitor, send_feedback
+from deepeval.monitor import monitor, a_monitor, send_feedback, a_send_feedback
 from deepeval.evaluate import evaluate, assert_test
 from deepeval.test_run import on_test_run_end, log_hyperparameters
 from deepeval.utils import login_with_confident_api_key
 from deepeval.telemetry import *
 from deepeval.integrations import trace_langchain, trace_llama_index
 from deepeval.confident import confident_evaluate
+from deepeval.guardrails import guard
 
 __all__ = [
     "login_with_confident_api_key",
     "log_hyperparameters",
     "track",
     "monitor",
+    "a_monitor",
+    "a_send_feedback",
+    "send_feedback",
     "evaluate",
     "assert_test",
     "on_test_run_end",
-    "send_feedback",
     "trace_langchain",
     "trace_llama_index",
     "confident_evaluate",
+    "guard",
 ]
 
 

diff --git a/deepeval/_version.py b/deepeval/_version.py
@@ -1 +1 @@
-__version__: str = "1.4.9"
+__version__: str = "1.5.1"
diff --git a/deepeval/confident/api.py b/deepeval/confident/api.py
@@ -1,5 +1,5 @@
 import os
-import urllib.parse
+import aiohttp
 import requests
 from enum import Enum
 
@@ -22,6 +22,7 @@ class Endpoints(Enum):
     EVENT_ENDPOINT = "/v1/event"
     FEEDBACK_ENDPOINT = "/v1/feedback"
     EVALUATE_ENDPOINT = "/evaluate"
+    GUARD_ENDPOINT = "/guard"
 
 
 class Api:
@@ -98,7 +99,49 @@ def send_request(
         else:
             raise Exception(res.json().get("error", res.text))
 
-    @staticmethod
-    def quote_string(text: str) -> str:
-        """Encode text to be safely included in URLs."""
-        return urllib.parse.quote(text, safe="")
+    async def a_send_request(
+        self, method: HttpMethods, endpoint: Endpoints, body=None, params=None
+    ):
+        url = f"{self.base_api_url}{endpoint.value}"
+        async with aiohttp.ClientSession() as session:
+            async with session.request(
+                method=method.value,
+                url=url,
+                headers=self._headers,
+                json=body,
+                params=params,
+                ssl=True,  # SSL verification enabled
+            ) as res:
+                if res.status == 200:
+                    try:
+                        return await res.json()
+                    except aiohttp.ContentTypeError:
+                        return await res.text()
+                elif res.status == 409 and body:
+                    message = (await res.json()).get(
+                        "message", "Conflict occurred."
+                    )
+
+                    user_input = (
+                        input(
+                            f"{message} Would you like to overwrite it? [y/N] or change the alias [c]: "
+                        )
+                        .strip()
+                        .lower()
+                    )
+
+                    if user_input == "y":
+                        body["overwrite"] = True
+                        return await self.a_send_request(method, endpoint, body)
+                    elif user_input == "c":
+                        new_alias = input("Enter a new alias: ").strip()
+                        body["alias"] = new_alias
+                        return await self.a_send_request(method, endpoint, body)
+                    else:
+                        print("Aborted.")
+                        return None
+                else:
+                    error_message = await res.json().get(
+                        "error", await res.text()
+                    )
+                    raise Exception(error_message)
diff --git a/deepeval/evaluate.py b/deepeval/evaluate.py
@@ -58,6 +58,7 @@
 class TestResult:
     """Returned from run_test"""
 
+    name: str
     success: bool
     metrics_data: Union[List[MetricData], None]
     conversational: bool
@@ -106,8 +107,11 @@ def create_metric_data(metric: BaseMetric) -> MetricData:
 def create_test_result(
     api_test_case: Union[LLMApiTestCase, ConversationalApiTestCase],
 ) -> TestResult:
+    name = api_test_case.name
+
     if isinstance(api_test_case, ConversationalApiTestCase):
         return TestResult(
+            name=name,
             success=api_test_case.success,
             metrics_data=api_test_case.metrics_data,
             conversational=True,
@@ -119,6 +123,7 @@ def create_test_result(
         )
         if multimodal:
             return TestResult(
+                name=name,
                 success=api_test_case.success,
                 metrics_data=api_test_case.metrics_data,
                 input=api_test_case.multimodal_input,
@@ -128,6 +133,7 @@ def create_test_result(
             )
         else:
             return TestResult(
+                name=name,
                 success=api_test_case.success,
                 metrics_data=api_test_case.metrics_data,
                 input=api_test_case.input,

diff --git a/deepeval/event/event.py b/deepeval/event/event.py
@@ -21,7 +21,6 @@ def track(
     hyperparameters: Optional[Dict[str, str]] = {},
     fail_silently: Optional[bool] = False,
     raise_expection: Optional[bool] = True,
-    run_async: Optional[bool] = True,
     trace_stack: Optional[Dict[str, Any]] = None,
     trace_provider: Optional[str] = None,
 ) -> str:
@@ -43,7 +42,6 @@ def track(
         hyperparameters=hyperparameters,
         fail_silently=fail_silently,
         raise_expection=raise_expection,
-        run_async=run_async,
         trace_stack=trace_stack,
         trace_provider=trace_provider,
     )
diff --git a/deepeval/guardrails/__init__.py b/deepeval/guardrails/__init__.py
@@ -0,0 +1,2 @@
+from .types import Guard
+from .guard import guard
diff --git a/deepeval/guardrails/api.py b/deepeval/guardrails/api.py
@@ -0,0 +1,22 @@
+from typing import Optional, List
+from pydantic import BaseModel
+
+
+class APIGuard(BaseModel):
+    input: str
+    response: str
+    guards: List[str]
+    purpose: Optional[str] = None
+    allowed_entities: Optional[List[str]] = None
+    system_prompt: Optional[str] = None
+    include_reason: bool
+
+
+class GuardResult(BaseModel):
+    guard: str
+    score: int
+    reason: Optional[str]
+
+
+class GuardResponseData(BaseModel):
+    results: List[GuardResult]
diff --git a/deepeval/guardrails/guard.py b/deepeval/guardrails/guard.py
@@ -0,0 +1,82 @@
+from typing import Optional, List
+
+from deepeval.guardrails.api import APIGuard, GuardResponseData
+from deepeval.confident.api import Api, HttpMethods, Endpoints
+from deepeval.telemetry import capture_guardrails
+from deepeval.guardrails.types import Guard
+from deepeval.guardrails.types import (
+    purpose_entities_dependent_guards,
+    entities_dependent_guards,
+    purpose_dependent_guards,
+)
+from deepeval.utils import is_confident
+
+
+BASE_URL = "https://internal.evals.confident-ai.com"
+
+
+def guard(
+    input: str,
+    response: str,
+    guards: List[Guard],
+    purpose: Optional[str] = None,
+    allowed_entities: Optional[List[str]] = None,
+    system_prompt: Optional[str] = None,
+    include_reason: bool = False,
+):
+    with capture_guardrails(
+        guards=guards,
+        include_reason=include_reason,
+        include_system_prompt=(system_prompt != None),
+    ):
+        # Check for missing parameters
+        for guard in guards:
+            if (
+                guard in purpose_dependent_guards
+                or guard in purpose_entities_dependent_guards
+            ):
+                if purpose is None and system_prompt is None:
+                    raise ValueError(
+                        f"Guard {guard.value} requires a purpose but none was provided."
+                    )
+
+            if (
+                guard in entities_dependent_guards
+                or guard in purpose_entities_dependent_guards
+            ):
+                if allowed_entities is None and system_prompt is None:
+                    raise ValueError(
+                        f"Guard {guard.value} requires allowed entities but none were provided or list was empty."
+                    )
+
+        # Prepare parameters for API request
+        guard_params = APIGuard(
+            input=input,
+            response=response,
+            guards=[g.value for g in guards],
+            purpose=purpose,
+            allowed_entities=allowed_entities,
+            system_prompt=system_prompt,
+            include_reason=include_reason,
+        )
+        body = guard_params.model_dump(by_alias=True, exclude_none=True)
+
+        # API request
+        if is_confident():
+            api = Api(base_url=BASE_URL)
+            response = api.send_request(
+                method=HttpMethods.POST,
+                endpoint=Endpoints.GUARD_ENDPOINT,
+                body=body,
+            )
+            try:
+                GuardResponseData(**response)
+            except TypeError as e:
+                raise Exception("Incorrect result format:", e)
+            results = response["results"]
+            if not include_reason:
+                for result in results:
+                    del result["reason"]
+            return results
+        else:
+            raise Exception("To use DeepEval guardrails, run `deepeval login`")
diff --git a/deepeval/guardrails/types.py b/deepeval/guardrails/types.py
@@ -0,0 +1,77 @@
+from enum import Enum
+
+
+class Guard(Enum):
+    PRIVACY = "Privacy"
+    INTELLECTUAL_PROPERTY = "Intellectual Property"
+    MISINFORMATION_DISINFORMATION = "Misinformation & Disinformation"
+    SPECIALIZED_FINANCIAL_ADVICE = "Specialized Financial Advice"
+    OFFENSIVE = "Offensive"
+    BIAS = "BIAS"
+    PII_API_DB = "API and Database Access"
+    PII_DIRECT = "Direct PII Disclosure"
+    PII_SESSION = "Session PII Leak"
+    PII_SOCIAL = "Social Engineering PII Disclosure"
+    DATA_LEAKAGE = "Data Leakage"
+    CONTRACTS = "Contracts"
+    EXCESSIVE_AGENCY = "Excessive Agency"
+    HALLUCINATION = "Hallucination"
+    IMITATION = "Imitation"
+    POLITICS = "Political Statements"
+    OVERRELIANCE = "Overreliance"
+    DEBUG_ACCESS = "Debug Access"
+    RBAC = "Role-Based Access Control"
+    SHELL_INJECTION = "Shell Injection"
+    SQL_INJECTION = "SQL Injection"
+    PROMPT_EXTRACTION = "Prompt Extraction"
+    SSRF = "Server Side Request Forgery"
+    BOLA = "Broken Object Level Authorization"
+    BFLA = "Broken Function Level Authorization"
+    COMPETITORS = "Competitors"
+    HIJACKING = "Hijacking"
+    RELIGION = "Religion"
+    VIOLENT_CRIME = "Violent Crimes"
+    NON_VIOLENT_CRIME = "Non Violent Crimes"
+    SEX_CRIME = "Sex Crimes"
+    CHILD_EXPLOITATION = "Child Exploitation"
+    INDISCRIMINATE_WEAPONS = "Indiscriminate Weapons"
+    HATE = "Hate"
+    SELF_HARM = "Self Harm"
+    SEXUAL_CONTENT = "Sexual Content"
+    CYBERCRIME = "Cybercrime"
+    CHEMICAL_BIOLOGICAL_WEAPONS = "Chemical & Biological Weapons"
+    ILLEGAL_DRUGS = "Illegal Drugs"
+    COPYRIGHT_VIOLATIONS = "Copyright Violations"
+    HARASSMENT_BULLYING = "Harassment & Bullying"
+    ILLEGAL_ACTIVITIES = "Illegal Activities"
+    GRAPHIC_CONTENT = "Graphic Content"
+    UNSAFE_PRACTICES = "Unsafe Practices"
+    RADICALIZATION = "Radicalization"
+    PROFANITY = "Profanity"
+    INSULTS = "Insults"
+
+
+# Lists of guards that require purpose, entities, or both
+purpose_dependent_guards = [
+    Guard.BFLA,
+    Guard.BIAS,
+    Guard.HALLUCINATION,
+    Guard.HIJACKING,
+    Guard.OVERRELIANCE,
+    Guard.PROMPT_EXTRACTION,
+    Guard.RBAC,
+    Guard.SSRF,
+    Guard.COMPETITORS,
+    Guard.RELIGION,
+]
+
+entities_dependent_guards = [Guard.BOLA, Guard.IMITATION]
+
+purpose_entities_dependent_guards = [
+    Guard.PII_API_DB,
+    Guard.PII_DIRECT,
+    Guard.PII_SESSION,
+    Guard.PII_SOCIAL,
+    Guard.COMPETITORS,
+    Guard.RELIGION,
+]
diff --git a/deepeval/integrations/llama_index/callback.py b/deepeval/integrations/llama_index/callback.py
@@ -363,7 +363,9 @@ def update_trace_instance(
             for node in nodes:
                 total_chunk_length += len(node.content)
                 if node.score:
-                    top_score = node.score if node.score > top_score else top_score
+                    top_score = (
+                        node.score if node.score > top_score else top_score
+                    )
             attributes.nodes = nodes
             attributes.top_k = len(nodes)
             attributes.average_chunk_size = total_chunk_length // len(nodes)

diff --git a/deepeval/monitor/__init__.py b/deepeval/monitor/__init__.py
@@ -1,3 +1,3 @@
-from .monitor import monitor
-from .feedback import send_feedback
+from .monitor import monitor, a_monitor
+from .feedback import send_feedback, a_send_feedback
 from .api import Link