Skip to content

Commit

Permalink
Merge branch 'main' into fix-tracing
Browse files Browse the repository at this point in the history
  • Loading branch information
penguine-ip authored Nov 13, 2024
2 parents 3e7ff4e + 9a19f13 commit 1a9c797
Show file tree
Hide file tree
Showing 33 changed files with 1,013 additions and 97 deletions.
8 changes: 6 additions & 2 deletions deepeval/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,30 @@
from ._version import __version__

from deepeval.event import track
from deepeval.monitor import monitor, send_feedback
from deepeval.monitor import monitor, a_monitor, send_feedback, a_send_feedback
from deepeval.evaluate import evaluate, assert_test
from deepeval.test_run import on_test_run_end, log_hyperparameters
from deepeval.utils import login_with_confident_api_key
from deepeval.telemetry import *
from deepeval.integrations import trace_langchain, trace_llama_index
from deepeval.confident import confident_evaluate
from deepeval.guardrails import guard

__all__ = [
"login_with_confident_api_key",
"log_hyperparameters",
"track",
"monitor",
"a_monitor",
"a_send_feedback",
"send_feedback",
"evaluate",
"assert_test",
"on_test_run_end",
"send_feedback",
"trace_langchain",
"trace_llama_index",
"confident_evaluate",
"guard",
]


Expand Down
2 changes: 1 addition & 1 deletion deepeval/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__: str = "1.4.9"
__version__: str = "1.5.1"
53 changes: 48 additions & 5 deletions deepeval/confident/api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
import urllib.parse
import aiohttp
import requests
from enum import Enum

Expand All @@ -22,6 +22,7 @@ class Endpoints(Enum):
EVENT_ENDPOINT = "/v1/event"
FEEDBACK_ENDPOINT = "/v1/feedback"
EVALUATE_ENDPOINT = "/evaluate"
GUARD_ENDPOINT = "/guard"


class Api:
Expand Down Expand Up @@ -98,7 +99,49 @@ def send_request(
else:
raise Exception(res.json().get("error", res.text))

@staticmethod
def quote_string(text: str) -> str:
"""Encode text to be safely included in URLs."""
return urllib.parse.quote(text, safe="")
async def a_send_request(
self, method: HttpMethods, endpoint: Endpoints, body=None, params=None
):
url = f"{self.base_api_url}{endpoint.value}"
async with aiohttp.ClientSession() as session:
async with session.request(
method=method.value,
url=url,
headers=self._headers,
json=body,
params=params,
ssl=True, # SSL verification enabled
) as res:
if res.status == 200:
try:
return await res.json()
except aiohttp.ContentTypeError:
return await res.text()
elif res.status == 409 and body:
message = (await res.json()).get(
"message", "Conflict occurred."
)

user_input = (
input(
f"{message} Would you like to overwrite it? [y/N] or change the alias [c]: "
)
.strip()
.lower()
)

if user_input == "y":
body["overwrite"] = True
return await self.a_send_request(method, endpoint, body)
elif user_input == "c":
new_alias = input("Enter a new alias: ").strip()
body["alias"] = new_alias
return await self.a_send_request(method, endpoint, body)
else:
print("Aborted.")
return None
else:
error_message = await res.json().get(
"error", await res.text()
)
raise Exception(error_message)
6 changes: 6 additions & 0 deletions deepeval/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
class TestResult:
"""Returned from run_test"""

name: str
success: bool
metrics_data: Union[List[MetricData], None]
conversational: bool
Expand Down Expand Up @@ -106,8 +107,11 @@ def create_metric_data(metric: BaseMetric) -> MetricData:
def create_test_result(
api_test_case: Union[LLMApiTestCase, ConversationalApiTestCase],
) -> TestResult:
name = api_test_case.name

if isinstance(api_test_case, ConversationalApiTestCase):
return TestResult(
name=name,
success=api_test_case.success,
metrics_data=api_test_case.metrics_data,
conversational=True,
Expand All @@ -119,6 +123,7 @@ def create_test_result(
)
if multimodal:
return TestResult(
name=name,
success=api_test_case.success,
metrics_data=api_test_case.metrics_data,
input=api_test_case.multimodal_input,
Expand All @@ -128,6 +133,7 @@ def create_test_result(
)
else:
return TestResult(
name=name,
success=api_test_case.success,
metrics_data=api_test_case.metrics_data,
input=api_test_case.input,
Expand Down
2 changes: 0 additions & 2 deletions deepeval/event/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ def track(
hyperparameters: Optional[Dict[str, str]] = {},
fail_silently: Optional[bool] = False,
raise_expection: Optional[bool] = True,
run_async: Optional[bool] = True,
trace_stack: Optional[Dict[str, Any]] = None,
trace_provider: Optional[str] = None,
) -> str:
Expand All @@ -43,7 +42,6 @@ def track(
hyperparameters=hyperparameters,
fail_silently=fail_silently,
raise_expection=raise_expection,
run_async=run_async,
trace_stack=trace_stack,
trace_provider=trace_provider,
)
2 changes: 2 additions & 0 deletions deepeval/guardrails/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .types import Guard
from .guard import guard
22 changes: 22 additions & 0 deletions deepeval/guardrails/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from typing import Optional, List
from pydantic import BaseModel


class APIGuard(BaseModel):
input: str
response: str
guards: List[str]
purpose: Optional[str] = None
allowed_entities: Optional[List[str]] = None
system_prompt: Optional[str] = None
include_reason: bool


class GuardResult(BaseModel):
guard: str
score: int
reason: Optional[str]


class GuardResponseData(BaseModel):
results: List[GuardResult]
82 changes: 82 additions & 0 deletions deepeval/guardrails/guard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from typing import Optional, List

from deepeval.guardrails.api import APIGuard, GuardResponseData
from deepeval.confident.api import Api, HttpMethods, Endpoints
from deepeval.telemetry import capture_guardrails
from deepeval.guardrails.types import Guard
from deepeval.guardrails.types import (
purpose_entities_dependent_guards,
entities_dependent_guards,
purpose_dependent_guards,
)
from deepeval.utils import is_confident


BASE_URL = "https://internal.evals.confident-ai.com"


def guard(
input: str,
response: str,
guards: List[Guard],
purpose: Optional[str] = None,
allowed_entities: Optional[List[str]] = None,
system_prompt: Optional[str] = None,
include_reason: bool = False,
):
with capture_guardrails(
guards=guards,
include_reason=include_reason,
include_system_prompt=(system_prompt != None),
):
# Check for missing parameters
for guard in guards:
if (
guard in purpose_dependent_guards
or guard in purpose_entities_dependent_guards
):
if purpose is None and system_prompt is None:
raise ValueError(
f"Guard {guard.value} requires a purpose but none was provided."
)

if (
guard in entities_dependent_guards
or guard in purpose_entities_dependent_guards
):
if allowed_entities is None and system_prompt is None:
raise ValueError(
f"Guard {guard.value} requires allowed entities but none were provided or list was empty."
)

# Prepare parameters for API request
guard_params = APIGuard(
input=input,
response=response,
guards=[g.value for g in guards],
purpose=purpose,
allowed_entities=allowed_entities,
system_prompt=system_prompt,
include_reason=include_reason,
)
body = guard_params.model_dump(by_alias=True, exclude_none=True)

# API request
if is_confident():
api = Api(base_url=BASE_URL)
response = api.send_request(
method=HttpMethods.POST,
endpoint=Endpoints.GUARD_ENDPOINT,
body=body,
)
try:
GuardResponseData(**response)
except TypeError as e:
raise Exception("Incorrect result format:", e)
results = response["results"]
if not include_reason:
for result in results:
del result["reason"]
return results
else:
raise Exception("To use DeepEval guardrails, run `deepeval login`")
77 changes: 77 additions & 0 deletions deepeval/guardrails/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from enum import Enum


class Guard(Enum):
PRIVACY = "Privacy"
INTELLECTUAL_PROPERTY = "Intellectual Property"
MISINFORMATION_DISINFORMATION = "Misinformation & Disinformation"
SPECIALIZED_FINANCIAL_ADVICE = "Specialized Financial Advice"
OFFENSIVE = "Offensive"
BIAS = "BIAS"
PII_API_DB = "API and Database Access"
PII_DIRECT = "Direct PII Disclosure"
PII_SESSION = "Session PII Leak"
PII_SOCIAL = "Social Engineering PII Disclosure"
DATA_LEAKAGE = "Data Leakage"
CONTRACTS = "Contracts"
EXCESSIVE_AGENCY = "Excessive Agency"
HALLUCINATION = "Hallucination"
IMITATION = "Imitation"
POLITICS = "Political Statements"
OVERRELIANCE = "Overreliance"
DEBUG_ACCESS = "Debug Access"
RBAC = "Role-Based Access Control"
SHELL_INJECTION = "Shell Injection"
SQL_INJECTION = "SQL Injection"
PROMPT_EXTRACTION = "Prompt Extraction"
SSRF = "Server Side Request Forgery"
BOLA = "Broken Object Level Authorization"
BFLA = "Broken Function Level Authorization"
COMPETITORS = "Competitors"
HIJACKING = "Hijacking"
RELIGION = "Religion"
VIOLENT_CRIME = "Violent Crimes"
NON_VIOLENT_CRIME = "Non Violent Crimes"
SEX_CRIME = "Sex Crimes"
CHILD_EXPLOITATION = "Child Exploitation"
INDISCRIMINATE_WEAPONS = "Indiscriminate Weapons"
HATE = "Hate"
SELF_HARM = "Self Harm"
SEXUAL_CONTENT = "Sexual Content"
CYBERCRIME = "Cybercrime"
CHEMICAL_BIOLOGICAL_WEAPONS = "Chemical & Biological Weapons"
ILLEGAL_DRUGS = "Illegal Drugs"
COPYRIGHT_VIOLATIONS = "Copyright Violations"
HARASSMENT_BULLYING = "Harassment & Bullying"
ILLEGAL_ACTIVITIES = "Illegal Activities"
GRAPHIC_CONTENT = "Graphic Content"
UNSAFE_PRACTICES = "Unsafe Practices"
RADICALIZATION = "Radicalization"
PROFANITY = "Profanity"
INSULTS = "Insults"


# Lists of guards that require purpose, entities, or both
purpose_dependent_guards = [
Guard.BFLA,
Guard.BIAS,
Guard.HALLUCINATION,
Guard.HIJACKING,
Guard.OVERRELIANCE,
Guard.PROMPT_EXTRACTION,
Guard.RBAC,
Guard.SSRF,
Guard.COMPETITORS,
Guard.RELIGION,
]

entities_dependent_guards = [Guard.BOLA, Guard.IMITATION]

purpose_entities_dependent_guards = [
Guard.PII_API_DB,
Guard.PII_DIRECT,
Guard.PII_SESSION,
Guard.PII_SOCIAL,
Guard.COMPETITORS,
Guard.RELIGION,
]
4 changes: 3 additions & 1 deletion deepeval/integrations/llama_index/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,9 @@ def update_trace_instance(
for node in nodes:
total_chunk_length += len(node.content)
if node.score:
top_score = node.score if node.score > top_score else top_score
top_score = (
node.score if node.score > top_score else top_score
)
attributes.nodes = nodes
attributes.top_k = len(nodes)
attributes.average_chunk_size = total_chunk_length // len(nodes)
Expand Down
4 changes: 2 additions & 2 deletions deepeval/monitor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .monitor import monitor
from .feedback import send_feedback
from .monitor import monitor, a_monitor
from .feedback import send_feedback, a_send_feedback
from .api import Link
Loading

0 comments on commit 1a9c797

Please sign in to comment.