diff --git a/docs/my-website/docs/observability/opik_integration.md b/docs/my-website/docs/observability/opik_integration.md index d8075c70e347..b4bcef539378 100644 --- a/docs/my-website/docs/observability/opik_integration.md +++ b/docs/my-website/docs/observability/opik_integration.md @@ -1,3 +1,5 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; import Image from '@theme/IdealImage'; # Comet Opik - Logging + Evals @@ -21,17 +23,16 @@ Use just 4 lines of code, to instantly log your responses **across all providers Get your Opik API Key by signing up [here](https://www.comet.com/signup?utm_source=litelllm&utm_medium=docs&utm_content=api_key_cell)! ```python -from litellm.integrations.opik.opik import OpikLogger import litellm - -opik_logger = OpikLogger() -litellm.callbacks = [opik_logger] +litellm.callbacks = ["opik"] ``` Full examples: + + + ```python -from litellm.integrations.opik.opik import OpikLogger import litellm import os @@ -43,8 +44,7 @@ os.environ["OPIK_WORKSPACE"] = "" os.environ["OPENAI_API_KEY"] = "" # set "opik" as a callback, litellm will send the data to an Opik server (such as comet.com) -opik_logger = OpikLogger() -litellm.callbacks = [opik_logger] +litellm.callbacks = ["opik"] # openai call response = litellm.completion( @@ -55,18 +55,16 @@ response = litellm.completion( ) ``` -If you are liteLLM within a function tracked using Opik's `@track` decorator, +If you are using liteLLM within a function tracked using Opik's `@track` decorator, you will need provide the `current_span_data` field in the metadata attribute so that the LLM call is assigned to the correct trace: ```python from opik import track from opik.opik_context import get_current_span_data -from litellm.integrations.opik.opik import OpikLogger import litellm -opik_logger = OpikLogger() -litellm.callbacks = [opik_logger] +litellm.callbacks = ["opik"] @track() def streaming_function(input): @@ -87,6 +85,126 @@ response = streaming_function("Why is tracking and evaluation of LLMs important? chunks = list(response) ``` + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: gpt-3.5-turbo-testing + litellm_params: + model: gpt-3.5-turbo + api_key: os.environ/OPENAI_API_KEY + +litellm_settings: + callbacks: ["opik"] + +environment_variables: + OPIK_API_KEY: "" + OPIK_WORKSPACE: "" +``` + +2. Run proxy + +```bash +litellm --config config.yaml +``` + +3. Test it! + +```bash +curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ + "model": "gpt-3.5-turbo-testing", + "messages": [ + { + "role": "user", + "content": "What's the weather like in Boston today?" + } + ] +}' +``` + + + + +## Opik-Specific Parameters + +These can be passed inside metadata with the `opik` key. + +### Fields + +- `project_name` - Name of the Opik project to send data to. +- `current_span_data` - The current span data to be used for tracing. +- `tags` - Tags to be used for tracing. + +### Usage + + + + +```python +from opik import track +from opik.opik_context import get_current_span_data +import litellm + +litellm.callbacks = ["opik"] + +messages = [{"role": "user", "content": input}] +response = litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + metadata = { + "opik": { + "current_span_data": get_current_span_data(), + "tags": ["streaming-test"], + }, + } +) +return response +``` + + + +```bash +curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ + "model": "gpt-3.5-turbo-testing", + "messages": [ + { + "role": "user", + "content": "What's the weather like in Boston today?" + } + ], + "metadata": { + "opik": { + "current_span_data": "...", + "tags": ["streaming-test"], + }, + } +}' +``` + + + + + + + + + + + + + + + + ## Support & Talk to Founders - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) diff --git a/docs/my-website/docs/proxy/logging.md b/docs/my-website/docs/proxy/logging.md index 1a541820ae2e..e13a4036344c 100644 --- a/docs/my-website/docs/proxy/logging.md +++ b/docs/my-website/docs/proxy/logging.md @@ -1,3 +1,7 @@ +import Image from '@theme/IdealImage'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Logging Log Proxy input, output, and exceptions using: @@ -13,9 +17,7 @@ Log Proxy input, output, and exceptions using: - DynamoDB - etc. -import Image from '@theme/IdealImage'; -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; + ## Getting the LiteLLM Call ID @@ -77,10 +79,13 @@ litellm_settings: ### Redact Messages, Response Content -Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to your logging provider, but request metadata will still be logged. +Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to your logging provider, but request metadata - e.g. spend, will still be tracked. + + + -Example config.yaml +**1. Setup config.yaml ** ```yaml model_list: - model_name: gpt-3.5-turbo @@ -91,9 +96,87 @@ litellm_settings: turn_off_message_logging: True # 👈 Key Change ``` -If you have this feature turned on, you can override it for specific requests by +**2. Send request** +```shell +curl --location 'http://0.0.0.0:4000/chat/completions' \ + --header 'Content-Type: application/json' \ + --data '{ + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "user", + "content": "what llm are you" + } + ] +}' +``` + + + + + + +:::info + +Dynamic request message redaction is in BETA. + +::: + +Pass in a request header to enable message redaction for a request. + +``` +x-litellm-enable-message-redaction: true +``` + +Example config.yaml + +**1. Setup config.yaml ** + +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: gpt-3.5-turbo +``` + +**2. Setup per request header** + +```shell +curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-zV5HlSIm8ihj1F9C_ZbB1g' \ +-H 'x-litellm-enable-message-redaction: true' \ +-d '{ + "model": "gpt-3.5-turbo-testing", + "messages": [ + { + "role": "user", + "content": "Hey, how'\''s it going 1234?" + } + ] +}' +``` + + + + +**3. Check Logging Tool + Spend Logs** + +**Logging Tool** + + + +**Spend Logs** + + + + +### Disable Message Redaction + +If you have `litellm.turn_on_message_logging` turned on, you can override it for specific requests by setting a request header `LiteLLM-Disable-Message-Redaction: true`. + ```shell curl --location 'http://0.0.0.0:4000/chat/completions' \ --header 'Content-Type: application/json' \ @@ -109,8 +192,6 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ }' ``` -Removes any field with `user_api_key_*` from metadata. - ### Turn off all tracking/logging diff --git a/docs/my-website/docs/proxy/request_headers.md b/docs/my-website/docs/proxy/request_headers.md index d3ccb544359f..133cc7351f68 100644 --- a/docs/my-website/docs/proxy/request_headers.md +++ b/docs/my-website/docs/proxy/request_headers.md @@ -6,6 +6,8 @@ Special headers that are supported by LiteLLM. `x-litellm-timeout` Optional[float]: The timeout for the request in seconds. +`x-litellm-enable-message-redaction`: Optional[bool]: Don't log the message content to logging integrations. Just track spend. [Learn More](./logging#redact-messages-response-content) + ## Anthropic Headers `anthropic-version` Optional[str]: The version of the Anthropic API to use. diff --git a/docs/my-website/img/message_redaction_logging.png b/docs/my-website/img/message_redaction_logging.png new file mode 100644 index 000000000000..6e210ad182ef Binary files /dev/null and b/docs/my-website/img/message_redaction_logging.png differ diff --git a/docs/my-website/img/message_redaction_spend_logs.png b/docs/my-website/img/message_redaction_spend_logs.png new file mode 100644 index 000000000000..eacfac2ece12 Binary files /dev/null and b/docs/my-website/img/message_redaction_spend_logs.png differ diff --git a/litellm/integrations/opik/opik.py b/litellm/integrations/opik/opik.py index c78c4de4e604..1f7f18f336b2 100644 --- a/litellm/integrations/opik/opik.py +++ b/litellm/integrations/opik/opik.py @@ -147,13 +147,11 @@ async def _submit_batch(self, url: str, headers: Dict[str, str], batch: Dict): f"OpikLogger - Error: {response.status_code} - {response.text}" ) else: - verbose_logger.debug( + verbose_logger.info( f"OpikLogger - {len(self.log_queue)} Opik events submitted" ) except Exception as e: - verbose_logger.exception( - f"OpikLogger failed to send batch - {str(e)}\n{traceback.format_exc()}" - ) + verbose_logger.exception(f"OpikLogger failed to send batch - {str(e)}") def _create_opik_headers(self): headers = {} @@ -165,7 +163,7 @@ def _create_opik_headers(self): return headers async def async_send_batch(self): - verbose_logger.exception("Calling async_send_batch") + verbose_logger.info("Calling async_send_batch") if not self.log_queue: return @@ -177,10 +175,12 @@ async def async_send_batch(self): await self._submit_batch( url=self.trace_url, headers=self.headers, batch={"traces": traces} ) + verbose_logger.info(f"Sent {len(traces)} traces") if len(spans) > 0: await self._submit_batch( url=self.span_url, headers=self.headers, batch={"spans": spans} ) + verbose_logger.info(f"Sent {len(spans)} spans") def _create_opik_payload( # noqa: PLR0915 self, kwargs, response_obj, start_time, end_time diff --git a/litellm/litellm_core_utils/redact_messages.py b/litellm/litellm_core_utils/redact_messages.py index 3d0cec8d727f..50e0e0b5755a 100644 --- a/litellm/litellm_core_utils/redact_messages.py +++ b/litellm/litellm_core_utils/redact_messages.py @@ -73,12 +73,9 @@ def perform_redaction(model_call_details: dict, result): return {"text": "redacted-by-litellm"} -def redact_message_input_output_from_logging( - model_call_details: dict, result, input: Optional[Any] = None -): +def should_redact_message_logging(model_call_details: dict) -> bool: """ - Removes messages, prompts, input, response from logging. This modifies the data in-place - only redacts when litellm.turn_off_message_logging == True + Determine if message logging should be redacted. """ _request_headers = ( model_call_details.get("litellm_params", {}).get("metadata", {}) or {} @@ -86,25 +83,48 @@ def redact_message_input_output_from_logging( request_headers = _request_headers.get("headers", {}) + possible_request_headers = [ + "litellm-enable-message-redaction", # old header. maintain backwards compatibility + "x-litellm-enable-message-redaction", # new header + ] + + is_redaction_enabled_via_header = False + for header in possible_request_headers: + if bool(request_headers.get(header, False)): + is_redaction_enabled_via_header = True + break + # check if user opted out of logging message/response to callbacks if ( litellm.turn_off_message_logging is not True - and request_headers.get("litellm-enable-message-redaction", False) is not True + and is_redaction_enabled_via_header is not True and _get_turn_off_message_logging_from_dynamic_params(model_call_details) is not True ): - return result + return False - if request_headers and request_headers.get( - "litellm-disable-message-redaction", False + if request_headers and bool( + request_headers.get("litellm-disable-message-redaction", False) ): - return result + return False # user has OPTED OUT of message redaction if _get_turn_off_message_logging_from_dynamic_params(model_call_details) is False: - return result + return False + + return True - return perform_redaction(model_call_details, result) + +def redact_message_input_output_from_logging( + model_call_details: dict, result, input: Optional[Any] = None +) -> Any: + """ + Removes messages, prompts, input, response from logging. This modifies the data in-place + only redacts when litellm.turn_off_message_logging == True + """ + if should_redact_message_logging(model_call_details): + return perform_redaction(model_call_details, result) + return result def _get_turn_off_message_logging_from_dynamic_params( diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 03eca86e960b..987ef948a57c 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -8904,16 +8904,5 @@ "supports_function_calling": true, "mode": "chat", "supports_tool_choice": true - }, - "hyperbolic/deepseek-v3": { - "max_tokens": 20480, - "max_input_tokens": 131072, - "max_output_tokens": 20480, - "litellm_provider": "openai", - "input_cost_per_token": 0.00000025, - "output_cost_per_token": 0.00000025, - "mode": "chat", - "supports_function_calling": true, - "supports_response_schema": true } } diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html deleted file mode 100644 index 5fefa6a45861..000000000000 --- a/litellm/proxy/_experimental/out/onboarding.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 0db6948ae0a8..2b4a64b74298 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -29,21 +29,4 @@ model_list: litellm_settings: - callbacks: ["langsmith"] - disable_no_log_param: true - -general_settings: - enable_jwt_auth: True - litellm_jwtauth: - object_id_jwt_field: "client_id" # can be either user / team, inferred from the role mapping - roles_jwt_field: "resource_access.litellm-test-client-id.roles" - role_mappings: - - role: litellm.api.consumer - internal_role: "team" - enforce_rbac: true - role_permissions: # default model + endpoint permissions for a role. - - role: team - models: ["anthropic-claude"] - routes: ["openai_routes"] - - + callbacks: ["opik"]