Litellm staging (#8270)

* fix(opik.py): cleanup * docs(opik_integration.md): cleanup opik integration docs * fix(redact_messages.py): fix redact messages check header logic ensures stringified bool value in header is still asserted to true allows dynamic message redaction * feat(redact_messages.py): support `x-litellm-enable-message-redaction` request header allows dynamic message redaction
BerriAI · Feb 5, 2025 · 8d3a942 · 8d3a942
1 parent 3c813b3
commit 8d3a942
Show file tree

Hide file tree

Showing 10 changed files with 258 additions and 66 deletions.
diff --git a/docs/my-website/docs/observability/opik_integration.md b/docs/my-website/docs/observability/opik_integration.md
@@ -1,3 +1,5 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
 import Image from '@theme/IdealImage';
 
 # Comet Opik - Logging + Evals
@@ -21,17 +23,16 @@ Use just 4 lines of code, to instantly log your responses **across all providers
 Get your Opik API Key by signing up [here](https://www.comet.com/signup?utm_source=litelllm&utm_medium=docs&utm_content=api_key_cell)!
 
 ```python
-from litellm.integrations.opik.opik import OpikLogger
 import litellm
-
-opik_logger = OpikLogger()
-litellm.callbacks = [opik_logger]
+litellm.callbacks = ["opik"]
 ```
 
 Full examples:
 
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
 ```python
-from litellm.integrations.opik.opik import OpikLogger
 import litellm
 import os
 
@@ -43,8 +44,7 @@ os.environ["OPIK_WORKSPACE"] = ""
 os.environ["OPENAI_API_KEY"] = ""
 
 # set "opik" as a callback, litellm will send the data to an Opik server (such as comet.com)
-opik_logger = OpikLogger()
-litellm.callbacks = [opik_logger]
+litellm.callbacks = ["opik"]
 
 # openai call
 response = litellm.completion(
@@ -55,18 +55,16 @@ response = litellm.completion(
 )
 ```
 
-If you are liteLLM within a function tracked using Opik's `@track` decorator,
+If you are using liteLLM within a function tracked using Opik's `@track` decorator,
 you will need provide the `current_span_data` field in the metadata attribute
 so that the LLM call is assigned to the correct trace:
 
 ```python
 from opik import track
 from opik.opik_context import get_current_span_data
-from litellm.integrations.opik.opik import OpikLogger
 import litellm
 
-opik_logger = OpikLogger()
-litellm.callbacks = [opik_logger]
+litellm.callbacks = ["opik"]
 
 @track()
 def streaming_function(input):
@@ -87,6 +85,126 @@ response = streaming_function("Why is tracking and evaluation of LLMs important?
 chunks = list(response)
 ```
 
+</TabItem>
+<TabItem value="proxy" label="Proxy">
+
+1. Setup config.yaml
+
+```yaml
+model_list:
+  - model_name: gpt-3.5-turbo-testing
+    litellm_params:
+      model: gpt-3.5-turbo
+      api_key: os.environ/OPENAI_API_KEY
+
+litellm_settings:
+  callbacks: ["opik"]
+
+environment_variables:
+  OPIK_API_KEY: ""
+  OPIK_WORKSPACE: ""
+```
+
+2. Run proxy
+
+```bash
+litellm --config config.yaml
+```
+
+3. Test it! 
+
+```bash
+curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-d '{
+  "model": "gpt-3.5-turbo-testing",
+  "messages": [
+    {
+      "role": "user",
+      "content": "What's the weather like in Boston today?"
+    }
+  ]
+}'
+```
+
+</TabItem>
+</Tabs>
+
+## Opik-Specific Parameters
+
+These can be passed inside metadata with the `opik` key.
+
+### Fields 
+
+- `project_name` - Name of the Opik project to send data to.
+- `current_span_data` - The current span data to be used for tracing.
+- `tags` - Tags to be used for tracing.
+
+### Usage
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from opik import track
+from opik.opik_context import get_current_span_data
+import litellm
+
+litellm.callbacks = ["opik"]
+
+messages = [{"role": "user", "content": input}]
+response = litellm.completion(
+    model="gpt-3.5-turbo",
+    messages=messages,
+    metadata = {
+        "opik": {
+            "current_span_data": get_current_span_data(),
+            "tags": ["streaming-test"],
+        },
+    }
+)
+return response
+```
+</TabItem>
+<TabItem value="proxy" label="Proxy">
+
+```bash
+curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-d '{
+  "model": "gpt-3.5-turbo-testing",
+  "messages": [
+    {
+      "role": "user",
+      "content": "What's the weather like in Boston today?"
+    }
+  ],
+  "metadata": {
+    "opik": {
+      "current_span_data": "...",
+      "tags": ["streaming-test"],
+    },
+  }
+}'
+``` 
+
+</TabItem>
+</Tabs>
+
+
+
+
+
+
+
+
+
+
+
+
+
 ## Support & Talk to Founders
 
 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)

diff --git a/docs/my-website/docs/proxy/logging.md b/docs/my-website/docs/proxy/logging.md
@@ -1,3 +1,7 @@
+import Image from '@theme/IdealImage';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
 # Logging
 
 Log Proxy input, output, and exceptions using:
@@ -13,9 +17,7 @@ Log Proxy input, output, and exceptions using:
 - DynamoDB
 - etc.
 
-import Image from '@theme/IdealImage';
-import Tabs from '@theme/Tabs';
-import TabItem from '@theme/TabItem';
+
 
 ## Getting the LiteLLM Call ID
 
@@ -77,10 +79,13 @@ litellm_settings:
 
 ### Redact Messages, Response Content
 
-Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to your logging provider, but request metadata will still be logged.
+Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to your logging provider, but request metadata - e.g. spend, will still be tracked.
+
+<Tabs>
 
+<TabItem value="global" label="Global">
 
-Example config.yaml
+**1. Setup config.yaml **
 ```yaml
 model_list:
  - model_name: gpt-3.5-turbo
@@ -91,9 +96,87 @@ litellm_settings:
   turn_off_message_logging: True # 👈 Key Change
 ```
 
-If you have this feature turned on, you can override it for specific requests by
+**2. Send request**
+```shell
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+    --header 'Content-Type: application/json' \
+    --data '{
+    "model": "gpt-3.5-turbo",
+    "messages": [
+        {
+        "role": "user",
+        "content": "what llm are you"
+        }
+    ]
+}'
+```
+
+
+
+</TabItem>
+<TabItem value="dynamic" label="Per Request">
+
+:::info
+
+Dynamic request message redaction is in BETA. 
+
+:::
+
+Pass in a request header to enable message redaction for a request.
+
+```
+x-litellm-enable-message-redaction: true
+```
+
+Example config.yaml
+
+**1. Setup config.yaml **
+
+```yaml
+model_list:
+ - model_name: gpt-3.5-turbo
+    litellm_params:
+      model: gpt-3.5-turbo
+```
+
+**2. Setup per request header**
+
+```shell
+curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-zV5HlSIm8ihj1F9C_ZbB1g' \
+-H 'x-litellm-enable-message-redaction: true' \
+-d '{
+  "model": "gpt-3.5-turbo-testing",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hey, how'\''s it going 1234?"
+    }
+  ]
+}'
+```
+
+</TabItem>
+</Tabs>
+
+**3. Check Logging Tool + Spend Logs**
+
+**Logging Tool**
+
+<Image img={require('../../img/message_redaction_logging.png')}/>
+
+**Spend Logs**
+
+<Image img={require('../../img/message_redaction_spend_logs.png')} />
+
+
+### Disable Message Redaction
+
+If you have `litellm.turn_on_message_logging` turned on, you can override it for specific requests by
 setting a request header `LiteLLM-Disable-Message-Redaction: true`.
 
+
 ```shell
 curl --location 'http://0.0.0.0:4000/chat/completions' \
     --header 'Content-Type: application/json' \
@@ -109,8 +192,6 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
 }'
 ```
 
-Removes any field with `user_api_key_*` from metadata.
-
 
 ### Turn off all tracking/logging
 

diff --git a/docs/my-website/docs/proxy/request_headers.md b/docs/my-website/docs/proxy/request_headers.md
@@ -6,6 +6,8 @@ Special headers that are supported by LiteLLM.
 
 `x-litellm-timeout` Optional[float]: The timeout for the request in seconds.
 
+`x-litellm-enable-message-redaction`: Optional[bool]: Don't log the message content to logging integrations. Just track spend. [Learn More](./logging#redact-messages-response-content)
+
 ## Anthropic Headers
 
 `anthropic-version` Optional[str]: The version of the Anthropic API to use.  

diff --git a/docs/my-website/img/message_redaction_logging.png b/docs/my-website/img/message_redaction_logging.png
diff --git a/docs/my-website/img/message_redaction_spend_logs.png b/docs/my-website/img/message_redaction_spend_logs.png
diff --git a/litellm/integrations/opik/opik.py b/litellm/integrations/opik/opik.py
@@ -147,13 +147,11 @@ async def _submit_batch(self, url: str, headers: Dict[str, str], batch: Dict):
                     f"OpikLogger - Error: {response.status_code} - {response.text}"
                 )
             else:
-                verbose_logger.debug(
+                verbose_logger.info(
                     f"OpikLogger - {len(self.log_queue)} Opik events submitted"
                 )
         except Exception as e:
-            verbose_logger.exception(
-                f"OpikLogger failed to send batch - {str(e)}\n{traceback.format_exc()}"
-            )
+            verbose_logger.exception(f"OpikLogger failed to send batch - {str(e)}")
 
     def _create_opik_headers(self):
         headers = {}
@@ -165,7 +163,7 @@ def _create_opik_headers(self):
         return headers
 
     async def async_send_batch(self):
-        verbose_logger.exception("Calling async_send_batch")
+        verbose_logger.info("Calling async_send_batch")
         if not self.log_queue:
             return
 
@@ -177,10 +175,12 @@ async def async_send_batch(self):
             await self._submit_batch(
                 url=self.trace_url, headers=self.headers, batch={"traces": traces}
             )
+            verbose_logger.info(f"Sent {len(traces)} traces")
         if len(spans) > 0:
             await self._submit_batch(
                 url=self.span_url, headers=self.headers, batch={"spans": spans}
             )
+            verbose_logger.info(f"Sent {len(spans)} spans")
 
     def _create_opik_payload(  # noqa: PLR0915
         self, kwargs, response_obj, start_time, end_time