microsoft · iseabock · Apr 16, 2024 · Mar 25, 2024 · Mar 27, 2024 · Apr 11, 2024
diff --git a/app.py b/app.py
@@ -826,13 +826,14 @@ async def send_chat_request(request):
 
     try:
         azure_openai_client = init_openai_client()
-        response = await azure_openai_client.chat.completions.create(**model_args)
-
+        raw_response = await azure_openai_client.chat.completions.with_raw_response.create(**model_args)
+        response = raw_response.parse()
+        apim_request_id = raw_response.headers.get("apim-request-id") 
     except Exception as e:
         logging.exception("Exception in send_chat_request")
         raise e
 
-    return response
+    return response, apim_request_id
 
 
 async def complete_chat_request(request_body):
@@ -843,18 +844,18 @@ async def complete_chat_request(request_body):
             response, history_metadata, PROMPTFLOW_RESPONSE_FIELD_NAME
         )
     else:
-        response = await send_chat_request(request_body)
+        response, apim_request_id = await send_chat_request(request_body)
         history_metadata = request_body.get("history_metadata", {})
-        return format_non_streaming_response(response, history_metadata)
+        return format_non_streaming_response(response, history_metadata, apim_request_id)
 
 
 async def stream_chat_request(request_body):
-    response = await send_chat_request(request_body)
+    response, apim_request_id = await send_chat_request(request_body)
     history_metadata = request_body.get("history_metadata", {})
-
+    
     async def generate():
         async for completionChunk in response:
-            yield format_stream_response(completionChunk, history_metadata)
+            yield format_stream_response(completionChunk, history_metadata, apim_request_id)
 
     return generate()
 

diff --git a/backend/utils.py b/backend/utils.py
@@ -73,14 +73,15 @@ def generateFilterString(userToken):
     return f"{AZURE_SEARCH_PERMITTED_GROUPS_COLUMN}/any(g:search.in(g, '{group_ids}'))"
 
 
-def format_non_streaming_response(chatCompletion, history_metadata, message_uuid=None):
+def format_non_streaming_response(chatCompletion, history_metadata, apim_request_id):
     response_obj = {
         "id": chatCompletion.id,
         "model": chatCompletion.model,
         "created": chatCompletion.created,
         "object": chatCompletion.object,
         "choices": [{"messages": []}],
         "history_metadata": history_metadata,
+        "apim-request-id": apim_request_id,
     }
 
     if len(chatCompletion.choices) > 0:
@@ -103,15 +104,15 @@ def format_non_streaming_response(chatCompletion, history_metadata, message_uuid
 
     return {}
 
-
-def format_stream_response(chatCompletionChunk, history_metadata, message_uuid=None):
+def format_stream_response(chatCompletionChunk, history_metadata, apim_request_id):
     response_obj = {
         "id": chatCompletionChunk.id,
         "model": chatCompletionChunk.model,
         "created": chatCompletionChunk.created,
         "object": chatCompletionChunk.object,
         "choices": [{"messages": []}],
         "history_metadata": history_metadata,
+        "apim-request-id": apim_request_id,
     }
 
     if len(chatCompletionChunk.choices) > 0: