From 141678e1b2663cb1ed79358aaf12aa4c68566da3 Mon Sep 17 00:00:00 2001 From: "Wang, Xigui" Date: Thu, 23 Jan 2025 20:22:23 +0800 Subject: [PATCH] Fix Doc-Sum stream output format Workround to keep Doc-Sum stream output aligned with v1.1 format This is a workaround to extract the tokens from stream output. Fix issue: https://github.com/opea-project/GenAIInfra/issues/753 Signed-off-by: Wang, Xigui --- .../doc-summarization/integrations/common.py | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/comps/llms/src/doc-summarization/integrations/common.py b/comps/llms/src/doc-summarization/integrations/common.py index a866ef6be..e15109f74 100644 --- a/comps/llms/src/doc-summarization/integrations/common.py +++ b/comps/llms/src/doc-summarization/integrations/common.py @@ -190,15 +190,30 @@ async def generate(self, input: DocSumChatCompletionRequest, client): if input.stream: - async def stream_generator(): - from langserve.serialization import WellKnownLCSerializer + import json + from langserve.serialization import WellKnownLCSerializer + _serializer = WellKnownLCSerializer() + + def extract_llm_tokens(stream_output): + op_data = _serializer.dumps({"ops": stream_output.ops}).decode("utf-8") + parsed_data = json.loads(op_data) + + tokens = [] + for op in parsed_data.get("ops", []): + if op["op"] == "add" and "/streamed_output_str" in op["path"]: + tokens.append(op["value"]) + + return "".join(tokens) - _serializer = WellKnownLCSerializer() + async def stream_generator(): async for chunk in llm_chain.astream_log(docs): - data = _serializer.dumps({"ops": chunk.ops}).decode("utf-8") + data = extract_llm_tokens(chunk) + if logflag: logger.info(data) - yield f"data: {data}\n\n" + if data != '': + yield f"data: {data}\n\n" + yield "data: [DONE]\n\n" return StreamingResponse(stream_generator(), media_type="text/event-stream")