Skip to content

Commit

Permalink
Streaming Agent (#659)
Browse files Browse the repository at this point in the history
* fix e2e_webui

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Agent: Streaming audio

* fix

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix text streaming

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
AnyaCoder and pre-commit-ci[bot] authored Nov 5, 2024
1 parent ce22ce4 commit 672000d
Showing 1 changed file with 25 additions and 13 deletions.
38 changes: 25 additions & 13 deletions tools/e2e_webui.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import io
import re
import wave

import gradio as gr
import numpy as np
Expand All @@ -7,6 +9,19 @@
from .schema import ServeMessage, ServeTextPart, ServeVQPart


def wav_chunk_header(sample_rate=44100, bit_depth=16, channels=1):
buffer = io.BytesIO()

with wave.open(buffer, "wb") as wav_file:
wav_file.setnchannels(channels)
wav_file.setsampwidth(bit_depth // 8)
wav_file.setframerate(sample_rate)

wav_header_bytes = buffer.getvalue()
buffer.close()
return wav_header_bytes


class ChatState:
def __init__(self):
self.conversation = []
Expand Down Expand Up @@ -62,7 +77,7 @@ async def process_audio_input(

if isinstance(sys_audio_input, tuple):
sr, sys_audio_data = sys_audio_input
elif text_input:
else:
sr = 44100
sys_audio_data = None

Expand Down Expand Up @@ -95,21 +110,13 @@ def append_to_chat_ctx(
if event.type == FishE2EEventType.USER_CODES:
append_to_chat_ctx(ServeVQPart(codes=event.vq_codes), role="user")
elif event.type == FishE2EEventType.SPEECH_SEGMENT:
result_audio += event.frame.data
np_audio = np.frombuffer(result_audio, dtype=np.int16)
append_to_chat_ctx(ServeVQPart(codes=event.vq_codes))

yield state.get_history(), (44100, np_audio), None, None
yield state.get_history(), wav_chunk_header() + event.frame.data, None, None
elif event.type == FishE2EEventType.TEXT_SEGMENT:
append_to_chat_ctx(ServeTextPart(text=event.text))
if result_audio:
np_audio = np.frombuffer(result_audio, dtype=np.int16)
yield state.get_history(), (44100, np_audio), None, None
else:
yield state.get_history(), None, None, None
yield state.get_history(), None, None, None

np_audio = np.frombuffer(result_audio, dtype=np.int16)
yield state.get_history(), (44100, np_audio), None, None
yield state.get_history(), None, None, None


async def process_text_input(
Expand Down Expand Up @@ -179,7 +186,12 @@ def create_demo():

text_input = gr.Textbox(label="Or type your message", type="text")

output_audio = gr.Audio(label="Assistant's Voice", type="numpy")
output_audio = gr.Audio(
label="Assistant's Voice",
streaming=True,
autoplay=True,
interactive=False,
)

send_button = gr.Button("Send", variant="primary")
clear_button = gr.Button("Clear")
Expand Down

0 comments on commit 672000d

Please sign in to comment.