From a5ce4810dc109667e79b7995519ca0bf1e8931e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mehmet=20Kesimalio=C4=9Flu?=
 <24902892+mkesim@users.noreply.github.com>
Date: Wed, 15 Jan 2025 07:09:55 +0300
Subject: [PATCH] Add Gradio speech-to-text app

---

For more details, open the [Copilot Workspace session](https://copilot-workspace.githubnext.com/alphacep/vosk-api?shareId=XXXX-XXXX-XXXX-XXXX).
---
 python/example/test_gradio.py | 36 +++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)
 mode change 100755 => 100644 python/example/test_gradio.py

diff --git a/python/example/test_gradio.py b/python/example/test_gradio.py
old mode 100755
new mode 100644
index b6c6a9b9..a387a023
--- a/python/example/test_gradio.py
+++ b/python/example/test_gradio.py
@@ -7,7 +7,7 @@
 
 model = Model(lang="en-us")
 
-def transcribe(stream, new_chunk):
+def transcribe(stream, new_chunk, transcribe_speaker, transcribe_meeting):
 
     sample_rate, audio_data = new_chunk
     audio_data = audio_data.tobytes()
@@ -28,12 +28,28 @@ def transcribe(stream, new_chunk):
 
     return (rec, result), "\n".join(result) + "\n" + partial_result
 
-gr.Interface(
-    fn=transcribe,
-    inputs=[
-        "state", gr.Audio(sources=["microphone"], type="numpy", streaming=True),
-    ],
-    outputs=[
-        "state", "text",
-    ],
-    live=True).launch(share=True)
+def start_transcription():
+    return None, ""
+
+def stop_transcription(stream):
+    if stream is not None:
+        rec, result = stream
+        final_result = json.loads(rec.FinalResult())["text"]
+        result.append(final_result)
+        return None, "\n".join(result)
+    return None, ""
+
+with gr.Blocks() as demo:
+    transcribe_speaker = gr.Checkbox(label="Transcribe Speaker's Voice")
+    transcribe_meeting = gr.Checkbox(label="Transcribe Entire Meeting")
+    start_button = gr.Button("Start Transcription")
+    stop_button = gr.Button("Stop Transcription")
+    state = gr.State()
+    audio = gr.Audio(sources=["microphone"], type="numpy", streaming=True)
+    text = gr.Textbox()
+
+    start_button.click(start_transcription, inputs=[], outputs=[state, text])
+    stop_button.click(stop_transcription, inputs=[state], outputs=[state, text])
+    audio.change(transcribe, inputs=[state, audio, transcribe_speaker, transcribe_meeting], outputs=[state, text])
+
+demo.launch(share=True)