From 09f9eb735851b77c6b7eb6cd1b0c478fa2d7def7 Mon Sep 17 00:00:00 2001 From: Ian Date: Wed, 18 Sep 2024 10:55:13 -0700 Subject: [PATCH 1/6] interrupt working --- recipes/llm-voice-assistant/python/main.py | 163 +++++++++++------- .../python/requirements.txt | 2 +- 2 files changed, 105 insertions(+), 60 deletions(-) diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index 6e75722..b9d299d 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -1,4 +1,5 @@ import signal +import concurrent.futures import time from argparse import ArgumentParser from collections import deque @@ -69,6 +70,7 @@ def orca_worker(access_key: str, connection, warmup_sec: float, stream_frame_sec synthesize = False flush = False close = False + interrupt = False utterance_end_sec = 0. delay_sec = [-1.] @@ -128,6 +130,16 @@ def play_buffered_pcm() -> None: connection.send({'done': True}) elif close: break + elif interrupt: + orca_profiler.tick() + pcm = orca_stream.flush() + orca_profiler.tock(pcm) + connection.send({'rtf': orca_profiler.rtf(), 'delay': delay_sec[0]}) + interrupt = False + pcm_deque.clear() + speaker.stop() + delay_sec[0] = -1 + connection.send({'done': True}) else: time.sleep(stream_frame_sec) @@ -145,6 +157,8 @@ def play_buffered_pcm() -> None: flush = True elif message['command'] == 'close': close = True + elif message['command'] == 'interrupt': + interrupt = True speaker.delete() orca_stream.close() @@ -269,10 +283,70 @@ def handler(_, __) -> None: signal.signal(signal.SIGINT, handler) + def generate_task(dialog, user_request, utterance_end_sec, main_connection): + short_answers_instruction = \ + "You are a voice assistant and your answers are very short but informative" + dialog.add_human_request( + f"{short_answers_instruction}. {user_request}" if short_answers else user_request) + + picollm_profiler = TPSProfiler() + + stop_phrases = { + '', # Llama-2, Mistral, and Mixtral + '', # Gemma + '<|endoftext|>', # Phi-2 + '<|eot_id|>', # Llama-3 + } + + completion = [''] + + def llm_callback(text: str) -> None: + picollm_profiler.tock() + completion[0] += text + if not any(x in completion[0] for x in stop_phrases): + main_connection.send({ + 'command': 'synthesize', + 'text': text.replace('\n', ' . '), + 'utterance_end_sec': utterance_end_sec}) + print(text, end='', flush=True) + + print("\nLLM (say `Picovoice` to interrupt) > ", end='', flush=True) + res = pllm.generate( + prompt=dialog.prompt(), + completion_token_limit=picollm_completion_token_limit, + stop_phrases=stop_phrases, + presence_penalty=picollm_presence_penalty, + frequency_penalty=picollm_frequency_penalty, + temperature=picollm_temperature, + top_p=picollm_top_p, + stream_callback=llm_callback) + + if res.endpoint == picollm.PicoLLMEndpoints.INTERRUPTED: + main_connection.send({'command': 'interrupt'}) + else: + main_connection.send({'command': 'flush'}) + + print('\n') + dialog.add_llm_response(res.completion) + + if profile: + print(f"[picoLLM TPS: {picollm_profiler.tps():.2f}]") + + while not main_connection.poll(): + time.sleep(0.01) + message = main_connection.recv() + if profile: + print(f"[Orca RTF: {message['rtf']:.2f}]") + print(f"[Delay: {message['delay']:.2f} sec]") + while not main_connection.poll(): + time.sleep(0.01) + assert main_connection.recv()['done'] + + return res + wake_word_detected = False user_request = '' endpoint_reached = False - utterance_end_sec = 0 porcupine_profiler = RTFProfiler(porcupine.sample_rate) cheetah_profiler = RTFProfiler(cheetah.sample_rate) @@ -304,66 +378,37 @@ def handler(_, __) -> None: remaining_transcript = cheetah.flush() cheetah_profiler.tock() user_request += remaining_transcript - print(remaining_transcript, end='\n\n') + print(remaining_transcript, end='\n') if profile: print(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]") - else: - short_answers_instruction = \ - "You are a voice assistant and your answers are very short but informative" - dialog.add_human_request( - f"{short_answers_instruction}. {user_request}" if short_answers else user_request) - - picollm_profiler = TPSProfiler() - - stop_phrases = { - '', # Llama-2, Mistral, and Mixtral - '', # Gemma - '<|endoftext|>', # Phi-2 - '<|eot_id|>', # Llama-3 - } - - completion = [''] - - def llm_callback(text: str) -> None: - picollm_profiler.tock() - completion[0] += text - if not any(x in completion[0] for x in stop_phrases): - main_connection.send({ - 'command': 'synthesize', - 'text': text.replace('\n', ' . '), - 'utterance_end_sec': utterance_end_sec}) - print(text, end='', flush=True) - - print("\nLLM > ", end='', flush=True) - res = pllm.generate( - prompt=dialog.prompt(), - completion_token_limit=picollm_completion_token_limit, - stop_phrases=stop_phrases, - presence_penalty=picollm_presence_penalty, - frequency_penalty=picollm_frequency_penalty, - temperature=picollm_temperature, - top_p=picollm_top_p, - stream_callback=llm_callback) - main_connection.send({'command': 'flush'}) - print('\n') - dialog.add_llm_response(res.completion) - if profile: - print(f"[picoLLM TPS: {picollm_profiler.tps():.2f}]") - - while not main_connection.poll(): - time.sleep(0.01) - message = main_connection.recv() - if profile: - print(f"[Orca RTF: {message['rtf']:.2f}]") - print(f"[Delay: {message['delay']:.2f} sec]") - while not main_connection.poll(): - time.sleep(0.01) - assert main_connection.recv()['done'] - - wake_word_detected = False - user_request = '' - endpoint_reached = False - print(f"\n$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...") + with concurrent.futures.ThreadPoolExecutor() as executor: + llm_future = executor.submit( + generate_task, + dialog, + user_request, + utterance_end_sec, + main_connection) + + while not llm_future.done(): + pcm = mic.read() + porcupine_profiler.tick() + wake_word_detected = porcupine.process(pcm) == 0 + porcupine_profiler.tock(pcm) + if wake_word_detected: + pllm.interrupt() + break + + llm_result = llm_future.result() + if llm_result.endpoint == picollm.PicoLLMEndpoints.INTERRUPTED: + wake_word_detected = True + print("$ Wake word detected, utter your request or question ...\n") + print("User > ", end='', flush=True) + else: + wake_word_detected = False + print(f"$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...") + user_request = '' + endpoint_reached = False + finally: main_connection.send({'command': 'close'}) mic.delete() diff --git a/recipes/llm-voice-assistant/python/requirements.txt b/recipes/llm-voice-assistant/python/requirements.txt index 57405c9..7b59945 100644 --- a/recipes/llm-voice-assistant/python/requirements.txt +++ b/recipes/llm-voice-assistant/python/requirements.txt @@ -1,4 +1,4 @@ -picollm==1.0.0 +../../../../picollm/binding/python/dist/picollm-1.1.0-py3-none-any.whl pvcheetah==2.0.1 pvorca==1.0.0 pvporcupine==3.0.2 From 7f6461dd0ac7c92fd42c38bceb02ef66b10adedb Mon Sep 17 00:00:00 2001 From: Ian Date: Wed, 18 Sep 2024 10:56:05 -0700 Subject: [PATCH 2/6] refactor --- recipes/llm-voice-assistant/python/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index b9d299d..9a88180 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -283,7 +283,7 @@ def handler(_, __) -> None: signal.signal(signal.SIGINT, handler) - def generate_task(dialog, user_request, utterance_end_sec, main_connection): + def llm_task(dialog, user_request, utterance_end_sec, main_connection): short_answers_instruction = \ "You are a voice assistant and your answers are very short but informative" dialog.add_human_request( @@ -383,7 +383,7 @@ def llm_callback(text: str) -> None: print(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]") with concurrent.futures.ThreadPoolExecutor() as executor: llm_future = executor.submit( - generate_task, + llm_task, dialog, user_request, utterance_end_sec, From f62cad2b773e54bd756762200e921443826419f1 Mon Sep 17 00:00:00 2001 From: Ian Date: Thu, 19 Sep 2024 16:29:01 -0700 Subject: [PATCH 3/6] lower case --- recipes/llm-voice-assistant/python/main.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index 9a88180..862976f 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -252,24 +252,24 @@ def main() -> None: porcupine = pvporcupine.create(access_key=access_key, keywords=['picovoice']) else: porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path]) - print(f"→ Porcupine V{porcupine.version}") + print(f"→ Porcupine v{porcupine.version}") cheetah = pvcheetah.create( access_key=access_key, endpoint_duration_sec=cheetah_endpoint_duration_sec, enable_automatic_punctuation=True) - print(f"→ Cheetah V{cheetah.version}") + print(f"→ Cheetah v{cheetah.version}") pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device) dialog = pllm.get_dialog() - print(f"→ picoLLM V{pllm.version} <{pllm.model}>") + print(f"→ picoLLM v{pllm.version} <{pllm.model}>") main_connection, orca_process_connection = Pipe() orca_process = Process(target=orca_worker, args=(access_key, orca_process_connection, orca_warmup_sec)) orca_process.start() while not main_connection.poll(): time.sleep(0.01) - print(f"→ Orca V{main_connection.recv()['version']}") + print(f"→ Orca v{main_connection.recv()['version']}") mic = PvRecorder(frame_length=porcupine.frame_length) mic.start() From c7173f1eaf8457613a5e4d32dbeddbc5ce74c3ff Mon Sep 17 00:00:00 2001 From: Ian Date: Thu, 19 Sep 2024 16:46:59 -0700 Subject: [PATCH 4/6] main --- recipes/llm-voice-assistant/python/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index 862976f..79a57e1 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -296,6 +296,7 @@ def llm_task(dialog, user_request, utterance_end_sec, main_connection): '', # Gemma '<|endoftext|>', # Phi-2 '<|eot_id|>', # Llama-3 + '<|end|>', '<|user|>', '<|assistant|>', # Phi-3 } completion = [''] From a0a47ec29a239e7c3e0166e7e8ff0445676ab0ae Mon Sep 17 00:00:00 2001 From: Ian Lavery Date: Fri, 20 Sep 2024 16:15:17 -0700 Subject: [PATCH 5/6] Update main.py --- recipes/llm-voice-assistant/python/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index 79a57e1..d1e511e 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -311,7 +311,7 @@ def llm_callback(text: str) -> None: 'utterance_end_sec': utterance_end_sec}) print(text, end='', flush=True) - print("\nLLM (say `Picovoice` to interrupt) > ", end='', flush=True) + print(f"\nLLM (say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} to interrupt) > ", end='', flush=True) res = pllm.generate( prompt=dialog.prompt(), completion_token_limit=picollm_completion_token_limit, From 783307977af6709c03a6d14c2bab621eca4cfcd5 Mon Sep 17 00:00:00 2001 From: Ian Date: Tue, 1 Oct 2024 16:14:47 -0700 Subject: [PATCH 6/6] use released version --- recipes/llm-voice-assistant/python/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/llm-voice-assistant/python/requirements.txt b/recipes/llm-voice-assistant/python/requirements.txt index 7b59945..23ab826 100644 --- a/recipes/llm-voice-assistant/python/requirements.txt +++ b/recipes/llm-voice-assistant/python/requirements.txt @@ -1,4 +1,4 @@ -../../../../picollm/binding/python/dist/picollm-1.1.0-py3-none-any.whl +picollm==1.1.0 pvcheetah==2.0.1 pvorca==1.0.0 pvporcupine==3.0.2