From 09f9eb735851b77c6b7eb6cd1b0c478fa2d7def7 Mon Sep 17 00:00:00 2001
From: Ian <ian@picovoice.ai>
Date: Wed, 18 Sep 2024 10:55:13 -0700
Subject: [PATCH 1/6] interrupt working

---
 recipes/llm-voice-assistant/python/main.py    | 163 +++++++++++-------
 .../python/requirements.txt                   |   2 +-
 2 files changed, 105 insertions(+), 60 deletions(-)
diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index 6e75722..b9d299d 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -1,4 +1,5 @@
 import signal
+import concurrent.futures
 import time
 from argparse import ArgumentParser
 from collections import deque
@@ -69,6 +70,7 @@ def orca_worker(access_key: str, connection, warmup_sec: float, stream_frame_sec
     synthesize = False
     flush = False
     close = False
+    interrupt = False
     utterance_end_sec = 0.
     delay_sec = [-1.]
 
@@ -128,6 +130,16 @@ def play_buffered_pcm() -> None:
             connection.send({'done': True})
         elif close:
             break
+        elif interrupt:
+            orca_profiler.tick()
+            pcm = orca_stream.flush()
+            orca_profiler.tock(pcm)
+            connection.send({'rtf': orca_profiler.rtf(), 'delay': delay_sec[0]})
+            interrupt = False
+            pcm_deque.clear()
+            speaker.stop()
+            delay_sec[0] = -1
+            connection.send({'done': True})
         else:
             time.sleep(stream_frame_sec)
 
@@ -145,6 +157,8 @@ def play_buffered_pcm() -> None:
                 flush = True
             elif message['command'] == 'close':
                 close = True
+            elif message['command'] == 'interrupt':
+                interrupt = True
 
     speaker.delete()
     orca_stream.close()
@@ -269,10 +283,70 @@ def handler(_, __) -> None:
 
     signal.signal(signal.SIGINT, handler)
 
+    def generate_task(dialog, user_request, utterance_end_sec, main_connection):
+        short_answers_instruction = \
+            "You are a voice assistant and your answers are very short but informative"
+        dialog.add_human_request(
+            f"{short_answers_instruction}. {user_request}" if short_answers else user_request)
+
+        picollm_profiler = TPSProfiler()
+
+        stop_phrases = {
+            '</s>',  # Llama-2, Mistral, and Mixtral
+            '<end_of_turn>',  # Gemma
+            '<|endoftext|>',  # Phi-2
+            '<|eot_id|>',  # Llama-3
+        }
+
+        completion = ['']
+
+        def llm_callback(text: str) -> None:
+            picollm_profiler.tock()
+            completion[0] += text
+            if not any(x in completion[0] for x in stop_phrases):
+                main_connection.send({
+                    'command': 'synthesize',
+                    'text': text.replace('\n', ' . '),
+                    'utterance_end_sec': utterance_end_sec})
+                print(text, end='', flush=True)
+
+        print("\nLLM (say `Picovoice` to interrupt) > ", end='', flush=True)
+        res = pllm.generate(
+            prompt=dialog.prompt(),
+            completion_token_limit=picollm_completion_token_limit,
+            stop_phrases=stop_phrases,
+            presence_penalty=picollm_presence_penalty,
+            frequency_penalty=picollm_frequency_penalty,
+            temperature=picollm_temperature,
+            top_p=picollm_top_p,
+            stream_callback=llm_callback)
+
+        if res.endpoint == picollm.PicoLLMEndpoints.INTERRUPTED:
+            main_connection.send({'command': 'interrupt'})
+        else:
+            main_connection.send({'command': 'flush'})
+
+        print('\n')
+        dialog.add_llm_response(res.completion)
+
+        if profile:
+            print(f"[picoLLM TPS: {picollm_profiler.tps():.2f}]")
+
+        while not main_connection.poll():
+            time.sleep(0.01)
+        message = main_connection.recv()
+        if profile:
+            print(f"[Orca RTF: {message['rtf']:.2f}]")
+            print(f"[Delay: {message['delay']:.2f} sec]")
+        while not main_connection.poll():
+            time.sleep(0.01)
+        assert main_connection.recv()['done']
+
+        return res
+
     wake_word_detected = False
     user_request = ''
     endpoint_reached = False
-    utterance_end_sec = 0
 
     porcupine_profiler = RTFProfiler(porcupine.sample_rate)
     cheetah_profiler = RTFProfiler(cheetah.sample_rate)
@@ -304,66 +378,37 @@ def handler(_, __) -> None:
                     remaining_transcript = cheetah.flush()
                     cheetah_profiler.tock()
                     user_request += remaining_transcript
-                    print(remaining_transcript, end='\n\n')
+                    print(remaining_transcript, end='\n')
                     if profile:
                         print(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]")
-            else:
-                short_answers_instruction = \
-                    "You are a voice assistant and your answers are very short but informative"
-                dialog.add_human_request(
-                    f"{short_answers_instruction}. {user_request}" if short_answers else user_request)
-
-                picollm_profiler = TPSProfiler()
-
-                stop_phrases = {
-                    '</s>',  # Llama-2, Mistral, and Mixtral
-                    '<end_of_turn>',  # Gemma
-                    '<|endoftext|>',  # Phi-2
-                    '<|eot_id|>',  # Llama-3
-                }
-
-                completion = ['']
-
-                def llm_callback(text: str) -> None:
-                    picollm_profiler.tock()
-                    completion[0] += text
-                    if not any(x in completion[0] for x in stop_phrases):
-                        main_connection.send({
-                            'command': 'synthesize',
-                            'text': text.replace('\n', ' . '),
-                            'utterance_end_sec': utterance_end_sec})
-                        print(text, end='', flush=True)
-
-                print("\nLLM > ", end='', flush=True)
-                res = pllm.generate(
-                    prompt=dialog.prompt(),
-                    completion_token_limit=picollm_completion_token_limit,
-                    stop_phrases=stop_phrases,
-                    presence_penalty=picollm_presence_penalty,
-                    frequency_penalty=picollm_frequency_penalty,
-                    temperature=picollm_temperature,
-                    top_p=picollm_top_p,
-                    stream_callback=llm_callback)
-                main_connection.send({'command': 'flush'})
-                print('\n')
-                dialog.add_llm_response(res.completion)
-                if profile:
-                    print(f"[picoLLM TPS: {picollm_profiler.tps():.2f}]")
-
-                while not main_connection.poll():
-                    time.sleep(0.01)
-                message = main_connection.recv()
-                if profile:
-                    print(f"[Orca RTF: {message['rtf']:.2f}]")
-                    print(f"[Delay: {message['delay']:.2f} sec]")
-                while not main_connection.poll():
-                    time.sleep(0.01)
-                assert main_connection.recv()['done']
-
-                wake_word_detected = False
-                user_request = ''
-                endpoint_reached = False
-                print(f"\n$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...")
+                    with concurrent.futures.ThreadPoolExecutor() as executor:
+                        llm_future = executor.submit(
+                            generate_task,
+                            dialog,
+                            user_request,
+                            utterance_end_sec,
+                            main_connection)
+
+                        while not llm_future.done():
+                            pcm = mic.read()
+                            porcupine_profiler.tick()
+                            wake_word_detected = porcupine.process(pcm) == 0
+                            porcupine_profiler.tock(pcm)
+                            if wake_word_detected:
+                                pllm.interrupt()
+                                break
+
+                        llm_result = llm_future.result()
+                        if llm_result.endpoint == picollm.PicoLLMEndpoints.INTERRUPTED:
+                            wake_word_detected = True
+                            print("$ Wake word detected, utter your request or question ...\n")
+                            print("User > ", end='', flush=True)
+                        else:
+                            wake_word_detected = False
+                            print(f"$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...")
+                        user_request = ''
+                        endpoint_reached = False
+
     finally:
         main_connection.send({'command': 'close'})
         mic.delete()
diff --git a/recipes/llm-voice-assistant/python/requirements.txt b/recipes/llm-voice-assistant/python/requirements.txt
index 57405c9..7b59945 100644
--- a/recipes/llm-voice-assistant/python/requirements.txt
+++ b/recipes/llm-voice-assistant/python/requirements.txt
@@ -1,4 +1,4 @@
-picollm==1.0.0
+../../../../picollm/binding/python/dist/picollm-1.1.0-py3-none-any.whl
 pvcheetah==2.0.1
 pvorca==1.0.0
 pvporcupine==3.0.2

From 7f6461dd0ac7c92fd42c38bceb02ef66b10adedb Mon Sep 17 00:00:00 2001
From: Ian <ian@picovoice.ai>
Date: Wed, 18 Sep 2024 10:56:05 -0700
Subject: [PATCH 2/6] refactor

---
 recipes/llm-voice-assistant/python/main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index b9d299d..9a88180 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -283,7 +283,7 @@ def handler(_, __) -> None:
 
     signal.signal(signal.SIGINT, handler)
 
-    def generate_task(dialog, user_request, utterance_end_sec, main_connection):
+    def llm_task(dialog, user_request, utterance_end_sec, main_connection):
         short_answers_instruction = \
             "You are a voice assistant and your answers are very short but informative"
         dialog.add_human_request(
@@ -383,7 +383,7 @@ def llm_callback(text: str) -> None:
                         print(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]")
                     with concurrent.futures.ThreadPoolExecutor() as executor:
                         llm_future = executor.submit(
-                            generate_task,
+                            llm_task,
                             dialog,
                             user_request,
                             utterance_end_sec,

From f62cad2b773e54bd756762200e921443826419f1 Mon Sep 17 00:00:00 2001
From: Ian <ian@picovoice.ai>
Date: Thu, 19 Sep 2024 16:29:01 -0700
Subject: [PATCH 3/6] lower case

---
 recipes/llm-voice-assistant/python/main.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index 9a88180..862976f 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -252,24 +252,24 @@ def main() -> None:
         porcupine = pvporcupine.create(access_key=access_key, keywords=['picovoice'])
     else:
         porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path])
-    print(f"→ Porcupine V{porcupine.version}")
+    print(f"→ Porcupine v{porcupine.version}")
 
     cheetah = pvcheetah.create(
         access_key=access_key,
         endpoint_duration_sec=cheetah_endpoint_duration_sec,
         enable_automatic_punctuation=True)
-    print(f"→ Cheetah V{cheetah.version}")
+    print(f"→ Cheetah v{cheetah.version}")
 
     pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device)
     dialog = pllm.get_dialog()
-    print(f"→ picoLLM V{pllm.version} <{pllm.model}>")
+    print(f"→ picoLLM v{pllm.version} <{pllm.model}>")
 
     main_connection, orca_process_connection = Pipe()
     orca_process = Process(target=orca_worker, args=(access_key, orca_process_connection, orca_warmup_sec))
     orca_process.start()
     while not main_connection.poll():
         time.sleep(0.01)
-    print(f"→ Orca V{main_connection.recv()['version']}")
+    print(f"→ Orca v{main_connection.recv()['version']}")
 
     mic = PvRecorder(frame_length=porcupine.frame_length)
     mic.start()

From c7173f1eaf8457613a5e4d32dbeddbc5ce74c3ff Mon Sep 17 00:00:00 2001
From: Ian <ian@picovoice.ai>
Date: Thu, 19 Sep 2024 16:46:59 -0700
Subject: [PATCH 4/6] main

---
 recipes/llm-voice-assistant/python/main.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index 862976f..79a57e1 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -296,6 +296,7 @@ def llm_task(dialog, user_request, utterance_end_sec, main_connection):
             '<end_of_turn>',  # Gemma
             '<|endoftext|>',  # Phi-2
             '<|eot_id|>',  # Llama-3
+            '<|end|>', '<|user|>', '<|assistant|>',  # Phi-3
         }
 
         completion = ['']

From a0a47ec29a239e7c3e0166e7e8ff0445676ab0ae Mon Sep 17 00:00:00 2001
From: Ian Lavery <ian@picovoice.ai>
Date: Fri, 20 Sep 2024 16:15:17 -0700
Subject: [PATCH 5/6] Update main.py

---
 recipes/llm-voice-assistant/python/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index 79a57e1..d1e511e 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -311,7 +311,7 @@ def llm_callback(text: str) -> None:
                     'utterance_end_sec': utterance_end_sec})
                 print(text, end='', flush=True)
 
-        print("\nLLM (say `Picovoice` to interrupt) > ", end='', flush=True)
+        print(f"\nLLM (say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} to interrupt) > ", end='', flush=True)
         res = pllm.generate(
             prompt=dialog.prompt(),
             completion_token_limit=picollm_completion_token_limit,

From 783307977af6709c03a6d14c2bab621eca4cfcd5 Mon Sep 17 00:00:00 2001
From: Ian <ian@picovoice.ai>
Date: Tue, 1 Oct 2024 16:14:47 -0700
Subject: [PATCH 6/6] use released version

---
 recipes/llm-voice-assistant/python/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipes/llm-voice-assistant/python/requirements.txt b/recipes/llm-voice-assistant/python/requirements.txt
index 7b59945..23ab826 100644
--- a/recipes/llm-voice-assistant/python/requirements.txt
+++ b/recipes/llm-voice-assistant/python/requirements.txt
@@ -1,4 +1,4 @@
-../../../../picollm/binding/python/dist/picollm-1.1.0-py3-none-any.whl
+picollm==1.1.0
 pvcheetah==2.0.1
 pvorca==1.0.0
 pvporcupine==3.0.2