Skip to content

Commit

Permalink
Merge pull request #20 from Abandon-ht/dev
Browse files Browse the repository at this point in the history
update vad_whisper api & demo
  • Loading branch information
Forairaaaaa authored Jan 20, 2025
2 parents 947ea55 + 3d61ba7 commit 484063c
Show file tree
Hide file tree
Showing 11 changed files with 364 additions and 3 deletions.
112 changes: 112 additions & 0 deletions examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
*
* SPDX-License-Identifier: MIT
*/
#include <Arduino.h>
#include <M5Unified.h>
#include <M5ModuleLLM.h>

M5ModuleLLM module_llm;

/* Must be capitalized */
String wake_up_keyword = "HELLO";
// String wake_up_keyword = "你好你好";
String kws_work_id;
String vad_work_id;
String whisper_work_id;
String language;

void setup()
{
M5.begin();
M5.Display.setTextSize(2);
M5.Display.setTextScroll(true);
// M5.Display.setFont(&fonts::efontCN_12); // Support Chinese display
// M5.Display.setFont(&fonts::efontJA_12); // Support Japanese display

language = "en_US";
// language = "zh_CN";

/* Init module serial port */
// int rxd = 16, txd = 17; // Basic
// int rxd = 13, txd = 14; // Core2
// int rxd = 18, txd = 17; // CoreS3
int rxd = M5.getPin(m5::pin_name_t::port_c_rxd);
int txd = M5.getPin(m5::pin_name_t::port_c_txd);
Serial2.begin(115200, SERIAL_8N1, rxd, txd);

/* Init module */
module_llm.begin(&Serial2);

/* Make sure module is connected */
M5.Display.printf(">> Check ModuleLLM connection..\n");
while (1) {
if (module_llm.checkConnection()) {
break;
}
}

/* Reset ModuleLLM */
M5.Display.printf(">> Reset ModuleLLM..\n");
module_llm.sys.reset();

/* Setup Audio module */
M5.Display.printf(">> Setup audio..\n");
module_llm.audio.setup();

/* Setup KWS module and save returned work id */
M5.Display.printf(">> Setup kws..\n");
m5_module_llm::ApiKwsSetupConfig_t kws_config;
kws_config.kws = wake_up_keyword;
kws_work_id = module_llm.kws.setup(kws_config, "kws_setup", language);

/* Setup VAD module and save returned work id */
M5.Display.printf(">> Setup vad..\n");
m5_module_llm::ApiVadSetupConfig_t vad_config;
vad_config.input = {"sys.pcm", kws_work_id};
vad_work_id = module_llm.vad.setup(vad_config, "vad_setup");

/* Setup Whisper module and save returned work id */
M5.Display.printf(">> Setup whisper..\n");
m5_module_llm::ApiWhisperSetupConfig_t whisper_config;
whisper_config.input = {"sys.pcm", kws_work_id, vad_work_id};
whisper_config.language = "en";
// whisper_config.language = "zh";
// whisper_config.language = "ja";
whisper_work_id = module_llm.whisper.setup(whisper_config, "whisper_setup");

M5.Display.printf(">> Setup ok\n>> Say \"%s\" to wakeup\n", wake_up_keyword.c_str());
}

void loop()
{
/* Update ModuleLLM */
module_llm.update();

/* Handle module response messages */
for (auto& msg : module_llm.msg.responseMsgList) {
/* If KWS module message */
if (msg.work_id == kws_work_id) {
M5.Display.setTextColor(TFT_GREENYELLOW);
M5.Display.printf(">> Keyword detected\n");
}

/* If ASR module message */
if (msg.work_id == whisper_work_id) {
/* Check message object type */
if (msg.object == "asr.utf-8") {
/* Parse message json and get ASR result */
JsonDocument doc;
deserializeJson(doc, msg.raw_msg);
String asr_result = doc["data"].as<String>();

M5.Display.setTextColor(TFT_YELLOW);
M5.Display.printf(">> %s\n", asr_result.c_str());
}
}
}

/* Clear handled messages */
module_llm.msg.responseMsgList.clear();
}
3 changes: 3 additions & 0 deletions examples/YOLO/YOLO.ino
Original file line number Diff line number Diff line change
Expand Up @@ -122,5 +122,8 @@ void loop()
}

/* Clear handled messages */
module_llm.msg.clearMsg("yolo_setup");
module_llm.msg.responseMsgList.clear();

usleep(500000);
}
2 changes: 1 addition & 1 deletion library.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name=M5ModuleLLM
version=1.0.0
version=1.4.0
author=M5Stack
maintainer=M5Stack
sentence=M5ModuleLLM is a library for M5ModuleLLM
Expand Down
2 changes: 2 additions & 0 deletions src/M5ModuleLLM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ bool M5ModuleLLM::begin(Stream* serialPort)
asr.init(&msg);
yolo.init(&msg);
camera.init(&msg);
vad.init(&msg);
whisper.init(&msg);
depthanything.init(&msg);
return true;
}
Expand Down
14 changes: 14 additions & 0 deletions src/M5ModuleLLM.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include "api/api_yolo.h"
#include "api/api_depth_anything.h"
#include "api/api_camera.h"
#include "api/api_vad.h"
#include "api/api_whisper.h"
#include "api/api_version.h"

class M5ModuleLLM {
Expand Down Expand Up @@ -105,6 +107,18 @@ class M5ModuleLLM {
*/
m5_module_llm::ApiYolo yolo;

/**
* @brief VAD module api set
*
*/
m5_module_llm::ApiVad vad;

/**
* @brief Whisper module api set
*
*/
m5_module_llm::ApiWhisper whisper;

/**
* @brief DepthAnything module api set
*
Expand Down
2 changes: 1 addition & 1 deletion src/api/api_llm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ String ApiLlm::setup(ApiLlmSetupConfig_t config, String request_id)
// Copy work id
llm_work_id = msg.work_id;
},
20000);
30000);
return llm_work_id;
}

Expand Down
64 changes: 64 additions & 0 deletions src/api/api_vad.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
*
* SPDX-License-Identifier: MIT
*/
#include "api_vad.h"

using namespace m5_module_llm;

void ApiVad::init(ModuleMsg* moduleMsg)
{
_module_msg = moduleMsg;
}

String ApiVad::setup(ApiVadSetupConfig_t config, String request_id)
{
String cmd;
{
JsonDocument doc;
doc["request_id"] = request_id;
doc["work_id"] = "vad";
doc["action"] = "setup";
doc["object"] = "vad.setup";
doc["data"]["model"] = config.model;
doc["data"]["response_format"] = config.response_format;
doc["data"]["enoutput"] = config.enoutput;
JsonArray inputArray = doc["data"]["input"].to<JsonArray>();
for (const String& str : config.input) {
inputArray.add(str);
}
serializeJson(doc, cmd);
}

String work_id;
_module_msg->sendCmdAndWaitToTakeMsg(
cmd.c_str(), request_id,
[&work_id](ResponseMsg_t& msg) {
// Copy work id
work_id = msg.work_id;
},
30000);
return work_id;
}

String ApiVad::exit(String work_id, String request_id)
{
String cmd;
{
JsonDocument doc;
doc["request_id"] = request_id;
doc["work_id"] = work_id;
doc["action"] = "exit";
serializeJson(doc, cmd);
}

_module_msg->sendCmdAndWaitToTakeMsg(
cmd.c_str(), request_id,
[&work_id](ResponseMsg_t& msg) {
// Copy work id
work_id = msg.work_id;
},
100);
return work_id;
}
45 changes: 45 additions & 0 deletions src/api/api_vad.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
*
* SPDX-License-Identifier: MIT
*/
#pragma once
#include "../utils/msg.h"
#include <Arduino.h>

namespace m5_module_llm {

struct ApiVadSetupConfig_t {
String model = "silero-vad";
String response_format = "vad.bool";
std::vector<String> input = {"sys.pcm"};
bool enoutput = true;
};

class ApiVad {
public:
void init(ModuleMsg* moduleMsg);

/**
* @brief Setup module VAD, return VAD work_id
*
* @param config
* @param request_id
* @return String
*/
String setup(ApiVadSetupConfig_t config = ApiVadSetupConfig_t(), String request_id = "vad_setup");

/**
* @brief Exit module VAD, return VAD work_id
*
* @param work_id
* @param request_id
* @return String
*/
String exit(String work_id, String request_id = "vad_exit");

private:
ModuleMsg* _module_msg = nullptr;
};

} // namespace m5_module_llm
65 changes: 65 additions & 0 deletions src/api/api_whisper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
*
* SPDX-License-Identifier: MIT
*/
#include "api_whisper.h"

using namespace m5_module_llm;

void ApiWhisper::init(ModuleMsg* moduleMsg)
{
_module_msg = moduleMsg;
}

String ApiWhisper::setup(ApiWhisperSetupConfig_t config, String request_id, String language)
{
String cmd;
{
JsonDocument doc;
doc["request_id"] = request_id;
doc["work_id"] = "whisper";
doc["action"] = "setup";
doc["object"] = "whisper.setup";
doc["data"]["model"] = config.model;
doc["data"]["response_format"] = config.response_format;
doc["data"]["language"] = config.language;
doc["data"]["enoutput"] = config.enoutput;
JsonArray inputArray = doc["data"]["input"].to<JsonArray>();
for (const String& str : config.input) {
inputArray.add(str);
}
serializeJson(doc, cmd);
}

String work_id;
_module_msg->sendCmdAndWaitToTakeMsg(
cmd.c_str(), request_id,
[&work_id](ResponseMsg_t& msg) {
// Copy work id
work_id = msg.work_id;
},
10000);
return work_id;
}

String ApiWhisper::exit(String work_id, String request_id)
{
String cmd;
{
JsonDocument doc;
doc["request_id"] = request_id;
doc["work_id"] = work_id;
doc["action"] = "exit";
serializeJson(doc, cmd);
}

_module_msg->sendCmdAndWaitToTakeMsg(
cmd.c_str(), request_id,
[&work_id](ResponseMsg_t& msg) {
// Copy work id
work_id = msg.work_id;
},
100);
return work_id;
}
Loading

0 comments on commit 484063c

Please sign in to comment.