diff --git a/c/Makefile b/c/Makefile index bc3a4de7..32e49393 100644 --- a/c/Makefile +++ b/c/Makefile @@ -1,7 +1,10 @@ CFLAGS=-I../src LDFLAGS=-L../src -lvosk -ldl -lpthread -Wl,-rpath,../src -all: test_vosk test_vosk_speaker +all: test_vosk_gpu_batch test_vosk test_vosk_speaker + +test_vosk_gpu_batch: test_vosk_gpu_batch.o + gcc $^ -o $@ $(LDFLAGS) test_vosk: test_vosk.o gcc $^ -o $@ $(LDFLAGS) @@ -13,4 +16,4 @@ test_vosk_speaker: test_vosk_speaker.o gcc $(CFLAGS) -c -o $@ $< clean: - rm -f *.o *.a test_vosk test_vosk_speaker + rm -f *.o *.a test_vosk_gpu_batch test_vosk test_vosk_speaker diff --git a/c/test_vosk_gpu_batch.c b/c/test_vosk_gpu_batch.c new file mode 100644 index 00000000..42919932 --- /dev/null +++ b/c/test_vosk_gpu_batch.c @@ -0,0 +1,35 @@ +#include +#include +#include +#include + +int main() { + FILE *wavin; + char buf[3200]; + int nread, final; + + vosk_gpu_init(); + VoskBatchModel *model = vosk_batch_model_new("model"); + VoskBatchRecognizer *recognizer = vosk_batch_recognizer_new(model, 16000.0); + + wavin = fopen("test.wav", "rb"); + fseek(wavin, 44, SEEK_SET); + while (!feof(wavin)) { + nread = fread(buf, 1, sizeof(buf), wavin); + vosk_batch_recognizer_accept_waveform(recognizer,buf,nread); + while(vosk_batch_recognizer_get_pending_chunks(recognizer)>0) usleep(1000); + + const char *result=vosk_batch_recognizer_front_result(recognizer); + if(strlen(result)) { + printf("%s\n", result); + vosk_batch_recognizer_pop(recognizer); + } else { + printf("%s\n", vosk_batch_recognizer_partial_result(recognizer)); + } + } + + fclose(wavin); + vosk_batch_recognizer_free(recognizer); + vosk_batch_model_free(model); + return 0; +} diff --git a/src/batch_recognizer.cc b/src/batch_recognizer.cc index 4f439bee..02dbca63 100644 --- a/src/batch_recognizer.cc +++ b/src/batch_recognizer.cc @@ -27,6 +27,8 @@ BatchRecognizer::BatchRecognizer(BatchModel *model, float resampler_ = new LinearResample( sample_frequency, 16000.0f, std::min(sample_frequency / 2, 16000.0f / 2), 6); + + partial_result_ = "{\n \"partial\" : \"\"\n}"; } BatchRecognizer::~BatchRecognizer() { @@ -114,27 +116,38 @@ void BatchRecognizer::SetNLSML(bool nlsml) void BatchRecognizer::AcceptWaveform(const char *data, int len) { + #define KALDI_BESTPATH_LOGS_ON 0 + uint64_t id = id_; if (!callbacks_set_) { - // Define the callback for results. -#if 0 - model_->cuda_pipeline_->SetBestPathCallback( + // Define the callbacks for results. + model_->cuda_pipeline_->SetBestPathCallback( id, [&, id](const std::string &str, bool partial, - bool endpoint_detected) { + bool endpoint_detected) { if (partial) { - KALDI_LOG << "id #" << id << " [partial] : " << str << ":"; + #if KALDI_BESTPATH_LOGS_ON + KALDI_LOG << "id #" << id << " [partial] : " << str << ":"; + #endif + partial_result_ = "{\n \"partial\" : \"" + str + "\"\n}"; // json-like partial result format } if (endpoint_detected) { - KALDI_LOG << "id #" << id << " [endpoint detected]"; + #if KALDI_BESTPATH_LOGS_ON + KALDI_LOG << "id #" << id << " [endpoint detected]"; + #endif + if(!partial) { + partial_result_ = "{\n \"partial\" : \"\"\n}"; // clear partial result + } } if (!partial) { - KALDI_LOG << "id #" << id << " : " << str; + #if KALDI_BESTPATH_LOGS_ON + KALDI_LOG << "id #" << id << " : " << str; + #endif } - }); -#endif + }); + model_->cuda_pipeline_->SetLatticeCallback( id, [&, id](SegmentedLatticeCallbackParams& params) { @@ -188,6 +201,11 @@ const char* BatchRecognizer::FrontResult() return results_.front().c_str(); } +const char *BatchRecognizer::PartialResult() +{ + return partial_result_.c_str(); +} + void BatchRecognizer::Pop() { if (results_.empty()) { diff --git a/src/batch_recognizer.h b/src/batch_recognizer.h index fa1d7e31..552c67aa 100644 --- a/src/batch_recognizer.h +++ b/src/batch_recognizer.h @@ -33,6 +33,7 @@ class BatchRecognizer { void AcceptWaveform(const char *data, int len); int GetNumPendingChunks(); const char *FrontResult(); + const char *PartialResult(); void Pop(); void FinishStream(); void SetNLSML(bool nlsml); @@ -48,6 +49,7 @@ class BatchRecognizer { bool nlsml_; float sample_frequency_; std::queue results_; + std::string partial_result_; LinearResample *resampler_; kaldi::Vector buffer_; }; diff --git a/src/vosk_api.cc b/src/vosk_api.cc index 0c205f83..967632e7 100644 --- a/src/vosk_api.cc +++ b/src/vosk_api.cc @@ -296,6 +296,14 @@ void vosk_batch_recognizer_pop(VoskBatchRecognizer *recognizer) #endif } +const char *vosk_batch_recognizer_partial_result(VoskBatchRecognizer *recognizer) +{ +#if HAVE_CUDA + return ((BatchRecognizer *)recognizer)->PartialResult(); +#else + return NULL; +#endif +} int vosk_batch_recognizer_get_pending_chunks(VoskBatchRecognizer *recognizer) { diff --git a/src/vosk_api.h b/src/vosk_api.h index 5aee618b..db1d0d84 100644 --- a/src/vosk_api.h +++ b/src/vosk_api.h @@ -375,6 +375,9 @@ const char *vosk_batch_recognizer_front_result(VoskBatchRecognizer *recognizer); /** Release and free first retrieved result */ void vosk_batch_recognizer_pop(VoskBatchRecognizer *recognizer); +/** Return actual partial result */ +const char *vosk_batch_recognizer_partial_result(VoskBatchRecognizer *recognizer); + /** Get amount of pending chunks for more intelligent waiting */ int vosk_batch_recognizer_get_pending_chunks(VoskBatchRecognizer *recognizer);