forked from k2-fsa/sherpa-onnx
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
cc73e6a
commit 0016516
Showing
24 changed files
with
190 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
{ | ||
"dependencies": { | ||
"sherpa-onnx-node": "^1.10.22" | ||
"sherpa-onnx-node": "^1.10.23" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
124 changes: 124 additions & 0 deletions
124
nodejs-examples/test-vad-with-non-streaming-asr-whisper.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) | ||
|
||
const sherpa_onnx = require('sherpa-onnx'); | ||
|
||
function createRecognizer() { | ||
// Please download test files from | ||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
const config = { | ||
'modelConfig': { | ||
'whisper': { | ||
'encoder': './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx', | ||
'decoder': './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx', | ||
'tailPaddings': 2000, | ||
}, | ||
'tokens': './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt', | ||
'debug': 0, | ||
} | ||
}; | ||
|
||
return sherpa_onnx.createOfflineRecognizer(config); | ||
} | ||
|
||
function createVad() { | ||
// please download silero_vad.onnx from | ||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
const config = { | ||
sileroVad: { | ||
model: './silero_vad.onnx', | ||
threshold: 0.5, | ||
minSpeechDuration: 0.25, | ||
minSilenceDuration: 0.5, | ||
windowSize: 512, | ||
}, | ||
sampleRate: 16000, | ||
debug: true, | ||
numThreads: 1, | ||
bufferSizeInSeconds: 60, | ||
}; | ||
|
||
return sherpa_onnx.createVad(config); | ||
} | ||
|
||
const recognizer = createRecognizer(); | ||
const vad = createVad(); | ||
|
||
// please download ./Obama.wav from | ||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
const waveFilename = './Obama.wav'; | ||
const wave = sherpa_onnx.readWave(waveFilename); | ||
|
||
if (wave.sampleRate != recognizer.config.featConfig.sampleRate) { | ||
throw new Error( | ||
'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}'); | ||
} | ||
|
||
console.log('Started') | ||
let start = Date.now(); | ||
|
||
const windowSize = vad.config.sileroVad.windowSize; | ||
for (let i = 0; i < wave.samples.length; i += windowSize) { | ||
const thisWindow = wave.samples.subarray(i, i + windowSize); | ||
vad.acceptWaveform(thisWindow); | ||
|
||
while (!vad.isEmpty()) { | ||
const segment = vad.front(); | ||
vad.pop(); | ||
|
||
let start_time = segment.start / wave.sampleRate; | ||
let end_time = start_time + segment.samples.length / wave.sampleRate; | ||
|
||
start_time = start_time.toFixed(2); | ||
end_time = end_time.toFixed(2); | ||
|
||
const stream = recognizer.createStream(); | ||
stream.acceptWaveform(wave.sampleRate, segment.samples); | ||
|
||
recognizer.decode(stream); | ||
const r = recognizer.getResult(stream); | ||
if (r.text.length > 0) { | ||
const text = r.text.toLowerCase().trim(); | ||
console.log(`${start_time} -- ${end_time}: ${text}`); | ||
} | ||
|
||
stream.free(); | ||
} | ||
} | ||
|
||
vad.flush(); | ||
|
||
while (!vad.isEmpty()) { | ||
const segment = vad.front(); | ||
vad.pop(); | ||
|
||
let start_time = segment.start / wave.sampleRate; | ||
let end_time = start_time + segment.samples.length / wave.sampleRate; | ||
|
||
start_time = start_time.toFixed(2); | ||
end_time = end_time.toFixed(2); | ||
|
||
const stream = recognizer.createStream(); | ||
stream.acceptWaveform(wave.sampleRate, segment.samples); | ||
|
||
recognizer.decode(stream); | ||
const r = recognizer.getResult(stream); | ||
if (r.text.length > 0) { | ||
const text = r.text.toLowerCase().trim(); | ||
console.log(`${start_time} -- ${end_time}: ${text}`); | ||
} | ||
} | ||
|
||
let stop = Date.now(); | ||
console.log('Done') | ||
|
||
const elapsed_seconds = (stop - start) / 1000; | ||
const duration = wave.samples.length / wave.sampleRate; | ||
const real_time_factor = elapsed_seconds / duration; | ||
console.log('Wave duration', duration.toFixed(3), 'seconds') | ||
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds') | ||
console.log( | ||
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
real_time_factor.toFixed(3)) | ||
|
||
vad.free(); | ||
recognizer.free(); |
Oops, something went wrong.