Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Go API for MatchaTTS models #1685

Merged
merged 1 commit into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .github/workflows/test-go-package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,15 @@ jobs:
go build
ls -lh

echo "Test matcha zh"
./run-matcha-zh.sh
rm -rf matcha-icefall-*

echo "Test matcha en"
./run-matcha-en.sh
rm -rf matcha-icefall-*
ls -lh *.wav

echo "Test vits-ljs"
./run-vits-ljs.sh
rm -rf vits-ljs
Expand Down Expand Up @@ -246,6 +255,15 @@ jobs:
cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
ls -lh

echo "Test matcha zh"
./run-matcha-zh.sh
rm -rf matcha-icefall-*

echo "Test matcha en"
./run-matcha-en.sh
rm -rf matcha-icefall-*
ls -lh *.wav

echo "Test vits-ljs"
./run-vits-ljs.sh
rm -rf vits-ljs
Expand Down Expand Up @@ -291,6 +309,15 @@ jobs:
cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
ls -lh

echo "Test matcha zh"
./run-matcha-zh.sh
rm -rf matcha-icefall-*

echo "Test matcha en"
./run-matcha-en.sh
rm -rf matcha-icefall-*
ls -lh *.wav

echo "Test vits-ljs"
./run-vits-ljs.sh
rm -rf vits-ljs
Expand Down
9 changes: 9 additions & 0 deletions .github/workflows/test-go.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,15 @@ jobs:
go build
ls -lh

echo "Test matcha zh"
./run-matcha-zh.sh
rm -rf matcha-icefall-*

echo "Test matcha en"
./run-matcha-en.sh
rm -rf matcha-icefall-*
ls -lh *.wav

echo "Test vits-ljs"
./run-vits-ljs.sh
rm -rf vits-ljs
Expand Down
11 changes: 11 additions & 0 deletions go-api-examples/non-streaming-tts/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,22 @@ func main() {
flag.StringVar(&config.Model.Vits.Lexicon, "vits-lexicon", "", "Path to lexicon.txt")
flag.StringVar(&config.Model.Vits.Tokens, "vits-tokens", "", "Path to tokens.txt")
flag.StringVar(&config.Model.Vits.DataDir, "vits-data-dir", "", "Path to espeak-ng-data")
flag.StringVar(&config.Model.Matcha.DictDir, "vits-dict-dir", "", "Path to dict for jieba")

flag.Float32Var(&config.Model.Vits.NoiseScale, "vits-noise-scale", 0.667, "noise_scale for VITS")
flag.Float32Var(&config.Model.Vits.NoiseScaleW, "vits-noise-scale-w", 0.8, "noise_scale_w for VITS")
flag.Float32Var(&config.Model.Vits.LengthScale, "vits-length-scale", 1.0, "length_scale for VITS. small -> faster in speech speed; large -> slower")

flag.StringVar(&config.Model.Matcha.AcousticModel, "matcha-acoustic-model", "", "Path to the matcha acoustic model")
flag.StringVar(&config.Model.Matcha.Vocoder, "matcha-vocoder", "", "Path to the matcha vocoder model")
flag.StringVar(&config.Model.Matcha.Lexicon, "matcha-lexicon", "", "Path to lexicon.txt")
flag.StringVar(&config.Model.Matcha.Tokens, "matcha-tokens", "", "Path to tokens.txt")
flag.StringVar(&config.Model.Matcha.DataDir, "matcha-data-dir", "", "Path to espeak-ng-data")
flag.StringVar(&config.Model.Matcha.DictDir, "matcha-dict-dir", "", "Path to dict for jieba")

flag.Float32Var(&config.Model.Matcha.NoiseScale, "matcha-noise-scale", 0.667, "noise_scale for Matcha")
flag.Float32Var(&config.Model.Matcha.LengthScale, "matcha-length-scale", 1.0, "length_scale for Matcha. small -> faster in speech speed; large -> slower")

flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")
Expand Down
31 changes: 31 additions & 0 deletions go-api-examples/non-streaming-tts/run-matcha-en.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env bash

set -ex

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2
fi

if [ ! -f ./hifigan_v2.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
fi

go mod tidy
go build

./non-streaming-tts \
--matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
--matcha-vocoder=./hifigan_v2.onnx \
--matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
--matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
--debug=1 \
--output-filename=./test-matcha-en.wav \
"Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."


31 changes: 31 additions & 0 deletions go-api-examples/non-streaming-tts/run-matcha-zh.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env bash

set -ex

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2
fi

if [ ! -f ./hifigan_v2.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
fi

go mod tidy
go build

./non-streaming-tts \
--matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
--matcha-vocoder=./hifigan_v2.onnx \
--matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
--matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
--matcha-dict-dir=./matcha-icefall-zh-baker/dict \
--debug=1 \
--tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
--output-filename=./test-matcha-zh.wav \
"某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。"

Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set -ex

if [ ! -d vits-piper-en_US-lessac-medium ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-lessac-medium.tar.bz2
tar xvf vits-piper-en_US-lessac-medium.tar.bz2
tar xf vits-piper-en_US-lessac-medium.tar.bz2
rm vits-piper-en_US-lessac-medium.tar.bz2
fi

Expand Down
1 change: 1 addition & 0 deletions scripts/go/_internal/non-streaming-tts/run-matcha-en.sh
1 change: 1 addition & 0 deletions scripts/go/_internal/non-streaming-tts/run-matcha-zh.sh
37 changes: 36 additions & 1 deletion scripts/go/sherpa_onnx.go
Original file line number Diff line number Diff line change
Expand Up @@ -671,8 +671,20 @@ type OfflineTtsVitsModelConfig struct {
DictDir string // Path to dict directory for jieba (used only in Chinese tts)
}

type OfflineTtsMatchaModelConfig struct {
AcousticModel string // Path to the acoustic model for MatchaTTS
Vocoder string // Path to the vocoder model for MatchaTTS
Lexicon string // Path to lexicon.txt
Tokens string // Path to tokens.txt
DataDir string // Path to espeak-ng-data directory
NoiseScale float32 // noise scale for vits models. Please use 0.667 in general
LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
DictDir string // Path to dict directory for jieba (used only in Chinese tts)
}

type OfflineTtsModelConfig struct {
Vits OfflineTtsVitsModelConfig
Vits OfflineTtsVitsModelConfig
Matcha OfflineTtsMatchaModelConfig

// Number of threads to use for neural network computation
NumThreads int
Expand Down Expand Up @@ -722,6 +734,7 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {

c.max_num_sentences = C.int(config.MaxNumSentences)

// vits
c.model.vits.model = C.CString(config.Model.Vits.Model)
defer C.free(unsafe.Pointer(c.model.vits.model))

Expand All @@ -741,6 +754,28 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
c.model.vits.dict_dir = C.CString(config.Model.Vits.DictDir)
defer C.free(unsafe.Pointer(c.model.vits.dict_dir))

// matcha
c.model.matcha.acoustic_model = C.CString(config.Model.Matcha.AcousticModel)
defer C.free(unsafe.Pointer(c.model.matcha.acoustic_model))

c.model.matcha.vocoder = C.CString(config.Model.Matcha.Vocoder)
defer C.free(unsafe.Pointer(c.model.matcha.vocoder))

c.model.matcha.lexicon = C.CString(config.Model.Matcha.Lexicon)
defer C.free(unsafe.Pointer(c.model.matcha.lexicon))

c.model.matcha.tokens = C.CString(config.Model.Matcha.Tokens)
defer C.free(unsafe.Pointer(c.model.matcha.tokens))

c.model.matcha.data_dir = C.CString(config.Model.Matcha.DataDir)
defer C.free(unsafe.Pointer(c.model.matcha.data_dir))

c.model.matcha.noise_scale = C.float(config.Model.Matcha.NoiseScale)
c.model.matcha.length_scale = C.float(config.Model.Matcha.LengthScale)

c.model.matcha.dict_dir = C.CString(config.Model.Matcha.DictDir)
defer C.free(unsafe.Pointer(c.model.matcha.dict_dir))

c.model.num_threads = C.int(config.Model.NumThreads)
c.model.debug = C.int(config.Model.Debug)

Expand Down
Loading