Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add C# API for Moonshine models. #1483

Merged
merged 2 commits into from
Oct 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/scripts/test-dot-net.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ rm -fv *.wav
rm -rfv sherpa-onnx-pyannote-*

cd ../offline-decode-files
./run-moonshine.sh
rm -rf sherpa-onnx-*

./run-sense-voice-ctc.sh
rm -rf sherpa-onnx-*

Expand Down
44 changes: 37 additions & 7 deletions dotnet-examples/offline-decode-files/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class Options
{

[Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
public int SampleRate { get; set; } = 16000;
public int SampleRate { get; set; } = 16000;

[Option("feat-dim", Required = false, Default = 80, HelpText = "Dimension of the features used to train the model")]
public int FeatureDim { get; set; } = 80;
Expand All @@ -31,7 +31,7 @@ class Options
[Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")]
public string Decoder { get; set; } = "";

[Option(Required = false, Default = "",HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
[Option(Required = false, Default = "", HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
public string Joiner { get; set; } = "";

[Option("model-type", Required = false, Default = "", HelpText = "model type")]
Expand All @@ -44,10 +44,22 @@ class Options
public string WhisperDecoder { get; set; } = "";

[Option("whisper-language", Required = false, Default = "", HelpText = "Language of the input file. Can be empty")]
public string WhisperLanguage{ get; set; } = "";
public string WhisperLanguage { get; set; } = "";

[Option("whisper-task", Required = false, Default = "transcribe", HelpText = "transcribe or translate")]
public string WhisperTask{ get; set; } = "transcribe";
public string WhisperTask { get; set; } = "transcribe";

[Option("moonshine-preprocessor", Required = false, Default = "", HelpText = "Path to preprocess.onnx. Used only for Moonshine models")]
public string MoonshinePreprocessor { get; set; } = "";

[Option("moonshine-encoder", Required = false, Default = "", HelpText = "Path to encode.onnx. Used only for Moonshine models")]
public string MoonshineEncoder { get; set; } = "";

[Option("moonshine-uncached-decoder", Required = false, Default = "", HelpText = "Path to uncached_decode.onnx. Used only for Moonshine models")]
public string MoonshineUncachedDecoder { get; set; } = "";

[Option("moonshine-cached-decoder", Required = false, Default = "", HelpText = "Path to cached_decode.onnx. Used only for Moonshine models")]
public string MoonshineCachedDecoder { get; set; } = "";

[Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")]
public string TdnnModel { get; set; } = "";
Expand Down Expand Up @@ -90,7 +102,7 @@ class Options
public float HotwordsScore { get; set; } = 1.5F;

[Option("files", Required = true, HelpText = "Audio files for decoding")]
public IEnumerable<string> Files { get; set; } = new string[] {};
public IEnumerable<string> Files { get; set; } = new string[] { };
}

static void Main(string[] args)
Expand Down Expand Up @@ -236,6 +248,13 @@ private static void Run(Options options)
config.ModelConfig.SenseVoice.Model = options.SenseVoiceModel;
config.ModelConfig.SenseVoice.UseInverseTextNormalization = options.SenseVoiceUseItn;
}
else if (!String.IsNullOrEmpty(options.MoonshinePreprocessor))
{
config.ModelConfig.Moonshine.Preprocessor = options.MoonshinePreprocessor;
config.ModelConfig.Moonshine.Encoder = options.MoonshineEncoder;
config.ModelConfig.Moonshine.UncachedDecoder = options.MoonshineUncachedDecoder;
config.ModelConfig.Moonshine.CachedDecoder = options.MoonshineCachedDecoder;
}
else
{
Console.WriteLine("Please provide a model");
Expand Down Expand Up @@ -273,10 +292,21 @@ private static void Run(Options options)
// display results
for (int i = 0; i != files.Length; ++i)
{
var text = streams[i].Result.Text;
var r = streams[i].Result;
Console.WriteLine("--------------------");
Console.WriteLine(files[i]);
Console.WriteLine(text);
Console.WriteLine("Text: {0}", r.Text);
Console.WriteLine("Tokens: [{0}]", string.Join(", ", r.Tokens));
if (r.Timestamps != null && r.Timestamps.Length > 0) {
Console.Write("Timestamps: [");
var sep = "";
for (int k = 0; k != r.Timestamps.Length; ++k)
{
Console.Write("{0}{1}", sep, r.Timestamps[k].ToString("0.00"));
sep = ", ";
}
Console.WriteLine("]");
}
}
Console.WriteLine("--------------------");
}
Expand Down
18 changes: 18 additions & 0 deletions dotnet-examples/offline-decode-files/run-moonshine.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
fi

dotnet run \
--num-threads=2 \
--moonshine-preprocessor=./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
--moonshine-encoder=./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
--moonshine-uncached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
--moonshine-cached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
--tokens=./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
--files ./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav
2 changes: 2 additions & 0 deletions scripts/dotnet/OfflineModelConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public OfflineModelConfig()
BpeVocab = "";
TeleSpeechCtc = "";
SenseVoice = new OfflineSenseVoiceModelConfig();
Moonshine = new OfflineMoonshineModelConfig();
}
public OfflineTransducerModelConfig Transducer;
public OfflineParaformerModelConfig Paraformer;
Expand Down Expand Up @@ -54,5 +55,6 @@ public OfflineModelConfig()
public string TeleSpeechCtc;

public OfflineSenseVoiceModelConfig SenseVoice;
public OfflineMoonshineModelConfig Moonshine;
}
}
29 changes: 29 additions & 0 deletions scripts/dotnet/OfflineMoonshineModelConfig.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineMoonshineModelConfig
{
public OfflineMoonshineModelConfig()
{
Preprocessor = "";
Encoder = "";
UncachedDecoder = "";
CachedDecoder = "";
}
[MarshalAs(UnmanagedType.LPStr)]
public string Preprocessor;

[MarshalAs(UnmanagedType.LPStr)]
public string Encoder;

[MarshalAs(UnmanagedType.LPStr)]
public string UncachedDecoder;

[MarshalAs(UnmanagedType.LPStr)]
public string CachedDecoder;
}
}
55 changes: 54 additions & 1 deletion scripts/dotnet/OfflineRecognizerResult.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,70 @@ public OfflineRecognizerResult(IntPtr handle)
byte[] stringBuffer = new byte[length];
Marshal.Copy(impl.Text, stringBuffer, 0, length);
_text = Encoding.UTF8.GetString(stringBuffer);

_tokens = new String[impl.Count];

unsafe
{
byte* buf = (byte*)impl.Tokens;
for (int i = 0; i < impl.Count; i++)
{
length = 0;
byte* start = buf;
while (*buf != 0)
{
++buf;
length += 1;
}
++buf;

stringBuffer = new byte[length];
fixed (byte* pTarget = stringBuffer)
{
for (int k = 0; k < length; k++)
{
pTarget[k] = start[k];
}
}

_tokens[i] = Encoding.UTF8.GetString(stringBuffer);
}
}

unsafe
{
if (impl.Timestamps != IntPtr.Zero)
{
float *t = (float*)impl.Timestamps;
_timestamps = new float[impl.Count];
fixed (float* f = _timestamps)
{
for (int k = 0; k < impl.Count; k++)
{
f[k] = t[k];
}
}
}
}

}

[StructLayout(LayoutKind.Sequential)]
struct Impl
{
public IntPtr Text;
public IntPtr Timestamps;
public int Count;
public IntPtr Tokens;
}

private String _text;
public String Text => _text;
}

private String[] _tokens;
public String[] Tokens => _tokens;

private float[] _timestamps;
public float[] Timestamps => _timestamps;
}
}
Loading