From 4285ae399c77d8c486ba3d97ab28cda0623a391b Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 6 Nov 2023 19:25:57 +0800 Subject: [PATCH 1/9] add distilled whisper --- .github/workflows/export-whisper-to-onnx.yaml | 2 +- scripts/whisper/export-onnx.py | 21 +++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/.github/workflows/export-whisper-to-onnx.yaml b/.github/workflows/export-whisper-to-onnx.yaml index 554e20655..f4785c5ee 100644 --- a/.github/workflows/export-whisper-to-onnx.yaml +++ b/.github/workflows/export-whisper-to-onnx.yaml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: os: [macos-latest] - model: ["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large", "large-v1", "large-v2"] + model: ["distil-medium.en", "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large", "large-v1", "large-v2"] steps: - uses: actions/checkout@v4 diff --git a/scripts/whisper/export-onnx.py b/scripts/whisper/export-onnx.py index 46594d12f..e7ab58fa3 100755 --- a/scripts/whisper/export-onnx.py +++ b/scripts/whisper/export-onnx.py @@ -39,7 +39,9 @@ def get_args(): choices=[ "tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", - "large", "large-v1", "large-v2"], + "large", "large-v1", "large-v2", + "distil-medium.en", + ], # fmt: on ) return parser.parse_args() @@ -260,7 +262,22 @@ def main(): opset_version = 13 - model = whisper.load_model(name) + if name == "distil-medium.en": + filename = "distil-medium-en-original-model.bin" + if not Path(filename): + raise ValueError( + """ + Please go to https://huggingface.co/distil-whisper/distil-medium.en + to download original-model.bin + You can use the following command to do that: + + wget -O distil-medium-en-original-model.bin https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/original-model.bin + """ + ) + model = whisper.load_model(name) + else: + model = whisper.load_model(name) + print( f"number of model parameters: {name}", sum(p.numel() for p in model.parameters()), From 2dd81b7a6140915e2090998f2c81f25cb1d6acb2 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 6 Nov 2023 19:34:00 +0800 Subject: [PATCH 2/9] small fixes --- .github/workflows/export-whisper-to-onnx.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/export-whisper-to-onnx.yaml b/.github/workflows/export-whisper-to-onnx.yaml index f4785c5ee..86630b9aa 100644 --- a/.github/workflows/export-whisper-to-onnx.yaml +++ b/.github/workflows/export-whisper-to-onnx.yaml @@ -24,7 +24,7 @@ jobs: - name: Install dependencies shell: bash run: | - python3 -m pip install openai-whisper torch onnxruntime onnx + python3 -m pip install openai-whisper==20230314 torch onnxruntime onnx - name: export ${{ matrix.model }} shell: bash From b412a5a5ce3d0831bf5995464e38971706f91acc Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 6 Nov 2023 19:39:36 +0800 Subject: [PATCH 3/9] small fixes --- .github/workflows/export-whisper-to-onnx.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/export-whisper-to-onnx.yaml b/.github/workflows/export-whisper-to-onnx.yaml index 86630b9aa..c41de590d 100644 --- a/.github/workflows/export-whisper-to-onnx.yaml +++ b/.github/workflows/export-whisper-to-onnx.yaml @@ -24,7 +24,8 @@ jobs: - name: Install dependencies shell: bash run: | - python3 -m pip install openai-whisper==20230314 torch onnxruntime onnx + python3 -m pip install torch==1.13.0 -f https://download.pytorch.org/whl/cpu/torch_stable.html + python3 -m pip install openai-whisper==20230314 onnxruntime onnx - name: export ${{ matrix.model }} shell: bash From e3c92dcab755ca2fc278a61e0cfbad39d8941f25 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 6 Nov 2023 19:41:34 +0800 Subject: [PATCH 4/9] small fixes --- .github/workflows/export-whisper-to-onnx.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/export-whisper-to-onnx.yaml b/.github/workflows/export-whisper-to-onnx.yaml index c41de590d..2e22ba97f 100644 --- a/.github/workflows/export-whisper-to-onnx.yaml +++ b/.github/workflows/export-whisper-to-onnx.yaml @@ -17,10 +17,16 @@ jobs: matrix: os: [macos-latest] model: ["distil-medium.en", "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large", "large-v1", "large-v2"] + python-version: ["3.8"] steps: - uses: actions/checkout@v4 + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies shell: bash run: | From 0a8055e428441861e426548320d73c6f60950b84 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 6 Nov 2023 19:48:33 +0800 Subject: [PATCH 5/9] small fixes --- .github/workflows/export-whisper-to-onnx.yaml | 5 +++++ scripts/whisper/export-onnx.py | 6 ++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/export-whisper-to-onnx.yaml b/.github/workflows/export-whisper-to-onnx.yaml index 2e22ba97f..9cf0bf04a 100644 --- a/.github/workflows/export-whisper-to-onnx.yaml +++ b/.github/workflows/export-whisper-to-onnx.yaml @@ -37,6 +37,11 @@ jobs: shell: bash run: | cd scripts/whisper + model=${{ matrix.model }} + echo "model: $model" + if [[ $model == distil-medium.en ]]; then + wget -O distil-medium-en-original-model.bin https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/original-model.bin + fi python3 ./export-onnx.py --model ${{ matrix.model }} python3 -m onnxruntime.tools.convert_onnx_models_to_ort --optimization_style=Fixed ./ diff --git a/scripts/whisper/export-onnx.py b/scripts/whisper/export-onnx.py index e7ab58fa3..945d39ccd 100755 --- a/scripts/whisper/export-onnx.py +++ b/scripts/whisper/export-onnx.py @@ -259,11 +259,13 @@ def convert_tokens(name, model): def main(): args = get_args() name = args.model + print(args) + print(name) opset_version = 13 if name == "distil-medium.en": - filename = "distil-medium-en-original-model.bin" + filename = "./distil-medium-en-original-model.bin" if not Path(filename): raise ValueError( """ @@ -274,7 +276,7 @@ def main(): wget -O distil-medium-en-original-model.bin https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/original-model.bin """ ) - model = whisper.load_model(name) + model = whisper.load_model(filename) else: model = whisper.load_model(name) From fe086dcf64a69dd9549a3774a71ac2fff0b7199e Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 6 Nov 2023 19:54:27 +0800 Subject: [PATCH 6/9] small fixes --- .github/workflows/export-whisper-to-onnx.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/export-whisper-to-onnx.yaml b/.github/workflows/export-whisper-to-onnx.yaml index 9cf0bf04a..59d1bee9f 100644 --- a/.github/workflows/export-whisper-to-onnx.yaml +++ b/.github/workflows/export-whisper-to-onnx.yaml @@ -40,7 +40,7 @@ jobs: model=${{ matrix.model }} echo "model: $model" if [[ $model == distil-medium.en ]]; then - wget -O distil-medium-en-original-model.bin https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/original-model.bin + wget -q -O distil-medium-en-original-model.bin https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/original-model.bin fi python3 ./export-onnx.py --model ${{ matrix.model }} python3 -m onnxruntime.tools.convert_onnx_models_to_ort --optimization_style=Fixed ./ From 3c6b23f4b2e1dd5b330578f5fbd9ebf157b41d34 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 6 Nov 2023 19:54:49 +0800 Subject: [PATCH 7/9] small fixes --- .github/workflows/export-whisper-to-onnx.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/export-whisper-to-onnx.yaml b/.github/workflows/export-whisper-to-onnx.yaml index 59d1bee9f..7ccdeae12 100644 --- a/.github/workflows/export-whisper-to-onnx.yaml +++ b/.github/workflows/export-whisper-to-onnx.yaml @@ -41,6 +41,7 @@ jobs: echo "model: $model" if [[ $model == distil-medium.en ]]; then wget -q -O distil-medium-en-original-model.bin https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/original-model.bin + ls -lh fi python3 ./export-onnx.py --model ${{ matrix.model }} python3 -m onnxruntime.tools.convert_onnx_models_to_ort --optimization_style=Fixed ./ From 89ed657fc8e7ebe08eb244e7169287763d784ae2 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 6 Nov 2023 20:09:38 +0800 Subject: [PATCH 8/9] small fixes --- .github/workflows/export-whisper-to-onnx.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/export-whisper-to-onnx.yaml b/.github/workflows/export-whisper-to-onnx.yaml index 7ccdeae12..58188590f 100644 --- a/.github/workflows/export-whisper-to-onnx.yaml +++ b/.github/workflows/export-whisper-to-onnx.yaml @@ -48,7 +48,9 @@ jobs: ls -lh - ls -lh ~/.cache/whisper + if [[ $model != distil-medium.en ]]; then + ls -lh ~/.cache/whisper + fi - name: Publish ${{ matrix.model }} to huggingface shell: bash From 57821c6650b515ffd120b016350243c5ee601d03 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 6 Nov 2023 20:21:42 +0800 Subject: [PATCH 9/9] small fixes --- .github/workflows/export-whisper-to-onnx.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/export-whisper-to-onnx.yaml b/.github/workflows/export-whisper-to-onnx.yaml index 58188590f..e1592d849 100644 --- a/.github/workflows/export-whisper-to-onnx.yaml +++ b/.github/workflows/export-whisper-to-onnx.yaml @@ -57,6 +57,8 @@ jobs: env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | + model=${{ matrix.model }} + cd scripts/whisper git config --global user.email "csukuangfj@gmail.com" @@ -69,6 +71,18 @@ jobs: cp *tokens.txt ./huggingface cd huggingface + + if [[ $model == distil-medium.en ]]; then + mkdir test_wavs + cd test_wavs + wget -q https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/0.wav + wget -q https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/1.wav + wget -q https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/8k.wav + wget -q https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/trans.txt + git add . + cd .. + fi + git status ls -lh git lfs track "*.onnx"