From aa9f85a20504c1e77812cededf01127da433b3e0 Mon Sep 17 00:00:00 2001 From: PoTaTo <148920650+PoTaTo-Mika@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:37:04 +0800 Subject: [PATCH] Add Audio Select to WebUI (#556) * Add Audio Select to WebUI * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/en/inference.md | 3 +++ docs/ja/inference.md | 3 +++ docs/pt/inference.md | 3 +++ docs/zh/inference.md | 3 +++ fish_speech/i18n/locale/en_US.json | 3 ++- fish_speech/i18n/locale/es_ES.json | 3 ++- fish_speech/i18n/locale/ja_JP.json | 4 ++-- fish_speech/i18n/locale/zh_CN.json | 3 ++- tools/webui.py | 34 ++++++++++++++++++++++++++++++ 9 files changed, 54 insertions(+), 5 deletions(-) diff --git a/docs/en/inference.md b/docs/en/inference.md index 1eb3042f..e085c968 100644 --- a/docs/en/inference.md +++ b/docs/en/inference.md @@ -118,6 +118,9 @@ python -m tools.webui \ --decoder-config-name firefly_gan_vq ``` +!!! note + You can save the label file and reference audio file in advance to the examples folder in the main directory (which you need to create yourself), so that you can directly call them in the WebUI. + !!! note You can use Gradio environment variables, such as `GRADIO_SHARE`, `GRADIO_SERVER_PORT`, `GRADIO_SERVER_NAME` to configure WebUI. diff --git a/docs/ja/inference.md b/docs/ja/inference.md index 4ca55768..1499ecf6 100644 --- a/docs/ja/inference.md +++ b/docs/ja/inference.md @@ -151,6 +151,9 @@ python -m tools.webui \ --decoder-config-name firefly_gan_vq ``` +!!! note + ラベルファイルと参照音声ファイルをメインディレクトリの examples フォルダ(自分で作成する必要があります)に事前に保存しておくことで、WebUI で直接呼び出すことができます。 + !!! note Gradio 環境変数(`GRADIO_SHARE`、`GRADIO_SERVER_PORT`、`GRADIO_SERVER_NAME`など)を使用して WebUI を構成できます。 diff --git a/docs/pt/inference.md b/docs/pt/inference.md index 6a8ff5c6..2202fed9 100644 --- a/docs/pt/inference.md +++ b/docs/pt/inference.md @@ -147,6 +147,9 @@ python -m tools.webui \ --decoder-config-name firefly_gan_vq ``` +!!! note + Você pode salvar antecipadamente o arquivo de rótulos e o arquivo de áudio de referência na pasta examples do diretório principal (que você precisa criar), para que possa chamá-los diretamente na WebUI. + !!! note É possível usar variáveis de ambiente do Gradio, como `GRADIO_SHARE`, `GRADIO_SERVER_PORT`, `GRADIO_SERVER_NAME`, para configurar a WebUI. diff --git a/docs/zh/inference.md b/docs/zh/inference.md index f783a525..d8375b02 100644 --- a/docs/zh/inference.md +++ b/docs/zh/inference.md @@ -128,6 +128,9 @@ python -m tools.webui \ --decoder-config-name firefly_gan_vq ``` +!!! note + 你可以提前将label文件和参考音频文件保存到主目录下的examples文件夹(需要自行创建),这样你可以直接在WebUI中调用它们。 + !!! note 你可以使用 Gradio 环境变量, 如 `GRADIO_SHARE`, `GRADIO_SERVER_PORT`, `GRADIO_SERVER_NAME` 来配置 WebUI. diff --git a/fish_speech/i18n/locale/en_US.json b/fish_speech/i18n/locale/en_US.json index 6e280c23..d36c7743 100644 --- a/fish_speech/i18n/locale/en_US.json +++ b/fish_speech/i18n/locale/en_US.json @@ -118,5 +118,6 @@ "new": "new", "Realtime Transform Text": "Realtime Transform Text", "Normalization Result Preview (Currently Only Chinese)": "Normalization Result Preview (Currently Only Chinese)", - "Text Normalization": "Text Normalization" + "Text Normalization": "Text Normalization", + "Select Example Audio": "Select Example Audio" } diff --git a/fish_speech/i18n/locale/es_ES.json b/fish_speech/i18n/locale/es_ES.json index 3285341f..7a475796 100644 --- a/fish_speech/i18n/locale/es_ES.json +++ b/fish_speech/i18n/locale/es_ES.json @@ -118,5 +118,6 @@ "new": "nuevo", "Realtime Transform Text": "Transformación de Texto en Tiempo Real", "Normalization Result Preview (Currently Only Chinese)": "Vista Previa del Resultado de Normalización (Actualmente Solo Chino)", - "Text Normalization": "Normalización de Texto" + "Text Normalization": "Normalización de Texto", + "Select Example Audio": "Selecionar áudio de exemplo" } diff --git a/fish_speech/i18n/locale/ja_JP.json b/fish_speech/i18n/locale/ja_JP.json index d30bac7b..863b8b0b 100644 --- a/fish_speech/i18n/locale/ja_JP.json +++ b/fish_speech/i18n/locale/ja_JP.json @@ -118,6 +118,6 @@ "new": "新規", "Realtime Transform Text": "リアルタイム変換テキスト", "Normalization Result Preview (Currently Only Chinese)": "正規化結果プレビュー(現在は中国語のみ)", - "Text Normalization": "テキスト正規化" - + "Text Normalization": "テキスト正規化", + "Select Example Audio": "サンプル音声を選択" } diff --git a/fish_speech/i18n/locale/zh_CN.json b/fish_speech/i18n/locale/zh_CN.json index 3dd1a5cd..9068ef0b 100644 --- a/fish_speech/i18n/locale/zh_CN.json +++ b/fish_speech/i18n/locale/zh_CN.json @@ -118,5 +118,6 @@ "new": "创建新的检查点", "Realtime Transform Text": "实时规范化文本", "Normalization Result Preview (Currently Only Chinese)": "规范化结果预览", - "Text Normalization": "文本规范化" + "Text Normalization": "文本规范化", + "Select Example Audio": "选择参考音频" } diff --git a/tools/webui.py b/tools/webui.py index cff155d4..7c9102f2 100644 --- a/tools/webui.py +++ b/tools/webui.py @@ -324,6 +324,20 @@ def build_app(): enable_reference_audio = gr.Checkbox( label=i18n("Enable Reference Audio"), ) + + # Add dropdown for selecting example audio files + examples_dir = Path("examples") + if not examples_dir.exists(): + examples_dir.mkdir() + example_audio_files = [ + f.name for f in examples_dir.glob("*.wav") + ] + [f.name for f in examples_dir.glob("*.mp3")] + example_audio_dropdown = gr.Dropdown( + label=i18n("Select Example Audio"), + choices=[""] + example_audio_files, + value="", + ) + reference_audio = gr.Audio( label=i18n("Reference Audio"), type="filepath", @@ -383,6 +397,26 @@ def build_app(): fn=normalize_text, inputs=[text, if_refine_text], outputs=[refined_text] ) + def select_example_audio(audio_file): + if audio_file: + audio_path = examples_dir / audio_file + lab_file = audio_path.with_suffix(".lab") + + if lab_file.exists(): + lab_content = lab_file.read_text(encoding="utf-8").strip() + else: + lab_content = "" + + return str(audio_path), lab_content, True + return None, "", False + + # Connect the dropdown to update reference audio and text + example_audio_dropdown.change( + fn=select_example_audio, + inputs=[example_audio_dropdown], + outputs=[reference_audio, reference_text, enable_reference_audio], + ) + # # Submit generate.click( inference_wrapper,