diff --git a/fish_speech/i18n/locale/en_US.json b/fish_speech/i18n/locale/en_US.json index a5510e62..cd00d673 100644 --- a/fish_speech/i18n/locale/en_US.json +++ b/fish_speech/i18n/locale/en_US.json @@ -1,25 +1,103 @@ { "5 to 10 seconds of reference audio, useful for specifying speaker.": "5 to 10 seconds of reference audio, useful for specifying speaker.", "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).", + "Accumulate Gradient Batches": "Accumulate Gradient Batches", + "Add to Processing Area": "Add to Processing Area", + "Added path successfully!": "Added path successfully!", "Advanced Config": "Advanced Config", + "Base LLAMA Model": "Base LLAMA Model", + "Batch Size": "Batch Size", + "Chinese": "Chinese", + "Compile Model": "Compile Model", + "Compile the model can significantly reduce the inference time, but will increase cold start time": "Compile the model can significantly reduce the inference time, but will increase cold start time", + "Copy": "Copy", + "Data Preprocessing": "Data Preprocessing", + "Data Preprocessing Path": "Data Preprocessing Path", + "Data Source": "Data Source", + "Disabled": "Disabled", "Enable Reference Audio": "Enable Reference Audio", + "English": "English", "Error Message": "Error Message", + "File Preprocessing": "File Preprocessing", "Generate": "Generate", "Generated Audio": "Generated Audio", + "If there is no corresponding text for the audio, apply ASR for assistance, support .txt or .lab format": "If there is no corresponding text for the audio, apply ASR for assistance, support .txt or .lab format", "Infer interface is closed": "Infer interface is closed", + "Inference Configuration": "Inference Configuration", + "Inference Server Configuration": "Inference Server Configuration", + "Inference Server Error": "Inference Server Error", "Inferring interface is launched at {}": "Inferring interface is launched at {}", + "Initial Learning Rate": "Initial Learning Rate", + "Input Audio & Source Path for Transcription": "Input Audio & Source Path for Transcription", "Input Text": "Input Text", + "Invalid path: {}": "Invalid path: {}", + "It is recommended to use CUDA, if you have low configuration, use CPU": "It is recommended to use CUDA, if you have low configuration, use CPU", "Iterative Prompt Length, 0 means off": "Iterative Prompt Length, 0 means off", + "Japanese": "Japanese", + "LLAMA Configuration": "LLAMA Configuration", + "LLAMA Model Config": "LLAMA Model Config", + "LLAMA Model Path": "LLAMA Model Path", + "Labeling Device": "Labeling Device", + "LoRA Model to be merged": "LoRA Model to be merged", + "Maximum Length per Sample": "Maximum Length per Sample", + "Maximum Training Steps": "Maximum Training Steps", "Maximum tokens per batch, 0 means no limit": "Maximum tokens per batch, 0 means no limit", + "Merge": "Merge", + "Merge LoRA": "Merge LoRA", + "Merge successfully": "Merge successfully", + "Model Output Path": "Model Output Path", + "Model Size": "Model Size", + "Move": "Move", + "Move files successfully": "Move files successfully", + "No selected options": "No selected options", + "Number of Workers": "Number of Workers", + "Open Inference Server": "Open Inference Server", + "Open Labeler WebUI": "Open Labeler WebUI", + "Open Tensorboard": "Open Tensorboard", "Opened labeler in browser": "Opened labeler in browser", + "Optional Label Language": "Optional Label Language", + "Output Path": "Output Path", + "Path error, please check the model file exists in the corresponding path": "Path error, please check the model file exists in the corresponding path", + "Precision": "Precision", + "Probability of applying Speaker Condition": "Probability of applying Speaker Condition", "Put your text here.": "Put your text here.", "Reference Audio": "Reference Audio", "Reference Text": "Reference Text", "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.", + "Remove Selected Data": "Remove Selected Data", + "Removed path successfully!": "Removed path successfully!", "Repetition Penalty": "Repetition Penalty", + "Save model every n steps": "Save model every n steps", + "Select source file processing method": "Select source file processing method", + "Select the model to be trained": "Select the model to be trained", + "Selected: {}": "Selected: {}", "Speaker": "Speaker", + "Speaker is identified by the folder name": "Speaker is identified by the folder name", + "Start Training": "Start Training", + "Tensorboard Host": "Tensorboard Host", + "Tensorboard Log Path": "Tensorboard Log Path", + "Tensorboard Port": "Tensorboard Port", + "Tensorboard interface is closed": "Tensorboard interface is closed", + "Tensorboard interface is launched at {}": "Tensorboard interface is launched at {}", "Text is too long, please keep it under {} characters.": "Text is too long, please keep it under {} characters.", + "The path of the input folder on the left or the filelist. Whether checked or not, it will be used for subsequent training in this list.": "The path of the input folder on the left or the filelist. Whether checked or not, it will be used for subsequent training in this list.", + "Training Configuration": "Training Configuration", + "Training Error": "Training Error", + "Training stopped": "Training stopped", "Type name of the speaker": "Type name of the speaker", + "Type the path or select from the dropdown": "Type the path or select from the dropdown", + "Use LoRA": "Use LoRA", + "Use LoRA can save GPU memory, but may reduce the quality of the model": "Use LoRA can save GPU memory, but may reduce the quality of the model", + "Use filelist": "Use filelist", + "Use large for 10G+ GPU, medium for 5G, small for 2G": "Use large for 10G+ GPU, medium for 5G, small for 2G", + "VQGAN Configuration": "VQGAN Configuration", + "VQGAN Model Path": "VQGAN Model Path", + "Validation Batch Size": "Validation Batch Size", + "View the status of the preprocessing folder (use the slider to control the depth of the tree)": "View the status of the preprocessing folder (use the slider to control the depth of the tree)", "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.", - "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1)." + "WebUI Host": "WebUI Host", + "WebUI Port": "WebUI Port", + "Whisper Model": "Whisper Model", + "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).", + "bf16-true is recommended for 30+ series GPU, 16-mixed is recommended for 10+ series GPU": "bf16-true is recommended for 30+ series GPU, 16-mixed is recommended for 10+ series GPU" } diff --git a/fish_speech/i18n/locale/ja_JP.json b/fish_speech/i18n/locale/ja_JP.json new file mode 100644 index 00000000..8d9594c9 --- /dev/null +++ b/fish_speech/i18n/locale/ja_JP.json @@ -0,0 +1,103 @@ +{ + "5 to 10 seconds of reference audio, useful for specifying speaker.": "話者を指定するのに役立つ、5~10秒のリファレンスオーディオ。", + "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "[Fish Audio](https://fish.audio)が開発したVQ-GANとLlamaに基づくテキスト音声合成モデル。", + "Accumulate Gradient Batches": "勾配バッチの累積", + "Add to Processing Area": "処理エリアに追加", + "Added path successfully!": "パスの追加に成功しました!", + "Advanced Config": "詳細設定", + "Base LLAMA Model": "基本LLAMAモデル", + "Batch Size": "バッチサイズ", + "Chinese": "中国語", + "Compile Model": "モデルのコンパイル", + "Compile the model can significantly reduce the inference time, but will increase cold start time": "モデルをコンパイルすると推論時間を大幅に短縮できますが、コールドスタート時間が長くなります", + "Copy": "コピー", + "Data Preprocessing": "データ前処理", + "Data Preprocessing Path": "データ前処理パス", + "Data Source": "データソース", + "Disabled": "無効", + "Enable Reference Audio": "リファレンスオーディオを有効にする", + "English": "英語", + "Error Message": "エラーメッセージ", + "File Preprocessing": "ファイル前処理", + "Generate": "生成", + "Generated Audio": "生成されたオーディオ", + "If there is no corresponding text for the audio, apply ASR for assistance, support .txt or .lab format": "音声に対応するテキストがない場合は、ASRを適用してサポートします。.txtまたは.lab形式をサポートしています", + "Infer interface is closed": "推論インターフェースが閉じられています", + "Inference Configuration": "推論設定", + "Inference Server Configuration": "推論サーバー設定", + "Inference Server Error": "推論サーバーエラー", + "Inferring interface is launched at {}": "推論インターフェースが{}で起動しました", + "Initial Learning Rate": "初期学習率", + "Input Audio & Source Path for Transcription": "入力オーディオと文字起こしのソースパス", + "Input Text": "入力テキスト", + "Invalid path: {}": "無効なパス: {}", + "It is recommended to use CUDA, if you have low configuration, use CPU": "CUDAの使用をお勧めします。低い構成の場合はCPUを使用してください", + "Iterative Prompt Length, 0 means off": "反復プロンプト長。0はオフを意味します", + "Japanese": "日本語", + "LLAMA Configuration": "LLAMA設定", + "LLAMA Model Config": "LLAMAモデル設定", + "LLAMA Model Path": "LLAMAモデルパス", + "Labeling Device": "ラベリングデバイス", + "LoRA Model to be merged": "マージするLoRAモデル", + "Maximum Length per Sample": "サンプルあたりの最大長", + "Maximum Training Steps": "最大トレーニングステップ数", + "Maximum tokens per batch, 0 means no limit": "バッチあたりの最大トークン数。0は制限なしを意味します", + "Merge": "マージ", + "Merge LoRA": "LoRAのマージ", + "Merge successfully": "マージに成功しました", + "Model Output Path": "モデル出力パス", + "Model Size": "モデルサイズ", + "Move": "移動", + "Move files successfully": "ファイルの移動に成功しました", + "No selected options": "選択されたオプションはありません", + "Number of Workers": "ワーカー数", + "Open Inference Server": "推論サーバーを開く", + "Open Labeler WebUI": "ラベラーWebUIを開く", + "Open Tensorboard": "Tensorboardを開く", + "Opened labeler in browser": "ブラウザでラベラーを開きました", + "Optional Label Language": "オプションのラベル言語", + "Output Path": "出力パス", + "Path error, please check the model file exists in the corresponding path": "パスエラー。対応するパスにモデルファイルが存在するか確認してください", + "Precision": "精度", + "Probability of applying Speaker Condition": "話者条件を適用する確率", + "Put your text here.": "ここにテキストを入力してください。", + "Reference Audio": "リファレンスオーディオ", + "Reference Text": "リファレンステキスト", + "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "関連コードはBSD-3-Clauseライセンスの下でリリースされ、重みはCC BY-NC-SA 4.0ライセンスの下でリリースされます。", + "Remove Selected Data": "選択したデータを削除", + "Removed path successfully!": "パスの削除に成功しました!", + "Repetition Penalty": "反復ペナルティ", + "Save model every n steps": "nステップごとにモデルを保存", + "Select source file processing method": "ソースファイルの処理方法を選択", + "Select the model to be trained": "トレーニングするモデルを選択", + "Selected: {}": "選択済み: {}", + "Speaker": "話者", + "Speaker is identified by the folder name": "話者はフォルダ名で識別されます", + "Start Training": "トレーニング開始", + "Tensorboard Host": "Tensorboardホスト", + "Tensorboard Log Path": "Tensorboardログパス", + "Tensorboard Port": "Tensorboardポート", + "Tensorboard interface is closed": "Tensorboardインターフェースが閉じられています", + "Tensorboard interface is launched at {}": "Tensorboardインターフェースが{}で起動されました", + "Text is too long, please keep it under {} characters.": "テキストが長すぎます。{}文字以内に抑えてください。", + "The path of the input folder on the left or the filelist. Whether checked or not, it will be used for subsequent training in this list.": "左側の入力フォルダまたはファイルリストのパス。チェックの有無にかかわらず、このリストの後続のトレーニングに使用されます。", + "Training Configuration": "トレーニング設定", + "Training Error": "トレーニングエラー", + "Training stopped": "トレーニングが停止しました", + "Type name of the speaker": "話者の名前を入力", + "Type the path or select from the dropdown": "パスを入力するか、ドロップダウンから選択してください", + "Use LoRA": "LoRAを使用", + "Use LoRA can save GPU memory, but may reduce the quality of the model": "LoRAを使用するとGPUメモリを節約できますが、モデルの品質が低下する可能性があります", + "Use filelist": "ファイルリストを使用", + "Use large for 10G+ GPU, medium for 5G, small for 2G": "10G以上のGPUには大、5Gには中、2Gには小を使用してください", + "VQGAN Configuration": "VQGAN設定", + "VQGAN Model Path": "VQGANモデルパス", + "Validation Batch Size": "検証バッチサイズ", + "View the status of the preprocessing folder (use the slider to control the depth of the tree)": "前処理フォルダの状態を表示(スライダーを使用してツリーの深さを制御)", + "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "モデルの誤用については一切責任を負いません。使用する前に、現地の法律と規制を考慮してください。", + "WebUI Host": "WebUIホスト", + "WebUI Port": "WebUIポート", + "Whisper Model": "Whisperモデル", + "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "ソースコードは[こちら](https://github.com/fishaudio/fish-speech)、モデルは[こちら](https://huggingface.co/fishaudio/fish-speech-1)にあります。", + "bf16-true is recommended for 30+ series GPU, 16-mixed is recommended for 10+ series GPU": "30シリーズ以降のGPUにはbf16-trueを、10シリーズ以降のGPUには16-mixedをお勧めします" +} diff --git a/fish_speech/i18n/locale/zh_CN.json b/fish_speech/i18n/locale/zh_CN.json index 034b45c5..99fed9c3 100644 --- a/fish_speech/i18n/locale/zh_CN.json +++ b/fish_speech/i18n/locale/zh_CN.json @@ -1,25 +1,103 @@ { "5 to 10 seconds of reference audio, useful for specifying speaker.": "5 到 10 秒的参考音频,适用于指定音色。", "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "由 [Fish Audio](https://fish.audio) 研发的基于 VQ-GAN 和 Llama 的多语种语音合成.", + "Accumulate Gradient Batches": "梯度累积批次", + "Add to Processing Area": "加入处理区", + "Added path successfully!": "添加路径成功!", "Advanced Config": "高级参数", + "Base LLAMA Model": "基础 LLAMA 模型", + "Batch Size": "批次大小", + "Chinese": "中文", + "Compile Model": "编译模型", + "Compile the model can significantly reduce the inference time, but will increase cold start time": "编译模型可以显著减少推理时间,但会增加冷启动时间", + "Copy": "复制", + "Data Preprocessing": "数据预处理", + "Data Preprocessing Path": "数据预处理路径", + "Data Source": "数据源", + "Disabled": "禁用", "Enable Reference Audio": "启用参考音频", + "English": "英文", "Error Message": "错误信息", + "File Preprocessing": "文件预处理", "Generate": "生成", "Generated Audio": "音频", + "If there is no corresponding text for the audio, apply ASR for assistance, support .txt or .lab format": "如果音频没有对应的文本,可以应用 ASR 辅助,支持 .txt 或 .lab 格式", "Infer interface is closed": "推理界面已关闭", + "Inference Configuration": "推理配置", + "Inference Server Configuration": "推理服务器配置", + "Inference Server Error": "推理服务器错误", "Inferring interface is launched at {}": "推理界面已在 {} 上启动", + "Initial Learning Rate": "初始学习率", + "Input Audio & Source Path for Transcription": "输入音频和转录源路径", "Input Text": "输入文本", + "Invalid path: {}": "无效路径: {}", + "It is recommended to use CUDA, if you have low configuration, use CPU": "建议使用 CUDA,如果配置较低,使用 CPU", "Iterative Prompt Length, 0 means off": "迭代提示长度,0 表示关闭", + "Japanese": "日文", + "LLAMA Configuration": "LLAMA 配置", + "LLAMA Model Config": "LLAMA 模型配置", + "LLAMA Model Path": "LLAMA 模型路径", + "Labeling Device": "标注加速设备", + "LoRA Model to be merged": "要合并的 LoRA 模型", + "Maximum Length per Sample": "每个样本的最大长度", + "Maximum Training Steps": "最大训练步数", "Maximum tokens per batch, 0 means no limit": "每批最大令牌数,0 表示无限制", + "Merge": "合并", + "Merge LoRA": "合并 LoRA", + "Merge successfully": "合并成功", + "Model Output Path": "模型输出路径", + "Model Size": "模型规模", + "Move": "移动", + "Move files successfully": "移动文件成功", + "No selected options": "没有选择的选项", + "Number of Workers": "数据加载进程数", + "Open Inference Server": "打开推理服务器", + "Open Labeler WebUI": "打开标注工具", + "Open Tensorboard": "打开 Tensorboard", "Opened labeler in browser": "在浏览器中打开标注工具", + "Optional Label Language": "[可选] 标注语言", + "Output Path": "输出路径", + "Path error, please check the model file exists in the corresponding path": "路径错误,请检查模型文件是否存在于相应路径", + "Precision": "精度", + "Probability of applying Speaker Condition": "应用说话人条件的概率", "Put your text here.": "在此处输入文本.", "Reference Audio": "参考音频", "Reference Text": "参考文本", "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "相关代码使用 BSD-3-Clause 许可证发布,权重使用 CC BY-NC-SA 4.0 许可证发布.", + "Remove Selected Data": "移除选中数据", + "Removed path successfully!": "移除路径成功!", "Repetition Penalty": "重复惩罚", + "Save model every n steps": "每 n 步保存模型", + "Select source file processing method": "选择源文件处理方法", + "Select the model to be trained": "选择要训练的模型", + "Selected: {}": "已选择: {}", "Speaker": "说话人", + "Speaker is identified by the folder name": "自动根据父目录名称识别说话人", + "Start Training": "开始训练", + "Tensorboard Host": "Tensorboard 监听地址", + "Tensorboard Log Path": "Tensorboard 日志路径", + "Tensorboard Port": "Tensorboard 端口", + "Tensorboard interface is closed": "Tensorboard 界面已关闭", + "Tensorboard interface is launched at {}": "Tensorboard 界面已在 {} 上启动", "Text is too long, please keep it under {} characters.": "文本太长,请保持在 {} 个字符以内.", + "The path of the input folder on the left or the filelist. Whether checked or not, it will be used for subsequent training in this list.": "左侧输入文件夹的路径或文件列表。无论是否选中,都将在此列表中用于后续训练.", + "Training Configuration": "训练配置", + "Training Error": "训练错误", + "Training stopped": "训练已停止", "Type name of the speaker": "输入说话人的名称", + "Type the path or select from the dropdown": "输入路径或从下拉菜单中选择", + "Use LoRA": "使用 LoRA", + "Use LoRA can save GPU memory, but may reduce the quality of the model": "使用 LoRA 可以节省 GPU 内存,但可能会降低模型质量", + "Use filelist": "使用文件列表", + "Use large for 10G+ GPU, medium for 5G, small for 2G": "10G+ GPU 使用 large, 5G 使用 medium, 2G 使用 small", + "VQGAN Configuration": "VQGAN 配置", + "VQGAN Model Path": "VQGAN 模型路径", + "Validation Batch Size": "验证批次大小", + "View the status of the preprocessing folder (use the slider to control the depth of the tree)": "查看预处理文件夹的状态 (使用滑块控制树的深度)", "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "我们不对模型的任何滥用负责,请在使用之前考虑您当地的法律法规.", - "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "你可以在 [这里](https://github.com/fishaudio/fish-speech) 找到源代码和 [这里](https://huggingface.co/fishaudio/fish-speech-1) 找到模型." + "WebUI Host": "WebUI 监听地址", + "WebUI Port": "WebUI 端口", + "Whisper Model": "Whisper 模型", + "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "你可以在 [这里](https://github.com/fishaudio/fish-speech) 找到源代码和 [这里](https://huggingface.co/fishaudio/fish-speech-1) 找到模型.", + "bf16-true is recommended for 30+ series GPU, 16-mixed is recommended for 10+ series GPU": "30+ 系列 GPU 建议使用 bf16-true, 10+ 系列 GPU 建议使用 16-mixed" } diff --git a/fish_speech/webui/js/animate.js b/fish_speech/webui/js/animate.js index 412eb366..0637a541 100644 --- a/fish_speech/webui/js/animate.js +++ b/fish_speech/webui/js/animate.js @@ -12,30 +12,28 @@ function createGradioAnimation() { document.documentElement.style.setProperty('--my-200', '#80eeee'); document.documentElement.style.setProperty('--my-50', '#ecfdf5'); - gradioApp.style.position = 'relative'; - gradioApp.style.backgroundSize = '200% 200%'; - gradioApp.style.animation = 'moveJellyBackground 10s ease infinite'; - gradioApp.style.backgroundImage = 'radial-gradient(circle at 0% 50%, var(--my-200), var(--my-50) 50%)'; - gradioApp.style.display = 'flex'; - gradioApp.style.justifyContent = 'flex-start'; - gradioApp.style.flexWrap = 'nowrap'; - gradioApp.style.overflowX = 'auto'; - - for (let i = 0; i < 6; i++) { - var quan = document.createElement('div'); - quan.className = 'quan'; - gradioApp.insertBefore(quan, gradioApp.firstChild); - quan.id = 'quan' + i.toString(); - quan.style.left = 'calc(var(--water-width) * ' + i.toString() + ')'; - var quanContainer = document.querySelector('.quan'); - if (quanContainer) { - var shui = document.createElement('div'); - shui.className = 'shui'; - quanContainer.insertBefore(shui, quanContainer.firstChild) - } - } - + // gradioApp.style.position = 'relative'; + // gradioApp.style.backgroundSize = '200% 200%'; + // gradioApp.style.animation = 'moveJellyBackground 10s ease infinite'; + // gradioApp.style.backgroundImage = 'radial-gradient(circle at 0% 50%, var(--my-200), var(--my-50) 50%)'; + // gradioApp.style.display = 'flex'; + // gradioApp.style.justifyContent = 'flex-start'; + // gradioApp.style.flexWrap = 'nowrap'; + // gradioApp.style.overflowX = 'auto'; + // for (let i = 0; i < 6; i++) { + // var quan = document.createElement('div'); + // quan.className = 'quan'; + // gradioApp.insertBefore(quan, gradioApp.firstChild); + // quan.id = 'quan' + i.toString(); + // quan.style.left = 'calc(var(--water-width) * ' + i.toString() + ')'; + // var quanContainer = document.querySelector('.quan'); + // if (quanContainer) { + // var shui = document.createElement('div'); + // shui.className = 'shui'; + // quanContainer.insertBefore(shui, quanContainer.firstChild) + // } + // } } var container = document.createElement('div'); diff --git a/fish_speech/webui/manage.py b/fish_speech/webui/manage.py index 051c393c..263791b2 100644 --- a/fish_speech/webui/manage.py +++ b/fish_speech/webui/manage.py @@ -220,12 +220,14 @@ def add_item(folder: str, method: str, label_lang: str): elif folder: err = folder return gr.Checkboxgroup(choices=items), build_html_error_message( - f"添加文件夹路径无效: {err}" + i18n("Invalid path: {}").format(err) ) formatted_data = json.dumps(dict_items, ensure_ascii=False, indent=4) logger.info(formatted_data) - return gr.Checkboxgroup(choices=items), build_html_ok_message("添加文件(夹)路径成功!") + return gr.Checkboxgroup(choices=items), build_html_ok_message( + i18n("Added path successfully!") + ) def remove_items(selected_items): @@ -237,13 +239,17 @@ def remove_items(selected_items): formatted_data = json.dumps(dict_items, ensure_ascii=False, indent=4) logger.info(formatted_data) return gr.Checkboxgroup(choices=items, value=[]), build_html_ok_message( - "删除文件(夹)路径成功!" + i18n("Removed path successfully!") ) def show_selected(options): selected_options = ", ".join(options) - return f"你选中了: {selected_options}" if options else "你没有选中任何选项" + + if options: + return i18n("Selected: {}").format(selected_options) + else: + return i18n("No selected options") def list_copy(list_file_path, method): @@ -260,7 +266,7 @@ def list_copy(list_file_path, method): if target_wav_path.is_file(): continue target_wav_path.parent.mkdir(parents=True, exist_ok=True) - if method == "复制一份": + if method == i18n("Copy"): shutil.copy(original_wav_path, target_wav_path) else: shutil.move(original_wav_path, target_wav_path.parent) @@ -273,22 +279,20 @@ def list_copy(list_file_path, method): ) if target_lab_path.is_file(): continue - if method == "复制一份": + if method == i18n("Copy"): shutil.copy(original_lab_path, target_lab_path) else: shutil.move(original_lab_path, target_lab_path.parent) - if method == "直接移动": + if method == i18n("Move"): with list_file_path.open("w", encoding="utf-8") as file: file.writelines("\n".join(lst)) del lst - return build_html_ok_message("使用filelist") + return build_html_ok_message(i18n("Use filelist")) def check_files(data_path: str, max_depth: int, label_model: str, label_device: str): - dict_to_language = {"中文": "ZH", "英文": "EN", "日文": "JP", "不打标": "WTF"} - global dict_items data_path = Path(data_path) for item, content in dict_items.items(): @@ -296,8 +300,8 @@ def check_files(data_path: str, max_depth: int, label_model: str, label_device: tar_path = data_path / item_path.name if content["type"] == "folder" and item_path.is_dir(): - cur_lang = dict_to_language[content["label_lang"]] - if cur_lang != "WTF": + cur_lang = content["label_lang"] + if cur_lang != "IGNORE": try: subprocess.run( [ @@ -319,7 +323,7 @@ def check_files(data_path: str, max_depth: int, label_model: str, label_device: except Exception: print("Transcription error occurred") - if content["method"] == "复制一份": + if content["method"] == i18n("Copy"): os.makedirs(tar_path, exist_ok=True) shutil.copytree( src=str(item_path), dst=str(tar_path), dirs_exist_ok=True @@ -330,7 +334,9 @@ def check_files(data_path: str, max_depth: int, label_model: str, label_device: elif content["type"] == "file" and item_path.is_file(): list_copy(item_path, content["method"]) - return build_html_ok_message("文件移动完毕"), new_explorer(data_path, max_depth=max_depth) + return build_html_ok_message(i18n("Move files successfully")), new_explorer( + data_path, max_depth=max_depth + ) def train_process( @@ -457,7 +463,7 @@ def generate_folder_name(): logger.info(train_cmd) subprocess.run(train_cmd) - return build_html_ok_message("训练终止") + return build_html_ok_message(i18n("Training stopped")) def tensorboard_process( @@ -468,13 +474,17 @@ def tensorboard_process( ): global p_tensorboard if if_tensorboard == True and p_tensorboard == None: - yield build_html_ok_message(f"Tensorboard界面已开启, 访问 http://{host}:{port}") + url = f"http://{host}:{port}" + yield build_html_ok_message( + i18n("Tensorboard interface is launched at {}").format(url) + ) + prefix = ["tensorboard"] + if Path("fishenv").exists(): + prefix = ["fishenv/python.exe", "fishenv/Scripts/tensorboard.exe"] + p_tensorboard = subprocess.Popen( - [ - "fishenv/python.exe", - "fishenv/Scripts/tensorboard.exe" - if Path("fishenv").exists() - else "tensorboard", + prefix + + [ "--logdir", tensorboard_dir, "--host", @@ -488,7 +498,7 @@ def tensorboard_process( elif if_tensorboard == False and p_tensorboard != None: kill_process(p_tensorboard.pid) p_tensorboard = None - yield build_html_error_message("Tensorboard界面已关闭") + yield build_html_error_message(i18n("Tensorboard interface is closed")) def fresh_tb_dir(): @@ -517,7 +527,11 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): or not Path(lora_weight).exists() or not Path(llama_weight).exists() ): - return build_html_error_message("路径错误,请检查模型文件是否存在于对应路径") + return build_html_error_message( + i18n( + "Path error, please check the model file exists in the corresponding path" + ) + ) merge_cmd = [ PYTHON, @@ -535,7 +549,7 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): ] logger.info(merge_cmd) subprocess.run(merge_cmd) - return build_html_ok_message("融合终止") + return build_html_ok_message(i18n("Merge successfully")) init_vqgan_yml = load_yaml_data_in_fact(vqgan_yml_path) @@ -546,70 +560,88 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): js=js, theme=seafoam, analytics_enabled=False, - title="Fish-Speech 鱼语", + title="Fish Speech", ) as demo: with gr.Row(): with gr.Column(): - with gr.Tab("\U0001F4D6 数据集准备"): + with gr.Tab("\U0001F4D6 " + i18n("Data Preprocessing")): with gr.Row(): textbox = gr.Textbox( - label="\U0000270F 输入音频&转写源文件夹路径", - info="音频装在一个以说话人命名的文件夹内作为区分", + label="\U0000270F " + + i18n("Input Audio & Source Path for Transcription"), + info=i18n("Speaker is identified by the folder name"), interactive=True, ) with gr.Row(equal_height=False): with gr.Column(): output_radio = gr.Radio( - label="\U0001F4C1 选择源文件(夹)处理方式", - choices=["复制一份", "直接移动"], - value="复制一份", + label="\U0001F4C1 " + + i18n("Select source file processing method"), + choices=[i18n("Copy"), i18n("Move")], + value=i18n("Copy"), interactive=True, ) with gr.Column(): - error = gr.HTML(label="错误信息") + error = gr.HTML(label=i18n("Error Message")) if_label = gr.Checkbox( - label="是否开启打标WebUI", scale=0, show_label=True + label=i18n("Open Labeler WebUI"), scale=0, show_label=True ) with gr.Row(): - add_button = gr.Button("\U000027A1提交到处理区", variant="primary") - remove_button = gr.Button("\U000026D4 取消所选内容") + add_button = gr.Button( + "\U000027A1 " + i18n("Add to Processing Area"), + variant="primary", + ) + remove_button = gr.Button( + "\U000026D4 " + i18n("Remove Selected Data") + ) with gr.Row(): label_device = gr.Dropdown( - label="打标设备", - info="建议使用cuda, 实在是低配置再用cpu", + label=i18n("Labeling Device"), + info=i18n( + "It is recommended to use CUDA, if you have low configuration, use CPU" + ), choices=["cpu", "cuda"], value="cuda", interactive=True, ) label_model = gr.Dropdown( - label="打标模型大小", - info="显存10G以上用large, 5G用medium, 2G用small", + label=i18n("Whisper Model"), + info=i18n( + "Use large for 10G+ GPU, medium for 5G, small for 2G" + ), choices=["large", "medium", "small"], value="small", interactive=True, ) label_radio = gr.Dropdown( - label="(可选)打标语言", - info="如果没有音频对应的文本,则进行辅助打标, 支持.txt或.lab格式", - choices=["中文", "日文", "英文", "不打标"], - value="不打标", + label=i18n("Optional Label Language"), + info=i18n( + "If there is no corresponding text for the audio, apply ASR for assistance, support .txt or .lab format" + ), + choices=[ + (i18n("Chinese"), "ZH"), + (i18n("English"), "EN"), + (i18n("Japanese"), "JA"), + (i18n("Disabled"), "IGNORE"), + ], + value="IGNORE", interactive=True, ) - with gr.Tab("\U0001F6E0 训练配置项"): # hammer + with gr.Tab("\U0001F6E0 " + i18n("Training Configuration")): with gr.Row(): model_type_radio = gr.Radio( - label="选择要训练的模型类型", + label=i18n("Select the model to be trained"), interactive=True, choices=["VQGAN", "LLAMA", "all"], value="all", ) with gr.Row(): - with gr.Tab(label="VQGAN配置项"): + with gr.Tab(label=i18n("VQGAN Configuration")): with gr.Row(equal_height=False): vqgan_lr_slider = gr.Slider( - label="初始学习率", + label=i18n("Initial Learning Rate"), interactive=True, minimum=1e-5, maximum=1e-4, @@ -617,7 +649,7 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): value=init_vqgan_yml["model"]["optimizer"]["lr"], ) vqgan_maxsteps_slider = gr.Slider( - label="训练最大步数", + label=i18n("Maximum Training Steps"), interactive=True, minimum=1000, maximum=100000, @@ -627,7 +659,7 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): with gr.Row(equal_height=False): vqgan_data_num_workers_slider = gr.Slider( - label="num_workers", + label=i18n("Number of Workers"), interactive=True, minimum=1, maximum=16, @@ -636,7 +668,7 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): ) vqgan_data_batch_size_slider = gr.Slider( - label="batch_size", + label=i18n("Batch Size"), interactive=True, minimum=1, maximum=32, @@ -645,7 +677,7 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): ) with gr.Row(equal_height=False): vqgan_data_val_batch_size_slider = gr.Slider( - label="val_batch_size", + label=i18n("Validation Batch Size"), interactive=True, minimum=1, maximum=32, @@ -653,14 +685,17 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): value=init_vqgan_yml["data"]["val_batch_size"], ) vqgan_precision_dropdown = gr.Dropdown( - label="训练精度", + label=i18n("Precision"), interactive=True, choices=["32", "bf16-true", "bf16-mixed"], + info=i18n( + "bf16-true is recommended for 30+ series GPU, 16-mixed is recommended for 10+ series GPU" + ), value=str(init_vqgan_yml["trainer"]["precision"]), ) with gr.Row(equal_height=False): vqgan_check_interval_slider = gr.Slider( - label="每n步保存一个模型", + label=i18n("Save model every n steps"), interactive=True, minimum=500, maximum=10000, @@ -668,15 +703,18 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): value=init_vqgan_yml["trainer"]["val_check_interval"], ) - with gr.Tab(label="LLAMA配置项"): + with gr.Tab(label=i18n("LLAMA Configuration")): with gr.Row(equal_height=False): llama_use_lora = gr.Checkbox( - label="使用lora训练?", + label=i18n("Use LoRA"), + info=i18n( + "Use LoRA can save GPU memory, but may reduce the quality of the model" + ), value=True, ) with gr.Row(equal_height=False): llama_lr_slider = gr.Slider( - label="初始学习率", + label=i18n("Initial Learning Rate"), interactive=True, minimum=1e-5, maximum=1e-4, @@ -684,7 +722,7 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): value=init_llama_yml["model"]["optimizer"]["lr"], ) llama_maxsteps_slider = gr.Slider( - label="训练最大步数", + label=i18n("Maximum Training Steps"), interactive=True, minimum=1000, maximum=100000, @@ -693,7 +731,7 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): ) with gr.Row(equal_height=False): llama_base_config = gr.Dropdown( - label="模型基础属性", + label=i18n("Model Size"), choices=[ "dual_ar_2_codebook_large", "dual_ar_2_codebook_medium", @@ -701,7 +739,7 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): value="dual_ar_2_codebook_large", ) llama_data_num_workers_slider = gr.Slider( - label="num_workers", + label=i18n("Number of Workers"), minimum=0, maximum=16, step=1, @@ -711,7 +749,7 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): ) with gr.Row(equal_height=False): llama_data_batch_size_slider = gr.Slider( - label="batch_size", + label=i18n("Batch Size"), interactive=True, minimum=1, maximum=32, @@ -719,7 +757,7 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): value=init_llama_yml["data"]["batch_size"], ) llama_data_max_length_slider = gr.Slider( - label="max_length", + label=i18n("Maximum Length per Sample"), interactive=True, minimum=1024, maximum=4096, @@ -728,13 +766,16 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): ) with gr.Row(equal_height=False): llama_precision_dropdown = gr.Dropdown( - label="训练精度", + label=i18n("Precision"), + info=i18n( + "bf16-true is recommended for 30+ series GPU, 16-mixed is recommended for 10+ series GPU" + ), interactive=True, choices=["32", "bf16-true", "16-mixed"], value="bf16-true", ) llama_check_interval_slider = gr.Slider( - label="每n步保存一个模型", + label=i18n("Save model every n steps"), interactive=True, minimum=500, maximum=10000, @@ -743,7 +784,7 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): ) with gr.Row(equal_height=False): llama_grad_batches = gr.Slider( - label="accumulate_grad_batches", + label=i18n("Accumulate Gradient Batches"), interactive=True, minimum=1, maximum=20, @@ -753,7 +794,7 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): ], ) llama_use_speaker = gr.Slider( - label="use_speaker_ratio", + label=i18n("Probability of applying Speaker Condition"), interactive=True, minimum=0.1, maximum=1.0, @@ -761,11 +802,11 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): value=init_llama_yml["train_dataset"]["use_speaker"], ) - with gr.Tab(label="LLAMA_lora融合"): + with gr.Tab(label=i18n("Merge LoRA")): with gr.Row(equal_height=False): llama_weight = gr.Dropdown( - label="要融入的原模型", - info="输入路径,或者下拉选择", + label=i18n("Base LLAMA Model"), + info=i18n("Type the path or select from the dropdown"), choices=[init_llama_yml["ckpt_path"]], value=init_llama_yml["ckpt_path"], allow_custom_value=True, @@ -773,8 +814,8 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): ) with gr.Row(equal_height=False): lora_weight = gr.Dropdown( - label="要融入的lora模型", - info="输入路径,或者下拉选择", + label=i18n("LoRA Model to be merged"), + info=i18n("Type the path or select from the dropdown"), choices=[ str(p) for p in Path("results").glob("text2*ar/**/*.ckpt") @@ -784,8 +825,8 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): ) with gr.Row(equal_height=False): llama_lora_output = gr.Dropdown( - label="输出的lora模型", - info="输出路径", + label=i18n("Output Path"), + info=i18n("Type the path or select from the dropdown"), value="checkpoints/merged.ckpt", choices=["checkpoints/merged.ckpt"], allow_custom_value=True, @@ -793,20 +834,20 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): ) with gr.Row(equal_height=False): llama_lora_merge_btn = gr.Button( - value="开始融合", variant="primary" + value=i18n("Merge"), variant="primary" ) with gr.Tab(label="Tensorboard"): with gr.Row(equal_height=False): tb_host = gr.Textbox( - label="Tensorboard Host", value="127.0.0.1" + label=i18n("Tensorboard Host"), value="127.0.0.1" ) tb_port = gr.Textbox( - label="Tensorboard Port", value="11451" + label=i18n("Tensorboard Port"), value="11451" ) with gr.Row(equal_height=False): tb_dir = gr.Dropdown( - label="Tensorboard 日志文件夹", + label=i18n("Tensorboard Log Path"), allow_custom_value=True, choices=[ str(p) @@ -817,24 +858,30 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): ) with gr.Row(equal_height=False): if_tb = gr.Checkbox( - label="是否打开tensorboard?", + label=i18n("Open Tensorboard"), ) - with gr.Tab("\U0001F9E0 进入推理界面"): + with gr.Tab("\U0001F9E0 " + i18n("Inference Configuration")): with gr.Column(): with gr.Row(): - with gr.Accordion(label="\U0001F5A5 推理服务器配置", open=False): + with gr.Accordion( + label="\U0001F5A5 " + + i18n("Inference Server Configuration"), + open=False, + ): with gr.Row(): infer_host_textbox = gr.Textbox( - label="Webui启动服务器地址", value="127.0.0.1" + label=i18n("WebUI Host"), value="127.0.0.1" ) infer_port_textbox = gr.Textbox( - label="Webui启动服务器端口", value="7862" + label=i18n("WebUI Port"), value="7862" ) with gr.Row(): infer_vqgan_model = gr.Dropdown( - label="VQGAN模型位置", - info="填写pth/ckpt文件路径", + label=i18n("VQGAN Model Path"), + info=i18n( + "Type the path or select from the dropdown" + ), value=init_vqgan_yml["ckpt_path"], choices=[init_vqgan_yml["ckpt_path"]] + [ @@ -847,8 +894,10 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): ) with gr.Row(): infer_llama_model = gr.Dropdown( - label="LLAMA模型位置", - info="填写pth/ckpt文件路径", + label=i18n("LLAMA Model Path"), + info=i18n( + "Type the path or select from the dropdown" + ), value=init_llama_yml["ckpt_path"], choices=[init_llama_yml["ckpt_path"]] + [ @@ -861,10 +910,15 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): ) with gr.Row(): infer_compile = gr.Radio( - label="是否编译模型?", choices=["Yes", "No"], value="Yes" + label=i18n("Compile Model"), + info=i18n( + "Compile the model can significantly reduce the inference time, but will increase cold start time" + ), + choices=["Yes", "No"], + value="Yes", ) infer_llama_config = gr.Dropdown( - label="LLAMA模型基础属性", + label=i18n("LLAMA Model Config"), choices=[ "dual_ar_2_codebook_large", "dual_ar_2_codebook_medium", @@ -874,27 +928,35 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): ) with gr.Row(): - infer_checkbox = gr.Checkbox(label="是否打开推理界面") - infer_error = gr.HTML(label="推理界面错误信息") + infer_checkbox = gr.Checkbox( + label=i18n("Open Inference Server") + ) + infer_error = gr.HTML(label=i18n("Inference Server Error")) with gr.Column(): - train_error = gr.HTML(label="训练时的报错信息") + train_error = gr.HTML(label=i18n("Training Error")) checkbox_group = gr.CheckboxGroup( - label="\U0001F4CA 数据源列表", - info="左侧输入文件夹所在路径或filelist。无论是否勾选,在此列表中都会被用以后续训练。", + label="\U0001F4CA " + i18n("Data Source"), + info=i18n( + "The path of the input folder on the left or the filelist. Whether checked or not, it will be used for subsequent training in this list." + ), elem_classes=["data_src"], ) train_box = gr.Textbox( - label="数据预处理文件夹路径", value=str(data_pre_output), interactive=False + label=i18n("Data Preprocessing Path"), + value=str(data_pre_output), + interactive=False, ) model_box = gr.Textbox( - label="\U0001F4BE 模型输出路径", + label="\U0001F4BE " + i18n("Model Output Path"), value=str(default_model_output), interactive=False, ) with gr.Accordion( - "查看预处理文件夹状态 (滑块为显示深度大小)", + i18n( + "View the status of the preprocessing folder (use the slider to control the depth of the tree)" + ), elem_classes=["scrollable-component"], elem_id="file_accordion", ): @@ -909,11 +971,14 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output): file_markdown = new_explorer(str(data_pre_output), 0) with gr.Row(equal_height=False): admit_btn = gr.Button( - "\U00002705 文件预处理", scale=0, min_width=160, variant="primary" + "\U00002705 " + i18n("File Preprocessing"), + scale=0, + min_width=160, + variant="primary", ) fresh_btn = gr.Button("\U0001F503", scale=0, min_width=80) help_button = gr.Button("\U00002753", scale=0, min_width=80) # question - train_btn = gr.Button("训练启动!", variant="primary") + train_btn = gr.Button(i18n("Start Training"), variant="primary") footer = load_data_in_raw("fish_speech/webui/html/footer.html") footer = footer.format(