From 7413800408642e6e992d3e70b8d65e80d8c03b4f Mon Sep 17 00:00:00 2001
From: Blaise <blaise@applio.org>
Date: Sun, 22 Dec 2024 22:24:29 +0100
Subject: [PATCH] a single cpu/gpu selector for all training

---
 assets/i18n/languages/en_US.json |   3 +-
 tabs/train/train.py              | 273 +++++++++++++------------------
 2 files changed, 119 insertions(+), 157 deletions(-)

diff --git a/assets/i18n/languages/en_US.json b/assets/i18n/languages/en_US.json
index 85e7efad..8cb9af6f 100644
--- a/assets/i18n/languages/en_US.json
+++ b/assets/i18n/languages/en_US.json
@@ -78,8 +78,7 @@
   "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.",
   "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.",
   "Extract Features": "Extract Features",
-  "We prioritize running the model extraction on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank.": "We prioritize running the model extraction on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank.",
-  "We prioritize running the model preprocessing on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank.": "We prioritize running the model preprocessing on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank.",
+  "Configure GPU and CPU settings.": "Configure GPU and CPU settings.",
   "Cache Dataset in GPU": "Cache Dataset in GPU",
   "Cache the dataset in GPU memory to speed up the training process.": "Cache the dataset in GPU memory to speed up the training process.",
   "Index Algorithm": "Index Algorithm",
diff --git a/tabs/train/train.py b/tabs/train/train.py
index a67d5178..42a38794 100644
--- a/tabs/train/train.py
+++ b/tabs/train/train.py
@@ -302,6 +302,7 @@ def upload_file(file_path):
 
 # Train Tab
 def train_tab():
+    # Model settings section
     with gr.Accordion(i18n("Model Settings")):
         with gr.Row():
             with gr.Column():
@@ -323,22 +324,22 @@ def train_tab():
 
             with gr.Column():
                 sampling_rate = gr.Radio(
-                        label=i18n("Sampling Rate"),
-                        info=i18n("The sampling rate of the audio files."),
-                        choices=["32000", "40000", "48000"],
-                        value="40000",
-                        interactive=True,
-                    )
+                    label=i18n("Sampling Rate"),
+                    info=i18n("The sampling rate of the audio files."),
+                    choices=["32000", "40000", "48000"],
+                    value="40000",
+                    interactive=True,
+                )
                 vocoder = gr.Radio(
-                        label=i18n("Vocoder"),
-                        info=i18n(
-                            "Vocoder for audio synthesis: HiFi-GAN (default, available for all clients), MRF HiFi-GAN (higher fidelity, Applio-only), or RefineGAN (offering superior audio quality, Applio-only, with slightly slower performance)."
-                        ),
-                        choices=["HiFi-GAN", "MRF HiFi-GAN", "RefineGAN"],
-                        value="HiFi-GAN",
-                        interactive=True,
-                        visible=False,
-                    )
+                    label=i18n("Vocoder"),
+                    info=i18n(
+                        "Vocoder for audio synthesis: HiFi-GAN (default, available for all clients), MRF HiFi-GAN (higher fidelity, Applio-only), or RefineGAN (offering superior audio quality, Applio-only, with slightly slower performance)."
+                    ),
+                    choices=["HiFi-GAN", "MRF HiFi-GAN", "RefineGAN"],
+                    value="HiFi-GAN",
+                    interactive=True,
+                    visible=False,
+                )
                 rvc_version = gr.Radio(
                     label=i18n("Model Architecture"),
                     info=i18n("Version of the model architecture."),
@@ -347,7 +348,41 @@ def train_tab():
                     interactive=True,
                     visible=False,
                 )
+        with gr.Accordion(
+            i18n("Advanced Settings"),
+            open=False,
+        ):
+            with gr.Row():
+                with gr.Column():
+                    cpu_cores = gr.Slider(
+                        1,
+                        min(cpu_count(), 32),  # max 32 parallel processes
+                        min(cpu_count(), 32),
+                        step=1,
+                        label=i18n("CPU Cores"),
+                        info=i18n(
+                            "The number of CPU cores to use in the extraction process. The default setting are your cpu cores, which is recommended for most cases."
+                        ),
+                        interactive=True,
+                    )
 
+                with gr.Column():
+                    gpu = gr.Textbox(
+                        label=i18n("GPU Number"),
+                        info=i18n(
+                            "Specify the number of GPUs you wish to utilize for extracting by entering them separated by hyphens (-)."
+                        ),
+                        placeholder=i18n("0 to ∞ separated by -"),
+                        value=str(get_number_of_gpus()),
+                        interactive=True,
+                    )
+                    gr.Textbox(
+                        label=i18n("GPU Information"),
+                        info=i18n("The GPU information will be displayed here."),
+                        value=get_gpu_info(),
+                        interactive=False,
+                    )
+    # Preprocess section
     with gr.Accordion(i18n("Preprocess")):
         dataset_path = gr.Dropdown(
             label=i18n("Dataset Path"),
@@ -379,26 +414,15 @@ def train_tab():
         refresh = gr.Button(i18n("Refresh"))
 
         with gr.Accordion(i18n("Advanced Settings"), open=False):
-            cpu_cores_preprocess = gr.Slider(
-                1,
-                min(cpu_count(), 32),  # max 32 parallel processes
-                min(cpu_count(), 32),
-                step=1,
-                label=i18n("CPU Cores"),
+            cut_preprocess = gr.Radio(
+                label=i18n("Audio cutting"),
                 info=i18n(
-                    "The number of CPU cores to use in the preprocess. The default setting are your cpu cores, which is recommended for most cases."
+                    "Audio file slicing method: Select 'Skip' if the files are already pre-sliced, 'Simple' if excessive silence has already been removed from the files, or 'Automatic' for automatic silence detection and slicing around it."
                 ),
+                choices=["Skip", "Simple", "Automatic"],
+                value="Automatic",
                 interactive=True,
             )
-            cut_preprocess = gr.Radio(
-                    label=i18n("Audio cutting"),
-                    info=i18n(
-                        "Audio file slicing method: Select 'Skip' if the files are already pre-sliced, 'Simple' if excessive silence has already been removed from the files, or 'Automatic' for automatic silence detection and slicing around it."
-                    ),
-                    choices=["Skip", "Simple", "Automatic"],
-                    value="Automatic",
-                    interactive=True,
-                )
             with gr.Row():
                 chunk_len = gr.Slider(
                     0.5,
@@ -415,10 +439,12 @@ def train_tab():
                     0.3,
                     step=0.1,
                     label=i18n("Overlap length (sec)"),
-                    info=i18n("Length of the overlap between slices for 'Simple' method."),
+                    info=i18n(
+                        "Length of the overlap between slices for 'Simple' method."
+                    ),
                     interactive=True,
                 )
-                
+
             with gr.Row():
                 process_effects = gr.Checkbox(
                     label=i18n("Process effects"),
@@ -439,16 +465,16 @@ def train_tab():
                     visible=True,
                 )
             clean_strength = gr.Slider(
-                    minimum=0,
-                    maximum=1,
-                    label=i18n("Noise Reduction Strength"),
-                    info=i18n(
-                        "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed."
-                    ),
-                    visible=False,
-                    value=0.5,
-                    interactive=True,
-                )
+                minimum=0,
+                maximum=1,
+                label=i18n("Noise Reduction Strength"),
+                info=i18n(
+                    "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed."
+                ),
+                visible=False,
+                value=0.5,
+                interactive=True,
+            )
         preprocess_output_info = gr.Textbox(
             label=i18n("Output Information"),
             info=i18n("The output information will be displayed here."),
@@ -465,7 +491,7 @@ def train_tab():
                     model_name,
                     dataset_path,
                     sampling_rate,
-                    cpu_cores_preprocess,
+                    cpu_cores,
                     cut_preprocess,
                     process_effects,
                     noise_reduction,
@@ -476,6 +502,7 @@ def train_tab():
                 outputs=[preprocess_output_info],
             )
 
+    # Extract section
     with gr.Accordion(i18n("Extract")):
         with gr.Row():
             f0_method = gr.Radio(
@@ -509,7 +536,7 @@ def train_tab():
             label=i18n("Silent training files"),
             info=i18n(
                 "Adding several silent files to the training set enables the model to handle pure silence in inferred audio files. Select 0 if your dataset is clean and already contains segments of pure silence."
-                ),
+            ),
             value=True,
             interactive=True,
         )
@@ -545,43 +572,6 @@ def train_tab():
                     )
                 move_files_button = gr.Button("Move files to custom embedder folder")
 
-        with gr.Accordion(
-            i18n(
-                "We prioritize running the model extraction on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank."
-            ),
-            open=False,
-        ):
-            with gr.Row():
-                with gr.Column():
-                    cpu_cores_extract = gr.Slider(
-                        1,
-                        min(cpu_count(), 32),  # max 32 parallel processes
-                        min(cpu_count(), 32),
-                        step=1,
-                        label=i18n("CPU Cores"),
-                        info=i18n(
-                            "The number of CPU cores to use in the extraction process. The default setting are your cpu cores, which is recommended for most cases."
-                        ),
-                        interactive=True,
-                    )
-
-                with gr.Column():
-                    gpu_extract = gr.Textbox(
-                        label=i18n("GPU Number"),
-                        info=i18n(
-                            "Specify the number of GPUs you wish to utilize for extracting by entering them separated by hyphens (-)."
-                        ),
-                        placeholder=i18n("0 to ∞ separated by -"),
-                        value=str(get_number_of_gpus()),
-                        interactive=True,
-                    )
-                    gr.Textbox(
-                        label=i18n("GPU Information"),
-                        info=i18n("The GPU information will be displayed here."),
-                        value=get_gpu_info(),
-                        interactive=False,
-                    )
-
         extract_output_info = gr.Textbox(
             label=i18n("Output Information"),
             info=i18n("The output information will be displayed here."),
@@ -597,16 +587,17 @@ def train_tab():
                 rvc_version,
                 f0_method,
                 hop_length,
-                cpu_cores_extract,
-                gpu_extract,
+                cpu_cores,
+                gpu,
                 sampling_rate,
                 embedder_model,
                 embedder_model_custom,
-                include_mutes
+                include_mutes,
             ],
             outputs=[extract_output_info],
         )
 
+    # Training section
     with gr.Accordion(i18n("Training")):
         with gr.Row():
             batch_size = gr.Slider(
@@ -686,19 +677,22 @@ def train_tab():
                     )
                     checkpointing = gr.Checkbox(
                         label=i18n("Checkpointing"),
-                        info=i18n("Enables memory-efficient training. This reduces VRAM usage at the cost of slower training speed. It is useful for GPUs with limited memory (e.g., <6GB VRAM) or when training with a batch size larger than what your GPU can normally accommodate."),
+                        info=i18n(
+                            "Enables memory-efficient training. This reduces VRAM usage at the cost of slower training speed. It is useful for GPUs with limited memory (e.g., <6GB VRAM) or when training with a batch size larger than what your GPU can normally accommodate."
+                        ),
+                        value=False,
+                        interactive=True,
+                    )
+            with gr.Row():
+                with gr.Column():
+                    custom_pretrained = gr.Checkbox(
+                        label=i18n("Custom Pretrained"),
+                        info=i18n(
+                            "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance."
+                        ),
                         value=False,
                         interactive=True,
                     )
-            with gr.Column():
-                custom_pretrained = gr.Checkbox(
-                    label=i18n("Custom Pretrained"),
-                    info=i18n(
-                        "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance."
-                    ),
-                    value=False,
-                    interactive=True,
-                )
                 with gr.Column(visible=False) as pretrained_custom_settings:
                     with gr.Accordion(i18n("Pretrained Custom Settings")):
                         upload_pretrained = gr.File(
@@ -727,33 +721,6 @@ def train_tab():
                             interactive=True,
                             allow_custom_value=True,
                         )
-                multiple_gpu = gr.Checkbox(
-                    label=i18n("GPU Settings"),
-                    info=(
-                        i18n(
-                            "Sets advanced GPU settings, recommended for users with better GPU architecture."
-                        )
-                    ),
-                    value=False,
-                    interactive=True,
-                )
-                with gr.Column(visible=False) as gpu_custom_settings:
-                    with gr.Accordion(i18n("GPU Settings")):
-                        gpu = gr.Textbox(
-                            label=i18n("GPU Number"),
-                            info=i18n(
-                                "Specify the number of GPUs you wish to utilize for training by entering them separated by hyphens (-)."
-                            ),
-                            placeholder=i18n("0 to ∞ separated by -"),
-                            value=str(get_number_of_gpus()),
-                            interactive=True,
-                        )
-                        gr.Textbox(
-                            label=i18n("GPU Information"),
-                            info=i18n("The GPU information will be displayed here."),
-                            value=get_gpu_info(),
-                            interactive=False,
-                        )
                 overtraining_detector = gr.Checkbox(
                     label=i18n("Overtraining Detector"),
                     info=i18n(
@@ -775,15 +742,15 @@ def train_tab():
                             ),
                             interactive=True,
                         )
-                index_algorithm = gr.Radio(
-                    label=i18n("Index Algorithm"),
-                    info=i18n(
-                        "KMeans is a clustering algorithm that divides the dataset into K clusters. This setting is particularly useful for large datasets."
-                    ),
-                    choices=["Auto", "Faiss", "KMeans"],
-                    value="Auto",
-                    interactive=True,
-                )
+            index_algorithm = gr.Radio(
+                label=i18n("Index Algorithm"),
+                info=i18n(
+                    "KMeans is a clustering algorithm that divides the dataset into K clusters. This setting is particularly useful for large datasets."
+                ),
+                choices=["Auto", "Faiss", "KMeans"],
+                value="Auto",
+                interactive=True,
+            )
 
         def enforce_terms(terms_accepted, *args):
             if not terms_accepted:
@@ -833,7 +800,7 @@ def enforce_terms(terms_accepted, *args):
                     g_pretrained_path,
                     d_pretrained_path,
                     vocoder,
-                    checkpointing
+                    checkpointing,
                 ],
                 outputs=[train_output_info],
             )
@@ -852,6 +819,7 @@ def enforce_terms(terms_accepted, *args):
                 outputs=[train_output_info],
             )
 
+    # Export Model section
     with gr.Accordion(i18n("Export Model"), open=False):
         if not os.name == "nt":
             gr.Markdown(
@@ -936,28 +904,28 @@ def disable_stop_train_button():
             def download_prerequisites(version):
                 if version == "v1":
                     gr.Info(
-                            "Checking for v1 prerequisites with pitch guidance... Missing files will be downloaded. If you already have them, this step will be skipped."
-                        )
+                        "Checking for v1 prerequisites with pitch guidance... Missing files will be downloaded. If you already have them, this step will be skipped."
+                    )
                     run_prerequisites_script(
-                            pretraineds_v1_f0=True,
-                            pretraineds_v1_nof0=False,
-                            pretraineds_v2_f0=False,
-                            pretraineds_v2_nof0=False,
-                            models=False,
-                            exe=False,
-                        )
+                        pretraineds_v1_f0=True,
+                        pretraineds_v1_nof0=False,
+                        pretraineds_v2_f0=False,
+                        pretraineds_v2_nof0=False,
+                        models=False,
+                        exe=False,
+                    )
                 elif version == "v2":
                     gr.Info(
-                            "Checking for v2 prerequisites with pitch guidance... Missing files will be downloaded. If you already have them, this step will be skipped."
-                        )
+                        "Checking for v2 prerequisites with pitch guidance... Missing files will be downloaded. If you already have them, this step will be skipped."
+                    )
                     run_prerequisites_script(
-                            pretraineds_v1_f0=False,
-                            pretraineds_v1_nof0=False,
-                            pretraineds_v2_f0=True,
-                            pretraineds_v2_nof0=False,
-                            models=False,
-                            exe=False,
-                        )
+                        pretraineds_v1_f0=False,
+                        pretraineds_v1_nof0=False,
+                        pretraineds_v2_f0=True,
+                        pretraineds_v2_nof0=False,
+                        models=False,
+                        exe=False,
+                    )
 
                 gr.Info(
                     "Prerequisites check complete. Missing files were downloaded, and you may now start preprocessing."
@@ -1060,11 +1028,6 @@ def update_slider_visibility(noise_reduction):
                 inputs=[overtraining_detector],
                 outputs=[overtraining_settings],
             )
-            multiple_gpu.change(
-                fn=toggle_visible,
-                inputs=[multiple_gpu],
-                outputs=[gpu_custom_settings],
-            )
             train_button.click(
                 fn=enable_stop_train_button,
                 inputs=[],