Merge pull request #1 from TheNeodev/formatter/main

chore(format): run black on main
TheNeodev · Dec 17, 2024 · ac2c2e0 · ac2c2e0
2 parents f44f053 + 2ef0a35
commit ac2c2e0
Show file tree

Hide file tree

Showing 10 changed files with 1,031 additions and 533 deletions.
diff --git a/rvc_inferpy/cli.py b/rvc_inferpy/cli.py
@@ -6,40 +6,88 @@
 import gc
 from rvc_inferpy.modules import VC
 from rvc_inferpy.infer import Configs, get_model
-from rvc_inferpy.split_audio import split_silence_nonsilent, adjust_audio_lengths, combine_silence_nonsilent
+from rvc_inferpy.split_audio import (
+    split_silence_nonsilent,
+    adjust_audio_lengths,
+    combine_silence_nonsilent,
+)
+
 
 def infer_audio_cli():
     parser = argparse.ArgumentParser(description="RVC INFERPY CLI VER.")
     parser.add_argument("--model_name", type=str, help="Name of the model.")
     parser.add_argument("--audio_path", type=str, help="Path to the input audio file.")
-    parser.add_argument("--f0_change", type=float, default=0, help="Pitch change factor.")
-    parser.add_argument("--f0_method", type=str, default="rmvpe+", help="Method for F0 estimation.")
-    parser.add_argument("--min_pitch", type=str, default="50", help="Minimum pitch value.")
-    parser.add_argument("--max_pitch", type=str, default="1100", help="Maximum pitch value.")
-    parser.add_argument("--crepe_hop_length", type=int, default=128, help="Crepe hop length.")
+    parser.add_argument(
+        "--f0_change", type=float, default=0, help="Pitch change factor."
+    )
+    parser.add_argument(
+        "--f0_method", type=str, default="rmvpe+", help="Method for F0 estimation."
+    )
+    parser.add_argument(
+        "--min_pitch", type=str, default="50", help="Minimum pitch value."
+    )
+    parser.add_argument(
+        "--max_pitch", type=str, default="1100", help="Maximum pitch value."
+    )
+    parser.add_argument(
+        "--crepe_hop_length", type=int, default=128, help="Crepe hop length."
+    )
     parser.add_argument("--index_rate", type=float, default=0.75, help="Index rate.")
     parser.add_argument("--filter_radius", type=int, default=3, help="Filter radius.")
-    parser.add_argument("--rms_mix_rate", type=float, default=0.25, help="RMS mix rate.")
+    parser.add_argument(
+        "--rms_mix_rate", type=float, default=0.25, help="RMS mix rate."
+    )
     parser.add_argument("--protect", type=float, default=0.33, help="Protect factor.")
-    parser.add_argument("--split_infer", action="store_true", help="Enable split inference.")
-    parser.add_argument("--min_silence", type=int, default=500, help="Minimum silence duration.")
-    parser.add_argument("--silence_threshold", type=float, default=-50, help="Silence threshold (dB).")
-    parser.add_argument("--seek_step", type=int, default=1, help="Seek step for silence detection.")
-    parser.add_argument("--keep_silence", type=int, default=100, help="Silence retention duration.")
-    parser.add_argument("--do_formant", action="store_true", help="Enable formant processing.")
-    parser.add_argument("--quefrency", type=float, default=0, help="Quefrency adjustment value.")
-    parser.add_argument("--timbre", type=float, default=1, help="Timbre adjustment factor.")
-    parser.add_argument("--f0_autotune", action="store_true", help="Enable F0 autotuning.")
-    parser.add_argument("--audio_format", type=str, default="wav", help="Output audio format.")
-    parser.add_argument("--resample_sr", type=int, default=0, help="Resample sample rate.")
-    parser.add_argument("--hubert_model_path", type=str, default="hubert_base.pt", help="Path to Hubert model.")
-    parser.add_argument("--rmvpe_model_path", type=str, default="rmvpe.pt", help="Path to RMVPE model.")
-    parser.add_argument("--fcpe_model_path", type=str, default="fcpe.pt", help="Path to FCPE model.")
+    parser.add_argument(
+        "--split_infer", action="store_true", help="Enable split inference."
+    )
+    parser.add_argument(
+        "--min_silence", type=int, default=500, help="Minimum silence duration."
+    )
+    parser.add_argument(
+        "--silence_threshold", type=float, default=-50, help="Silence threshold (dB)."
+    )
+    parser.add_argument(
+        "--seek_step", type=int, default=1, help="Seek step for silence detection."
+    )
+    parser.add_argument(
+        "--keep_silence", type=int, default=100, help="Silence retention duration."
+    )
+    parser.add_argument(
+        "--do_formant", action="store_true", help="Enable formant processing."
+    )
+    parser.add_argument(
+        "--quefrency", type=float, default=0, help="Quefrency adjustment value."
+    )
+    parser.add_argument(
+        "--timbre", type=float, default=1, help="Timbre adjustment factor."
+    )
+    parser.add_argument(
+        "--f0_autotune", action="store_true", help="Enable F0 autotuning."
+    )
+    parser.add_argument(
+        "--audio_format", type=str, default="wav", help="Output audio format."
+    )
+    parser.add_argument(
+        "--resample_sr", type=int, default=0, help="Resample sample rate."
+    )
+    parser.add_argument(
+        "--hubert_model_path",
+        type=str,
+        default="hubert_base.pt",
+        help="Path to Hubert model.",
+    )
+    parser.add_argument(
+        "--rmvpe_model_path", type=str, default="rmvpe.pt", help="Path to RMVPE model."
+    )
+    parser.add_argument(
+        "--fcpe_model_path", type=str, default="fcpe.pt", help="Path to FCPE model."
+    )
     args = parser.parse_args()
 
     os.environ["rmvpe_model_path"] = args.rmvpe_model_path
     os.environ["fcpe_model_path"] = args.fcpe_model_path
-    configs = Configs('cuda:0', True)
+    configs = Configs("cuda:0", True)
     vc = VC(configs)
     pth_path, index_path = get_model(args.model_name)
     vc_data = vc.get_vc(pth_path, args.protect, 0.5)
@@ -50,7 +98,11 @@ def infer_audio_cli():
         os.makedirs(temp_dir, exist_ok=True)
         print("Splitting audio into silence and nonsilent segments.")
         silence_files, nonsilent_files = split_silence_nonsilent(
-            args.audio_path, args.min_silence, args.silence_threshold, args.seek_step, args.keep_silence
+            args.audio_path,
+            args.min_silence,
+            args.silence_threshold,
+            args.seek_step,
+            args.keep_silence,
         )
         for i, nonsilent_file in enumerate(nonsilent_files):
             print(f"Processing nonsilent audio {i+1}/{len(nonsilent_files)}")
@@ -74,7 +126,7 @@ def infer_audio_cli():
                 args.min_pitch,
                 args.max_pitch,
                 args.f0_autotune,
-                args.hubert_model_path
+                args.hubert_model_path,
             )
             if inference_info[0] == "Success.":
                 print("Inference ran successfully.")
@@ -85,7 +137,9 @@ def infer_audio_cli():
             inferred_files.append(output_path)
 
         adjusted_inferred_files = adjust_audio_lengths(nonsilent_files, inferred_files)
-        output_path = combine_silence_nonsilent(silence_files, adjusted_inferred_files, args.keep_silence, output_path)
+        output_path = combine_silence_nonsilent(
+            silence_files, adjusted_inferred_files, args.keep_silence, output_path
+        )
         shutil.rmtree(temp_dir)
     else:
         inference_info, audio_data, output_path = vc.vc_single(
@@ -108,7 +162,7 @@ def infer_audio_cli():
             args.min_pitch,
             args.max_pitch,
             args.f0_autotune,
-            args.hubert_model_path
+            args.hubert_model_path,
         )
         if inference_info[0] == "Success.":
             print("Inference ran successfully.")
@@ -121,5 +175,6 @@ def infer_audio_cli():
     gc.collect()
     print(f"Output saved to: {output_path}")
 
+
 if __name__ == "__main__":
     infer_audio_cli()
diff --git a/rvc_inferpy/infer.py b/rvc_inferpy/infer.py
@@ -4,12 +4,15 @@
 import torch
 from multiprocessing import cpu_count
 from rvc_inferpy.modules import VC
-from rvc_inferpy.split_audio import split_silence_nonsilent, adjust_audio_lengths, combine_silence_nonsilent
+from rvc_inferpy.split_audio import (
+    split_silence_nonsilent,
+    adjust_audio_lengths,
+    combine_silence_nonsilent,
+)
 from pathlib import Path
 import requests
 
 
-
 class Configs:
     def __init__(self, device, is_half):
         self.device = device
@@ -54,25 +57,24 @@ def device_config(self) -> tuple:
 
         return x_pad, x_query, x_center, x_max
 
+
 def get_model(voice_model):
     model_dir = os.path.join(os.getcwd(), "models", voice_model)
     model_filename, index_filename = None, None
     for file in os.listdir(model_dir):
         ext = os.path.splitext(file)[1]
-        if ext == '.pth':
+        if ext == ".pth":
             model_filename = file
-        if ext == '.index':
+        if ext == ".index":
             index_filename = file
 
     if model_filename is None:
-        print(f'No model file exists in {models_dir}.')
+        print(f"No model file exists in {models_dir}.")
         return None, None
 
-    return os.path.join(model_dir, model_filename), os.path.join(model_dir, index_filename) if index_filename else ''
-
-
-
-
+    return os.path.join(model_dir, model_filename), (
+        os.path.join(model_dir, index_filename) if index_filename else ""
+    )
 
 
 BASE_DIR = Path(os.getcwd())  # Use Path for better path handling
@@ -83,21 +85,25 @@ def get_model(voice_model):
 # Check for missing files
 missing_files = [file for file in files_to_check if not (BASE_DIR / file).exists()]
 
+
 # Define the download function
 def dl_model(link, model_name, dir_name):
     url = f"{link}/{model_name}"
     response = requests.get(url, stream=True)
     response.raise_for_status()
 
     target_path = dir_name / model_name
-    target_path.parent.mkdir(parents=True, exist_ok=True)  # Create the directory if it doesn't exist
+    target_path.parent.mkdir(
+        parents=True, exist_ok=True
+    )  # Create the directory if it doesn't exist
 
     with open(target_path, "wb") as f:
         for chunk in response.iter_content(chunk_size=8192):
             f.write(chunk)
 
     print(f"{model_name} downloaded successfully!")
 
+
 # Download missing files if any
 if missing_files:
     RVC_DOWNLOAD_LINK = "https://huggingface.co/theNeofr/rvc-base/resolve/main"  # Replace with the actual download link
@@ -111,9 +117,6 @@ def dl_model(link, model_name, dir_name):
     print("All required files are already present.")
 
 
-
-
-
 def infer_audio(
     model_name,
     audio_path,
@@ -139,52 +142,57 @@ def infer_audio(
     resample_sr=0,
     hubert_model_path="hubert_base.pt",
     rmvpe_model_path="rmvpe.pt",
-    fcpe_model_path="fcpe.pt"
-    ):
+    fcpe_model_path="fcpe.pt",
+):
     os.environ["rmvpe_model_path"] = rmvpe_model_path
     os.environ["fcpe_model_path"] = fcpe_model_path
-    configs = Configs('cuda:0', True)
+    configs = Configs("cuda:0", True)
     vc = VC(configs)
     pth_path, index_path = get_model(model_name)
     vc_data = vc.get_vc(pth_path, protect, 0.5)
 
-
-
     if split_infer:
         inferred_files = []
         temp_dir = os.path.join(os.getcwd(), "seperate", "temp")
         os.makedirs(temp_dir, exist_ok=True)
         print("Splitting audio to silence and nonsilent segments.")
-        silence_files, nonsilent_files = split_silence_nonsilent(audio_path, min_silence, silence_threshold, seek_step, keep_silence)
-        print(f"Total silence segments: {len(silence_files)}.\nTotal nonsilent segments: {len(nonsilent_files)}.")
+        silence_files, nonsilent_files = split_silence_nonsilent(
+            audio_path, min_silence, silence_threshold, seek_step, keep_silence
+        )
+        print(
+            f"Total silence segments: {len(silence_files)}.\nTotal nonsilent segments: {len(nonsilent_files)}."
+        )
         for i, nonsilent_file in enumerate(nonsilent_files):
             print(f"Inferring nonsilent audio {i+1}")
             inference_info, audio_data, output_path = vc.vc_single(
-            0,
-            nonsilent_file,
-            f0_change,
-            f0_method,
-            index_path,
-            index_path,
-            index_rate,
-            filter_radius,
-            resample_sr,
-            rms_mix_rate,
-            protect,
-            audio_format,
-            crepe_hop_length,
-            do_formant,
-            quefrency,
-            timbre,
-            min_pitch,
-            max_pitch,
-            f0_autotune,
-            hubert_model_path
+                0,
+                nonsilent_file,
+                f0_change,
+                f0_method,
+                index_path,
+                index_path,
+                index_rate,
+                filter_radius,
+                resample_sr,
+                rms_mix_rate,
+                protect,
+                audio_format,
+                crepe_hop_length,
+                do_formant,
+                quefrency,
+                timbre,
+                min_pitch,
+                max_pitch,
+                f0_autotune,
+                hubert_model_path,
             )
             if inference_info[0] == "Success.":
                 print("Inference ran successfully.")
                 print(inference_info[1])
-                print("Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
+                print(
+                    "Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs"
+                    % (*inference_info[2],)
+                )
             else:
                 print(f"An error occurred while processing.\n{inference_info[0]}")
                 return None
@@ -194,11 +202,17 @@ def infer_audio(
         print("Combining silence and inferred audios.")
         output_count = 1
         while True:
-            output_path = os.path.join(os.getcwd(), "output", f"{os.path.splitext(os.path.basename(audio_path))[0]}{model_name}{f0_method.capitalize()}_{output_count}.{audio_format}")
+            output_path = os.path.join(
+                os.getcwd(),
+                "output",
+                f"{os.path.splitext(os.path.basename(audio_path))[0]}{model_name}{f0_method.capitalize()}_{output_count}.{audio_format}",
+            )
             if not os.path.exists(output_path):
                 break
             output_count += 1
-        output_path = combine_silence_nonsilent(silence_files, adjusted_inferred_files, keep_silence, output_path)
+        output_path = combine_silence_nonsilent(
+            silence_files, adjusted_inferred_files, keep_silence, output_path
+        )
         [shutil.move(inferred_file, temp_dir) for inferred_file in inferred_files]
         shutil.rmtree(temp_dir)
     else:
@@ -222,18 +236,21 @@ def infer_audio(
             min_pitch,
             max_pitch,
             f0_autotune,
-            hubert_model_path
+            hubert_model_path,
         )
         if inference_info[0] == "Success.":
             print("Inference ran successfully.")
             print(inference_info[1])
-            print("Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
+            print(
+                "Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs"
+                % (*inference_info[2],)
+            )
         else:
             print(f"An error occurred while processing.\n{inference_info[0]}")
             del configs, vc
             gc.collect()
             return inference_info[0]
-    
+
     del configs, vc
     gc.collect()
     return output_path