Merge pull request #657 from IAHispano/formatter/main

chore(format): run black on main
IAHispano · Sep 1, 2024 · 1314b82 · 1314b82
2 parents 029665a + cb5edde
commit 1314b82
Show file tree

Hide file tree

Showing 2 changed files with 54 additions and 15 deletions.
diff --git a/rvc/train/extract/extract.py b/rvc/train/extract/extract.py
@@ -3,6 +3,7 @@
 import time
 import tqdm
 import torch
+
 # Zluda
 if torch.cuda.is_available() and torch.cuda.get_device_name().endswith("[ZLUDA]"):
     torch.backends.cudnn.enabled = False
@@ -24,6 +25,7 @@
 # Load config
 config = Config()
 
+
 class FeatureInput:
     """Class for F0 extraction."""
 
@@ -95,7 +97,7 @@ def coarse_f0(self, f0):
     def process_file(self, file_info, f0_method, hop_length):
         """Process a single audio file for F0 extraction."""
         inp_path, opt_path1, opt_path2, _ = file_info
-        #print(f"Process file {inp_path}. Class on {self.device}, model is on {self.model_rmvpe.device}")
+        # print(f"Process file {inp_path}. Class on {self.device}, model is on {self.model_rmvpe.device}")
 
         if os.path.exists(opt_path1) and os.path.exists(opt_path2):
             return
@@ -107,14 +109,17 @@ def process_file(self, file_info, f0_method, hop_length):
             coarse_pit = self.coarse_f0(feature_pit)
             np.save(opt_path1, coarse_pit, allow_pickle=False)
         except Exception as error:
-            print(f"An error occurred extracting file {inp_path} on {self.device}: {error}")
+            print(
+                f"An error occurred extracting file {inp_path} on {self.device}: {error}"
+            )
 
     def process_files(self, files, f0_method, hop_length, pbar):
         """Process multiple files."""
         for file_info in files:
             self.process_file(file_info, f0_method, hop_length)
             pbar.update(1)
 
+
 def run_pitch_extraction(files, devices, f0_method, hop_length, num_processes):
     print(f"Starting pitch extraction with {num_processes} cores and {f0_method}...")
     start_time = time.time()
@@ -128,7 +133,7 @@ def run_pitch_extraction(files, devices, f0_method, hop_length, num_processes):
         part_paths = files[idx::num_gpus]
         process_partials.append((feature_input, part_paths))
 
-    with concurrent.futures.ThreadPoolExecutor(max_workers = num_processes) as executor:
+    with concurrent.futures.ThreadPoolExecutor(max_workers=num_processes) as executor:
         futures = [
             executor.submit(
                 FeatureInput.process_files,
@@ -147,6 +152,7 @@ def run_pitch_extraction(files, devices, f0_method, hop_length, num_processes):
     elapsed_time = time.time() - start_time
     print(f"Pitch extraction completed in {elapsed_time:.2f} seconds.")
 
+
 def process_file_embedding(file_info, model, device):
     """Process a single audio file for embedding extraction."""
     wav_file_path, _, _, out_file_path = file_info
@@ -168,6 +174,7 @@ def process_file_embedding(file_info, model, device):
     else:
         print(f"{file} contains NaN values and will be skipped.")
 
+
 def run_embedding_extraction(files, devices, embedder_model, embedder_model_custom):
     """Main function to orchestrate the embedding extraction process."""
     print("Starting embedding extraction...")
@@ -178,7 +185,7 @@ def run_embedding_extraction(files, devices, embedder_model, embedder_model_cust
 
     # add multi-threading here?
     for i, file_info in enumerate(files):
-        device = devices[i%len(devices)]
+        device = devices[i % len(devices)]
         try:
             process_file_embedding(file_info, model, device)
         except Exception as error:
@@ -189,6 +196,7 @@ def run_embedding_extraction(files, devices, embedder_model, embedder_model_cust
     elapsed_time = time.time() - start_time
     print(f"Embedding extraction completed in {elapsed_time:.2f} seconds.")
 
+
 if __name__ == "__main__":
 
     exp_dir = sys.argv[1]
@@ -212,10 +220,12 @@ def run_embedding_extraction(files, devices, embedder_model, embedder_model_cust
     for file in glob.glob(os.path.join(wav_path, "*.wav")):
         file_name = os.path.basename(file)
         file_info = [
-            file,   # full path to sliced 16k wav
+            file,  # full path to sliced 16k wav
             os.path.join(exp_dir, "f0", file_name + ".npy"),
             os.path.join(exp_dir, "f0_voiced", file_name + ".npy"),
-            os.path.join(exp_dir, version + "_extracted", file_name.replace("wav", "npy"))
+            os.path.join(
+                exp_dir, version + "_extracted", file_name.replace("wav", "npy")
+            ),
         ]
         files.append(file_info)
 

diff --git a/rvc/train/preprocess/preprocess.py b/rvc/train/preprocess/preprocess.py
@@ -43,7 +43,9 @@ def __init__(self, sr: int, exp_dir: str, per: float):
             max_sil_kept=500,
         )
         self.sr = sr
-        self.b_high, self.a_high = signal.butter(N=5, Wn=HIGH_PASS_CUTOFF, btype="high", fs=self.sr)
+        self.b_high, self.a_high = signal.butter(
+            N=5, Wn=HIGH_PASS_CUTOFF, btype="high", fs=self.sr
+        )
         self.per = per
         self.exp_dir = exp_dir
         self.device = "cpu"
@@ -65,13 +67,25 @@ def process_audio_segment(
         idx1: int,
         process_effects: bool,
     ):
-        normalized_audio = self._normalize_audio(audio_segment) if process_effects else audio_segment
+        normalized_audio = (
+            self._normalize_audio(audio_segment) if process_effects else audio_segment
+        )
         if normalized_audio is None:
             print(f"{idx0}-{idx1}-filtered")
             return
-        wavfile.write(os.path.join(self.gt_wavs_dir, f"{idx0}_{idx1}.wav"), self.sr, normalized_audio.astype(np.float32))
-        audio_16k = librosa.resample(normalized_audio, orig_sr=self.sr, target_sr=SAMPLE_RATE_16K)
-        wavfile.write(os.path.join(self.wavs16k_dir, f"{idx0}_{idx1}.wav"), SAMPLE_RATE_16K, audio_16k.astype(np.float32))
+        wavfile.write(
+            os.path.join(self.gt_wavs_dir, f"{idx0}_{idx1}.wav"),
+            self.sr,
+            normalized_audio.astype(np.float32),
+        )
+        audio_16k = librosa.resample(
+            normalized_audio, orig_sr=self.sr, target_sr=SAMPLE_RATE_16K
+        )
+        wavfile.write(
+            os.path.join(self.wavs16k_dir, f"{idx0}_{idx1}.wav"),
+            SAMPLE_RATE_16K,
+            audio_16k.astype(np.float32),
+        )
 
     def process_audio(
         self,
@@ -94,12 +108,18 @@ def process_audio(
                         start = int(self.sr * (self.per - OVERLAP) * i)
                         i += 1
                         if len(audio_segment[start:]) > (self.per + OVERLAP) * self.sr:
-                            tmp_audio = audio_segment[start : start + int(self.per * self.sr)]
-                            self.process_audio_segment(tmp_audio, idx0, idx1, process_effects)
+                            tmp_audio = audio_segment[
+                                start : start + int(self.per * self.sr)
+                            ]
+                            self.process_audio_segment(
+                                tmp_audio, idx0, idx1, process_effects
+                            )
                             idx1 += 1
                         else:
                             tmp_audio = audio_segment[start:]
-                            self.process_audio_segment(tmp_audio, idx0, idx1, process_effects)
+                            self.process_audio_segment(
+                                tmp_audio, idx0, idx1, process_effects
+                            )
                             idx1 += 1
                             break
             else:
@@ -108,6 +128,7 @@ def process_audio(
             print(f"Error processing audio: {error}")
         return audio_length
 
+
 def format_duration(seconds):
     hours = int(seconds // 3600)
     minutes = int((seconds % 3600) // 60)
@@ -159,7 +180,15 @@ def preprocess_training_set(
     ]
     # print(f"Number of files: {len(files)}")
     with concurrent.futures.ThreadPoolExecutor(max_workers=num_processes) as executor:
-        audio_length = list(tqdm(executor.map(process_audio_wrapper, [(pp, file, cut_preprocess, process_effects) for file in files]), total=len(files)))
+        audio_length = list(
+            tqdm(
+                executor.map(
+                    process_audio_wrapper,
+                    [(pp, file, cut_preprocess, process_effects) for file in files],
+                ),
+                total=len(files),
+            )
+        )
     audio_length = sum(audio_length)
     save_dataset_duration(
         os.path.join(exp_dir, "model_info.json"), dataset_duration=audio_length