From e96a659966ba595bac1db115e13ea69136211d58 Mon Sep 17 00:00:00 2001 From: Blaise Date: Sun, 22 Dec 2024 23:39:51 +0100 Subject: [PATCH] revert preprocess changes --- rvc/train/preprocess/preprocess.py | 106 ++++++++++++++++------------- 1 file changed, 59 insertions(+), 47 deletions(-) diff --git a/rvc/train/preprocess/preprocess.py b/rvc/train/preprocess/preprocess.py index 71c5c899..edc02f87 100644 --- a/rvc/train/preprocess/preprocess.py +++ b/rvc/train/preprocess/preprocess.py @@ -35,15 +35,6 @@ class PreProcess: def __init__(self, sr: int, exp_dir: str, per: float): - self.sr = sr - self.per = per - self.exp_dir = exp_dir - self.device = "cpu" - self.gt_wavs_dir = os.path.join(exp_dir, "sliced_audios") - self.wavs16k_dir = os.path.join(exp_dir, "sliced_audios_16k") - os.makedirs(self.gt_wavs_dir, exist_ok=True) - os.makedirs(self.wavs16k_dir, exist_ok=True) - self.slicer = Slicer( sr=sr, threshold=-42, @@ -52,9 +43,17 @@ def __init__(self, sr: int, exp_dir: str, per: float): hop_size=15, max_sil_kept=500, ) + self.sr = sr self.b_high, self.a_high = signal.butter( N=5, Wn=HIGH_PASS_CUTOFF, btype="high", fs=self.sr ) + self.per = per + self.exp_dir = exp_dir + self.device = "cpu" + self.gt_wavs_dir = os.path.join(exp_dir, "sliced_audios") + self.wavs16k_dir = os.path.join(exp_dir, "sliced_audios_16k") + os.makedirs(self.gt_wavs_dir, exist_ok=True) + os.makedirs(self.wavs16k_dir, exist_ok=True) def _normalize_audio(self, audio: np.ndarray): tmp_max = np.abs(audio).max() @@ -97,19 +96,18 @@ def simple_cut( chunk_len: float, overlap_len: float, ): - chunk_samples = int(self.sr * chunk_len) - overlap_samples = int(self.sr * overlap_len) - step = chunk_samples - overlap_samples - num_chunks = (len(audio) - chunk_samples) // step + 1 - for i in range(num_chunks): - start = i * step - end = start + chunk_samples - if end <= len(audio): - chunk = audio[start:end] - file_index = i + chunk_length = int(self.sr * chunk_len) + overlap_length = int(self.sr * overlap_len) + i = 0 + while i < len(audio): + chunk = audio[i : i + chunk_length] + if len(chunk) == chunk_length: # full SR for training wavfile.write( - os.path.join(self.gt_wavs_dir, f"{sid}_{idx0}_{file_index}.wav"), + os.path.join( + self.gt_wavs_dir, + f"{sid}_{idx0}_{i // (chunk_length - overlap_length)}.wav", + ), self.sr, chunk.astype(np.float32), ) @@ -118,10 +116,14 @@ def simple_cut( chunk, orig_sr=self.sr, target_sr=SAMPLE_RATE_16K, res_type=RES_TYPE ) wavfile.write( - os.path.join(self.wavs16k_dir, f"{sid}_{idx0}_{file_index}.wav"), + os.path.join( + self.wavs16k_dir, + f"{sid}_{idx0}_{i // (chunk_length - overlap_length)}.wav", + ), SAMPLE_RATE_16K, chunk_16k.astype(np.float32), ) + i += chunk_length - overlap_length def process_audio( self, @@ -145,37 +147,46 @@ def process_audio( audio = self._normalize_audio(audio) if noise_reduction: audio = nr.reduce_noise( - y=audio, - sr=self.sr, - prop_decrease=reduction_strength, - n_fft=2048, - hop_length=512, + y=audio, sr=self.sr, prop_decrease=reduction_strength ) if cut_preprocess == "Skip": - self.process_audio_segment(audio, sid, idx0, 0) + # no cutting + self.process_audio_segment( + audio, + sid, + idx0, + 0, + ) elif cut_preprocess == "Simple": + # simple self.simple_cut(audio, sid, idx0, chunk_len, overlap_len) elif cut_preprocess == "Automatic": - segments = self.slicer.slice(audio) idx1 = 0 - for audio_segment in segments: - segment_length = len(audio_segment) - per_samples = int(self.sr * self.per) - overlap_samples_segment = int(self.sr * OVERLAP) - step = per_samples - overlap_samples_segment - - num_sub_segments = (segment_length - per_samples + step - 1) // step - - for i in range(num_sub_segments): - start = i * step - end = start + per_samples - if end <= segment_length: - tmp_audio = audio_segment[start:end] - self.process_audio_segment(tmp_audio, sid, idx0, idx1) + # legacy + for audio_segment in self.slicer.slice(audio): + i = 0 + while True: + start = int(self.sr * (self.per - OVERLAP) * i) + i += 1 + if len(audio_segment[start:]) > (self.per + OVERLAP) * self.sr: + tmp_audio = audio_segment[ + start : start + int(self.per * self.sr) + ] + self.process_audio_segment( + tmp_audio, + sid, + idx0, + idx1, + ) idx1 += 1 - elif start < segment_length: + else: tmp_audio = audio_segment[start:] - self.process_audio_segment(tmp_audio, sid, idx0, idx1) + self.process_audio_segment( + tmp_audio, + sid, + idx0, + idx1, + ) idx1 += 1 break @@ -266,6 +277,7 @@ def preprocess_training_set( f'Speaker ID folder is expected to be integer, got "{os.path.basename(root)}" instead.' ) + # print(f"Number of files: {len(files)}") audio_length = [] with tqdm(total=len(files)) as pbar: with concurrent.futures.ProcessPoolExecutor( @@ -291,13 +303,13 @@ def preprocess_training_set( audio_length.append(future.result()) pbar.update(1) - total_audio_length = sum(audio_length) + audio_length = sum(audio_length) save_dataset_duration( - os.path.join(exp_dir, "model_info.json"), dataset_duration=total_audio_length + os.path.join(exp_dir, "model_info.json"), dataset_duration=audio_length ) elapsed_time = time.time() - start_time print( - f"Preprocess completed in {elapsed_time:.2f} seconds on {format_duration(total_audio_length)} seconds of audio." + f"Preprocess completed in {elapsed_time:.2f} seconds on {format_duration(audio_length)} seconds of audio." )