Skip to content

Commit

Permalink
revert preprocess changes
Browse files Browse the repository at this point in the history
  • Loading branch information
blaisewf committed Dec 22, 2024
1 parent d535029 commit e96a659
Showing 1 changed file with 59 additions and 47 deletions.
106 changes: 59 additions & 47 deletions rvc/train/preprocess/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,6 @@

class PreProcess:
def __init__(self, sr: int, exp_dir: str, per: float):
self.sr = sr
self.per = per
self.exp_dir = exp_dir
self.device = "cpu"
self.gt_wavs_dir = os.path.join(exp_dir, "sliced_audios")
self.wavs16k_dir = os.path.join(exp_dir, "sliced_audios_16k")
os.makedirs(self.gt_wavs_dir, exist_ok=True)
os.makedirs(self.wavs16k_dir, exist_ok=True)

self.slicer = Slicer(
sr=sr,
threshold=-42,
Expand All @@ -52,9 +43,17 @@ def __init__(self, sr: int, exp_dir: str, per: float):
hop_size=15,
max_sil_kept=500,
)
self.sr = sr
self.b_high, self.a_high = signal.butter(
N=5, Wn=HIGH_PASS_CUTOFF, btype="high", fs=self.sr
)
self.per = per
self.exp_dir = exp_dir
self.device = "cpu"
self.gt_wavs_dir = os.path.join(exp_dir, "sliced_audios")
self.wavs16k_dir = os.path.join(exp_dir, "sliced_audios_16k")
os.makedirs(self.gt_wavs_dir, exist_ok=True)
os.makedirs(self.wavs16k_dir, exist_ok=True)

def _normalize_audio(self, audio: np.ndarray):
tmp_max = np.abs(audio).max()
Expand Down Expand Up @@ -97,19 +96,18 @@ def simple_cut(
chunk_len: float,
overlap_len: float,
):
chunk_samples = int(self.sr * chunk_len)
overlap_samples = int(self.sr * overlap_len)
step = chunk_samples - overlap_samples
num_chunks = (len(audio) - chunk_samples) // step + 1
for i in range(num_chunks):
start = i * step
end = start + chunk_samples
if end <= len(audio):
chunk = audio[start:end]
file_index = i
chunk_length = int(self.sr * chunk_len)
overlap_length = int(self.sr * overlap_len)
i = 0
while i < len(audio):
chunk = audio[i : i + chunk_length]
if len(chunk) == chunk_length:
# full SR for training
wavfile.write(
os.path.join(self.gt_wavs_dir, f"{sid}_{idx0}_{file_index}.wav"),
os.path.join(
self.gt_wavs_dir,
f"{sid}_{idx0}_{i // (chunk_length - overlap_length)}.wav",
),
self.sr,
chunk.astype(np.float32),
)
Expand All @@ -118,10 +116,14 @@ def simple_cut(
chunk, orig_sr=self.sr, target_sr=SAMPLE_RATE_16K, res_type=RES_TYPE
)
wavfile.write(
os.path.join(self.wavs16k_dir, f"{sid}_{idx0}_{file_index}.wav"),
os.path.join(
self.wavs16k_dir,
f"{sid}_{idx0}_{i // (chunk_length - overlap_length)}.wav",
),
SAMPLE_RATE_16K,
chunk_16k.astype(np.float32),
)
i += chunk_length - overlap_length

def process_audio(
self,
Expand All @@ -145,37 +147,46 @@ def process_audio(
audio = self._normalize_audio(audio)
if noise_reduction:
audio = nr.reduce_noise(
y=audio,
sr=self.sr,
prop_decrease=reduction_strength,
n_fft=2048,
hop_length=512,
y=audio, sr=self.sr, prop_decrease=reduction_strength
)
if cut_preprocess == "Skip":
self.process_audio_segment(audio, sid, idx0, 0)
# no cutting
self.process_audio_segment(
audio,
sid,
idx0,
0,
)
elif cut_preprocess == "Simple":
# simple
self.simple_cut(audio, sid, idx0, chunk_len, overlap_len)
elif cut_preprocess == "Automatic":
segments = self.slicer.slice(audio)
idx1 = 0
for audio_segment in segments:
segment_length = len(audio_segment)
per_samples = int(self.sr * self.per)
overlap_samples_segment = int(self.sr * OVERLAP)
step = per_samples - overlap_samples_segment

num_sub_segments = (segment_length - per_samples + step - 1) // step

for i in range(num_sub_segments):
start = i * step
end = start + per_samples
if end <= segment_length:
tmp_audio = audio_segment[start:end]
self.process_audio_segment(tmp_audio, sid, idx0, idx1)
# legacy
for audio_segment in self.slicer.slice(audio):
i = 0
while True:
start = int(self.sr * (self.per - OVERLAP) * i)
i += 1
if len(audio_segment[start:]) > (self.per + OVERLAP) * self.sr:
tmp_audio = audio_segment[
start : start + int(self.per * self.sr)
]
self.process_audio_segment(
tmp_audio,
sid,
idx0,
idx1,
)
idx1 += 1
elif start < segment_length:
else:
tmp_audio = audio_segment[start:]
self.process_audio_segment(tmp_audio, sid, idx0, idx1)
self.process_audio_segment(
tmp_audio,
sid,
idx0,
idx1,
)
idx1 += 1
break

Expand Down Expand Up @@ -266,6 +277,7 @@ def preprocess_training_set(
f'Speaker ID folder is expected to be integer, got "{os.path.basename(root)}" instead.'
)

# print(f"Number of files: {len(files)}")
audio_length = []
with tqdm(total=len(files)) as pbar:
with concurrent.futures.ProcessPoolExecutor(
Expand All @@ -291,13 +303,13 @@ def preprocess_training_set(
audio_length.append(future.result())
pbar.update(1)

total_audio_length = sum(audio_length)
audio_length = sum(audio_length)
save_dataset_duration(
os.path.join(exp_dir, "model_info.json"), dataset_duration=total_audio_length
os.path.join(exp_dir, "model_info.json"), dataset_duration=audio_length
)
elapsed_time = time.time() - start_time
print(
f"Preprocess completed in {elapsed_time:.2f} seconds on {format_duration(total_audio_length)} seconds of audio."
f"Preprocess completed in {elapsed_time:.2f} seconds on {format_duration(audio_length)} seconds of audio."
)


Expand Down

0 comments on commit e96a659

Please sign in to comment.