Skip to content

Commit

Permalink
Merge pull request #657 from IAHispano/formatter/main
Browse files Browse the repository at this point in the history
chore(format): run black on main
  • Loading branch information
blaisewf authored Sep 1, 2024
2 parents 029665a + cb5edde commit 1314b82
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 15 deletions.
22 changes: 16 additions & 6 deletions rvc/train/extract/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import time
import tqdm
import torch

# Zluda
if torch.cuda.is_available() and torch.cuda.get_device_name().endswith("[ZLUDA]"):
torch.backends.cudnn.enabled = False
Expand All @@ -24,6 +25,7 @@
# Load config
config = Config()


class FeatureInput:
"""Class for F0 extraction."""

Expand Down Expand Up @@ -95,7 +97,7 @@ def coarse_f0(self, f0):
def process_file(self, file_info, f0_method, hop_length):
"""Process a single audio file for F0 extraction."""
inp_path, opt_path1, opt_path2, _ = file_info
#print(f"Process file {inp_path}. Class on {self.device}, model is on {self.model_rmvpe.device}")
# print(f"Process file {inp_path}. Class on {self.device}, model is on {self.model_rmvpe.device}")

if os.path.exists(opt_path1) and os.path.exists(opt_path2):
return
Expand All @@ -107,14 +109,17 @@ def process_file(self, file_info, f0_method, hop_length):
coarse_pit = self.coarse_f0(feature_pit)
np.save(opt_path1, coarse_pit, allow_pickle=False)
except Exception as error:
print(f"An error occurred extracting file {inp_path} on {self.device}: {error}")
print(
f"An error occurred extracting file {inp_path} on {self.device}: {error}"
)

def process_files(self, files, f0_method, hop_length, pbar):
"""Process multiple files."""
for file_info in files:
self.process_file(file_info, f0_method, hop_length)
pbar.update(1)


def run_pitch_extraction(files, devices, f0_method, hop_length, num_processes):
print(f"Starting pitch extraction with {num_processes} cores and {f0_method}...")
start_time = time.time()
Expand All @@ -128,7 +133,7 @@ def run_pitch_extraction(files, devices, f0_method, hop_length, num_processes):
part_paths = files[idx::num_gpus]
process_partials.append((feature_input, part_paths))

with concurrent.futures.ThreadPoolExecutor(max_workers = num_processes) as executor:
with concurrent.futures.ThreadPoolExecutor(max_workers=num_processes) as executor:
futures = [
executor.submit(
FeatureInput.process_files,
Expand All @@ -147,6 +152,7 @@ def run_pitch_extraction(files, devices, f0_method, hop_length, num_processes):
elapsed_time = time.time() - start_time
print(f"Pitch extraction completed in {elapsed_time:.2f} seconds.")


def process_file_embedding(file_info, model, device):
"""Process a single audio file for embedding extraction."""
wav_file_path, _, _, out_file_path = file_info
Expand All @@ -168,6 +174,7 @@ def process_file_embedding(file_info, model, device):
else:
print(f"{file} contains NaN values and will be skipped.")


def run_embedding_extraction(files, devices, embedder_model, embedder_model_custom):
"""Main function to orchestrate the embedding extraction process."""
print("Starting embedding extraction...")
Expand All @@ -178,7 +185,7 @@ def run_embedding_extraction(files, devices, embedder_model, embedder_model_cust

# add multi-threading here?
for i, file_info in enumerate(files):
device = devices[i%len(devices)]
device = devices[i % len(devices)]
try:
process_file_embedding(file_info, model, device)
except Exception as error:
Expand All @@ -189,6 +196,7 @@ def run_embedding_extraction(files, devices, embedder_model, embedder_model_cust
elapsed_time = time.time() - start_time
print(f"Embedding extraction completed in {elapsed_time:.2f} seconds.")


if __name__ == "__main__":

exp_dir = sys.argv[1]
Expand All @@ -212,10 +220,12 @@ def run_embedding_extraction(files, devices, embedder_model, embedder_model_cust
for file in glob.glob(os.path.join(wav_path, "*.wav")):
file_name = os.path.basename(file)
file_info = [
file, # full path to sliced 16k wav
file, # full path to sliced 16k wav
os.path.join(exp_dir, "f0", file_name + ".npy"),
os.path.join(exp_dir, "f0_voiced", file_name + ".npy"),
os.path.join(exp_dir, version + "_extracted", file_name.replace("wav", "npy"))
os.path.join(
exp_dir, version + "_extracted", file_name.replace("wav", "npy")
),
]
files.append(file_info)

Expand Down
47 changes: 38 additions & 9 deletions rvc/train/preprocess/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ def __init__(self, sr: int, exp_dir: str, per: float):
max_sil_kept=500,
)
self.sr = sr
self.b_high, self.a_high = signal.butter(N=5, Wn=HIGH_PASS_CUTOFF, btype="high", fs=self.sr)
self.b_high, self.a_high = signal.butter(
N=5, Wn=HIGH_PASS_CUTOFF, btype="high", fs=self.sr
)
self.per = per
self.exp_dir = exp_dir
self.device = "cpu"
Expand All @@ -65,13 +67,25 @@ def process_audio_segment(
idx1: int,
process_effects: bool,
):
normalized_audio = self._normalize_audio(audio_segment) if process_effects else audio_segment
normalized_audio = (
self._normalize_audio(audio_segment) if process_effects else audio_segment
)
if normalized_audio is None:
print(f"{idx0}-{idx1}-filtered")
return
wavfile.write(os.path.join(self.gt_wavs_dir, f"{idx0}_{idx1}.wav"), self.sr, normalized_audio.astype(np.float32))
audio_16k = librosa.resample(normalized_audio, orig_sr=self.sr, target_sr=SAMPLE_RATE_16K)
wavfile.write(os.path.join(self.wavs16k_dir, f"{idx0}_{idx1}.wav"), SAMPLE_RATE_16K, audio_16k.astype(np.float32))
wavfile.write(
os.path.join(self.gt_wavs_dir, f"{idx0}_{idx1}.wav"),
self.sr,
normalized_audio.astype(np.float32),
)
audio_16k = librosa.resample(
normalized_audio, orig_sr=self.sr, target_sr=SAMPLE_RATE_16K
)
wavfile.write(
os.path.join(self.wavs16k_dir, f"{idx0}_{idx1}.wav"),
SAMPLE_RATE_16K,
audio_16k.astype(np.float32),
)

def process_audio(
self,
Expand All @@ -94,12 +108,18 @@ def process_audio(
start = int(self.sr * (self.per - OVERLAP) * i)
i += 1
if len(audio_segment[start:]) > (self.per + OVERLAP) * self.sr:
tmp_audio = audio_segment[start : start + int(self.per * self.sr)]
self.process_audio_segment(tmp_audio, idx0, idx1, process_effects)
tmp_audio = audio_segment[
start : start + int(self.per * self.sr)
]
self.process_audio_segment(
tmp_audio, idx0, idx1, process_effects
)
idx1 += 1
else:
tmp_audio = audio_segment[start:]
self.process_audio_segment(tmp_audio, idx0, idx1, process_effects)
self.process_audio_segment(
tmp_audio, idx0, idx1, process_effects
)
idx1 += 1
break
else:
Expand All @@ -108,6 +128,7 @@ def process_audio(
print(f"Error processing audio: {error}")
return audio_length


def format_duration(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
Expand Down Expand Up @@ -159,7 +180,15 @@ def preprocess_training_set(
]
# print(f"Number of files: {len(files)}")
with concurrent.futures.ThreadPoolExecutor(max_workers=num_processes) as executor:
audio_length = list(tqdm(executor.map(process_audio_wrapper, [(pp, file, cut_preprocess, process_effects) for file in files]), total=len(files)))
audio_length = list(
tqdm(
executor.map(
process_audio_wrapper,
[(pp, file, cut_preprocess, process_effects) for file in files],
),
total=len(files),
)
)
audio_length = sum(audio_length)
save_dataset_duration(
os.path.join(exp_dir, "model_info.json"), dataset_duration=audio_length
Expand Down

0 comments on commit 1314b82

Please sign in to comment.