Skip to content

Commit

Permalink
Merge pull request #1 from TheNeodev/formatter/main
Browse files Browse the repository at this point in the history
chore(format): run black on main
  • Loading branch information
TheNeodev authored Dec 17, 2024
2 parents f44f053 + 2ef0a35 commit ac2c2e0
Show file tree
Hide file tree
Showing 10 changed files with 1,031 additions and 533 deletions.
107 changes: 81 additions & 26 deletions rvc_inferpy/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,40 +6,88 @@
import gc
from rvc_inferpy.modules import VC
from rvc_inferpy.infer import Configs, get_model
from rvc_inferpy.split_audio import split_silence_nonsilent, adjust_audio_lengths, combine_silence_nonsilent
from rvc_inferpy.split_audio import (
split_silence_nonsilent,
adjust_audio_lengths,
combine_silence_nonsilent,
)


def infer_audio_cli():
parser = argparse.ArgumentParser(description="RVC INFERPY CLI VER.")
parser.add_argument("--model_name", type=str, help="Name of the model.")
parser.add_argument("--audio_path", type=str, help="Path to the input audio file.")
parser.add_argument("--f0_change", type=float, default=0, help="Pitch change factor.")
parser.add_argument("--f0_method", type=str, default="rmvpe+", help="Method for F0 estimation.")
parser.add_argument("--min_pitch", type=str, default="50", help="Minimum pitch value.")
parser.add_argument("--max_pitch", type=str, default="1100", help="Maximum pitch value.")
parser.add_argument("--crepe_hop_length", type=int, default=128, help="Crepe hop length.")
parser.add_argument(
"--f0_change", type=float, default=0, help="Pitch change factor."
)
parser.add_argument(
"--f0_method", type=str, default="rmvpe+", help="Method for F0 estimation."
)
parser.add_argument(
"--min_pitch", type=str, default="50", help="Minimum pitch value."
)
parser.add_argument(
"--max_pitch", type=str, default="1100", help="Maximum pitch value."
)
parser.add_argument(
"--crepe_hop_length", type=int, default=128, help="Crepe hop length."
)
parser.add_argument("--index_rate", type=float, default=0.75, help="Index rate.")
parser.add_argument("--filter_radius", type=int, default=3, help="Filter radius.")
parser.add_argument("--rms_mix_rate", type=float, default=0.25, help="RMS mix rate.")
parser.add_argument(
"--rms_mix_rate", type=float, default=0.25, help="RMS mix rate."
)
parser.add_argument("--protect", type=float, default=0.33, help="Protect factor.")
parser.add_argument("--split_infer", action="store_true", help="Enable split inference.")
parser.add_argument("--min_silence", type=int, default=500, help="Minimum silence duration.")
parser.add_argument("--silence_threshold", type=float, default=-50, help="Silence threshold (dB).")
parser.add_argument("--seek_step", type=int, default=1, help="Seek step for silence detection.")
parser.add_argument("--keep_silence", type=int, default=100, help="Silence retention duration.")
parser.add_argument("--do_formant", action="store_true", help="Enable formant processing.")
parser.add_argument("--quefrency", type=float, default=0, help="Quefrency adjustment value.")
parser.add_argument("--timbre", type=float, default=1, help="Timbre adjustment factor.")
parser.add_argument("--f0_autotune", action="store_true", help="Enable F0 autotuning.")
parser.add_argument("--audio_format", type=str, default="wav", help="Output audio format.")
parser.add_argument("--resample_sr", type=int, default=0, help="Resample sample rate.")
parser.add_argument("--hubert_model_path", type=str, default="hubert_base.pt", help="Path to Hubert model.")
parser.add_argument("--rmvpe_model_path", type=str, default="rmvpe.pt", help="Path to RMVPE model.")
parser.add_argument("--fcpe_model_path", type=str, default="fcpe.pt", help="Path to FCPE model.")
parser.add_argument(
"--split_infer", action="store_true", help="Enable split inference."
)
parser.add_argument(
"--min_silence", type=int, default=500, help="Minimum silence duration."
)
parser.add_argument(
"--silence_threshold", type=float, default=-50, help="Silence threshold (dB)."
)
parser.add_argument(
"--seek_step", type=int, default=1, help="Seek step for silence detection."
)
parser.add_argument(
"--keep_silence", type=int, default=100, help="Silence retention duration."
)
parser.add_argument(
"--do_formant", action="store_true", help="Enable formant processing."
)
parser.add_argument(
"--quefrency", type=float, default=0, help="Quefrency adjustment value."
)
parser.add_argument(
"--timbre", type=float, default=1, help="Timbre adjustment factor."
)
parser.add_argument(
"--f0_autotune", action="store_true", help="Enable F0 autotuning."
)
parser.add_argument(
"--audio_format", type=str, default="wav", help="Output audio format."
)
parser.add_argument(
"--resample_sr", type=int, default=0, help="Resample sample rate."
)
parser.add_argument(
"--hubert_model_path",
type=str,
default="hubert_base.pt",
help="Path to Hubert model.",
)
parser.add_argument(
"--rmvpe_model_path", type=str, default="rmvpe.pt", help="Path to RMVPE model."
)
parser.add_argument(
"--fcpe_model_path", type=str, default="fcpe.pt", help="Path to FCPE model."
)
args = parser.parse_args()

os.environ["rmvpe_model_path"] = args.rmvpe_model_path
os.environ["fcpe_model_path"] = args.fcpe_model_path
configs = Configs('cuda:0', True)
configs = Configs("cuda:0", True)
vc = VC(configs)
pth_path, index_path = get_model(args.model_name)
vc_data = vc.get_vc(pth_path, args.protect, 0.5)
Expand All @@ -50,7 +98,11 @@ def infer_audio_cli():
os.makedirs(temp_dir, exist_ok=True)
print("Splitting audio into silence and nonsilent segments.")
silence_files, nonsilent_files = split_silence_nonsilent(
args.audio_path, args.min_silence, args.silence_threshold, args.seek_step, args.keep_silence
args.audio_path,
args.min_silence,
args.silence_threshold,
args.seek_step,
args.keep_silence,
)
for i, nonsilent_file in enumerate(nonsilent_files):
print(f"Processing nonsilent audio {i+1}/{len(nonsilent_files)}")
Expand All @@ -74,7 +126,7 @@ def infer_audio_cli():
args.min_pitch,
args.max_pitch,
args.f0_autotune,
args.hubert_model_path
args.hubert_model_path,
)
if inference_info[0] == "Success.":
print("Inference ran successfully.")
Expand All @@ -85,7 +137,9 @@ def infer_audio_cli():
inferred_files.append(output_path)

adjusted_inferred_files = adjust_audio_lengths(nonsilent_files, inferred_files)
output_path = combine_silence_nonsilent(silence_files, adjusted_inferred_files, args.keep_silence, output_path)
output_path = combine_silence_nonsilent(
silence_files, adjusted_inferred_files, args.keep_silence, output_path
)
shutil.rmtree(temp_dir)
else:
inference_info, audio_data, output_path = vc.vc_single(
Expand All @@ -108,7 +162,7 @@ def infer_audio_cli():
args.min_pitch,
args.max_pitch,
args.f0_autotune,
args.hubert_model_path
args.hubert_model_path,
)
if inference_info[0] == "Success.":
print("Inference ran successfully.")
Expand All @@ -121,5 +175,6 @@ def infer_audio_cli():
gc.collect()
print(f"Output saved to: {output_path}")


if __name__ == "__main__":
infer_audio_cli()
111 changes: 64 additions & 47 deletions rvc_inferpy/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
import torch
from multiprocessing import cpu_count
from rvc_inferpy.modules import VC
from rvc_inferpy.split_audio import split_silence_nonsilent, adjust_audio_lengths, combine_silence_nonsilent
from rvc_inferpy.split_audio import (
split_silence_nonsilent,
adjust_audio_lengths,
combine_silence_nonsilent,
)
from pathlib import Path
import requests



class Configs:
def __init__(self, device, is_half):
self.device = device
Expand Down Expand Up @@ -54,25 +57,24 @@ def device_config(self) -> tuple:

return x_pad, x_query, x_center, x_max


def get_model(voice_model):
model_dir = os.path.join(os.getcwd(), "models", voice_model)
model_filename, index_filename = None, None
for file in os.listdir(model_dir):
ext = os.path.splitext(file)[1]
if ext == '.pth':
if ext == ".pth":
model_filename = file
if ext == '.index':
if ext == ".index":
index_filename = file

if model_filename is None:
print(f'No model file exists in {models_dir}.')
print(f"No model file exists in {models_dir}.")
return None, None

return os.path.join(model_dir, model_filename), os.path.join(model_dir, index_filename) if index_filename else ''




return os.path.join(model_dir, model_filename), (
os.path.join(model_dir, index_filename) if index_filename else ""
)


BASE_DIR = Path(os.getcwd()) # Use Path for better path handling
Expand All @@ -83,21 +85,25 @@ def get_model(voice_model):
# Check for missing files
missing_files = [file for file in files_to_check if not (BASE_DIR / file).exists()]


# Define the download function
def dl_model(link, model_name, dir_name):
url = f"{link}/{model_name}"
response = requests.get(url, stream=True)
response.raise_for_status()

target_path = dir_name / model_name
target_path.parent.mkdir(parents=True, exist_ok=True) # Create the directory if it doesn't exist
target_path.parent.mkdir(
parents=True, exist_ok=True
) # Create the directory if it doesn't exist

with open(target_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)

print(f"{model_name} downloaded successfully!")


# Download missing files if any
if missing_files:
RVC_DOWNLOAD_LINK = "https://huggingface.co/theNeofr/rvc-base/resolve/main" # Replace with the actual download link
Expand All @@ -111,9 +117,6 @@ def dl_model(link, model_name, dir_name):
print("All required files are already present.")





def infer_audio(
model_name,
audio_path,
Expand All @@ -139,52 +142,57 @@ def infer_audio(
resample_sr=0,
hubert_model_path="hubert_base.pt",
rmvpe_model_path="rmvpe.pt",
fcpe_model_path="fcpe.pt"
):
fcpe_model_path="fcpe.pt",
):
os.environ["rmvpe_model_path"] = rmvpe_model_path
os.environ["fcpe_model_path"] = fcpe_model_path
configs = Configs('cuda:0', True)
configs = Configs("cuda:0", True)
vc = VC(configs)
pth_path, index_path = get_model(model_name)
vc_data = vc.get_vc(pth_path, protect, 0.5)



if split_infer:
inferred_files = []
temp_dir = os.path.join(os.getcwd(), "seperate", "temp")
os.makedirs(temp_dir, exist_ok=True)
print("Splitting audio to silence and nonsilent segments.")
silence_files, nonsilent_files = split_silence_nonsilent(audio_path, min_silence, silence_threshold, seek_step, keep_silence)
print(f"Total silence segments: {len(silence_files)}.\nTotal nonsilent segments: {len(nonsilent_files)}.")
silence_files, nonsilent_files = split_silence_nonsilent(
audio_path, min_silence, silence_threshold, seek_step, keep_silence
)
print(
f"Total silence segments: {len(silence_files)}.\nTotal nonsilent segments: {len(nonsilent_files)}."
)
for i, nonsilent_file in enumerate(nonsilent_files):
print(f"Inferring nonsilent audio {i+1}")
inference_info, audio_data, output_path = vc.vc_single(
0,
nonsilent_file,
f0_change,
f0_method,
index_path,
index_path,
index_rate,
filter_radius,
resample_sr,
rms_mix_rate,
protect,
audio_format,
crepe_hop_length,
do_formant,
quefrency,
timbre,
min_pitch,
max_pitch,
f0_autotune,
hubert_model_path
0,
nonsilent_file,
f0_change,
f0_method,
index_path,
index_path,
index_rate,
filter_radius,
resample_sr,
rms_mix_rate,
protect,
audio_format,
crepe_hop_length,
do_formant,
quefrency,
timbre,
min_pitch,
max_pitch,
f0_autotune,
hubert_model_path,
)
if inference_info[0] == "Success.":
print("Inference ran successfully.")
print(inference_info[1])
print("Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
print(
"Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs"
% (*inference_info[2],)
)
else:
print(f"An error occurred while processing.\n{inference_info[0]}")
return None
Expand All @@ -194,11 +202,17 @@ def infer_audio(
print("Combining silence and inferred audios.")
output_count = 1
while True:
output_path = os.path.join(os.getcwd(), "output", f"{os.path.splitext(os.path.basename(audio_path))[0]}{model_name}{f0_method.capitalize()}_{output_count}.{audio_format}")
output_path = os.path.join(
os.getcwd(),
"output",
f"{os.path.splitext(os.path.basename(audio_path))[0]}{model_name}{f0_method.capitalize()}_{output_count}.{audio_format}",
)
if not os.path.exists(output_path):
break
output_count += 1
output_path = combine_silence_nonsilent(silence_files, adjusted_inferred_files, keep_silence, output_path)
output_path = combine_silence_nonsilent(
silence_files, adjusted_inferred_files, keep_silence, output_path
)
[shutil.move(inferred_file, temp_dir) for inferred_file in inferred_files]
shutil.rmtree(temp_dir)
else:
Expand All @@ -222,18 +236,21 @@ def infer_audio(
min_pitch,
max_pitch,
f0_autotune,
hubert_model_path
hubert_model_path,
)
if inference_info[0] == "Success.":
print("Inference ran successfully.")
print(inference_info[1])
print("Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
print(
"Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs"
% (*inference_info[2],)
)
else:
print(f"An error occurred while processing.\n{inference_info[0]}")
del configs, vc
gc.collect()
return inference_info[0]

del configs, vc
gc.collect()
return output_path
Loading

0 comments on commit ac2c2e0

Please sign in to comment.