diff --git a/.gitignore b/.gitignore index 44c640c71..33152a49f 100644 --- a/.gitignore +++ b/.gitignore @@ -18,5 +18,4 @@ logs env venv -.venv -rvc/models \ No newline at end of file +.venv \ No newline at end of file diff --git a/app.py b/app.py index 4407df20a..e1330952f 100644 --- a/app.py +++ b/app.py @@ -32,8 +32,6 @@ from core import run_prerequisites_script run_prerequisites_script( - pretraineds_v1_f0=False, - pretraineds_v1_nof0=False, pretraineds_v2_f0=True, pretraineds_v2_nof0=False, models=True, diff --git a/assets/Applio_NoUI.ipynb b/assets/Applio_NoUI.ipynb index f055746ab..6748d5956 100644 --- a/assets/Applio_NoUI.ipynb +++ b/assets/Applio_NoUI.ipynb @@ -447,7 +447,6 @@ "outputs": [], "source": [ "# @title Extract Features\n", - "rvc_version = \"v2\" # @param [\"v2\", \"v1\"] {allow-input: false}\n", "f0_method = \"rmvpe\" # @param [\"crepe\", \"crepe-tiny\", \"rmvpe\"] {allow-input: false}\n", "hop_length = 128 # @param {type:\"slider\", min:1, max:512, step:0}\n", "\n", @@ -456,7 +455,7 @@ "embedder_model = \"contentvec\" # @param [\"contentvec\", \"chinese-hubert-base\", \"japanese-hubert-base\", \"korean-hubert-base\", \"custom\"] {allow-input: false}\n", "embedder_model_custom = \"\" # @param {type:\"string\"}\n", "\n", - "!python core.py extract --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --f0_method \"{f0_method}\" --hop_length \"{hop_length}\" --sample_rate \"{sr}\" --cpu_cores \"{cpu_cores}\" --gpu \"0\" --embedder_model \"{embedder_model}\" --embedder_model_custom \"{embedder_model_custom}\"" + "!python core.py extract --model_name \"{model_name}\" --f0_method \"{f0_method}\" --hop_length \"{hop_length}\" --sample_rate \"{sr}\" --cpu_cores \"{cpu_cores}\" --gpu \"0\" --embedder_model \"{embedder_model}\" --embedder_model_custom \"{embedder_model_custom}\"" ] }, { @@ -636,7 +635,7 @@ " if tensorboard == True:\n", " %load_ext tensorboard\n", " %tensorboard --logdir /content/Applio/logs/\n", - " !python core.py train --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --save_every_epoch \"{save_every_epoch}\" --save_only_latest \"{save_only_latest}\" --save_every_weights \"{save_every_weights}\" --total_epoch \"{total_epoch}\" --sample_rate \"{sr}\" --batch_size \"{batch_size}\" --gpu \"{gpu}\" --pretrained \"{pretrained}\" --custom_pretrained \"{custom_pretrained}\" --g_pretrained_path \"{g_pretrained_path}\" --d_pretrained_path \"{d_pretrained_path}\" --overtraining_detector \"{overtraining_detector}\" --overtraining_threshold \"{overtraining_threshold}\" --cleanup \"{cleanup}\" --cache_data_in_gpu \"{cache_data_in_gpu}\"\n", + " !python core.py train --model_name \"{model_name}\" --save_every_epoch \"{save_every_epoch}\" --save_only_latest \"{save_only_latest}\" --save_every_weights \"{save_every_weights}\" --total_epoch \"{total_epoch}\" --sample_rate \"{sr}\" --batch_size \"{batch_size}\" --gpu \"{gpu}\" --pretrained \"{pretrained}\" --custom_pretrained \"{custom_pretrained}\" --g_pretrained_path \"{g_pretrained_path}\" --d_pretrained_path \"{d_pretrained_path}\" --overtraining_detector \"{overtraining_detector}\" --overtraining_threshold \"{overtraining_threshold}\" --cleanup \"{cleanup}\" --cache_data_in_gpu \"{cache_data_in_gpu}\"\n", "\n", "\n", "server_thread = threading.Thread(target=start_train)\n", @@ -660,7 +659,7 @@ "source": [ "# @title Generate index file\n", "index_algorithm = \"Auto\" # @param [\"Auto\", \"Faiss\", \"KMeans\"] {allow-input: false}\n", - "!python core.py index --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --index_algorithm \"{index_algorithm}\"" + "!python core.py index --model_name \"{model_name}\" --index_algorithm \"{index_algorithm}\"" ] }, { @@ -789,7 +788,6 @@ "# @markdown ### ➡️ Use the same as you did previously\n", "model_name = \"Darwin\" # @param {type:\"string\"}\n", "sample_rate = \"40k\" # @param [\"32k\", \"40k\", \"48k\"] {allow-input: false}\n", - "rvc_version = \"v2\" # @param [\"v2\", \"v1\"] {allow-input: false}\n", "f0_method = \"rmvpe\" # @param [\"crepe\", \"crepe-tiny\", \"rmvpe\"] {allow-input: false}\n", "hop_length = 128 # @param {type:\"slider\", min:1, max:512, step:0}\n", "sr = int(sample_rate.rstrip(\"k\")) * 1000" diff --git a/core.py b/core.py index 0c4c7cf9e..5f92460a4 100644 --- a/core.py +++ b/core.py @@ -454,7 +454,6 @@ def run_preprocess_script( # Extract def run_extract_script( model_name: str, - rvc_version: str, f0_method: str, hop_length: int, cpu_cores: int, @@ -479,7 +478,6 @@ def run_extract_script( hop_length, cpu_cores, gpu, - rvc_version, sample_rate, embedder_model, embedder_model_custom, @@ -496,7 +494,6 @@ def run_extract_script( # Train def run_train_script( model_name: str, - rvc_version: str, save_every_epoch: int, save_only_latest: bool, save_every_weights: bool, @@ -522,7 +519,7 @@ def run_train_script( if custom_pretrained == False: pg, pd = pretrained_selector( - str(rvc_version), str(vocoder), True, int(sample_rate) + str(vocoder), True, int(sample_rate) ) else: if g_pretrained_path is None or d_pretrained_path is None: @@ -545,7 +542,6 @@ def run_train_script( total_epoch, pg, pd, - rvc_version, gpu, batch_size, sample_rate, @@ -561,18 +557,17 @@ def run_train_script( ), ] subprocess.run(command) - run_index_script(model_name, rvc_version, index_algorithm) + run_index_script(model_name, index_algorithm) return f"Model {model_name} trained successfully." # Index -def run_index_script(model_name: str, rvc_version: str, index_algorithm: str): +def run_index_script(model_name: str, index_algorithm: str): index_script_path = os.path.join("rvc", "train", "process", "extract_index.py") command = [ python, index_script_path, os.path.join(logs_path, model_name), - rvc_version, index_algorithm, ] @@ -607,16 +602,12 @@ def run_download_script(model_link: str): # Prerequisites def run_prerequisites_script( - pretraineds_v1_f0: bool, - pretraineds_v1_nof0: bool, pretraineds_v2_f0: bool, pretraineds_v2_nof0: bool, models: bool, exe: bool, ): prequisites_download_pipeline( - pretraineds_v1_f0, - pretraineds_v1_nof0, pretraineds_v2_f0, pretraineds_v2_nof0, models, @@ -1893,13 +1884,6 @@ def parse_arguments(): extract_parser.add_argument( "--model_name", type=str, help="Name of the model.", required=True ) - extract_parser.add_argument( - "--rvc_version", - type=str, - help="Version of the RVC model ('v1' or 'v2').", - choices=["v1", "v2"], - default="v2", - ) extract_parser.add_argument( "--f0_method", type=str, @@ -1971,13 +1955,6 @@ def parse_arguments(): train_parser.add_argument( "--model_name", type=str, help="Name of the model to be trained.", required=True ) - train_parser.add_argument( - "--rvc_version", - type=str, - help="Version of the RVC model to train ('v1' or 'v2').", - choices=["v1", "v2"], - default="v2", - ) train_parser.add_argument( "--vocoder", type=str, @@ -2113,13 +2090,6 @@ def parse_arguments(): index_parser.add_argument( "--model_name", type=str, help="Name of the model.", required=True ) - index_parser.add_argument( - "--rvc_version", - type=str, - help="Version of the RVC model ('v1' or 'v2').", - choices=["v1", "v2"], - default="v2", - ) index_parser.add_argument( "--index_algorithm", type=str, @@ -2181,13 +2151,6 @@ def parse_arguments(): prerequisites_parser = subparsers.add_parser( "prerequisites", help="Install prerequisites for RVC." ) - prerequisites_parser.add_argument( - "--pretraineds_v1_f0", - type=lambda x: bool(strtobool(x)), - choices=[True, False], - default=False, - help="Download pretrained models for RVC v1.", - ) prerequisites_parser.add_argument( "--pretraineds_v2_f0", type=lambda x: bool(strtobool(x)), @@ -2195,13 +2158,6 @@ def parse_arguments(): default=True, help="Download pretrained models for RVC v2.", ) - prerequisites_parser.add_argument( - "--pretraineds_v1_nof0", - type=lambda x: bool(strtobool(x)), - choices=[True, False], - default=False, - help="Download non f0 pretrained models for RVC v1.", - ) prerequisites_parser.add_argument( "--pretraineds_v2_nof0", type=lambda x: bool(strtobool(x)), @@ -2412,7 +2368,6 @@ def main(): elif args.mode == "extract": run_extract_script( model_name=args.model_name, - rvc_version=args.rvc_version, f0_method=args.f0_method, hop_length=args.hop_length, cpu_cores=args.cpu_cores, @@ -2425,7 +2380,6 @@ def main(): elif args.mode == "train": run_train_script( model_name=args.model_name, - rvc_version=args.rvc_version, save_every_epoch=args.save_every_epoch, save_only_latest=args.save_only_latest, save_every_weights=args.save_every_weights, @@ -2448,7 +2402,6 @@ def main(): elif args.mode == "index": run_index_script( model_name=args.model_name, - rvc_version=args.rvc_version, index_algorithm=args.index_algorithm, ) elif args.mode == "model_information": @@ -2470,8 +2423,6 @@ def main(): ) elif args.mode == "prerequisites": run_prerequisites_script( - pretraineds_v1_f0=args.pretraineds_v1_f0, - pretraineds_v1_nof0=args.pretraineds_v1_nof0, pretraineds_v2_f0=args.pretraineds_v2_f0, pretraineds_v2_nof0=args.pretraineds_v2_nof0, models=args.models, diff --git a/logs/mute/v1_extracted/mute.npy b/logs/mute/v1_extracted/mute.npy deleted file mode 100644 index ffe35e784..000000000 Binary files a/logs/mute/v1_extracted/mute.npy and /dev/null differ diff --git a/logs/mute/v2_extracted/mute.npy b/logs/mute/v2_extracted/mute.npy deleted file mode 100644 index b14cfb83e..000000000 Binary files a/logs/mute/v2_extracted/mute.npy and /dev/null differ diff --git a/rvc/configs/v2/32000.json b/rvc/configs/32000.json similarity index 100% rename from rvc/configs/v2/32000.json rename to rvc/configs/32000.json diff --git a/rvc/configs/v2/40000.json b/rvc/configs/40000.json similarity index 100% rename from rvc/configs/v2/40000.json rename to rvc/configs/40000.json diff --git a/rvc/configs/v2/44100.json b/rvc/configs/44100.json similarity index 100% rename from rvc/configs/v2/44100.json rename to rvc/configs/44100.json diff --git a/rvc/configs/v2/48000.json b/rvc/configs/48000.json similarity index 100% rename from rvc/configs/v2/48000.json rename to rvc/configs/48000.json diff --git a/rvc/configs/config.py b/rvc/configs/config.py index 942326cc4..171324753 100644 --- a/rvc/configs/config.py +++ b/rvc/configs/config.py @@ -3,14 +3,10 @@ import os version_config_paths = [ - os.path.join("v1", "32000.json"), - os.path.join("v1", "40000.json"), - os.path.join("v1", "44100.json"), - os.path.join("v1", "48000.json"), - os.path.join("v2", "48000.json"), - os.path.join("v2", "40000.json"), - os.path.join("v2", "44100.json"), - os.path.join("v2", "32000.json"), + os.path.join("48000.json"), + os.path.join("40000.json"), + os.path.join("44100.json"), + os.path.join("32000.json"), ] diff --git a/rvc/configs/v1/32000.json b/rvc/configs/v1/32000.json deleted file mode 100644 index 2f28f4f68..000000000 --- a/rvc/configs/v1/32000.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "train": { - "log_interval": 200, - "seed": 1234, - "epochs": 20000, - "learning_rate": 1e-4, - "betas": [0.8, 0.99], - "eps": 1e-9, - "batch_size": 4, - "fp16_run": true, - "lr_decay": 0.999875, - "segment_size": 12800, - "init_lr_ratio": 1, - "warmup_epochs": 0, - "c_mel": 45, - "c_kl": 1.0 - }, - "data": { - "max_wav_value": 32768.0, - "sample_rate": 32000, - "filter_length": 1024, - "hop_length": 320, - "win_length": 1024, - "n_mel_channels": 80, - "mel_fmin": 0.0, - "mel_fmax": null - }, - "model": { - "inter_channels": 192, - "hidden_channels": 192, - "filter_channels": 768, - "text_enc_hidden_dim": 256, - "n_heads": 2, - "n_layers": 6, - "kernel_size": 3, - "p_dropout": 0, - "resblock": "1", - "resblock_kernel_sizes": [3,7,11], - "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], - "upsample_rates": [10,4,2,2,2], - "upsample_initial_channel": 512, - "upsample_kernel_sizes": [16,16,4,4,4], - "use_spectral_norm": false, - "gin_channels": 256, - "spk_embed_dim": 109 - } -} diff --git a/rvc/configs/v1/40000.json b/rvc/configs/v1/40000.json deleted file mode 100644 index 3961ddb64..000000000 --- a/rvc/configs/v1/40000.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "train": { - "log_interval": 200, - "seed": 1234, - "epochs": 20000, - "learning_rate": 1e-4, - "betas": [0.8, 0.99], - "eps": 1e-9, - "batch_size": 4, - "fp16_run": true, - "lr_decay": 0.999875, - "segment_size": 12800, - "init_lr_ratio": 1, - "warmup_epochs": 0, - "c_mel": 45, - "c_kl": 1.0 - }, - "data": { - "max_wav_value": 32768.0, - "sample_rate": 40000, - "filter_length": 2048, - "hop_length": 400, - "win_length": 2048, - "n_mel_channels": 125, - "mel_fmin": 0.0, - "mel_fmax": null - }, - "model": { - "inter_channels": 192, - "hidden_channels": 192, - "filter_channels": 768, - "text_enc_hidden_dim": 256, - "n_heads": 2, - "n_layers": 6, - "kernel_size": 3, - "p_dropout": 0, - "resblock": "1", - "resblock_kernel_sizes": [3,7,11], - "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], - "upsample_rates": [10,10,2,2], - "upsample_initial_channel": 512, - "upsample_kernel_sizes": [16,16,4,4], - "use_spectral_norm": false, - "gin_channels": 256, - "spk_embed_dim": 109 - } -} diff --git a/rvc/configs/v1/44100.json b/rvc/configs/v1/44100.json deleted file mode 100644 index 39246c326..000000000 --- a/rvc/configs/v1/44100.json +++ /dev/null @@ -1,43 +0,0 @@ -{ - "train": { - "log_interval": 200, - "seed": 1234, - "learning_rate": 0.0001, - "betas": [0.8, 0.99], - "eps": 1e-09, - "fp16_run": true, - "lr_decay": 0.999875, - "segment_size": 15876, - "c_mel": 45, - "c_kl": 1.0 - }, - "data": { - "max_wav_value": 32768.0, - "sample_rate": 44100, - "filter_length": 2048, - "hop_length": 441, - "win_length": 2048, - "n_mel_channels": 160, - "mel_fmin": 0.0, - "mel_fmax": null - }, - "model": { - "inter_channels": 192, - "hidden_channels": 192, - "filter_channels": 768, - "text_enc_hidden_dim": 256, - "n_heads": 2, - "n_layers": 6, - "kernel_size": 3, - "p_dropout": 0, - "resblock": "1", - "resblock_kernel_sizes": [3,7,11], - "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], - "upsample_rates": [7,7,3,3], - "upsample_initial_channel": 512, - "upsample_kernel_sizes": [14,14,6,6], - "use_spectral_norm": false, - "gin_channels": 256, - "spk_embed_dim": 109 - } -} \ No newline at end of file diff --git a/rvc/configs/v1/48000.json b/rvc/configs/v1/48000.json deleted file mode 100644 index 41ea3b62f..000000000 --- a/rvc/configs/v1/48000.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "train": { - "log_interval": 200, - "seed": 1234, - "epochs": 20000, - "learning_rate": 1e-4, - "betas": [0.8, 0.99], - "eps": 1e-9, - "batch_size": 4, - "fp16_run": true, - "lr_decay": 0.999875, - "segment_size": 11520, - "init_lr_ratio": 1, - "warmup_epochs": 0, - "c_mel": 45, - "c_kl": 1.0 - }, - "data": { - "max_wav_value": 32768.0, - "sample_rate": 48000, - "filter_length": 2048, - "hop_length": 480, - "win_length": 2048, - "n_mel_channels": 128, - "mel_fmin": 0.0, - "mel_fmax": null - }, - "model": { - "inter_channels": 192, - "hidden_channels": 192, - "filter_channels": 768, - "text_enc_hidden_dim": 256, - "n_heads": 2, - "n_layers": 6, - "kernel_size": 3, - "p_dropout": 0, - "resblock": "1", - "resblock_kernel_sizes": [3,7,11], - "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], - "upsample_rates": [10,6,2,2,2], - "upsample_initial_channel": 512, - "upsample_kernel_sizes": [16,16,4,4,4], - "use_spectral_norm": false, - "gin_channels": 256, - "spk_embed_dim": 109 - } -} diff --git a/rvc/infer/infer.py b/rvc/infer/infer.py index 0d6892d7a..1072a21ca 100644 --- a/rvc/infer/infer.py +++ b/rvc/infer/infer.py @@ -47,7 +47,7 @@ def __init__(self): """ Initializes the VoiceConverter with default configuration, and sets up models and parameters. """ - self.config = Config() # Load RVC configuration + self.config = Config() # Load configuration self.hubert_model = ( None # Initialize the Hubert model (for embedding extraction) ) diff --git a/rvc/infer/pipeline.py b/rvc/infer/pipeline.py index 6e73efcbc..2ff06ea88 100644 --- a/rvc/infer/pipeline.py +++ b/rvc/infer/pipeline.py @@ -434,7 +434,7 @@ def voice_conversion( index: FAISS index for speaker embedding retrieval. big_npy: Speaker embeddings stored in a NumPy array. index_rate: Blending rate for speaker embedding retrieval. - version: Model version ("v1" or "v2"). + version: Model version (Keep to support old models). protect: Protection level for preserving the original pitch. """ with torch.no_grad(): diff --git a/rvc/lib/algorithm/discriminators.py b/rvc/lib/algorithm/discriminators.py index 107ddcb12..ecd968ca9 100644 --- a/rvc/lib/algorithm/discriminators.py +++ b/rvc/lib/algorithm/discriminators.py @@ -16,18 +16,15 @@ class MultiPeriodDiscriminator(torch.nn.Module): the input signal at different periods. Args: - periods (str): Periods of the discriminator. V1 = [2, 3, 5, 7, 11, 17], V2 = [2, 3, 5, 7, 11, 17, 23, 37]. use_spectral_norm (bool): Whether to use spectral normalization. Defaults to False. """ def __init__( - self, version: str, use_spectral_norm: bool = False, checkpointing: bool = False + self, use_spectral_norm: bool = False, checkpointing: bool = False ): super(MultiPeriodDiscriminator, self).__init__() - periods = ( - [2, 3, 5, 7, 11, 17] if version == "v1" else [2, 3, 5, 7, 11, 17, 23, 37] - ) + periods = [2, 3, 5, 7, 11, 17, 23, 37] self.checkpointing = checkpointing self.discriminators = torch.nn.ModuleList( [ diff --git a/rvc/lib/tools/prerequisites_download.py b/rvc/lib/tools/prerequisites_download.py index 7ef7bcce1..cafa261b9 100644 --- a/rvc/lib/tools/prerequisites_download.py +++ b/rvc/lib/tools/prerequisites_download.py @@ -5,26 +5,7 @@ url_base = "https://huggingface.co/IAHispano/Applio/resolve/main/Resources" -pretraineds_v1_list = [ - ( - "pretrained_v1/", - [ - "D32k.pth", - "D40k.pth", - "D48k.pth", - "G32k.pth", - "G40k.pth", - "G48k.pth", - "f0D32k.pth", - "f0D40k.pth", - "f0D48k.pth", - "f0G32k.pth", - "f0G40k.pth", - "f0G48k.pth", - ], - ) -] -pretraineds_v2_list = [ +pretraineds_hifigan_list = [ ( "pretrained_v2/", [ @@ -50,8 +31,7 @@ ] folder_mapping_list = { - "pretrained_v1/": "rvc/models/pretraineds/pretrained_v1/", - "pretrained_v2/": "rvc/models/pretraineds/pretrained_v2/", + "pretrained_v2/": "rvc/models/pretraineds/hifi-gan/", "embedders/contentvec/": "rvc/models/embedders/contentvec/", "predictors/": "rvc/models/predictors/", "formant/": "rvc/models/formant/", @@ -126,17 +106,12 @@ def split_pretraineds(pretrained_list): return f0_list, non_f0_list -pretraineds_v1_f0_list, pretraineds_v1_nof0_list = split_pretraineds( - pretraineds_v1_list -) pretraineds_v2_f0_list, pretraineds_v2_nof0_list = split_pretraineds( - pretraineds_v2_list + pretraineds_hifigan_list ) def calculate_total_size( - pretraineds_v1_f0, - pretraineds_v1_nof0, pretraineds_v2_f0, pretraineds_v2_nof0, models, @@ -151,16 +126,12 @@ def calculate_total_size( total_size += get_file_size_if_missing(embedders_list) if exe and os.name == "nt": total_size += get_file_size_if_missing(executables_list) - total_size += get_file_size_if_missing(pretraineds_v1_f0) - total_size += get_file_size_if_missing(pretraineds_v1_nof0) total_size += get_file_size_if_missing(pretraineds_v2_f0) total_size += get_file_size_if_missing(pretraineds_v2_nof0) return total_size def prequisites_download_pipeline( - pretraineds_v1_f0, - pretraineds_v1_nof0, pretraineds_v2_f0, pretraineds_v2_nof0, models, @@ -170,8 +141,6 @@ def prequisites_download_pipeline( Manage the download pipeline for different categories of files. """ total_size = calculate_total_size( - pretraineds_v1_f0_list if pretraineds_v1_f0 else [], - pretraineds_v1_nof0_list if pretraineds_v1_nof0 else [], pretraineds_v2_f0_list if pretraineds_v2_f0 else [], pretraineds_v2_nof0_list if pretraineds_v2_nof0 else [], models, @@ -190,10 +159,6 @@ def prequisites_download_pipeline( download_mapping_files(executables_list, global_bar) else: print("No executables needed") - if pretraineds_v1_f0: - download_mapping_files(pretraineds_v1_f0_list, global_bar) - if pretraineds_v1_nof0: - download_mapping_files(pretraineds_v1_nof0_list, global_bar) if pretraineds_v2_f0: download_mapping_files(pretraineds_v2_f0_list, global_bar) if pretraineds_v2_nof0: diff --git a/rvc/lib/tools/pretrained_selector.py b/rvc/lib/tools/pretrained_selector.py index bcf0489e7..d4062a240 100644 --- a/rvc/lib/tools/pretrained_selector.py +++ b/rvc/lib/tools/pretrained_selector.py @@ -1,21 +1,12 @@ import os -def pretrained_selector(version, vocoder, pitch_guidance, sample_rate): - base_path = os.path.join("rvc", "models", "pretraineds", f"pretrained_{version}") +def pretrained_selector(vocoder, pitch_guidance, sample_rate): + base_path = os.path.join("rvc", "models", "pretraineds", f"{vocoder.lower()}") f0 = "f0" if pitch_guidance else "" - if vocoder == "HiFi-GAN": - vocoder_path = "" - elif vocoder == "MRF HiFi-GAN": - vocoder_path = "HiFiGAN_" - elif vocoder == "RefineGAN": - vocoder_path = "RefineGAN_" - else: - vocoder_path = "" - - path_g = os.path.join(base_path, f"{vocoder_path}{f0}G{str(sample_rate)[:2]}k.pth") - path_d = os.path.join(base_path, f"{vocoder_path}{f0}D{str(sample_rate)[:2]}k.pth") + path_g = os.path.join(base_path, f"{f0}G{str(sample_rate)[:2]}k.pth") + path_d = os.path.join(base_path, f"{f0}D{str(sample_rate)[:2]}k.pth") if os.path.exists(path_g) and os.path.exists(path_d): return path_g, path_d diff --git a/rvc/models/pretraineds/pretraineds_custom/.gitkeep b/rvc/models/pretraineds/custom/.gitkeep similarity index 100% rename from rvc/models/pretraineds/pretraineds_custom/.gitkeep rename to rvc/models/pretraineds/custom/.gitkeep diff --git a/rvc/models/pretraineds/pretrained_v1/.gitkeep b/rvc/models/pretraineds/hifi-gan/.gitkeep similarity index 100% rename from rvc/models/pretraineds/pretrained_v1/.gitkeep rename to rvc/models/pretraineds/hifi-gan/.gitkeep diff --git a/rvc/models/pretraineds/pretrained_v2/.gitkeep b/rvc/models/pretraineds/pretrained_v2/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/rvc/train/extract/extract.py b/rvc/train/extract/extract.py index 9b7bd21a2..c24857f5c 100644 --- a/rvc/train/extract/extract.py +++ b/rvc/train/extract/extract.py @@ -144,7 +144,7 @@ def run_pitch_extraction(files, devices, f0_method, hop_length, threads): def process_file_embedding( - files, version, embedder_model, embedder_model_custom, device_num, device, n_threads + files, embedder_model, embedder_model_custom, device_num, device, n_threads ): dtype = torch.float16 if (config.is_half and "cuda" in device) else torch.float32 model = load_embedding(embedder_model, embedder_model_custom).to(dtype).to(device) @@ -158,8 +158,6 @@ def worker(file_info): feats = feats.view(1, -1) with torch.no_grad(): result = model(feats)["last_hidden_state"] - if version == "v1": - result = model.final_proj(result[0]).unsqueeze(0) feats_out = result.squeeze(0).float().cpu().numpy() if not np.isnan(feats_out).any(): np.save(out_file_path, feats_out, allow_pickle=False) @@ -174,7 +172,7 @@ def worker(file_info): def run_embedding_extraction( - files, devices, version, embedder_model, embedder_model_custom, threads + files, devices, embedder_model, embedder_model_custom, threads ): devices_str = ", ".join(devices) print( @@ -186,7 +184,6 @@ def run_embedding_extraction( executor.submit( process_file_embedding, files[i :: len(devices)], - version, embedder_model, embedder_model_custom, i, @@ -206,16 +203,15 @@ def run_embedding_extraction( hop_length = int(sys.argv[3]) num_processes = int(sys.argv[4]) gpus = sys.argv[5] - version = sys.argv[6] - sample_rate = sys.argv[7] - embedder_model = sys.argv[8] - embedder_model_custom = sys.argv[9] if len(sys.argv) > 9 else None - include_mutes = int(sys.argv[10]) if len(sys.argv) > 10 else 2 + sample_rate = sys.argv[6] + embedder_model = sys.argv[7] + embedder_model_custom = sys.argv[8] if len(sys.argv) > 8 else None + include_mutes = int(sys.argv[9]) if len(sys.argv) > 9 else 2 wav_path = os.path.join(exp_dir, "sliced_audios_16k") os.makedirs(os.path.join(exp_dir, "f0"), exist_ok=True) os.makedirs(os.path.join(exp_dir, "f0_voiced"), exist_ok=True) - os.makedirs(os.path.join(exp_dir, version + "_extracted"), exist_ok=True) + os.makedirs(os.path.join(exp_dir, "extracted"), exist_ok=True) chosen_embedder_model = ( embedder_model_custom if embedder_model == "custom" else embedder_model @@ -238,7 +234,7 @@ def run_embedding_extraction( os.path.join(exp_dir, "f0", file_name + ".npy"), os.path.join(exp_dir, "f0_voiced", file_name + ".npy"), os.path.join( - exp_dir, version + "_extracted", file_name.replace("wav", "npy") + exp_dir, "extracted", file_name.replace("wav", "npy") ), ] files.append(file_info) @@ -248,8 +244,8 @@ def run_embedding_extraction( run_pitch_extraction(files, devices, f0_method, hop_length, num_processes) run_embedding_extraction( - files, devices, version, embedder_model, embedder_model_custom, num_processes + files, devices, embedder_model, embedder_model_custom, num_processes ) - generate_config(version, sample_rate, exp_dir) - generate_filelist(exp_dir, version, sample_rate, include_mutes) + generate_config(sample_rate, exp_dir) + generate_filelist(exp_dir, sample_rate, include_mutes) diff --git a/rvc/train/extract/preparing_files.py b/rvc/train/extract/preparing_files.py index a2ce3ddf8..e5df05baf 100644 --- a/rvc/train/extract/preparing_files.py +++ b/rvc/train/extract/preparing_files.py @@ -8,18 +8,18 @@ current_directory = os.getcwd() -def generate_config(rvc_version: str, sample_rate: int, model_path: str): - config_path = os.path.join("rvc", "configs", rvc_version, f"{sample_rate}.json") +def generate_config(sample_rate: int, model_path: str): + config_path = os.path.join("rvc", "configs", f"{sample_rate}.json") config_save_path = os.path.join(model_path, "config.json") if not os.path.exists(config_save_path): shutil.copyfile(config_path, config_save_path) def generate_filelist( - model_path: str, rvc_version: str, sample_rate: int, include_mutes: int = 2 + model_path: str, sample_rate: int, include_mutes: int = 2 ): gt_wavs_dir = os.path.join(model_path, "sliced_audios") - feature_dir = os.path.join(model_path, f"{rvc_version}_extracted") + feature_dir = os.path.join(model_path, f"extracted") f0_dir, f0nsf_dir = None, None f0_dir = os.path.join(model_path, "f0") @@ -40,7 +40,7 @@ def generate_filelist( if sid not in sids: sids.append(sid) options.append( - f"{gt_wavs_dir}/{name}.wav|{feature_dir}/{name}.npy|{f0_dir}/{name}.wav.npy|{f0nsf_dir}/{name}.wav.npy|{sid}" + f"{os.path.join(gt_wavs_dir, name)}.wav|{os.path.join(feature_dir, name)}.npy|{os.path.join(f0_dir, name)}.wav.npy|{os.path.join(f0nsf_dir, name)}.wav.npy|{sid}" ) if include_mutes > 0: @@ -48,7 +48,7 @@ def generate_filelist( mute_base_path, "sliced_audios", f"mute{sample_rate}.wav" ) mute_feature_path = os.path.join( - mute_base_path, f"{rvc_version}_extracted", "mute.npy" + mute_base_path, f"extracted", "mute.npy" ) mute_f0_path = os.path.join(mute_base_path, "f0", "mute.wav.npy") mute_f0nsf_path = os.path.join(mute_base_path, "f0_voiced", "mute.wav.npy") diff --git a/rvc/train/process/extract_index.py b/rvc/train/process/extract_index.py index fba16d6a4..3613ded6d 100644 --- a/rvc/train/process/extract_index.py +++ b/rvc/train/process/extract_index.py @@ -7,19 +7,15 @@ # Parse command line arguments exp_dir = str(sys.argv[1]) -version = str(sys.argv[2]) -index_algorithm = str(sys.argv[3]) +index_algorithm = str(sys.argv[2]) try: - feature_dir = os.path.join(exp_dir, f"{version}_extracted") + feature_dir = os.path.join(exp_dir, f"extracted") model_name = os.path.basename(exp_dir) - index_filename_added = f"added_{model_name}_{version}.index" + index_filename_added = f"{model_name}.index" index_filepath_added = os.path.join(exp_dir, index_filename_added) - # index_filename_trained = f"trained_{model_name}_{version}.index" - # index_filepath_trained = os.path.join(exp_dir, index_filename_trained) - if os.path.exists(index_filepath_added): pass else: @@ -52,26 +48,11 @@ .cluster_centers_ ) - # np.save(os.path.join(exp_dir, "total_fea.npy"), big_npy) n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) - """ - # index_trained - index_trained = faiss.index_factory( - 256 if version == "v1" else 768, f"IVF{n_ivf},Flat" - ) - index_ivf_trained = faiss.extract_index_ivf(index_trained) - index_ivf_trained.nprobe = 1 - index_trained.train(big_npy) - - faiss.write_index(index_trained, index_filepath_trained) - """ - # index_added - index_added = faiss.index_factory( - 256 if version == "v1" else 768, f"IVF{n_ivf},Flat" - ) + index_added = faiss.index_factory(768, f"IVF{n_ivf},Flat") index_ivf_added = faiss.extract_index_ivf(index_added) index_ivf_added.nprobe = 1 index_added.train(big_npy) diff --git a/rvc/train/process/extract_model.py b/rvc/train/process/extract_model.py index 5d3d1170d..14d97d22b 100644 --- a/rvc/train/process/extract_model.py +++ b/rvc/train/process/extract_model.py @@ -32,10 +32,10 @@ def extract_model( model_path, epoch, step, - version, hps, overtrain_info, vocoder, + version="v2", ): try: model_dir = os.path.dirname(model_path) diff --git a/rvc/train/train.py b/rvc/train/train.py index 8599b7bd9..72917d2a3 100644 --- a/rvc/train/train.py +++ b/rvc/train/train.py @@ -59,18 +59,17 @@ total_epoch = int(sys.argv[3]) pretrainG = sys.argv[4] pretrainD = sys.argv[5] -version = sys.argv[6] -gpus = sys.argv[7] -batch_size = int(sys.argv[8]) -sample_rate = int(sys.argv[9]) -save_only_latest = strtobool(sys.argv[10]) -save_every_weights = strtobool(sys.argv[11]) -cache_data_in_gpu = strtobool(sys.argv[12]) -overtraining_detector = strtobool(sys.argv[13]) -overtraining_threshold = int(sys.argv[14]) -cleanup = strtobool(sys.argv[15]) -vocoder = sys.argv[16] -checkpointing = strtobool(sys.argv[17]) +gpus = sys.argv[6] +batch_size = int(sys.argv[7]) +sample_rate = int(sys.argv[8]) +save_only_latest = strtobool(sys.argv[9]) +save_every_weights = strtobool(sys.argv[10]) +cache_data_in_gpu = strtobool(sys.argv[11]) +overtraining_detector = strtobool(sys.argv[12]) +overtraining_threshold = int(sys.argv[13]) +cleanup = strtobool(sys.argv[14]) +vocoder = sys.argv[15] +checkpointing = strtobool(sys.argv[16]) randomized = True optimizer = "RAdam" # "AdamW" @@ -371,19 +370,31 @@ def run( # Validations if len(train_loader) < 3: - print("Not enough data present in the training set. Perhaps you forgot to slice the audio files in preprocess?") + print( + "Not enough data present in the training set. Perhaps you forgot to slice the audio files in preprocess?" + ) os._exit(2333333) else: g_file = latest_checkpoint_path(experiment_dir, "G_*.pth") if g_file != None: - print('Checking saved weights...') - g = torch.load(g_file, map_location = "cpu") - if optimizer == "RAdam" and "amsgrad" in g["optimizer"]["param_groups"][0].keys(): + print("Checking saved weights...") + g = torch.load(g_file, map_location="cpu") + if ( + optimizer == "RAdam" + and "amsgrad" in g["optimizer"]["param_groups"][0].keys() + ): optimizer = "AdamW" - print(f"Optimizer choice has been reverted to {optimizer} to match the saved D/G weights.") - elif optimizer == "AdamW" and "decoupled_weight_decay" in g["optimizer"]["param_groups"][0].keys(): + print( + f"Optimizer choice has been reverted to {optimizer} to match the saved D/G weights." + ) + elif ( + optimizer == "AdamW" + and "decoupled_weight_decay" in g["optimizer"]["param_groups"][0].keys() + ): optimizer = "RAdam" - print(f"Optimizer choice has been reverted to {optimizer} to match the saved D/G weights.") + print( + f"Optimizer choice has been reverted to {optimizer} to match the saved D/G weights." + ) del g # Initialize models and optimizers @@ -403,7 +414,7 @@ def run( ) net_d = MultiPeriodDiscriminator( - version, config.model.use_spectral_norm, checkpointing=checkpointing + config.model.use_spectral_norm, checkpointing=checkpointing ) if torch.cuda.is_available(): @@ -984,7 +995,7 @@ def train_and_evaluate( ) for m in model_add: if os.path.exists(m): - print(f'{m} already exists. Overwriting.') + print(f"{m} already exists. Overwriting.") extract_model( ckpt=ckpt, sr=sample_rate, @@ -993,7 +1004,6 @@ def train_and_evaluate( model_path=m, epoch=epoch, step=global_step, - version=version, hps=hps, overtrain_info=overtrain_info, vocoder=vocoder, diff --git a/tabs/train/train.py b/tabs/train/train.py index 5b0ff6f74..87bd03ddc 100644 --- a/tabs/train/train.py +++ b/tabs/train/train.py @@ -21,29 +21,6 @@ now_dir = os.getcwd() sys.path.append(now_dir) -pretraineds_v1 = [ - ( - "pretrained_v1/", - [ - "D32k.pth", - "D40k.pth", - "D48k.pth", - "G32k.pth", - "G40k.pth", - "G48k.pth", - "f0D32k.pth", - "f0D40k.pth", - "f0D48k.pth", - "f0G32k.pth", - "f0G40k.pth", - "f0G48k.pth", - ], - ), -] - -folder_mapping = { - "pretrained_v1/": "rvc/models/pretraineds/pretrained_v1/", -} sup_audioext = { "wav", @@ -342,14 +319,6 @@ def train_tab(): interactive=False, visible=True, ) - rvc_version = gr.Radio( - label=i18n("Model Architecture"), - info=i18n("Version of the model architecture."), - choices=["v1", "v2"], - value="v2", - interactive=True, - visible=False, - ) with gr.Accordion( i18n("Advanced Settings"), open=False, @@ -586,7 +555,6 @@ def train_tab(): fn=run_extract_script, inputs=[ model_name, - rvc_version, f0_method, hop_length, cpu_cores, @@ -785,7 +753,6 @@ def enforce_terms(terms_accepted, *args): inputs=[ terms_checkbox, model_name, - rvc_version, save_every_epoch, save_only_latest, save_every_weights, @@ -818,7 +785,7 @@ def enforce_terms(terms_accepted, *args): index_button = gr.Button(i18n("Generate Index")) index_button.click( fn=run_index_script, - inputs=[model_name, rvc_version, index_algorithm], + inputs=[model_name, index_algorithm], outputs=[train_output_info], ) @@ -904,36 +871,20 @@ def disable_stop_train_button(): "__type__": "update", } - def download_prerequisites(version): - if version == "v1": + def download_prerequisites(): gr.Info( - "Checking for v1 prerequisites with pitch guidance... Missing files will be downloaded. If you already have them, this step will be skipped." + "Checking for prerequisites with pitch guidance... Missing files will be downloaded. If you already have them, this step will be skipped." ) run_prerequisites_script( - pretraineds_v1_f0=True, - pretraineds_v1_nof0=False, - pretraineds_v2_f0=False, + pretraineds_v2_f0=True, pretraineds_v2_nof0=False, models=False, exe=False, ) - elif version == "v2": gr.Info( - "Checking for v2 prerequisites with pitch guidance... Missing files will be downloaded. If you already have them, this step will be skipped." - ) - run_prerequisites_script( - pretraineds_v1_f0=False, - pretraineds_v1_nof0=False, - pretraineds_v2_f0=True, - pretraineds_v2_nof0=False, - models=False, - exe=False, + "Prerequisites check complete. Missing files were downloaded, and you may now start preprocessing." ) - gr.Info( - "Prerequisites check complete. Missing files were downloaded, and you may now start preprocessing." - ) - def toggle_visible_embedder_custom(embedder_model): if embedder_model == "custom": return {"visible": True, "__type__": "update"} @@ -963,11 +914,6 @@ def update_slider_visibility(noise_reduction): inputs=noise_reduction, outputs=clean_strength, ) - rvc_version.change( - fn=download_prerequisites, - inputs=[rvc_version], - outputs=[], - ) architecture.change( fn=toggle_architecture, inputs=[architecture],