Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🦄 Refactor #8

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -170,4 +170,6 @@ pretrain/nsf_hifigan/model
data/*
__pycache__
exp/*
dataset_raw/*
dataset_raw/*

ReFlowVaeSVC/cache/*
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,21 @@
(1)预处理:

```bash
python preprocess.py -c configs/reflow-vae-wavenet.yaml
python -m ReFlowVaeSVC.preprocess -c configs/reflow-vae-wavenet.yaml
```

(2)训练(无底模):

```bash
python train.py -c configs/reflow-vae-wavenet.yaml
python -m ReFlowVaeSVC.train -c configs/reflow-vae-wavenet.yaml
```
Beta版底模可以在这里下载:https://huggingface.co/OOPPEENN/pretrained_model

(3)非实时推理:

```bash
# 普通模式, 需要语义编码器, 比如 contentvec
python main.py -i <input.wav> -m <model_ckpt.pt> -o <output.wav> -k <keychange (semitones)> -tid <target_speaker_id> -step <infer_step> -method <method>
python -m ReFlowVaeSVC.main -i <input.wav> -m <model_ckpt.pt> -o <output.wav> -k <keychange (semitones)> -tid <target_speaker_id> -step <infer_step> -method <method>
# VAE 模式, 无需语义编码器, 特化 sid 到 tid 的变声(或者音高编辑,如果sid == tid)
python main.py -i <input.wav> -m <model_ckpt.pt> -o <output.wav> -k <keychange (semitones)> -sid <source_speaker_id> -tid <target_speaker_id> -step <infer_step> -method <method>
python -m ReFlowVaeSVC.main -i <input.wav> -m <model_ckpt.pt> -o <output.wav> -k <keychange (semitones)> -sid <source_speaker_id> -tid <target_speaker_id> -step <infer_step> -method <method>
```
File renamed without changes.
File renamed without changes.
1 change: 1 addition & 0 deletions ReFlowVaeSVC/encoder/rmvpe/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .constants import *
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import numpy as np
import torch
import torch.nn.functional as F
from torchaudio.transforms import Resample
from .constants import *
from .model import E2E0, E2E
from .model import E2E0
from .spec import MelSpectrogram
from .utils import to_local_average_f0, to_viterbi_f0

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import torch
from torch import nn
from .deepunet import DeepUnet, DeepUnet0
from .constants import *
from .spec import MelSpectrogram
from .seq import BiGRU


Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
1 change: 0 additions & 1 deletion logger/saver.py → ReFlowVaeSVC/logger/saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
'''

import os
import json
import time
import yaml
import datetime
Expand Down
1 change: 0 additions & 1 deletion logger/utils.py → ReFlowVaeSVC/logger/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
import yaml
import json
import pickle
import torch
try:
import torch_musa
Expand Down
8 changes: 3 additions & 5 deletions main.py → ReFlowVaeSVC/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,12 @@
import argparse
import numpy as np
import soundfile as sf
import pyworld as pw
import parselmouth
import hashlib
import torch.nn.functional as F
from ast import literal_eval
from slicer import Slicer
from reflow.extractors import F0_Extractor, Volume_Extractor, Units_Encoder
from reflow.vocoder import load_model_vocoder
from ReFlowVaeSVC.slicer import Slicer
from ReFlowVaeSVC.reflow.extractors import F0_Extractor, Volume_Extractor, Units_Encoder
from ReFlowVaeSVC.reflow.vocoder import load_model_vocoder
from tqdm import tqdm


Expand Down
File renamed without changes.
File renamed without changes.
4 changes: 0 additions & 4 deletions nsf_hifigan/nvSTFT.py → ReFlowVaeSVC/nsf_hifigan/nvSTFT.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import math
import os
os.environ["LRU_CACHE_CAPACITY"] = "3"
import random
import torch
try:
import torch_musa
Expand All @@ -10,9 +8,7 @@
import torch.utils.data
import numpy as np
import librosa
from librosa.util import normalize
from librosa.filters import mel as librosa_mel_fn
from scipy.io.wavfile import read
import soundfile as sf
import torch.nn.functional as F

Expand Down
File renamed without changes.
11 changes: 4 additions & 7 deletions preprocess.py → ReFlowVaeSVC/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,13 @@
use_torch_musa = True
except ImportError:
use_torch_musa = False
import pyworld as pw
import parselmouth
import argparse
import shutil
from logger import utils
from ReFlowVaeSVC.logger import utils
from tqdm import tqdm
from reflow.extractors import F0_Extractor, Volume_Extractor, Units_Encoder
from reflow.vocoder import Vocoder
from logger.utils import traverse_dir
import concurrent.futures
from ReFlowVaeSVC.reflow.extractors import F0_Extractor, Volume_Extractor, Units_Encoder
from ReFlowVaeSVC.reflow.vocoder import Vocoder
from ReFlowVaeSVC.logger.utils import traverse_dir

def parse_args(args=None, namespace=None):
"""Parse command-line arguments."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import torch_musa
except ImportError:
pass
import random
from tqdm import tqdm
from torch.utils.data import Dataset

Expand Down
8 changes: 2 additions & 6 deletions reflow/extractors.py → ReFlowVaeSVC/reflow/extractors.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import os
import numpy as np
import yaml
import torch
try:
import torch_musa
Expand All @@ -11,13 +9,11 @@
import pyworld as pw
import parselmouth
import torchcrepe
import resampy
from transformers import HubertModel, Wav2Vec2FeatureExtractor
from fairseq import checkpoint_utils
from encoder.hubert.model import HubertSoft
from ReFlowVaeSVC.encoder.hubert.model import HubertSoft
from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present
from torchaudio.transforms import Resample
import time

CREPE_RESAMPLE_KERNEL = {}
F0_KERNEL = {}
Expand Down Expand Up @@ -78,7 +74,7 @@ def __init__(self, f0_extractor, sample_rate = 44100, hop_size = 512, f0_min = 6
self.resample_kernel = CREPE_RESAMPLE_KERNEL[key_str]
if f0_extractor == 'rmvpe':
if 'rmvpe' not in F0_KERNEL :
from encoder.rmvpe import RMVPE
from ReFlowVaeSVC.encoder.rmvpe.inference import RMVPE
F0_KERNEL['rmvpe'] = RMVPE('pretrain/rmvpe/model.pt', hop_length=160)
self.rmvpe = F0_KERNEL['rmvpe']
if f0_extractor == 'fcpe':
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import math
from typing import Optional

import torch
try:
Expand Down
1 change: 0 additions & 1 deletion reflow/reflow.py → ReFlowVaeSVC/reflow/reflow.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import numpy as np
import torch
try:
import torch_musa
Expand Down
4 changes: 2 additions & 2 deletions reflow/solver.py → ReFlowVaeSVC/reflow/solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
except ImportError:
use_torch_musa = False
import librosa
from logger.saver import Saver
from logger import utils
from ReFlowVaeSVC.logger.saver import Saver
from ReFlowVaeSVC.logger import utils
from torch import autocast
# from torch.cuda.amp import GradScaler

Expand Down
5 changes: 2 additions & 3 deletions reflow/vocoder.py → ReFlowVaeSVC/reflow/vocoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@
except ImportError:
use_torch_musa = False
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from nsf_hifigan.nvSTFT import STFT
from nsf_hifigan.models import load_model,load_config
from ReFlowVaeSVC.nsf_hifigan.nvSTFT import STFT
from ReFlowVaeSVC.nsf_hifigan.models import load_model,load_config
from torchaudio.transforms import Resample
from .reflow import Bi_RectifiedFlow
from .naive_v2_diff import NaiveV2Diff
Expand Down
File renamed without changes.
File renamed without changes.
9 changes: 4 additions & 5 deletions train.py → ReFlowVaeSVC/train.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import os
import argparse
import torch
try:
import torch_musa
except ImportError:
pass
from torch.optim import lr_scheduler
from logger import utils
from reflow.data_loaders import get_data_loaders
from reflow.vocoder import Vocoder, Unit2Wav_VAE
from reflow.solver import train
from ReFlowVaeSVC.logger import utils
from ReFlowVaeSVC.reflow.data_loaders import get_data_loaders
from ReFlowVaeSVC.reflow.vocoder import Vocoder, Unit2Wav_VAE
from ReFlowVaeSVC.reflow.solver import train

def parse_args(args=None, namespace=None):
"""Parse command-line arguments."""
Expand Down
5 changes: 0 additions & 5 deletions encoder/rmvpe/__init__.py

This file was deleted.

2 changes: 0 additions & 2 deletions pretrain/contentvec/.gitignore

This file was deleted.

2 changes: 0 additions & 2 deletions pretrain/nsf_hifigan/.gitignore

This file was deleted.

2 changes: 0 additions & 2 deletions pretrain/rmvpe/.gitignore

This file was deleted.