yxlllc · HuanLinOTO · May 3, 2024 · May 3, 2024 · May 3, 2024 · May 3, 2024
diff --git a/.gitignore b/.gitignore
@@ -170,4 +170,6 @@ pretrain/nsf_hifigan/model
 data/*
 __pycache__
 exp/*
-dataset_raw/*
+dataset_raw/*
+
+ReFlowVaeSVC/cache/*
diff --git a/README.md b/README.md
@@ -6,21 +6,21 @@
 （1）预处理：
 
 ```bash
-python preprocess.py -c configs/reflow-vae-wavenet.yaml
+python -m ReFlowVaeSVC.preprocess -c configs/reflow-vae-wavenet.yaml
 ```
 
 （2）训练（无底模）：
 
 ```bash
-python train.py -c configs/reflow-vae-wavenet.yaml
+python -m ReFlowVaeSVC.train -c configs/reflow-vae-wavenet.yaml
 ```
 Beta版底模可以在这里下载：https://huggingface.co/OOPPEENN/pretrained_model
 
 （3）非实时推理：
 
 ```bash
 # 普通模式, 需要语义编码器, 比如 contentvec
-python main.py -i <input.wav> -m <model_ckpt.pt> -o <output.wav> -k <keychange (semitones)> -tid <target_speaker_id> -step <infer_step> -method <method>
+python -m ReFlowVaeSVC.main -i <input.wav> -m <model_ckpt.pt> -o <output.wav> -k <keychange (semitones)> -tid <target_speaker_id> -step <infer_step> -method <method>
 # VAE 模式, 无需语义编码器, 特化 sid 到 tid 的变声（或者音高编辑，如果sid == tid）
-python main.py -i <input.wav> -m <model_ckpt.pt> -o <output.wav> -k <keychange (semitones)> -sid <source_speaker_id> -tid <target_speaker_id> -step <infer_step> -method <method>
+python -m ReFlowVaeSVC.main -i <input.wav> -m <model_ckpt.pt> -o <output.wav> -k <keychange (semitones)> -sid <source_speaker_id> -tid <target_speaker_id> -step <infer_step> -method <method>
 ```
diff --git a/draw.py → ReFlowVaeSVC/draw.py b/draw.py → ReFlowVaeSVC/draw.py
diff --git a/encoder/hubert/model.py → ReFlowVaeSVC/encoder/hubert/model.py b/encoder/hubert/model.py → ReFlowVaeSVC/encoder/hubert/model.py
diff --git a/ReFlowVaeSVC/encoder/rmvpe/__init__.py b/ReFlowVaeSVC/encoder/rmvpe/__init__.py
@@ -0,0 +1 @@
+from .constants import *
diff --git a/encoder/rmvpe/constants.py → ReFlowVaeSVC/encoder/rmvpe/constants.py b/encoder/rmvpe/constants.py → ReFlowVaeSVC/encoder/rmvpe/constants.py
diff --git a/encoder/rmvpe/deepunet.py → ReFlowVaeSVC/encoder/rmvpe/deepunet.py b/encoder/rmvpe/deepunet.py → ReFlowVaeSVC/encoder/rmvpe/deepunet.py
diff --git a/encoder/rmvpe/inference.py → ReFlowVaeSVC/encoder/rmvpe/inference.py b/encoder/rmvpe/inference.py → ReFlowVaeSVC/encoder/rmvpe/inference.py
@@ -1,9 +1,8 @@
-import numpy as np
 import torch
 import torch.nn.functional as F
 from torchaudio.transforms import Resample
 from .constants import *
-from .model import E2E0, E2E
+from .model import E2E0
 from .spec import MelSpectrogram 
 from .utils import to_local_average_f0, to_viterbi_f0
 

diff --git a/encoder/rmvpe/model.py → ReFlowVaeSVC/encoder/rmvpe/model.py b/encoder/rmvpe/model.py → ReFlowVaeSVC/encoder/rmvpe/model.py
@@ -1,8 +1,6 @@
-import torch
 from torch import nn
 from .deepunet import DeepUnet, DeepUnet0
 from .constants import *
-from .spec import MelSpectrogram
 from .seq import BiGRU
 
 

diff --git a/encoder/rmvpe/seq.py → ReFlowVaeSVC/encoder/rmvpe/seq.py b/encoder/rmvpe/seq.py → ReFlowVaeSVC/encoder/rmvpe/seq.py
diff --git a/encoder/rmvpe/spec.py → ReFlowVaeSVC/encoder/rmvpe/spec.py b/encoder/rmvpe/spec.py → ReFlowVaeSVC/encoder/rmvpe/spec.py
diff --git a/encoder/rmvpe/utils.py → ReFlowVaeSVC/encoder/rmvpe/utils.py b/encoder/rmvpe/utils.py → ReFlowVaeSVC/encoder/rmvpe/utils.py
diff --git a/logger/__init__.py → ReFlowVaeSVC/logger/__init__.py b/logger/__init__.py → ReFlowVaeSVC/logger/__init__.py
diff --git a/logger/saver.py → ReFlowVaeSVC/logger/saver.py b/logger/saver.py → ReFlowVaeSVC/logger/saver.py
@@ -3,7 +3,6 @@
 '''
 
 import os
-import json
 import time
 import yaml
 import datetime

diff --git a/logger/utils.py → ReFlowVaeSVC/logger/utils.py b/logger/utils.py → ReFlowVaeSVC/logger/utils.py
@@ -1,7 +1,6 @@
 import os
 import yaml
 import json
-import pickle
 import torch
 try:
     import torch_musa

diff --git a/main.py → ReFlowVaeSVC/main.py b/main.py → ReFlowVaeSVC/main.py
@@ -9,14 +9,12 @@
 import argparse
 import numpy as np
 import soundfile as sf
-import pyworld as pw
-import parselmouth
 import hashlib
 import torch.nn.functional as F
 from ast import literal_eval
-from slicer import Slicer
-from reflow.extractors import F0_Extractor, Volume_Extractor, Units_Encoder
-from reflow.vocoder import load_model_vocoder
+from ReFlowVaeSVC.slicer import Slicer
+from ReFlowVaeSVC.reflow.extractors import F0_Extractor, Volume_Extractor, Units_Encoder
+from ReFlowVaeSVC.reflow.vocoder import load_model_vocoder
 from tqdm import tqdm
 
 

diff --git a/nsf_hifigan/env.py → ReFlowVaeSVC/nsf_hifigan/env.py b/nsf_hifigan/env.py → ReFlowVaeSVC/nsf_hifigan/env.py
diff --git a/nsf_hifigan/models.py → ReFlowVaeSVC/nsf_hifigan/models.py b/nsf_hifigan/models.py → ReFlowVaeSVC/nsf_hifigan/models.py
diff --git a/nsf_hifigan/nvSTFT.py → ReFlowVaeSVC/nsf_hifigan/nvSTFT.py b/nsf_hifigan/nvSTFT.py → ReFlowVaeSVC/nsf_hifigan/nvSTFT.py
@@ -1,7 +1,5 @@
-import math
 import os
 os.environ["LRU_CACHE_CAPACITY"] = "3"
-import random
 import torch
 try:
     import torch_musa
@@ -10,9 +8,7 @@
 import torch.utils.data
 import numpy as np
 import librosa
-from librosa.util import normalize
 from librosa.filters import mel as librosa_mel_fn
-from scipy.io.wavfile import read
 import soundfile as sf
 import torch.nn.functional as F
 

diff --git a/nsf_hifigan/utils.py → ReFlowVaeSVC/nsf_hifigan/utils.py b/nsf_hifigan/utils.py → ReFlowVaeSVC/nsf_hifigan/utils.py
diff --git a/preprocess.py → ReFlowVaeSVC/preprocess.py b/preprocess.py → ReFlowVaeSVC/preprocess.py
@@ -8,16 +8,13 @@
     use_torch_musa = True
 except ImportError:
     use_torch_musa = False
-import pyworld as pw
-import parselmouth
 import argparse
 import shutil
-from logger import utils
+from ReFlowVaeSVC.logger import utils
 from tqdm import tqdm
-from reflow.extractors import F0_Extractor, Volume_Extractor, Units_Encoder
-from reflow.vocoder import Vocoder
-from logger.utils import traverse_dir
-import concurrent.futures
+from ReFlowVaeSVC.reflow.extractors import F0_Extractor, Volume_Extractor, Units_Encoder
+from ReFlowVaeSVC.reflow.vocoder import Vocoder
+from ReFlowVaeSVC.logger.utils import traverse_dir
 
 def parse_args(args=None, namespace=None):
     """Parse command-line arguments."""

diff --git a/reflow/data_loaders.py → ReFlowVaeSVC/reflow/data_loaders.py b/reflow/data_loaders.py → ReFlowVaeSVC/reflow/data_loaders.py
@@ -8,7 +8,6 @@
     import torch_musa
 except ImportError:
     pass
-import random
 from tqdm import tqdm
 from torch.utils.data import Dataset
 

diff --git a/reflow/extractors.py → ReFlowVaeSVC/reflow/extractors.py b/reflow/extractors.py → ReFlowVaeSVC/reflow/extractors.py
@@ -1,6 +1,4 @@
-import os
 import numpy as np
-import yaml
 import torch
 try:
     import torch_musa
@@ -11,13 +9,11 @@
 import pyworld as pw
 import parselmouth
 import torchcrepe
-import resampy
 from transformers import HubertModel, Wav2Vec2FeatureExtractor
 from fairseq import checkpoint_utils
-from encoder.hubert.model import HubertSoft
+from ReFlowVaeSVC.encoder.hubert.model import HubertSoft
 from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present
 from torchaudio.transforms import Resample
-import time
 
 CREPE_RESAMPLE_KERNEL = {}
 F0_KERNEL = {}
@@ -78,7 +74,7 @@ def __init__(self, f0_extractor, sample_rate = 44100, hop_size = 512, f0_min = 6
             self.resample_kernel = CREPE_RESAMPLE_KERNEL[key_str]
         if f0_extractor == 'rmvpe':
             if 'rmvpe' not in F0_KERNEL :
-                from encoder.rmvpe import RMVPE
+                from ReFlowVaeSVC.encoder.rmvpe.inference import RMVPE
                 F0_KERNEL['rmvpe'] = RMVPE('pretrain/rmvpe/model.pt', hop_length=160)
             self.rmvpe = F0_KERNEL['rmvpe']
         if f0_extractor == 'fcpe':

diff --git a/reflow/model_conformer_naive.py → ReFlowVaeSVC/reflow/model_conformer_naive.py b/reflow/model_conformer_naive.py → ReFlowVaeSVC/reflow/model_conformer_naive.py
diff --git a/reflow/naive_v2_diff.py → ReFlowVaeSVC/reflow/naive_v2_diff.py b/reflow/naive_v2_diff.py → ReFlowVaeSVC/reflow/naive_v2_diff.py
@@ -1,5 +1,4 @@
 import math
-from typing import Optional
 
 import torch
 try:

diff --git a/reflow/reflow.py → ReFlowVaeSVC/reflow/reflow.py b/reflow/reflow.py → ReFlowVaeSVC/reflow/reflow.py
@@ -1,4 +1,3 @@
-import numpy as np
 import torch
 try:
     import torch_musa

diff --git a/reflow/solver.py → ReFlowVaeSVC/reflow/solver.py b/reflow/solver.py → ReFlowVaeSVC/reflow/solver.py
@@ -8,8 +8,8 @@
 except ImportError:
     use_torch_musa = False
 import librosa
-from logger.saver import Saver
-from logger import utils
+from ReFlowVaeSVC.logger.saver import Saver
+from ReFlowVaeSVC.logger import utils
 from torch import autocast
 # from torch.cuda.amp import GradScaler
 

diff --git a/reflow/vocoder.py → ReFlowVaeSVC/reflow/vocoder.py b/reflow/vocoder.py → ReFlowVaeSVC/reflow/vocoder.py
@@ -7,10 +7,9 @@
 except ImportError:
     use_torch_musa = False
 import torch.nn as nn
-import torch.nn.functional as F
 import numpy as np
-from nsf_hifigan.nvSTFT import STFT
-from nsf_hifigan.models import load_model,load_config
+from ReFlowVaeSVC.nsf_hifigan.nvSTFT import STFT
+from ReFlowVaeSVC.nsf_hifigan.models import load_model,load_config
 from torchaudio.transforms import Resample
 from .reflow import Bi_RectifiedFlow
 from .naive_v2_diff import NaiveV2Diff

diff --git a/reflow/wavenet.py → ReFlowVaeSVC/reflow/wavenet.py b/reflow/wavenet.py → ReFlowVaeSVC/reflow/wavenet.py
diff --git a/slicer.py → ReFlowVaeSVC/slicer.py b/slicer.py → ReFlowVaeSVC/slicer.py
diff --git a/train.py → ReFlowVaeSVC/train.py b/train.py → ReFlowVaeSVC/train.py
@@ -1,15 +1,14 @@
-import os
 import argparse
 import torch
 try:
     import torch_musa
 except ImportError:
     pass
 from torch.optim import lr_scheduler
-from logger import utils
-from reflow.data_loaders import get_data_loaders
-from reflow.vocoder import Vocoder, Unit2Wav_VAE
-from reflow.solver import train
+from ReFlowVaeSVC.logger import utils
+from ReFlowVaeSVC.reflow.data_loaders import get_data_loaders
+from ReFlowVaeSVC.reflow.vocoder import Vocoder, Unit2Wav_VAE
+from ReFlowVaeSVC.reflow.solver import train
 
 def parse_args(args=None, namespace=None):
     """Parse command-line arguments."""

diff --git a/encoder/rmvpe/__init__.py b/encoder/rmvpe/__init__.py
diff --git a/pretrain/contentvec/.gitignore b/pretrain/contentvec/.gitignore
diff --git a/pretrain/nsf_hifigan/.gitignore b/pretrain/nsf_hifigan/.gitignore
diff --git a/pretrain/rmvpe/.gitignore b/pretrain/rmvpe/.gitignore