Skip to content

Commit

Permalink
Merge pull request #205 from sonphantrung/various-fixes
Browse files Browse the repository at this point in the history
Various fixes.
  • Loading branch information
blaisewf authored Jan 20, 2024
2 parents 237506f + 9664f8d commit 46d8aa3
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 77 deletions.
36 changes: 36 additions & 0 deletions .github/workflows/unittest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Unit Test
on: [ push, pull_request ]
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ["3.9", "3.10"]
os: [ubuntu-latest]
fail-fast: true

steps:
- uses: actions/checkout@master
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
sudo apt update
sudo apt -y install ffmpeg
sudo apt -y install -qq aria2
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d ./ -o hubert_base.pt
python -m pip install --upgrade pip
python -m pip install --upgrade setuptools
python -m pip install --upgrade wheel
pip install torch torchvision torchaudio
pip install -r requirements.txt
- name: Test step 1 & 2
run: |
mkdir -p logs/mi-test
touch logs/mi-test/preprocess.log
python rvc/train/preprocess/preprocess.py logs/mi-test logs/mute/0_gt_wavs 48000 8 3.7
touch logs/mi-test/extract_f0_feature.log
python rvc/train/extract/extract_f0_print.py logs/mi-test pm 64
python rvc/train/extract/extract_feature_print.py cpu 1 0 0 logs/mi-test v1 True
4 changes: 2 additions & 2 deletions rvc/train/extract/extract_feature_print.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
i_part = int(sys.argv[3])

if len(sys.argv) == 7:
exp_dir, version, is_half = sys.argv[4], sys.argv[5], sys.argv[6]
exp_dir, version, is_half = sys.argv[4], sys.argv[5], bool(sys.argv[6])
else:
i_gpu, exp_dir = sys.argv[4], sys.argv[5]
os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu)
version, is_half = sys.argv[6], sys.argv[7]
version, is_half = sys.argv[6], bool(sys.argv[7])


def forward_dml(ctx, x, scale):
Expand Down
72 changes: 0 additions & 72 deletions rvc/train/slicer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import os
from argparse import ArgumentParser
import librosa
import soundfile
import numpy as np


class Slicer:
def __init__(
self,
Expand Down Expand Up @@ -144,7 +142,6 @@ def slice(self, waveform):

return chunks


def get_rms(
y,
frame_length=2048,
Expand Down Expand Up @@ -173,72 +170,3 @@ def get_rms(

power = np.mean(np.abs(x) ** 2, axis=-2, keepdims=True)
return np.sqrt(power)


def main():
parser = ArgumentParser()
parser.add_argument("audio", type=str, help="The audio to be sliced")
parser.add_argument(
"--out", type=str, help="Output directory of the sliced audio clips"
)
parser.add_argument(
"--db_thresh",
type=float,
default=-40,
help="The dB threshold for silence detection",
)
parser.add_argument(
"--min_length",
type=int,
default=5000,
help="The minimum milliseconds required for each sliced audio clip",
)
parser.add_argument(
"--min_interval",
type=int,
default=300,
help="The minimum milliseconds for a silence part to be sliced",
)
parser.add_argument(
"--hop_size", type=int, default=10, help="Frame length in milliseconds"
)
parser.add_argument(
"--max_sil_kept",
type=int,
default=500,
help="The maximum silence length kept around the sliced clip, presented in milliseconds",
)
args = parser.parse_args()

out = args.out or os.path.dirname(os.path.abspath(args.audio))
audio, sr = librosa.load(args.audio, sr=None, mono=False)

slicer = Slicer(
sr=sr,
threshold=args.db_thresh,
min_length=args.min_length,
min_interval=args.min_interval,
hop_size=args.hop_size,
max_sil_kept=args.max_sil_kept,
)

chunks = slicer.slice(audio)

if not os.path.exists(out):
os.makedirs(out)

for i, chunk in enumerate(chunks):
if len(chunk.shape) > 1:
chunk = chunk.T
soundfile.write(
os.path.join(
out,
f"{os.path.basename(args.audio).rsplit('.', maxsplit=1)[0]}_{i}.wav",
),
chunk,
sr,
)


if __name__ == "__main__":
main()
4 changes: 2 additions & 2 deletions rvc/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
kl_loss,
)
from mel_processing import mel_spectrogram_torch, spec_to_mel_torch
from process_ckpt import save_final
from process_ckpt import save_final, extract_small_model

from rvc.lib.infer_pack import commons

Expand Down Expand Up @@ -560,7 +560,7 @@ def train_and_evaluate(rank, epoch, hps, nets, optims, scaler, loaders, writers,
% (
hps.name,
epoch,
save_final(
extract_small_model(
ckpt,
hps.sample_rate,
hps.if_f0,
Expand Down
2 changes: 1 addition & 1 deletion tabs/inference/inference.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os, sys
import gradio as gr
import regex as re
import re
import shutil
import datetime
import random
Expand Down
1 change: 1 addition & 0 deletions tabs/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def train_tab():
model_name = gr.Textbox(
label=i18n("Model Name"),
placeholder=i18n("Enter model name"),
value="my-project",
interactive=True,
)
dataset_path = gr.Textbox(
Expand Down

0 comments on commit 46d8aa3

Please sign in to comment.