Skip to content

Commit

Permalink
test(zoo): move big model tests into separate files
Browse files Browse the repository at this point in the history
Makes it easier to run them locally: make test_zoo_big
  • Loading branch information
eginhard committed Dec 16, 2024
1 parent 2370b7b commit b71669a
Show file tree
Hide file tree
Showing 3 changed files with 196 additions and 175 deletions.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ test_aux: ## run aux tests.
test_zoo: ## run zoo tests.
coverage run -m pytest -x -v --durations=0 tests/zoo_tests/test_models.py

test_zoo_big: ## run tests for models that are too big for CI.
coverage run -m pytest -x -v --durations=0 tests/zoo_tests/test_big_models.py

inference_tests: ## run inference tests.
coverage run -m pytest -x -v --durations=0 tests/inference_tests

Expand Down
193 changes: 193 additions & 0 deletions tests/zoo_tests/test_big_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
"""These tests should be run locally because the models are too big for CI."""

import os

import pytest
import torch

from tests import get_tests_data_path, run_main
from TTS.bin.synthesize import main
from TTS.utils.manage import ModelManager

GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true"


@pytest.fixture(scope="session", autouse=True)
def set_env():
os.environ["COQUI_TOS_AGREED"] = "1"


@pytest.fixture
def manager():
"""Set up model manager."""
return ModelManager(progress_bar=False)


@pytest.mark.skipif(GITHUB_ACTIONS, reason="Model too big for CI")
def test_xtts(tmp_path):
"""XTTS is too big to run on github actions. We need to test it locally"""
args = [
"--model_name",
"tts_models/multilingual/multi-dataset/xtts_v1.1",
"--text",
"C'est un exemple.",
"--language_idx",
"fr",
"--out_path",
str(tmp_path / "output.wav"),
"--no-progress_bar",
"--speaker_wav",
os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav"),
]
if torch.cuda.is_available():
args.append("--use_cuda")
run_main(main, args)


@pytest.mark.skipif(GITHUB_ACTIONS, reason="Model too big for CI")
def test_xtts_streaming(manager):
"""Testing the new inference_stream method"""
from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.models.xtts import Xtts

speaker_wav = [os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav")]
speaker_wav_2 = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0002.wav")
speaker_wav.append(speaker_wav_2)
model_path, _, _ = manager.download_model("tts_models/multilingual/multi-dataset/xtts_v1.1")
config = XttsConfig()
config.load_json(model_path / "config.json")
model = Xtts.init_from_config(config)
model.load_checkpoint(config, checkpoint_dir=str(model_path))
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

print("Computing speaker latents...")
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)

print("Inference...")
chunks = model.inference_stream(
"It took me quite a long time to develop a voice and now that I have it I am not going to be silent.",
"en",
gpt_cond_latent,
speaker_embedding,
)
wav_chunks = []
for i, chunk in enumerate(chunks):
if i == 0:
assert chunk.shape[-1] > 5000
wav_chunks.append(chunk)
assert len(wav_chunks) > 1


@pytest.mark.skipif(GITHUB_ACTIONS, reason="Model too big for CI")
def test_xtts_v2(tmp_path):
"""XTTS is too big to run on github actions. We need to test it locally"""
args = [
"--model_name",
"tts_models/multilingual/multi-dataset/xtts_v2",
"--text",
"C'est un exemple.",
"--language_idx",
"fr",
"--out_path",
str(tmp_path / "output.wav"),
"--no-progress_bar",
"--speaker_wav",
os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav"),
os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0002.wav"),
]
if torch.cuda.is_available():
args.append("--use_cuda")
run_main(main, args)


@pytest.mark.skipif(GITHUB_ACTIONS, reason="Model too big for CI")
def test_xtts_v2_streaming(manager):
"""Testing the new inference_stream method"""
from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.models.xtts import Xtts

speaker_wav = [os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav")]
model_path, _, _ = manager.download_model("tts_models/multilingual/multi-dataset/xtts_v2")
config = XttsConfig()
config.load_json(model_path / "config.json")
model = Xtts.init_from_config(config)
model.load_checkpoint(config, checkpoint_dir=str(model_path))
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

print("Computing speaker latents...")
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)

print("Inference...")
chunks = model.inference_stream(
"It took me quite a long time to develop a voice and now that I have it I am not going to be silent.",
"en",
gpt_cond_latent,
speaker_embedding,
)
wav_chunks = []
for i, chunk in enumerate(chunks):
if i == 0:
assert chunk.shape[-1] > 5000
wav_chunks.append(chunk)
assert len(wav_chunks) > 1
normal_len = sum([len(chunk) for chunk in wav_chunks])

chunks = model.inference_stream(
"It took me quite a long time to develop a voice and now that I have it I am not going to be silent.",
"en",
gpt_cond_latent,
speaker_embedding,
speed=1.5,
)
wav_chunks = []
for i, chunk in enumerate(chunks):
wav_chunks.append(chunk)
fast_len = sum([len(chunk) for chunk in wav_chunks])

chunks = model.inference_stream(
"It took me quite a long time to develop a voice and now that I have it I am not going to be silent.",
"en",
gpt_cond_latent,
speaker_embedding,
speed=0.66,
)
wav_chunks = []
for i, chunk in enumerate(chunks):
wav_chunks.append(chunk)
slow_len = sum([len(chunk) for chunk in wav_chunks])

assert slow_len > normal_len
assert normal_len > fast_len


@pytest.mark.skipif(GITHUB_ACTIONS, reason="Model too big for CI")
def test_tortoise(tmp_path):
args = [
"--model_name",
"tts_models/en/multi-dataset/tortoise-v2",
"--text",
"This is an example.",
"--out_path",
str(tmp_path / "output.wav"),
"--no-progress_bar",
]
if torch.cuda.is_available():
args.append("--use_cuda")
run_main(main, args)


@pytest.mark.skipif(GITHUB_ACTIONS, reason="Model too big for CI")
def test_bark(tmp_path):
"""Bark is too big to run on github actions. We need to test it locally"""
args = [
"--model_name",
"tts_models/multilingual/multi-dataset/bark",
"--text",
"This is an example.",
"--out_path",
str(tmp_path / "output.wav"),
"--no-progress_bar",
]
if torch.cuda.is_available():
args.append("--use_cuda")
run_main(main, args)
175 changes: 0 additions & 175 deletions tests/zoo_tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import shutil

import pytest
import torch
from trainer.io import get_user_data_dir

from tests import get_tests_data_path, run_main
from TTS.api import TTS
Expand All @@ -13,8 +11,6 @@
from TTS.tts.utils.speakers import SpeakerManager
from TTS.utils.manage import ModelManager

GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true"

MODELS_WITH_SEP_TESTS = [
"tts_models/multilingual/multi-dataset/bark",
"tts_models/en/multi-dataset/tortoise-v2",
Expand Down Expand Up @@ -83,177 +79,6 @@ def test_models(tmp_path, model_name, manager):
print(f" | > OK: {model_name}")


@pytest.mark.skipif(GITHUB_ACTIONS, reason="Model too big for CI")
def test_xtts(tmp_path, manager):
"""XTTS is too big to run on github actions. We need to test it locally"""
model_name = "tts_models/multilingual/multi-dataset/xtts_v1.1"
model_path, _, _ = manager.download_model(model_name)
(model_path / "tos_agreed.txt").touch()
args = [
"--model_name",
model_name,
"--text",
"C'est un exemple.",
"--language_idx",
"fr",
"--out_path",
str(tmp_path / "output.wav"),
"--no-progress_bar",
"--speaker_wav",
os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav"),
"--use_cuda" if torch.cuda.is_available() else "",
]
run_main(main, args)


@pytest.mark.skipif(GITHUB_ACTIONS, reason="Model too big for CI")
def test_xtts_streaming():
"""Testing the new inference_stream method"""
from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.models.xtts import Xtts

speaker_wav = [os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav")]
speaker_wav_2 = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0002.wav")
speaker_wav.append(speaker_wav_2)
model_path = os.path.join(get_user_data_dir("tts"), "tts_models--multilingual--multi-dataset--xtts_v1.1")
config = XttsConfig()
config.load_json(os.path.join(model_path, "config.json"))
model = Xtts.init_from_config(config)
model.load_checkpoint(config, checkpoint_dir=model_path)
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

print("Computing speaker latents...")
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)

print("Inference...")
chunks = model.inference_stream(
"It took me quite a long time to develop a voice and now that I have it I am not going to be silent.",
"en",
gpt_cond_latent,
speaker_embedding,
)
wav_chuncks = []
for i, chunk in enumerate(chunks):
if i == 0:
assert chunk.shape[-1] > 5000
wav_chuncks.append(chunk)
assert len(wav_chuncks) > 1


@pytest.mark.skipif(GITHUB_ACTIONS, reason="Model too big for CI")
def test_xtts_v2(tmp_path):
"""XTTS is too big to run on github actions. We need to test it locally"""
args = [
"--model_name",
"tts_models/multilingual/multi-dataset/xtts_v2",
"--text",
"C'est un exemple.",
"--language_idx",
"fr",
"--out_path",
str(tmp_path / "output.wav"),
"--no-progress_bar",
"--speaker_wav",
os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav"),
os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0002.wav"),
"--use_cuda" if torch.cuda.is_available() else "",
]
run_main(main, args)


@pytest.mark.skipif(GITHUB_ACTIONS, reason="Model too big for CI")
def test_xtts_v2_streaming():
"""Testing the new inference_stream method"""
from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.models.xtts import Xtts

speaker_wav = [os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav")]
model_path = os.path.join(get_user_data_dir("tts"), "tts_models--multilingual--multi-dataset--xtts_v2")
config = XttsConfig()
config.load_json(os.path.join(model_path, "config.json"))
model = Xtts.init_from_config(config)
model.load_checkpoint(config, checkpoint_dir=model_path)
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

print("Computing speaker latents...")
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)

print("Inference...")
chunks = model.inference_stream(
"It took me quite a long time to develop a voice and now that I have it I am not going to be silent.",
"en",
gpt_cond_latent,
speaker_embedding,
)
wav_chuncks = []
for i, chunk in enumerate(chunks):
if i == 0:
assert chunk.shape[-1] > 5000
wav_chuncks.append(chunk)
assert len(wav_chuncks) > 1
normal_len = sum([len(chunk) for chunk in wav_chuncks])

chunks = model.inference_stream(
"It took me quite a long time to develop a voice and now that I have it I am not going to be silent.",
"en",
gpt_cond_latent,
speaker_embedding,
speed=1.5,
)
wav_chuncks = []
for i, chunk in enumerate(chunks):
wav_chuncks.append(chunk)
fast_len = sum([len(chunk) for chunk in wav_chuncks])

chunks = model.inference_stream(
"It took me quite a long time to develop a voice and now that I have it I am not going to be silent.",
"en",
gpt_cond_latent,
speaker_embedding,
speed=0.66,
)
wav_chuncks = []
for i, chunk in enumerate(chunks):
wav_chuncks.append(chunk)
slow_len = sum([len(chunk) for chunk in wav_chuncks])

assert slow_len > normal_len
assert normal_len > fast_len


@pytest.mark.skipif(GITHUB_ACTIONS, reason="Model too big for CI")
def test_tortoise(tmp_path):
args = [
"--model_name",
"tts_models/en/multi-dataset/tortoise-v2",
"--text",
"This is an example.",
"--out_path",
str(tmp_path / "output.wav"),
"--no-progress_bar",
"--use_cuda" if torch.cuda.is_available() else "",
]
run_main(main, args)


@pytest.mark.skipif(GITHUB_ACTIONS, reason="Model too big for CI")
def test_bark(tmp_path):
"""Bark is too big to run on github actions. We need to test it locally"""
args = [
"--model_name",
"tts_models/multilingual/multi-dataset/bark",
"tts_models/en/multi-dataset/tortoise-v2",
"--text",
"This is an example.",
"--out_path",
str(tmp_path / "output.wav"),
"--no-progress_bar",
"--use_cuda" if torch.cuda.is_available() else "",
]
run_main(main, args)
output_path = tmp_path / "output.wav"


def test_voice_conversion(tmp_path):
print(" > Run voice conversion inference using YourTTS model.")
args = [
Expand Down

0 comments on commit b71669a

Please sign in to comment.