From 23bedf16450f2d879911f2b7087ea32c1e5d3ba2 Mon Sep 17 00:00:00 2001
From: Enno Hermann <enno.hermann@idiap.ch>
Date: Fri, 13 Dec 2024 15:56:10 +0100
Subject: [PATCH] test(zoo): use pytest.parametrize to parallelize zoo tests

Also only run zoo tests for the latest Python version
---
 .github/workflows/tests.yml    |  49 ++++++++++++--
 Makefile                       |   9 +--
 tests/zoo_tests/test_models.py | 120 +++++++++++++++------------------
 3 files changed, 101 insertions(+), 77 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 8d639d5dee..829a638bc7 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -37,7 +37,6 @@ jobs:
           sudo apt-get install espeak espeak-ng
       - name: Install dependencies
         run: |
-          sudo apt-get update
           sudo apt-get install -y --no-install-recommends git make gcc
           make system-deps
       - name: Install custom Trainer and/or Coqpit if requested
@@ -68,7 +67,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ["3.9", "3.12"]
-        subset: ["test_tts", "test_tts2", "test_vocoder", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]
+        subset: ["test_tts", "test_tts2", "test_vocoder", "test_xtts", "test_zoo"]
     steps:
       - uses: actions/checkout@v4
       - name: Setup uv
@@ -76,13 +75,12 @@ jobs:
       - name: Set up Python ${{ matrix.python-version }}
         run: uv python install ${{ matrix.python-version }}
       - name: Install Espeak
-        if: contains(fromJSON('["test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset)
+        if: contains(fromJSON('["test_tts", "test_tts2", "test_xtts"]'), matrix.subset)
         run: |
           sudo apt-get update
           sudo apt-get install espeak espeak-ng
       - name: Install dependencies
         run: |
-          sudo apt-get update
           sudo apt-get install -y --no-install-recommends git make gcc
           make system-deps
       - name: Install custom Trainer and/or Coqpit if requested
@@ -107,9 +105,50 @@ jobs:
           name: coverage-data-${{ matrix.subset }}-${{ matrix.python-version }}
           path: .coverage.*
           if-no-files-found: ignore
+  zoo:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12"]
+        partition: ["0", "1", "2"]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup uv
+        uses: ./.github/actions/setup-uv
+      - name: Set up Python ${{ matrix.python-version }}
+        run: uv python install ${{ matrix.python-version }}
+      - name: Install Espeak
+        run: |
+          sudo apt-get update
+          sudo apt-get install espeak espeak-ng
+      - name: Install dependencies
+        run: |
+          sudo apt-get install -y --no-install-recommends git make gcc
+          make system-deps
+      - name: Install custom Trainer and/or Coqpit if requested
+        run: |
+          if [[ -n "${{ github.event.inputs.trainer_branch }}" ]]; then
+            uv add git+https://github.com/idiap/coqui-ai-Trainer --branch ${{ github.event.inputs.trainer_branch }}
+          fi
+          if [[ -n "${{ github.event.inputs.coqpit_branch }}" ]]; then
+            uv add git+https://github.com/idiap/coqui-ai-coqpit --branch ${{ github.event.inputs.coqpit_branch }}
+          fi
+      - name: Zoo tests
+        run: uv run --extra server --extra languages make test_zoo
+        env:
+          NUM_PARTITIONS: 3
+          TEST_PARTITION: ${{ matrix.partition }}
+      - name: Upload coverage data
+        uses: actions/upload-artifact@v4
+        with:
+          include-hidden-files: true
+          name: coverage-data-zoo-${{ matrix.partition }}
+          path: .coverage.*
+          if-no-files-found: ignore
   coverage:
     if: always()
-    needs: [unit, integration]
+    needs: [unit, integration, zoo]
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
diff --git a/Makefile b/Makefile
index c451fe9f7b..e8a941fb93 100644
--- a/Makefile
+++ b/Makefile
@@ -25,13 +25,8 @@ test_aux:	## run aux tests.
 	coverage run -m pytest -x -v --durations=0 tests/aux_tests
 	./run_bash_tests.sh
 
-test_zoo0:	## run zoo tests.
-	coverage run -m pytest -x -v --durations=0 tests/zoo_tests/test_models.py \
-	-k "test_models_offset_0_step_3 or test_voice_conversion"
-test_zoo1:	## run zoo tests.
-	coverage run -m pytest -x -v --durations=0 tests/zoo_tests/test_models.py -k test_models_offset_1_step_3
-test_zoo2:	## run zoo tests.
-	coverage run -m pytest -x -v --durations=0 tests/zoo_tests/test_models.py -k test_models_offset_2_step_3
+test_zoo:	## run zoo tests.
+	coverage run -m pytest -x -v --durations=0 tests/zoo_tests/test_models.py
 
 inference_tests: ## run inference tests.
 	coverage run -m pytest -x -v --durations=0 tests/inference_tests
diff --git a/tests/zoo_tests/test_models.py b/tests/zoo_tests/test_models.py
index 9276f129f6..9a0d0bcb37 100644
--- a/tests/zoo_tests/test_models.py
+++ b/tests/zoo_tests/test_models.py
@@ -8,6 +8,7 @@
 from trainer.io import get_user_data_dir
 
 from tests import get_tests_data_path, run_cli
+from TTS.api import TTS
 from TTS.tts.utils.languages import LanguageManager
 from TTS.tts.utils.speakers import SpeakerManager
 from TTS.utils.manage import ModelManager
@@ -30,55 +31,61 @@ def run_around_tests(tmp_path):
     shutil.rmtree(tmp_path)
 
 
-def run_models(tmp_path, offset=0, step=1):
-    """Check if all the models are downloadable and tts models run correctly."""
-    print(" > Run synthesizer with all the models.")
+@pytest.fixture
+def manager(tmp_path):
+    """Set up model manager."""
+    return ModelManager(output_prefix=tmp_path, progress_bar=False)
+
+
+# To split tests into different CI jobs
+num_partitions = int(os.getenv("NUM_PARTITIONS", "1"))
+partition = int(os.getenv("TEST_PARTITION", "0"))
+model_names = [name for name in TTS.list_models() if name not in MODELS_WITH_SEP_TESTS]
+model_names = [name for i, name in enumerate(model_names) if i % num_partitions == partition]
+
+
+@pytest.mark.parametrize("model_name", model_names)
+def test_models(tmp_path, model_name, manager):
+    print(f"\n > Run - {model_name}")
     output_path = tmp_path / "output.wav"
-    manager = ModelManager(output_prefix=tmp_path, progress_bar=False)
-    model_names = [name for name in manager.list_models() if name not in MODELS_WITH_SEP_TESTS]
-    print("Model names:", model_names)
-    for model_name in model_names[offset::step]:
-        print(f"\n > Run - {model_name}")
-        model_path, _, _ = manager.download_model(model_name)
-        if "tts_models" in model_name:
-            local_download_dir = os.path.dirname(model_path)
-            # download and run the model
-            speaker_files = glob.glob(local_download_dir + "/speaker*")
-            language_files = glob.glob(local_download_dir + "/language*")
-            speaker_arg = ""
-            language_arg = ""
-            if len(speaker_files) > 0:
-                # multi-speaker model
-                if "speaker_ids" in speaker_files[0]:
-                    speaker_manager = SpeakerManager(speaker_id_file_path=speaker_files[0])
-                elif "speakers" in speaker_files[0]:
-                    speaker_manager = SpeakerManager(d_vectors_file_path=speaker_files[0])
-                speakers = list(speaker_manager.name_to_id.keys())
-                if len(speakers) > 1:
-                    speaker_arg = f'--speaker_idx "{speakers[0]}"'
-            if len(language_files) > 0 and "language_ids" in language_files[0]:
-                # multi-lingual model
-                language_manager = LanguageManager(language_ids_file_path=language_files[0])
-                languages = language_manager.language_names
-                if len(languages) > 1:
-                    language_arg = f'--language_idx "{languages[0]}"'
-            run_cli(
-                f'tts --model_name  {model_name} --text "This is an example." '
-                f'--out_path "{output_path}" {speaker_arg} {language_arg} --no-progress_bar'
-            )
-        elif "voice_conversion_models" in model_name:
-            speaker_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav")
-            reference_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0032.wav")
-            run_cli(
-                f"tts --model_name  {model_name} "
-                f'--out_path "{output_path}" --source_wav "{speaker_wav}" --target_wav "{reference_wav}" --no-progress_bar'
-            )
-        else:
-            # only download the model
-            manager.download_model(model_name)
-        # remove downloaded models
-        shutil.rmtree(get_user_data_dir("tts"))
-        print(f" | > OK: {model_name}")
+    model_path, _, _ = manager.download_model(model_name)
+    if "tts_models" in model_name:
+        local_download_dir = os.path.dirname(model_path)
+        # download and run the model
+        speaker_files = glob.glob(local_download_dir + "/speaker*")
+        language_files = glob.glob(local_download_dir + "/language*")
+        speaker_arg = ""
+        language_arg = ""
+        if len(speaker_files) > 0:
+            # multi-speaker model
+            if "speaker_ids" in speaker_files[0]:
+                speaker_manager = SpeakerManager(speaker_id_file_path=speaker_files[0])
+            elif "speakers" in speaker_files[0]:
+                speaker_manager = SpeakerManager(d_vectors_file_path=speaker_files[0])
+            speakers = list(speaker_manager.name_to_id.keys())
+            if len(speakers) > 1:
+                speaker_arg = f'--speaker_idx "{speakers[0]}"'
+        if len(language_files) > 0 and "language_ids" in language_files[0]:
+            # multi-lingual model
+            language_manager = LanguageManager(language_ids_file_path=language_files[0])
+            languages = language_manager.language_names
+            if len(languages) > 1:
+                language_arg = f'--language_idx "{languages[0]}"'
+        run_cli(
+            f'tts --model_name  {model_name} --text "This is an example." '
+            f'--out_path "{output_path}" {speaker_arg} {language_arg} --no-progress_bar'
+        )
+    elif "voice_conversion_models" in model_name:
+        speaker_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav")
+        reference_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0032.wav")
+        run_cli(
+            f"tts --model_name  {model_name} "
+            f'--out_path "{output_path}" --source_wav "{speaker_wav}" --target_wav "{reference_wav}" --no-progress_bar'
+        )
+    else:
+        # only download the model
+        manager.download_model(model_name)
+    print(f" | > OK: {model_name}")
 
 
 @pytest.mark.skipif(GITHUB_ACTIONS, reason="Model too big for CI")
@@ -264,20 +271,3 @@ def test_voice_conversion(tmp_path):
         f"tts --model_name  {model_name}"
         f" --out_path {output_path} --speaker_wav {speaker_wav} --reference_wav {reference_wav} --language_idx {language_id} --no-progress_bar"
     )
-
-
-"""
-These are used to split tests into different actions on Github.
-"""
-
-
-def test_models_offset_0_step_3(tmp_path):
-    run_models(tmp_path, offset=0, step=3)
-
-
-def test_models_offset_1_step_3(tmp_path):
-    run_models(tmp_path, offset=1, step=3)
-
-
-def test_models_offset_2_step_3(tmp_path):
-    run_models(tmp_path, offset=2, step=3)