style: run ruff format

idiap · Jan 11, 2025 · e49d3d9 · e49d3d9
1 parent 5b3e40a
commit e49d3d9
Show file tree

Hide file tree

Showing 68 changed files with 333 additions and 381 deletions.
diff --git a/TTS/encoder/configs/base_encoder_config.py b/TTS/encoder/configs/base_encoder_config.py
@@ -55,6 +55,6 @@ class BaseEncoderConfig(BaseTrainingConfig):
     def check_values(self):
         super().check_values()
         c = asdict(self)
-        assert (
-            c["model_params"]["input_dim"] == self.audio.num_mels
-        ), " [!] model input dimendion must be equal to melspectrogram dimension."
+        assert c["model_params"]["input_dim"] == self.audio.num_mels, (
+            " [!] model input dimendion must be equal to melspectrogram dimension."
+        )
diff --git a/TTS/encoder/utils/prepare_voxceleb.py b/TTS/encoder/utils/prepare_voxceleb.py
@@ -16,7 +16,7 @@
 # Only support eager mode and TF>=2.0.0
 # pylint: disable=no-member, invalid-name, relative-beyond-top-level
 # pylint: disable=too-many-locals, too-many-statements, too-many-arguments, too-many-instance-attributes
-""" voxceleb 1 & 2 """
+"""voxceleb 1 & 2"""
 
 import csv
 import hashlib

diff --git a/TTS/tts/configs/neuralhmm_tts_config.py b/TTS/tts/configs/neuralhmm_tts_config.py
@@ -161,9 +161,9 @@ def check_values(self):
             AssertionError: transition probability is not between 0 and 1
         """
         assert self.ar_order > 0, "AR order must be greater than 0 it is an autoregressive model."
-        assert (
-            len(self.outputnet_size) >= 1
-        ), f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}"
-        assert (
-            0 < self.flat_start_params["transition_p"] < 1
-        ), f"Transition probability must be between 0 and 1. Provided: {self.flat_start_params['transition_p']}"
+        assert len(self.outputnet_size) >= 1, (
+            f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}"
+        )
+        assert 0 < self.flat_start_params["transition_p"] < 1, (
+            f"Transition probability must be between 0 and 1. Provided: {self.flat_start_params['transition_p']}"
+        )
diff --git a/TTS/tts/configs/overflow_config.py b/TTS/tts/configs/overflow_config.py
@@ -192,9 +192,9 @@ def check_values(self):
             AssertionError: transition probability is not between 0 and 1
         """
         assert self.ar_order > 0, "AR order must be greater than 0 it is an autoregressive model."
-        assert (
-            len(self.outputnet_size) >= 1
-        ), f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}"
-        assert (
-            0 < self.flat_start_params["transition_p"] < 1
-        ), f"Transition probability must be between 0 and 1. Provided: {self.flat_start_params['transition_p']}"
+        assert len(self.outputnet_size) >= 1, (
+            f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}"
+        )
+        assert 0 < self.flat_start_params["transition_p"] < 1, (
+            f"Transition probability must be between 0 and 1. Provided: {self.flat_start_params['transition_p']}"
+        )
diff --git a/TTS/tts/configs/tacotron_config.py b/TTS/tts/configs/tacotron_config.py
@@ -223,12 +223,12 @@ class TacotronConfig(BaseTTSConfig):
 
     def check_values(self):
         if self.gradual_training:
-            assert (
-                self.gradual_training[0][1] == self.r
-            ), f"[!] the first scheduled gradual training `r` must be equal to the model's `r` value. {self.gradual_training[0][1]} vs {self.r}"
+            assert self.gradual_training[0][1] == self.r, (
+                f"[!] the first scheduled gradual training `r` must be equal to the model's `r` value. {self.gradual_training[0][1]} vs {self.r}"
+            )
         if self.model == "tacotron" and self.audio is not None:
-            assert self.out_channels == (
-                self.audio.fft_size // 2 + 1
-            ), f"{self.out_channels} vs {self.audio.fft_size // 2 + 1}"
+            assert self.out_channels == (self.audio.fft_size // 2 + 1), (
+                f"{self.out_channels} vs {self.audio.fft_size // 2 + 1}"
+            )
         if self.model == "tacotron2" and self.audio is not None:
             assert self.out_channels == self.audio.num_mels
diff --git a/TTS/tts/datasets/__init__.py b/TTS/tts/datasets/__init__.py
@@ -37,9 +37,9 @@ def split_dataset(items, eval_split_max_size=None, eval_split_size=0.01):
         else:
             eval_split_size = int(len(items) * eval_split_size)
 
-    assert (
-        eval_split_size > 0
-    ), f" [!] You do not have enough samples for the evaluation set. You can work around this setting the 'eval_split_size' parameter to a minimum of {1 / len(items)}"
+    assert eval_split_size > 0, (
+        f" [!] You do not have enough samples for the evaluation set. You can work around this setting the 'eval_split_size' parameter to a minimum of {1 / len(items)}"
+    )
     np.random.seed(0)
     np.random.shuffle(items)
     if is_multi_speaker:

diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py
@@ -424,7 +424,7 @@ def vctk(root_path, meta_files=None, wavs_path="wav48_silence_trimmed", mic="mic
     """
     file_ext = "flac"
     items = []
-    meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
+    meta_files = glob(f"{os.path.join(root_path, 'txt')}/**/*.txt", recursive=True)
     for meta_file in meta_files:
         _, speaker_id, txt_file = os.path.relpath(meta_file, root_path).split(os.sep)
         file_id = txt_file.split(".")[0]
@@ -451,7 +451,7 @@ def vctk(root_path, meta_files=None, wavs_path="wav48_silence_trimmed", mic="mic
 def vctk_old(root_path, meta_files=None, wavs_path="wav48", ignored_speakers=None):
     """homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz"""
     items = []
-    meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
+    meta_files = glob(f"{os.path.join(root_path, 'txt')}/**/*.txt", recursive=True)
     for meta_file in meta_files:
         _, speaker_id, txt_file = os.path.relpath(meta_file, root_path).split(os.sep)
         file_id = txt_file.split(".")[0]

diff --git a/TTS/tts/layers/bark/hubert/kmeans_hubert.py b/TTS/tts/layers/bark/hubert/kmeans_hubert.py
@@ -7,7 +7,6 @@
 
 # Modified code from https://github.com/lucidrains/audiolm-pytorch/blob/main/audiolm_pytorch/hubert_kmeans.py
 
-
 import torch
 from einops import pack, unpack
 from torch import nn

diff --git a/TTS/tts/layers/bark/inference_funcs.py b/TTS/tts/layers/bark/inference_funcs.py
@@ -58,9 +58,7 @@ def load_npz(npz_file: str) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64
 
 def load_voice(
     model, voice: str, extra_voice_dirs: list[str] = []
-) -> tuple[
-    npt.NDArray[np.int64] | None, npt.NDArray[np.int64] | None, npt.NDArray[np.int64] | None
-]:  # pylint: disable=dangerous-default-value
+) -> tuple[npt.NDArray[np.int64] | None, npt.NDArray[np.int64] | None, npt.NDArray[np.int64] | None]:  # pylint: disable=dangerous-default-value
     if voice == "random":
         return None, None, None
 

diff --git a/TTS/tts/layers/bark/model.py b/TTS/tts/layers/bark/model.py
@@ -175,9 +175,9 @@ def forward(self, idx, merge_context=False, past_kv=None, position_ids=None, use
                 assert idx.shape[1] >= 256 + 256 + 1
                 t = idx.shape[1] - 256
             else:
-                assert (
-                    t <= self.config.block_size
-                ), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
+                assert t <= self.config.block_size, (
+                    f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
+                )
 
             # forward the GPT model itself
             if merge_context:

diff --git a/TTS/tts/layers/bark/model_fine.py b/TTS/tts/layers/bark/model_fine.py
@@ -101,9 +101,9 @@ def __init__(self, config):
     def forward(self, pred_idx, idx):
         device = idx.device
         b, t, codes = idx.size()
-        assert (
-            t <= self.config.block_size
-        ), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
+        assert t <= self.config.block_size, (
+            f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
+        )
         assert pred_idx > 0, "cannot predict 0th codebook"
         assert codes == self.n_codes_total, (b, t, codes)
         pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0)  # shape (1, t)

diff --git a/TTS/tts/layers/feed_forward/encoder.py b/TTS/tts/layers/feed_forward/encoder.py
@@ -143,9 +143,9 @@ def __init__(
         elif encoder_type.lower() == "residual_conv_bn":
             self.encoder = ResidualConv1dBNEncoder(in_hidden_channels, out_channels, in_hidden_channels, encoder_params)
         elif encoder_type.lower() == "fftransformer":
-            assert (
-                in_hidden_channels == out_channels
-            ), "[!] must be `in_channels` == `out_channels` when encoder type is 'fftransformer'"
+            assert in_hidden_channels == out_channels, (
+                "[!] must be `in_channels` == `out_channels` when encoder type is 'fftransformer'"
+            )
             # pylint: disable=unexpected-keyword-arg
             self.encoder = FFTransformerBlock(in_hidden_channels, **encoder_params)
         else:

diff --git a/TTS/tts/layers/generic/pos_encoding.py b/TTS/tts/layers/generic/pos_encoding.py
@@ -18,7 +18,7 @@ class PositionalEncoding(nn.Module):
     def __init__(self, channels, dropout_p=0.0, max_len=5000, use_scale=False):
         super().__init__()
         if channels % 2 != 0:
-            raise ValueError("Cannot use sin/cos positional encoding with " f"odd channels (got channels={channels:d})")
+            raise ValueError(f"Cannot use sin/cos positional encoding with odd channels (got channels={channels:d})")
         self.use_scale = use_scale
         if use_scale:
             self.scale = torch.nn.Parameter(torch.ones(1))

diff --git a/TTS/tts/layers/generic/transformer.py b/TTS/tts/layers/generic/transformer.py
@@ -70,9 +70,7 @@ def forward(self, x, mask=None, g=None):  # pylint: disable=unused-argument
 
 
 class FFTDurationPredictor:
-    def __init__(
-        self, in_channels, hidden_channels, num_heads, num_layers, dropout_p=0.1, cond_channels=None
-    ):  # pylint: disable=unused-argument
+    def __init__(self, in_channels, hidden_channels, num_heads, num_layers, dropout_p=0.1, cond_channels=None):  # pylint: disable=unused-argument
         self.fft = FFTransformerBlock(in_channels, num_heads, hidden_channels, num_layers, dropout_p)
         self.proj = nn.Linear(in_channels, 1)
 

diff --git a/TTS/tts/layers/tortoise/arch_utils.py b/TTS/tts/layers/tortoise/arch_utils.py
@@ -101,9 +101,9 @@ def __init__(
         if num_head_channels == -1:
             self.num_heads = num_heads
         else:
-            assert (
-                channels % num_head_channels == 0
-            ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}"
+            assert channels % num_head_channels == 0, (
+                f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}"
+            )
             self.num_heads = channels // num_head_channels
         self.norm = normalization(channels)
         self.qkv = nn.Conv1d(channels, channels * 3, 1)

diff --git a/TTS/tts/layers/tortoise/audio_utils.py b/TTS/tts/layers/tortoise/audio_utils.py
@@ -125,14 +125,14 @@ def load_voices(voices: list[str], extra_voice_dirs: list[str] = []):
             return None, None
         clip, latent = load_voice(voice, extra_voice_dirs)
         if latent is None:
-            assert (
-                len(latents) == 0
-            ), "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
+            assert len(latents) == 0, (
+                "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
+            )
             clips.extend(clip)
         elif clip is None:
-            assert (
-                len(clips) == 0
-            ), "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
+            assert len(clips) == 0, (
+                "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
+            )
             latents.append(latent)
     if len(latents) == 0:
         return clips, None

diff --git a/TTS/tts/layers/tortoise/autoregressive.py b/TTS/tts/layers/tortoise/autoregressive.py
@@ -608,9 +608,9 @@ def inference_speech(
         if input_tokens is None:
             inputs = fake_inputs
         else:
-            assert (
-                num_return_sequences % input_tokens.shape[0] == 0
-            ), "The number of return sequences must be divisible by the number of input sequences"
+            assert num_return_sequences % input_tokens.shape[0] == 0, (
+                "The number of return sequences must be divisible by the number of input sequences"
+            )
             fake_inputs = fake_inputs.repeat(num_return_sequences, 1)
             input_tokens = input_tokens.repeat(num_return_sequences // input_tokens.shape[0], 1)
             inputs = torch.cat([fake_inputs, input_tokens], dim=1)

diff --git a/TTS/tts/layers/tortoise/dpm_solver.py b/TTS/tts/layers/tortoise/dpm_solver.py
@@ -563,57 +563,29 @@ def get_orders_and_timesteps_for_singlestep_solver(self, steps, order, skip_type
         if order == 3:
             K = steps // 3 + 1
             if steps % 3 == 0:
-                orders = [
-                    3,
-                ] * (
-                    K - 2
-                ) + [2, 1]
+                orders = [3] * (K - 2) + [2, 1]
             elif steps % 3 == 1:
-                orders = [
-                    3,
-                ] * (
-                    K - 1
-                ) + [1]
+                orders = [3] * (K - 1) + [1]
             else:
-                orders = [
-                    3,
-                ] * (
-                    K - 1
-                ) + [2]
+                orders = [3] * (K - 1) + [2]
         elif order == 2:
             if steps % 2 == 0:
                 K = steps // 2
-                orders = [
-                    2,
-                ] * K
+                orders = [2] * K
             else:
                 K = steps // 2 + 1
-                orders = [
-                    2,
-                ] * (
-                    K - 1
-                ) + [1]
+                orders = [2] * (K - 1) + [1]
         elif order == 1:
             K = 1
-            orders = [
-                1,
-            ] * steps
+            orders = [1] * steps
         else:
             raise ValueError("'order' must be '1' or '2' or '3'.")
         if skip_type == "logSNR":
             # To reproduce the results in DPM-Solver paper
             timesteps_outer = self.get_time_steps(skip_type, t_T, t_0, K, device)
         else:
             timesteps_outer = self.get_time_steps(skip_type, t_T, t_0, steps, device)[
-                torch.cumsum(
-                    torch.tensor(
-                        [
-                            0,
-                        ]
-                        + orders
-                    ),
-                    0,
-                ).to(device)
+                torch.cumsum(torch.tensor([0] + orders), 0).to(device)
             ]
         return timesteps_outer, orders
 
@@ -1217,9 +1189,9 @@ def inverse(
         """
         t_0 = 1.0 / self.noise_schedule.total_N if t_start is None else t_start
         t_T = self.noise_schedule.T if t_end is None else t_end
-        assert (
-            t_0 > 0 and t_T > 0
-        ), "Time range needs to be greater than 0. For discrete-time DPMs, it needs to be in [1 / N, 1], where N is the length of betas array"
+        assert t_0 > 0 and t_T > 0, (
+            "Time range needs to be greater than 0. For discrete-time DPMs, it needs to be in [1 / N, 1], where N is the length of betas array"
+        )
         return self.sample(
             x,
             steps=steps,
@@ -1362,9 +1334,9 @@ def sample(
         """
         t_0 = 1.0 / self.noise_schedule.total_N if t_end is None else t_end
         t_T = self.noise_schedule.T if t_start is None else t_start
-        assert (
-            t_0 > 0 and t_T > 0
-        ), "Time range needs to be greater than 0. For discrete-time DPMs, it needs to be in [1 / N, 1], where N is the length of betas array"
+        assert t_0 > 0 and t_T > 0, (
+            "Time range needs to be greater than 0. For discrete-time DPMs, it needs to be in [1 / N, 1], where N is the length of betas array"
+        )
         if return_intermediate:
             assert method in [
                 "multistep",

diff --git a/TTS/tts/layers/tortoise/transformer.py b/TTS/tts/layers/tortoise/transformer.py
@@ -43,9 +43,9 @@ def route_args(router, args, depth):
 class SequentialSequence(nn.Module):
     def __init__(self, layers, args_route={}, layer_dropout=0.0):
         super().__init__()
-        assert all(
-            len(route) == len(layers) for route in args_route.values()
-        ), "each argument route map must have the same depth as the number of sequential layers"
+        assert all(len(route) == len(layers) for route in args_route.values()), (
+            "each argument route map must have the same depth as the number of sequential layers"
+        )
         self.layers = layers
         self.args_route = args_route
         self.layer_dropout = layer_dropout

diff --git a/TTS/tts/layers/tortoise/xtransformers.py b/TTS/tts/layers/tortoise/xtransformers.py
@@ -560,9 +560,9 @@ def __init__(
 
         self.rel_pos_bias = rel_pos_bias
         if rel_pos_bias:
-            assert (
-                rel_pos_num_buckets <= rel_pos_max_distance
-            ), "number of relative position buckets must be less than the relative position max distance"
+            assert rel_pos_num_buckets <= rel_pos_max_distance, (
+                "number of relative position buckets must be less than the relative position max distance"
+            )
             self.rel_pos = RelativePositionBias(
                 scale=dim_head**0.5,
                 causal=causal,
@@ -680,9 +680,9 @@ def forward(
             del input_mask
 
         if exists(attn_mask):
-            assert (
-                2 <= attn_mask.ndim <= 4
-            ), "attention mask must have greater than 2 dimensions but less than or equal to 4"
+            assert 2 <= attn_mask.ndim <= 4, (
+                "attention mask must have greater than 2 dimensions but less than or equal to 4"
+            )
             if attn_mask.ndim == 2:
                 attn_mask = rearrange(attn_mask, "i j -> () () i j")
             elif attn_mask.ndim == 3:
@@ -790,9 +790,9 @@ def __init__(
         rotary_emb_dim = max(default(rotary_emb_dim, dim_head // 2), 32)
         self.rotary_pos_emb = RotaryEmbedding(rotary_emb_dim) if rotary_pos_emb else None
 
-        assert not (
-            alibi_pos_bias and rel_pos_bias
-        ), "you can only choose Alibi positional bias or T5 relative positional bias, not both"
+        assert not (alibi_pos_bias and rel_pos_bias), (
+            "you can only choose Alibi positional bias or T5 relative positional bias, not both"
+        )
 
         if alibi_pos_bias:
             alibi_num_heads = default(alibi_num_heads, heads)
@@ -922,9 +922,9 @@ def forward(
         past_key_values=None,
         expected_seq_len=None,
     ):
-        assert not (
-            self.cross_attend ^ (exists(context) or exists(full_context))
-        ), "context must be passed in if cross_attend is set to True"
+        assert not (self.cross_attend ^ (exists(context) or exists(full_context))), (
+            "context must be passed in if cross_attend is set to True"
+        )
         assert context is None or full_context is None, "only one of full_context or context can be provided"
 
         hiddens = []
@@ -940,9 +940,9 @@ def forward(
         rotary_pos_emb = None
         if exists(self.rotary_pos_emb):
             if not self.training and self.causal:
-                assert (
-                    expected_seq_len is not None
-                ), "To decode a transformer with rotary embeddings, you must specify an `expected_seq_len`"
+                assert expected_seq_len is not None, (
+                    "To decode a transformer with rotary embeddings, you must specify an `expected_seq_len`"
+                )
             elif expected_seq_len is None:
                 expected_seq_len = 0
             seq_len = x.shape[1]