diff --git a/TTS/encoder/configs/base_encoder_config.py b/TTS/encoder/configs/base_encoder_config.py
index 97cbf47893..d2d0ef580d 100644
--- a/TTS/encoder/configs/base_encoder_config.py
+++ b/TTS/encoder/configs/base_encoder_config.py
@@ -55,6 +55,6 @@ class BaseEncoderConfig(BaseTrainingConfig):
     def check_values(self):
         super().check_values()
         c = asdict(self)
-        assert (
-            c["model_params"]["input_dim"] == self.audio.num_mels
-        ), " [!] model input dimendion must be equal to melspectrogram dimension."
+        assert c["model_params"]["input_dim"] == self.audio.num_mels, (
+            " [!] model input dimendion must be equal to melspectrogram dimension."
+        )
diff --git a/TTS/encoder/utils/prepare_voxceleb.py b/TTS/encoder/utils/prepare_voxceleb.py
index fe57874a99..8d50ffd5f5 100644
--- a/TTS/encoder/utils/prepare_voxceleb.py
+++ b/TTS/encoder/utils/prepare_voxceleb.py
@@ -16,7 +16,7 @@
 # Only support eager mode and TF>=2.0.0
 # pylint: disable=no-member, invalid-name, relative-beyond-top-level
 # pylint: disable=too-many-locals, too-many-statements, too-many-arguments, too-many-instance-attributes
-""" voxceleb 1 & 2 """
+"""voxceleb 1 & 2"""
 
 import csv
 import hashlib
diff --git a/TTS/tts/configs/neuralhmm_tts_config.py b/TTS/tts/configs/neuralhmm_tts_config.py
index be7a81fa89..bd1736c880 100644
--- a/TTS/tts/configs/neuralhmm_tts_config.py
+++ b/TTS/tts/configs/neuralhmm_tts_config.py
@@ -161,9 +161,9 @@ def check_values(self):
             AssertionError: transition probability is not between 0 and 1
         """
         assert self.ar_order > 0, "AR order must be greater than 0 it is an autoregressive model."
-        assert (
-            len(self.outputnet_size) >= 1
-        ), f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}"
-        assert (
-            0 < self.flat_start_params["transition_p"] < 1
-        ), f"Transition probability must be between 0 and 1. Provided: {self.flat_start_params['transition_p']}"
+        assert len(self.outputnet_size) >= 1, (
+            f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}"
+        )
+        assert 0 < self.flat_start_params["transition_p"] < 1, (
+            f"Transition probability must be between 0 and 1. Provided: {self.flat_start_params['transition_p']}"
+        )
diff --git a/TTS/tts/configs/overflow_config.py b/TTS/tts/configs/overflow_config.py
index 8a113f1f33..93a6a9e377 100644
--- a/TTS/tts/configs/overflow_config.py
+++ b/TTS/tts/configs/overflow_config.py
@@ -192,9 +192,9 @@ def check_values(self):
             AssertionError: transition probability is not between 0 and 1
         """
         assert self.ar_order > 0, "AR order must be greater than 0 it is an autoregressive model."
-        assert (
-            len(self.outputnet_size) >= 1
-        ), f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}"
-        assert (
-            0 < self.flat_start_params["transition_p"] < 1
-        ), f"Transition probability must be between 0 and 1. Provided: {self.flat_start_params['transition_p']}"
+        assert len(self.outputnet_size) >= 1, (
+            f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}"
+        )
+        assert 0 < self.flat_start_params["transition_p"] < 1, (
+            f"Transition probability must be between 0 and 1. Provided: {self.flat_start_params['transition_p']}"
+        )
diff --git a/TTS/tts/configs/tacotron_config.py b/TTS/tts/configs/tacotron_config.py
index 7badbfac59..e4b419d1fa 100644
--- a/TTS/tts/configs/tacotron_config.py
+++ b/TTS/tts/configs/tacotron_config.py
@@ -223,12 +223,12 @@ class TacotronConfig(BaseTTSConfig):
 
     def check_values(self):
         if self.gradual_training:
-            assert (
-                self.gradual_training[0][1] == self.r
-            ), f"[!] the first scheduled gradual training `r` must be equal to the model's `r` value. {self.gradual_training[0][1]} vs {self.r}"
+            assert self.gradual_training[0][1] == self.r, (
+                f"[!] the first scheduled gradual training `r` must be equal to the model's `r` value. {self.gradual_training[0][1]} vs {self.r}"
+            )
         if self.model == "tacotron" and self.audio is not None:
-            assert self.out_channels == (
-                self.audio.fft_size // 2 + 1
-            ), f"{self.out_channels} vs {self.audio.fft_size // 2 + 1}"
+            assert self.out_channels == (self.audio.fft_size // 2 + 1), (
+                f"{self.out_channels} vs {self.audio.fft_size // 2 + 1}"
+            )
         if self.model == "tacotron2" and self.audio is not None:
             assert self.out_channels == self.audio.num_mels
diff --git a/TTS/tts/datasets/__init__.py b/TTS/tts/datasets/__init__.py
index 2f5357c642..d83abce00a 100644
--- a/TTS/tts/datasets/__init__.py
+++ b/TTS/tts/datasets/__init__.py
@@ -37,9 +37,9 @@ def split_dataset(items, eval_split_max_size=None, eval_split_size=0.01):
         else:
             eval_split_size = int(len(items) * eval_split_size)
 
-    assert (
-        eval_split_size > 0
-    ), f" [!] You do not have enough samples for the evaluation set. You can work around this setting the 'eval_split_size' parameter to a minimum of {1 / len(items)}"
+    assert eval_split_size > 0, (
+        f" [!] You do not have enough samples for the evaluation set. You can work around this setting the 'eval_split_size' parameter to a minimum of {1 / len(items)}"
+    )
     np.random.seed(0)
     np.random.shuffle(items)
     if is_multi_speaker:
diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py
index 6cf65c9b5e..3a4605275a 100644
--- a/TTS/tts/datasets/formatters.py
+++ b/TTS/tts/datasets/formatters.py
@@ -424,7 +424,7 @@ def vctk(root_path, meta_files=None, wavs_path="wav48_silence_trimmed", mic="mic
     """
     file_ext = "flac"
     items = []
-    meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
+    meta_files = glob(f"{os.path.join(root_path, 'txt')}/**/*.txt", recursive=True)
     for meta_file in meta_files:
         _, speaker_id, txt_file = os.path.relpath(meta_file, root_path).split(os.sep)
         file_id = txt_file.split(".")[0]
@@ -451,7 +451,7 @@ def vctk(root_path, meta_files=None, wavs_path="wav48_silence_trimmed", mic="mic
 def vctk_old(root_path, meta_files=None, wavs_path="wav48", ignored_speakers=None):
     """homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz"""
     items = []
-    meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
+    meta_files = glob(f"{os.path.join(root_path, 'txt')}/**/*.txt", recursive=True)
     for meta_file in meta_files:
         _, speaker_id, txt_file = os.path.relpath(meta_file, root_path).split(os.sep)
         file_id = txt_file.split(".")[0]
diff --git a/TTS/tts/layers/bark/hubert/kmeans_hubert.py b/TTS/tts/layers/bark/hubert/kmeans_hubert.py
index ade84794eb..87be97d5d1 100644
--- a/TTS/tts/layers/bark/hubert/kmeans_hubert.py
+++ b/TTS/tts/layers/bark/hubert/kmeans_hubert.py
@@ -7,7 +7,6 @@
 
 # Modified code from https://github.com/lucidrains/audiolm-pytorch/blob/main/audiolm_pytorch/hubert_kmeans.py
 
-
 import torch
 from einops import pack, unpack
 from torch import nn
diff --git a/TTS/tts/layers/bark/inference_funcs.py b/TTS/tts/layers/bark/inference_funcs.py
index 1d141dc537..457a20ea28 100644
--- a/TTS/tts/layers/bark/inference_funcs.py
+++ b/TTS/tts/layers/bark/inference_funcs.py
@@ -58,9 +58,7 @@ def load_npz(npz_file: str) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64
 
 def load_voice(
     model, voice: str, extra_voice_dirs: list[str] = []
-) -> tuple[
-    npt.NDArray[np.int64] | None, npt.NDArray[np.int64] | None, npt.NDArray[np.int64] | None
-]:  # pylint: disable=dangerous-default-value
+) -> tuple[npt.NDArray[np.int64] | None, npt.NDArray[np.int64] | None, npt.NDArray[np.int64] | None]:  # pylint: disable=dangerous-default-value
     if voice == "random":
         return None, None, None
 
diff --git a/TTS/tts/layers/bark/model.py b/TTS/tts/layers/bark/model.py
index 54a9cecec0..4850d0a88b 100644
--- a/TTS/tts/layers/bark/model.py
+++ b/TTS/tts/layers/bark/model.py
@@ -175,9 +175,9 @@ def forward(self, idx, merge_context=False, past_kv=None, position_ids=None, use
                 assert idx.shape[1] >= 256 + 256 + 1
                 t = idx.shape[1] - 256
             else:
-                assert (
-                    t <= self.config.block_size
-                ), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
+                assert t <= self.config.block_size, (
+                    f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
+                )
 
             # forward the GPT model itself
             if merge_context:
diff --git a/TTS/tts/layers/bark/model_fine.py b/TTS/tts/layers/bark/model_fine.py
index 29126b41ab..20f54d2152 100644
--- a/TTS/tts/layers/bark/model_fine.py
+++ b/TTS/tts/layers/bark/model_fine.py
@@ -101,9 +101,9 @@ def __init__(self, config):
     def forward(self, pred_idx, idx):
         device = idx.device
         b, t, codes = idx.size()
-        assert (
-            t <= self.config.block_size
-        ), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
+        assert t <= self.config.block_size, (
+            f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
+        )
         assert pred_idx > 0, "cannot predict 0th codebook"
         assert codes == self.n_codes_total, (b, t, codes)
         pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0)  # shape (1, t)
diff --git a/TTS/tts/layers/feed_forward/encoder.py b/TTS/tts/layers/feed_forward/encoder.py
index caf939ffc7..2d08f03c2d 100644
--- a/TTS/tts/layers/feed_forward/encoder.py
+++ b/TTS/tts/layers/feed_forward/encoder.py
@@ -143,9 +143,9 @@ def __init__(
         elif encoder_type.lower() == "residual_conv_bn":
             self.encoder = ResidualConv1dBNEncoder(in_hidden_channels, out_channels, in_hidden_channels, encoder_params)
         elif encoder_type.lower() == "fftransformer":
-            assert (
-                in_hidden_channels == out_channels
-            ), "[!] must be `in_channels` == `out_channels` when encoder type is 'fftransformer'"
+            assert in_hidden_channels == out_channels, (
+                "[!] must be `in_channels` == `out_channels` when encoder type is 'fftransformer'"
+            )
             # pylint: disable=unexpected-keyword-arg
             self.encoder = FFTransformerBlock(in_hidden_channels, **encoder_params)
         else:
diff --git a/TTS/tts/layers/generic/pos_encoding.py b/TTS/tts/layers/generic/pos_encoding.py
index 695e37a6e0..7765e224aa 100644
--- a/TTS/tts/layers/generic/pos_encoding.py
+++ b/TTS/tts/layers/generic/pos_encoding.py
@@ -18,7 +18,7 @@ class PositionalEncoding(nn.Module):
     def __init__(self, channels, dropout_p=0.0, max_len=5000, use_scale=False):
         super().__init__()
         if channels % 2 != 0:
-            raise ValueError("Cannot use sin/cos positional encoding with " f"odd channels (got channels={channels:d})")
+            raise ValueError(f"Cannot use sin/cos positional encoding with odd channels (got channels={channels:d})")
         self.use_scale = use_scale
         if use_scale:
             self.scale = torch.nn.Parameter(torch.ones(1))
diff --git a/TTS/tts/layers/generic/transformer.py b/TTS/tts/layers/generic/transformer.py
index 9b7ecee2ba..2fe9bcc408 100644
--- a/TTS/tts/layers/generic/transformer.py
+++ b/TTS/tts/layers/generic/transformer.py
@@ -70,9 +70,7 @@ def forward(self, x, mask=None, g=None):  # pylint: disable=unused-argument
 
 
 class FFTDurationPredictor:
-    def __init__(
-        self, in_channels, hidden_channels, num_heads, num_layers, dropout_p=0.1, cond_channels=None
-    ):  # pylint: disable=unused-argument
+    def __init__(self, in_channels, hidden_channels, num_heads, num_layers, dropout_p=0.1, cond_channels=None):  # pylint: disable=unused-argument
         self.fft = FFTransformerBlock(in_channels, num_heads, hidden_channels, num_layers, dropout_p)
         self.proj = nn.Linear(in_channels, 1)
 
diff --git a/TTS/tts/layers/tortoise/arch_utils.py b/TTS/tts/layers/tortoise/arch_utils.py
index 1bbf676393..00fa559c77 100644
--- a/TTS/tts/layers/tortoise/arch_utils.py
+++ b/TTS/tts/layers/tortoise/arch_utils.py
@@ -101,9 +101,9 @@ def __init__(
         if num_head_channels == -1:
             self.num_heads = num_heads
         else:
-            assert (
-                channels % num_head_channels == 0
-            ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}"
+            assert channels % num_head_channels == 0, (
+                f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}"
+            )
             self.num_heads = channels // num_head_channels
         self.norm = normalization(channels)
         self.qkv = nn.Conv1d(channels, channels * 3, 1)
diff --git a/TTS/tts/layers/tortoise/audio_utils.py b/TTS/tts/layers/tortoise/audio_utils.py
index 6d6bb8cdb7..6bbe6c389c 100644
--- a/TTS/tts/layers/tortoise/audio_utils.py
+++ b/TTS/tts/layers/tortoise/audio_utils.py
@@ -125,14 +125,14 @@ def load_voices(voices: list[str], extra_voice_dirs: list[str] = []):
             return None, None
         clip, latent = load_voice(voice, extra_voice_dirs)
         if latent is None:
-            assert (
-                len(latents) == 0
-            ), "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
+            assert len(latents) == 0, (
+                "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
+            )
             clips.extend(clip)
         elif clip is None:
-            assert (
-                len(clips) == 0
-            ), "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
+            assert len(clips) == 0, (
+                "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
+            )
             latents.append(latent)
     if len(latents) == 0:
         return clips, None
diff --git a/TTS/tts/layers/tortoise/autoregressive.py b/TTS/tts/layers/tortoise/autoregressive.py
index cbfe076825..eaeb2a03c1 100644
--- a/TTS/tts/layers/tortoise/autoregressive.py
+++ b/TTS/tts/layers/tortoise/autoregressive.py
@@ -608,9 +608,9 @@ def inference_speech(
         if input_tokens is None:
             inputs = fake_inputs
         else:
-            assert (
-                num_return_sequences % input_tokens.shape[0] == 0
-            ), "The number of return sequences must be divisible by the number of input sequences"
+            assert num_return_sequences % input_tokens.shape[0] == 0, (
+                "The number of return sequences must be divisible by the number of input sequences"
+            )
             fake_inputs = fake_inputs.repeat(num_return_sequences, 1)
             input_tokens = input_tokens.repeat(num_return_sequences // input_tokens.shape[0], 1)
             inputs = torch.cat([fake_inputs, input_tokens], dim=1)
diff --git a/TTS/tts/layers/tortoise/dpm_solver.py b/TTS/tts/layers/tortoise/dpm_solver.py
index d34b61f486..c8892d456a 100644
--- a/TTS/tts/layers/tortoise/dpm_solver.py
+++ b/TTS/tts/layers/tortoise/dpm_solver.py
@@ -563,41 +563,21 @@ def get_orders_and_timesteps_for_singlestep_solver(self, steps, order, skip_type
         if order == 3:
             K = steps // 3 + 1
             if steps % 3 == 0:
-                orders = [
-                    3,
-                ] * (
-                    K - 2
-                ) + [2, 1]
+                orders = [3] * (K - 2) + [2, 1]
             elif steps % 3 == 1:
-                orders = [
-                    3,
-                ] * (
-                    K - 1
-                ) + [1]
+                orders = [3] * (K - 1) + [1]
             else:
-                orders = [
-                    3,
-                ] * (
-                    K - 1
-                ) + [2]
+                orders = [3] * (K - 1) + [2]
         elif order == 2:
             if steps % 2 == 0:
                 K = steps // 2
-                orders = [
-                    2,
-                ] * K
+                orders = [2] * K
             else:
                 K = steps // 2 + 1
-                orders = [
-                    2,
-                ] * (
-                    K - 1
-                ) + [1]
+                orders = [2] * (K - 1) + [1]
         elif order == 1:
             K = 1
-            orders = [
-                1,
-            ] * steps
+            orders = [1] * steps
         else:
             raise ValueError("'order' must be '1' or '2' or '3'.")
         if skip_type == "logSNR":
@@ -605,15 +585,7 @@ def get_orders_and_timesteps_for_singlestep_solver(self, steps, order, skip_type
             timesteps_outer = self.get_time_steps(skip_type, t_T, t_0, K, device)
         else:
             timesteps_outer = self.get_time_steps(skip_type, t_T, t_0, steps, device)[
-                torch.cumsum(
-                    torch.tensor(
-                        [
-                            0,
-                        ]
-                        + orders
-                    ),
-                    0,
-                ).to(device)
+                torch.cumsum(torch.tensor([0] + orders), 0).to(device)
             ]
         return timesteps_outer, orders
 
@@ -1217,9 +1189,9 @@ def inverse(
         """
         t_0 = 1.0 / self.noise_schedule.total_N if t_start is None else t_start
         t_T = self.noise_schedule.T if t_end is None else t_end
-        assert (
-            t_0 > 0 and t_T > 0
-        ), "Time range needs to be greater than 0. For discrete-time DPMs, it needs to be in [1 / N, 1], where N is the length of betas array"
+        assert t_0 > 0 and t_T > 0, (
+            "Time range needs to be greater than 0. For discrete-time DPMs, it needs to be in [1 / N, 1], where N is the length of betas array"
+        )
         return self.sample(
             x,
             steps=steps,
@@ -1362,9 +1334,9 @@ def sample(
         """
         t_0 = 1.0 / self.noise_schedule.total_N if t_end is None else t_end
         t_T = self.noise_schedule.T if t_start is None else t_start
-        assert (
-            t_0 > 0 and t_T > 0
-        ), "Time range needs to be greater than 0. For discrete-time DPMs, it needs to be in [1 / N, 1], where N is the length of betas array"
+        assert t_0 > 0 and t_T > 0, (
+            "Time range needs to be greater than 0. For discrete-time DPMs, it needs to be in [1 / N, 1], where N is the length of betas array"
+        )
         if return_intermediate:
             assert method in [
                 "multistep",
diff --git a/TTS/tts/layers/tortoise/transformer.py b/TTS/tts/layers/tortoise/transformer.py
index c1854bd196..531f294220 100644
--- a/TTS/tts/layers/tortoise/transformer.py
+++ b/TTS/tts/layers/tortoise/transformer.py
@@ -43,9 +43,9 @@ def route_args(router, args, depth):
 class SequentialSequence(nn.Module):
     def __init__(self, layers, args_route={}, layer_dropout=0.0):
         super().__init__()
-        assert all(
-            len(route) == len(layers) for route in args_route.values()
-        ), "each argument route map must have the same depth as the number of sequential layers"
+        assert all(len(route) == len(layers) for route in args_route.values()), (
+            "each argument route map must have the same depth as the number of sequential layers"
+        )
         self.layers = layers
         self.args_route = args_route
         self.layer_dropout = layer_dropout
diff --git a/TTS/tts/layers/tortoise/xtransformers.py b/TTS/tts/layers/tortoise/xtransformers.py
index 0892fee19d..b2e74cf118 100644
--- a/TTS/tts/layers/tortoise/xtransformers.py
+++ b/TTS/tts/layers/tortoise/xtransformers.py
@@ -560,9 +560,9 @@ def __init__(
 
         self.rel_pos_bias = rel_pos_bias
         if rel_pos_bias:
-            assert (
-                rel_pos_num_buckets <= rel_pos_max_distance
-            ), "number of relative position buckets must be less than the relative position max distance"
+            assert rel_pos_num_buckets <= rel_pos_max_distance, (
+                "number of relative position buckets must be less than the relative position max distance"
+            )
             self.rel_pos = RelativePositionBias(
                 scale=dim_head**0.5,
                 causal=causal,
@@ -680,9 +680,9 @@ def forward(
             del input_mask
 
         if exists(attn_mask):
-            assert (
-                2 <= attn_mask.ndim <= 4
-            ), "attention mask must have greater than 2 dimensions but less than or equal to 4"
+            assert 2 <= attn_mask.ndim <= 4, (
+                "attention mask must have greater than 2 dimensions but less than or equal to 4"
+            )
             if attn_mask.ndim == 2:
                 attn_mask = rearrange(attn_mask, "i j -> () () i j")
             elif attn_mask.ndim == 3:
@@ -790,9 +790,9 @@ def __init__(
         rotary_emb_dim = max(default(rotary_emb_dim, dim_head // 2), 32)
         self.rotary_pos_emb = RotaryEmbedding(rotary_emb_dim) if rotary_pos_emb else None
 
-        assert not (
-            alibi_pos_bias and rel_pos_bias
-        ), "you can only choose Alibi positional bias or T5 relative positional bias, not both"
+        assert not (alibi_pos_bias and rel_pos_bias), (
+            "you can only choose Alibi positional bias or T5 relative positional bias, not both"
+        )
 
         if alibi_pos_bias:
             alibi_num_heads = default(alibi_num_heads, heads)
@@ -922,9 +922,9 @@ def forward(
         past_key_values=None,
         expected_seq_len=None,
     ):
-        assert not (
-            self.cross_attend ^ (exists(context) or exists(full_context))
-        ), "context must be passed in if cross_attend is set to True"
+        assert not (self.cross_attend ^ (exists(context) or exists(full_context))), (
+            "context must be passed in if cross_attend is set to True"
+        )
         assert context is None or full_context is None, "only one of full_context or context can be provided"
 
         hiddens = []
@@ -940,9 +940,9 @@ def forward(
         rotary_pos_emb = None
         if exists(self.rotary_pos_emb):
             if not self.training and self.causal:
-                assert (
-                    expected_seq_len is not None
-                ), "To decode a transformer with rotary embeddings, you must specify an `expected_seq_len`"
+                assert expected_seq_len is not None, (
+                    "To decode a transformer with rotary embeddings, you must specify an `expected_seq_len`"
+                )
             elif expected_seq_len is None:
                 expected_seq_len = 0
             seq_len = x.shape[1]
diff --git a/TTS/tts/layers/xtts/gpt.py b/TTS/tts/layers/xtts/gpt.py
index 20eff26ecc..4e0f53616d 100644
--- a/TTS/tts/layers/xtts/gpt.py
+++ b/TTS/tts/layers/xtts/gpt.py
@@ -347,12 +347,12 @@ def forward(
             audio_codes = F.pad(audio_codes, (0, max_mel_len - audio_codes.shape[-1]))
 
         # 💖 Lovely assertions
-        assert (
-            max_mel_len <= audio_codes.shape[-1]
-        ), f" ❗ max_mel_len ({max_mel_len}) > audio_codes.shape[-1] ({audio_codes.shape[-1]})"
-        assert (
-            max_text_len <= text_inputs.shape[-1]
-        ), f" ❗ max_text_len ({max_text_len}) > text_inputs.shape[-1] ({text_inputs.shape[-1]})"
+        assert max_mel_len <= audio_codes.shape[-1], (
+            f" ❗ max_mel_len ({max_mel_len}) > audio_codes.shape[-1] ({audio_codes.shape[-1]})"
+        )
+        assert max_text_len <= text_inputs.shape[-1], (
+            f" ❗ max_text_len ({max_text_len}) > text_inputs.shape[-1] ({text_inputs.shape[-1]})"
+        )
 
         # Append stop token to text inputs
         text_inputs = F.pad(text_inputs[:, :max_text_len], (0, 1), value=self.stop_text_token)
@@ -454,9 +454,9 @@ def forward(
             mel_targets[idx, l + 1 :] = -1
 
         # check if stoptoken is in every row of mel_targets
-        assert (mel_targets == self.stop_audio_token).sum() >= mel_targets.shape[
-            0
-        ], f" ❗ mel_targets does not contain stop token ({self.stop_audio_token}) in every row."
+        assert (mel_targets == self.stop_audio_token).sum() >= mel_targets.shape[0], (
+            f" ❗ mel_targets does not contain stop token ({self.stop_audio_token}) in every row."
+        )
 
         # ignore the loss for the segment used for conditioning
         # coin flip for the segment to be ignored
diff --git a/TTS/tts/layers/xtts/stream_generator.py b/TTS/tts/layers/xtts/stream_generator.py
index 303a990c27..e09a5233ac 100644
--- a/TTS/tts/layers/xtts/stream_generator.py
+++ b/TTS/tts/layers/xtts/stream_generator.py
@@ -953,7 +953,6 @@ def init_stream_support():
 
 
 def _get_logits_warper(generation_config: GenerationConfig) -> LogitsProcessorList:
-
     warpers = LogitsProcessorList()
 
     if generation_config.temperature is not None and generation_config.temperature != 1.0:
diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py
index e00ce2b4de..6e99d41eb9 100644
--- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py
+++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py
@@ -248,7 +248,11 @@ def test_run(self, assets) -> tuple[dict, dict]:  # pylint: disable=W0613
         return {"audios": test_audios}
 
     def test_log(
-        self, outputs: dict, logger: "Logger", assets: dict, steps: int  # pylint: disable=unused-argument
+        self,
+        outputs: dict,
+        logger: "Logger",
+        assets: dict,
+        steps: int,  # pylint: disable=unused-argument
     ) -> None:
         logger.test_audios(steps, outputs["audios"], self.args.output_sample_rate)
 
diff --git a/TTS/tts/models/align_tts.py b/TTS/tts/models/align_tts.py
index 12c3d18252..c2e29c7100 100644
--- a/TTS/tts/models/align_tts.py
+++ b/TTS/tts/models/align_tts.py
@@ -232,9 +232,7 @@ def _forward_mdn(self, o_en, y, y_lengths, x_mask):
         dr_mas, logp = self.compute_align_path(mu, log_sigma, y, x_mask, y_mask)
         return dr_mas, mu, log_sigma, logp
 
-    def forward(
-        self, x, x_lengths, y, y_lengths, aux_input={"d_vectors": None}, phase=None
-    ):  # pylint: disable=unused-argument
+    def forward(self, x, x_lengths, y, y_lengths, aux_input={"d_vectors": None}, phase=None):  # pylint: disable=unused-argument
         """
         Shapes:
             - x: :math:`[B, T_max]`
@@ -351,9 +349,7 @@ def _create_logs(self, batch, outputs, ap):  # pylint: disable=no-self-use
         train_audio = ap.inv_melspectrogram(pred_spec.T)
         return figures, {"audio": train_audio}
 
-    def train_log(
-        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
-    ) -> None:  # pylint: disable=no-self-use
+    def train_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:  # pylint: disable=no-self-use
         figures, audios = self._create_logs(batch, outputs, self.ap)
         logger.train_figures(steps, figures)
         logger.train_audios(steps, audios, self.ap.sample_rate)
@@ -366,9 +362,7 @@ def eval_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, s
         logger.eval_figures(steps, figures)
         logger.eval_audios(steps, audios, self.ap.sample_rate)
 
-    def load_checkpoint(
-        self, config, checkpoint_path, eval=False, cache=False
-    ):  # pylint: disable=unused-argument, redefined-builtin
+    def load_checkpoint(self, config, checkpoint_path, eval=False, cache=False):  # pylint: disable=unused-argument, redefined-builtin
         state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
         self.load_state_dict(state["model"])
         if eval:
diff --git a/TTS/tts/models/bark.py b/TTS/tts/models/bark.py
index 83478926a6..df0b73e3b4 100644
--- a/TTS/tts/models/bark.py
+++ b/TTS/tts/models/bark.py
@@ -194,9 +194,7 @@ def _set_voice_dirs(self, voice_dirs):
         return _voice_dirs
 
     # TODO: remove config from synthesize
-    def synthesize(
-        self, text, config, speaker_id="random", voice_dirs=None, **kwargs
-    ):  # pylint: disable=unused-argument
+    def synthesize(self, text, config, speaker_id="random", voice_dirs=None, **kwargs):  # pylint: disable=unused-argument
         """Synthesize speech with the given input text.
 
         Args:
diff --git a/TTS/tts/models/base_tacotron.py b/TTS/tts/models/base_tacotron.py
index 8821036b5f..05f4ae168d 100644
--- a/TTS/tts/models/base_tacotron.py
+++ b/TTS/tts/models/base_tacotron.py
@@ -93,9 +93,7 @@ def forward(self):
     def inference(self):
         pass
 
-    def load_checkpoint(
-        self, config, checkpoint_path, eval=False, cache=False
-    ):  # pylint: disable=unused-argument, redefined-builtin
+    def load_checkpoint(self, config, checkpoint_path, eval=False, cache=False):  # pylint: disable=unused-argument, redefined-builtin
         """Load model checkpoint and set up internals.
 
         Args:
@@ -176,7 +174,11 @@ def test_run(self, assets: dict) -> tuple[dict, dict]:
         return {"figures": test_figures, "audios": test_audios}
 
     def test_log(
-        self, outputs: dict, logger: "Logger", assets: dict, steps: int  # pylint: disable=unused-argument
+        self,
+        outputs: dict,
+        logger: "Logger",
+        assets: dict,
+        steps: int,  # pylint: disable=unused-argument
     ) -> None:
         logger.test_audios(steps, outputs["audios"], self.ap.sample_rate)
         logger.test_figures(steps, outputs["figures"])
diff --git a/TTS/tts/models/base_tts.py b/TTS/tts/models/base_tts.py
index 0976e4cdab..f5bc49e147 100644
--- a/TTS/tts/models/base_tts.py
+++ b/TTS/tts/models/base_tts.py
@@ -210,9 +210,9 @@ def format_batch(self, batch: dict) -> dict:
                 extra_frames = dur.sum() - mel_lengths[idx]
                 largest_idxs = torch.argsort(-dur)[:extra_frames]
                 dur[largest_idxs] -= 1
-                assert (
-                    dur.sum() == mel_lengths[idx]
-                ), f" [!] total duration {dur.sum()} vs spectrogram length {mel_lengths[idx]}"
+                assert dur.sum() == mel_lengths[idx], (
+                    f" [!] total duration {dur.sum()} vs spectrogram length {mel_lengths[idx]}"
+                )
                 durations[idx, : text_lengths[idx]] = dur
 
         # set stop targets wrt reduction factor
diff --git a/TTS/tts/models/delightful_tts.py b/TTS/tts/models/delightful_tts.py
index 4a3defe665..eeb921503d 100644
--- a/TTS/tts/models/delightful_tts.py
+++ b/TTS/tts/models/delightful_tts.py
@@ -835,9 +835,7 @@ def _log(self, batch, outputs, name_prefix="train"):
         audios[f"{name_prefix}/vocoder_audio"] = sample_voice
         return figures, audios
 
-    def train_log(
-        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
-    ):  # pylint: disable=no-self-use, unused-argument
+    def train_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int):  # pylint: disable=no-self-use, unused-argument
         """Create visualizations and waveform examples.
 
         For example, here you can plot spectrograms and generate sample sample waveforms from these spectrograms to
@@ -1050,7 +1048,11 @@ def test_run(self, assets) -> tuple[dict, dict]:
         return {"figures": test_figures, "audios": test_audios}
 
     def test_log(
-        self, outputs: dict, logger: "Logger", assets: dict, steps: int  # pylint: disable=unused-argument
+        self,
+        outputs: dict,
+        logger: "Logger",
+        assets: dict,
+        steps: int,  # pylint: disable=unused-argument
     ) -> None:
         logger.test_audios(steps, outputs["audios"], self.config.audio.sample_rate)
         logger.test_figures(steps, outputs["figures"])
@@ -1262,9 +1264,7 @@ def on_epoch_end(self, trainer):  # pylint: disable=unused-argument
         self.energy_scaler.eval()
 
     @staticmethod
-    def init_from_config(
-        config: "DelightfulTTSConfig", samples: list[list] | list[dict] = None
-    ):  # pylint: disable=unused-argument
+    def init_from_config(config: "DelightfulTTSConfig", samples: list[list] | list[dict] = None):  # pylint: disable=unused-argument
         """Initiate model from config
 
         Args:
diff --git a/TTS/tts/models/forward_tts.py b/TTS/tts/models/forward_tts.py
index 5b68475406..497ac3f63a 100644
--- a/TTS/tts/models/forward_tts.py
+++ b/TTS/tts/models/forward_tts.py
@@ -770,9 +770,7 @@ def _create_logs(self, batch, outputs, ap):
         train_audio = ap.inv_melspectrogram(pred_spec.T)
         return figures, {"audio": train_audio}
 
-    def train_log(
-        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
-    ) -> None:  # pylint: disable=no-self-use
+    def train_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:  # pylint: disable=no-self-use
         figures, audios = self._create_logs(batch, outputs, self.ap)
         logger.train_figures(steps, figures)
         logger.train_audios(steps, audios, self.ap.sample_rate)
@@ -785,9 +783,7 @@ def eval_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, s
         logger.eval_figures(steps, figures)
         logger.eval_audios(steps, audios, self.ap.sample_rate)
 
-    def load_checkpoint(
-        self, config, checkpoint_path, eval=False, cache=False
-    ):  # pylint: disable=unused-argument, redefined-builtin
+    def load_checkpoint(self, config, checkpoint_path, eval=False, cache=False):  # pylint: disable=unused-argument, redefined-builtin
         state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
         self.load_state_dict(state["model"])
         if eval:
diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py
index 3289dcdd04..6310751b26 100644
--- a/TTS/tts/models/glow_tts.py
+++ b/TTS/tts/models/glow_tts.py
@@ -124,9 +124,9 @@ def init_multispeaker(self, config: Coqpit):
                 config.d_vector_dim if "d_vector_dim" in config and config.d_vector_dim is not None else 512
             )
             if self.speaker_manager is not None:
-                assert (
-                    config.d_vector_dim == self.speaker_manager.embedding_dim
-                ), " [!] d-vector dimension mismatch b/w config and speaker manager."
+                assert config.d_vector_dim == self.speaker_manager.embedding_dim, (
+                    " [!] d-vector dimension mismatch b/w config and speaker manager."
+                )
         # init speaker embedding layer
         if config.use_speaker_embedding and not config.use_d_vector_file:
             logger.info("Init speaker_embedding layer.")
@@ -192,9 +192,7 @@ def _speaker_embedding(self, aux_input: dict) -> torch.tensor | None:
                 g = F.normalize(g).unsqueeze(-1)  # [b, h, 1]
         return g
 
-    def forward(
-        self, x, x_lengths, y, y_lengths=None, aux_input={"d_vectors": None, "speaker_ids": None}
-    ):  # pylint: disable=dangerous-default-value
+    def forward(self, x, x_lengths, y, y_lengths=None, aux_input={"d_vectors": None, "speaker_ids": None}):  # pylint: disable=dangerous-default-value
         """
         Args:
             x (torch.Tensor):
@@ -318,9 +316,7 @@ def inference_with_MAS(
         return outputs
 
     @torch.inference_mode()
-    def decoder_inference(
-        self, y, y_lengths=None, aux_input={"d_vectors": None, "speaker_ids": None}
-    ):  # pylint: disable=dangerous-default-value
+    def decoder_inference(self, y, y_lengths=None, aux_input={"d_vectors": None, "speaker_ids": None}):  # pylint: disable=dangerous-default-value
         """
         Shapes:
             - y: :math:`[B, T, C]`
@@ -341,9 +337,7 @@ def decoder_inference(
         return outputs
 
     @torch.inference_mode()
-    def inference(
-        self, x, aux_input={"x_lengths": None, "d_vectors": None, "speaker_ids": None}
-    ):  # pylint: disable=dangerous-default-value
+    def inference(self, x, aux_input={"x_lengths": None, "d_vectors": None, "speaker_ids": None}):  # pylint: disable=dangerous-default-value
         x_lengths = aux_input["x_lengths"]
         g = self._speaker_embedding(aux_input)
         # embedding pass
@@ -456,9 +450,7 @@ def _create_logs(self, batch, outputs, ap):
         train_audio = ap.inv_melspectrogram(pred_spec.T)
         return figures, {"audio": train_audio}
 
-    def train_log(
-        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
-    ) -> None:  # pylint: disable=no-self-use
+    def train_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:  # pylint: disable=no-self-use
         figures, audios = self._create_logs(batch, outputs, self.ap)
         logger.train_figures(steps, figures)
         logger.train_audios(steps, audios, self.ap.sample_rate)
@@ -521,9 +513,7 @@ def preprocess(self, y, y_lengths, y_max_length, attn=None):
     def store_inverse(self):
         self.decoder.store_inverse()
 
-    def load_checkpoint(
-        self, config, checkpoint_path, eval=False
-    ):  # pylint: disable=unused-argument, redefined-builtin
+    def load_checkpoint(self, config, checkpoint_path, eval=False):  # pylint: disable=unused-argument, redefined-builtin
         state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"))
         self.load_state_dict(state["model"])
         if eval:
diff --git a/TTS/tts/models/neuralhmm_tts.py b/TTS/tts/models/neuralhmm_tts.py
index a7c0ea7f14..2cbf425884 100644
--- a/TTS/tts/models/neuralhmm_tts.py
+++ b/TTS/tts/models/neuralhmm_tts.py
@@ -345,17 +345,13 @@ def _create_logs(self, batch, outputs, ap):  # pylint: disable=no-self-use, unus
         audio = ap.inv_melspectrogram(inference_output["model_outputs"][0].T.cpu().numpy())
         return figures, {"audios": audio}
 
-    def train_log(
-        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
-    ):  # pylint: disable=unused-argument
+    def train_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int):  # pylint: disable=unused-argument
         """Log training progress."""
         figures, audios = self._create_logs(batch, outputs, self.ap)
         logger.train_figures(steps, figures)
         logger.train_audios(steps, audios, self.ap.sample_rate)
 
-    def eval_log(
-        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
-    ):  # pylint: disable=unused-argument
+    def eval_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int):  # pylint: disable=unused-argument
         """Compute and log evaluation metrics."""
         # Plot model parameters histograms
         if isinstance(logger, TensorboardLogger):
@@ -369,7 +365,11 @@ def eval_log(
         logger.eval_audios(steps, audios, self.ap.sample_rate)
 
     def test_log(
-        self, outputs: dict, logger: "Logger", assets: dict, steps: int  # pylint: disable=unused-argument
+        self,
+        outputs: dict,
+        logger: "Logger",
+        assets: dict,
+        steps: int,  # pylint: disable=unused-argument
     ) -> None:
         logger.test_audios(steps, outputs[1], self.ap.sample_rate)
         logger.test_figures(steps, outputs[0])
diff --git a/TTS/tts/models/overflow.py b/TTS/tts/models/overflow.py
index 85e1523307..aad2e1f553 100644
--- a/TTS/tts/models/overflow.py
+++ b/TTS/tts/models/overflow.py
@@ -362,17 +362,13 @@ def _create_logs(self, batch, outputs, ap):  # pylint: disable=no-self-use, unus
         audio = ap.inv_melspectrogram(inference_output["model_outputs"][0].T.cpu().numpy())
         return figures, {"audios": audio}
 
-    def train_log(
-        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
-    ):  # pylint: disable=unused-argument
+    def train_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int):  # pylint: disable=unused-argument
         """Log training progress."""
         figures, audios = self._create_logs(batch, outputs, self.ap)
         logger.train_figures(steps, figures)
         logger.train_audios(steps, audios, self.ap.sample_rate)
 
-    def eval_log(
-        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
-    ):  # pylint: disable=unused-argument
+    def eval_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int):  # pylint: disable=unused-argument
         """Compute and log evaluation metrics."""
         # Plot model parameters histograms
         if isinstance(logger, TensorboardLogger):
@@ -386,7 +382,11 @@ def eval_log(
         logger.eval_audios(steps, audios, self.ap.sample_rate)
 
     def test_log(
-        self, outputs: dict, logger: "Logger", assets: dict, steps: int  # pylint: disable=unused-argument
+        self,
+        outputs: dict,
+        logger: "Logger",
+        assets: dict,
+        steps: int,  # pylint: disable=unused-argument
     ) -> None:
         logger.test_audios(steps, outputs[1], self.ap.sample_rate)
         logger.test_figures(steps, outputs[0])
diff --git a/TTS/tts/models/tacotron.py b/TTS/tts/models/tacotron.py
index 879a2b94b5..59173691f7 100644
--- a/TTS/tts/models/tacotron.py
+++ b/TTS/tts/models/tacotron.py
@@ -376,9 +376,7 @@ def _create_logs(self, batch, outputs, ap):
         audio = ap.inv_spectrogram(pred_linear_spec.T)
         return figures, {"audio": audio}
 
-    def train_log(
-        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
-    ) -> None:  # pylint: disable=no-self-use
+    def train_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:  # pylint: disable=no-self-use
         figures, audios = self._create_logs(batch, outputs, self.ap)
         logger.train_figures(steps, figures)
         logger.train_audios(steps, audios, self.ap.sample_rate)
diff --git a/TTS/tts/models/tacotron2.py b/TTS/tts/models/tacotron2.py
index c8c0c875ad..e924d82d42 100644
--- a/TTS/tts/models/tacotron2.py
+++ b/TTS/tts/models/tacotron2.py
@@ -399,9 +399,7 @@ def _create_logs(self, batch, outputs, ap):
         audio = ap.inv_melspectrogram(pred_spec.T)
         return figures, {"audio": audio}
 
-    def train_log(
-        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
-    ) -> None:  # pylint: disable=no-self-use
+    def train_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:  # pylint: disable=no-self-use
         """Log training progress."""
         figures, audios = self._create_logs(batch, outputs, self.ap)
         logger.train_figures(steps, figures)
diff --git a/TTS/tts/models/tortoise.py b/TTS/tts/models/tortoise.py
index 738e9dd9b3..b44a5fbfc6 100644
--- a/TTS/tts/models/tortoise.py
+++ b/TTS/tts/models/tortoise.py
@@ -685,9 +685,9 @@ def inference(
 
         text_tokens = torch.IntTensor(self.tokenizer.encode(text)).unsqueeze(0).to(self.device)
         text_tokens = F.pad(text_tokens, (0, 1))  # This may not be necessary.
-        assert (
-            text_tokens.shape[-1] < 400
-        ), "Too much text provided. Break the text up into separate segments and re-try inference."
+        assert text_tokens.shape[-1] < 400, (
+            "Too much text provided. Break the text up into separate segments and re-try inference."
+        )
 
         if voice_samples is not None:
             (
diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py
index 819ac7aea0..c92d6f46f7 100644
--- a/TTS/tts/models/vits.py
+++ b/TTS/tts/models/vits.py
@@ -1188,9 +1188,7 @@ def _log(self, ap, batch, outputs, name_prefix="train"):  # pylint: disable=unus
         )
         return figures, audios
 
-    def train_log(
-        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
-    ):  # pylint: disable=no-self-use
+    def train_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int):  # pylint: disable=no-self-use
         """Create visualizations and waveform examples.
 
         For example, here you can plot spectrograms and generate sample sample waveforms from these spectrograms to
@@ -1297,7 +1295,11 @@ def test_run(self, assets) -> tuple[dict, dict]:
         return {"figures": test_figures, "audios": test_audios}
 
     def test_log(
-        self, outputs: dict, logger: "Logger", assets: dict, steps: int  # pylint: disable=unused-argument
+        self,
+        outputs: dict,
+        logger: "Logger",
+        assets: dict,
+        steps: int,  # pylint: disable=unused-argument
     ) -> None:
         logger.test_audios(steps, outputs["audios"], self.ap.sample_rate)
         logger.test_figures(steps, outputs["figures"])
@@ -1366,9 +1368,9 @@ def format_batch_on_device(self, batch):
         )
 
         if self.args.encoder_sample_rate:
-            assert batch["spec"].shape[2] == int(
-                batch["mel"].shape[2] / self.interpolate_factor
-            ), f"{batch['spec'].shape[2]}, {batch['mel'].shape[2]}"
+            assert batch["spec"].shape[2] == int(batch["mel"].shape[2] / self.interpolate_factor), (
+                f"{batch['spec'].shape[2]}, {batch['mel'].shape[2]}"
+            )
         else:
             assert batch["spec"].shape[2] == batch["mel"].shape[2], f"{batch['spec'].shape[2]}, {batch['mel'].shape[2]}"
 
@@ -1538,9 +1540,7 @@ def get_criterion(self):
 
         return [VitsDiscriminatorLoss(self.config), VitsGeneratorLoss(self.config)]
 
-    def load_checkpoint(
-        self, config, checkpoint_path, eval=False, strict=True, cache=False
-    ):  # pylint: disable=unused-argument, redefined-builtin
+    def load_checkpoint(self, config, checkpoint_path, eval=False, strict=True, cache=False):  # pylint: disable=unused-argument, redefined-builtin
         """Load the model checkpoint and setup for training or inference"""
         state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
         # compat band-aid for the pre-trained models to not use the encoder baked into the model
@@ -1567,9 +1567,7 @@ def load_checkpoint(
             self.eval()
             assert not self.training
 
-    def load_fairseq_checkpoint(
-        self, config, checkpoint_dir, eval=False, strict=True
-    ):  # pylint: disable=unused-argument, redefined-builtin
+    def load_fairseq_checkpoint(self, config, checkpoint_dir, eval=False, strict=True):  # pylint: disable=unused-argument, redefined-builtin
         """Load VITS checkpoints released by fairseq here: https://github.com/facebookresearch/fairseq/tree/main/examples/mms
         Performs some changes for compatibility.
 
@@ -1625,15 +1623,15 @@ def init_from_config(config: "VitsConfig", samples: list[list] | list[dict] = No
         upsample_rate = torch.prod(torch.as_tensor(config.model_args.upsample_rates_decoder)).item()
 
         if not config.model_args.encoder_sample_rate:
-            assert (
-                upsample_rate == config.audio.hop_length
-            ), f" [!] Product of upsample rates must be equal to the hop length - {upsample_rate} vs {config.audio.hop_length}"
+            assert upsample_rate == config.audio.hop_length, (
+                f" [!] Product of upsample rates must be equal to the hop length - {upsample_rate} vs {config.audio.hop_length}"
+            )
         else:
             encoder_to_vocoder_upsampling_factor = config.audio.sample_rate / config.model_args.encoder_sample_rate
             effective_hop_length = config.audio.hop_length * encoder_to_vocoder_upsampling_factor
-            assert (
-                upsample_rate == effective_hop_length
-            ), f" [!] Product of upsample rates must be equal to the hop length - {upsample_rate} vs {effective_hop_length}"
+            assert upsample_rate == effective_hop_length, (
+                f" [!] Product of upsample rates must be equal to the hop length - {upsample_rate} vs {effective_hop_length}"
+            )
 
         ap = AudioProcessor.init_from_config(config)
         tokenizer, new_config = TTSTokenizer.init_from_config(config)
diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py
index c1ed3a305b..28eb17d648 100644
--- a/TTS/tts/models/xtts.py
+++ b/TTS/tts/models/xtts.py
@@ -383,9 +383,9 @@ def synthesize(self, text, config, speaker_wav, language, speaker_id=None, **kwa
             as latents used at inference.
 
         """
-        assert (
-            "zh-cn" if language == "zh" else language in self.config.languages
-        ), f" ❗ Language {language} is not supported. Supported languages are {self.config.languages}"
+        assert "zh-cn" if language == "zh" else language in self.config.languages, (
+            f" ❗ Language {language} is not supported. Supported languages are {self.config.languages}"
+        )
         # Use generally found best tuning knobs for generation.
         settings = {
             "temperature": config.temperature,
@@ -523,9 +523,9 @@ def inference(
             sent = sent.strip().lower()
             text_tokens = torch.IntTensor(self.tokenizer.encode(sent, lang=language)).unsqueeze(0).to(self.device)
 
-            assert (
-                text_tokens.shape[-1] < self.args.gpt_max_text_tokens
-            ), " ❗ XTTS can only generate text with a maximum of 400 tokens."
+            assert text_tokens.shape[-1] < self.args.gpt_max_text_tokens, (
+                " ❗ XTTS can only generate text with a maximum of 400 tokens."
+            )
 
             with torch.no_grad():
                 gpt_codes = self.gpt.generate(
@@ -631,9 +631,9 @@ def inference_stream(
             sent = sent.strip().lower()
             text_tokens = torch.IntTensor(self.tokenizer.encode(sent, lang=language)).unsqueeze(0).to(self.device)
 
-            assert (
-                text_tokens.shape[-1] < self.args.gpt_max_text_tokens
-            ), " ❗ XTTS can only generate text with a maximum of 400 tokens."
+            assert text_tokens.shape[-1] < self.args.gpt_max_text_tokens, (
+                " ❗ XTTS can only generate text with a maximum of 400 tokens."
+            )
 
             fake_inputs = self.gpt.compute_embeddings(
                 gpt_cond_latent.to(self.device),
diff --git a/TTS/tts/utils/helpers.py b/TTS/tts/utils/helpers.py
index cf02e5282b..a3648eff4b 100644
--- a/TTS/tts/utils/helpers.py
+++ b/TTS/tts/utils/helpers.py
@@ -105,9 +105,9 @@ def rand_segments(
         _x_lenghts[len_diff < 0] = segment_size
         len_diff = _x_lenghts - segment_size
     else:
-        assert all(
-            len_diff > 0
-        ), f" [!] At least one sample is shorter than the segment size ({segment_size}). \n {_x_lenghts}"
+        assert all(len_diff > 0), (
+            f" [!] At least one sample is shorter than the segment size ({segment_size}). \n {_x_lenghts}"
+        )
     segment_indices = (torch.rand([B]).type_as(x) * (len_diff + 1)).long()
     ret = segment(x, segment_indices, segment_size, pad_short=pad_short)
     return ret, segment_indices
diff --git a/TTS/tts/utils/speakers.py b/TTS/tts/utils/speakers.py
index 026039ab29..6fab27de5a 100644
--- a/TTS/tts/utils/speakers.py
+++ b/TTS/tts/utils/speakers.py
@@ -185,9 +185,9 @@ def get_speaker_manager(c: Coqpit, data: list = None, restore_path: str = None,
             elif not c.use_d_vector_file:  # restor speaker manager with speaker ID file.
                 speaker_ids_from_data = speaker_manager.name_to_id
                 speaker_manager.load_ids_from_file(speakers_file)
-                assert all(
-                    speaker in speaker_manager.name_to_id for speaker in speaker_ids_from_data
-                ), " [!] You cannot introduce new speakers to a pre-trained model."
+                assert all(speaker in speaker_manager.name_to_id for speaker in speaker_ids_from_data), (
+                    " [!] You cannot introduce new speakers to a pre-trained model."
+                )
         elif c.use_d_vector_file and c.d_vector_file:
             # new speaker manager with external speaker embeddings.
             speaker_manager.load_embeddings_from_file(c.d_vector_file)
diff --git a/TTS/tts/utils/ssim.py b/TTS/tts/utils/ssim.py
index 24bab63ca1..660370a832 100644
--- a/TTS/tts/utils/ssim.py
+++ b/TTS/tts/utils/ssim.py
@@ -49,16 +49,16 @@ def _validate_input(
         if size_range is None:
             assert t.size() == x.size(), f"Expected tensors with same size, got {t.size()} and {x.size()}"
         else:
-            assert (
-                t.size()[size_range[0] : size_range[1]] == x.size()[size_range[0] : size_range[1]]
-            ), f"Expected tensors with same size at given dimensions, got {t.size()} and {x.size()}"
+            assert t.size()[size_range[0] : size_range[1]] == x.size()[size_range[0] : size_range[1]], (
+                f"Expected tensors with same size at given dimensions, got {t.size()} and {x.size()}"
+            )
 
         if dim_range[0] == dim_range[1]:
             assert t.dim() == dim_range[0], f"Expected number of dimensions to be {dim_range[0]}, got {t.dim()}"
         elif dim_range[0] < dim_range[1]:
-            assert (
-                dim_range[0] <= t.dim() <= dim_range[1]
-            ), f"Expected number of dimensions to be between {dim_range[0]} and {dim_range[1]}, got {t.dim()}"
+            assert dim_range[0] <= t.dim() <= dim_range[1], (
+                f"Expected number of dimensions to be between {dim_range[0]} and {dim_range[1]}, got {t.dim()}"
+            )
 
         if data_range[0] < data_range[1]:
             assert data_range[0] <= t.min(), f"Expected values to be greater or equal to {data_range[0]}, got {t.min()}"
@@ -285,8 +285,7 @@ def _ssim_per_channel(
     """
     if x.size(-1) < kernel.size(-1) or x.size(-2) < kernel.size(-2):
         raise ValueError(
-            f"Kernel size can't be greater than actual input size. Input size: {x.size()}. "
-            f"Kernel size: {kernel.size()}"
+            f"Kernel size can't be greater than actual input size. Input size: {x.size()}. Kernel size: {kernel.size()}"
         )
 
     c1 = k1**2
@@ -337,8 +336,7 @@ def _ssim_per_channel_complex(
     n_channels = x.size(1)
     if x.size(-2) < kernel.size(-1) or x.size(-3) < kernel.size(-2):
         raise ValueError(
-            f"Kernel size can't be greater than actual input size. Input size: {x.size()}. "
-            f"Kernel size: {kernel.size()}"
+            f"Kernel size can't be greater than actual input size. Input size: {x.size()}. Kernel size: {kernel.size()}"
         )
 
     c1 = k1**2
diff --git a/TTS/tts/utils/text/bangla/phonemizer.py b/TTS/tts/utils/text/bangla/phonemizer.py
index cddcb00fd5..1537240380 100644
--- a/TTS/tts/utils/text/bangla/phonemizer.py
+++ b/TTS/tts/utils/text/bangla/phonemizer.py
@@ -45,7 +45,7 @@ def tag_text(text: str):
     # create start and end
     text = "start" + text + "end"
     # tag text
-    parts = re.split("[\u0600-\u06FF]+", text)
+    parts = re.split("[\u0600-\u06ff]+", text)
     # remove non chars
     parts = [p for p in parts if p.strip()]
     # unique parts
diff --git a/TTS/tts/utils/text/characters.py b/TTS/tts/utils/text/characters.py
index da30692f5e..f8beaef036 100644
--- a/TTS/tts/utils/text/characters.py
+++ b/TTS/tts/utils/text/characters.py
@@ -289,9 +289,9 @@ def _create_vocab(self):
         self.vocab = _vocab + list(self._punctuations)
         if self.is_unique:
             duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
-            assert (
-                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
-            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+            assert len(self.vocab) == len(self._char_to_id) == len(self._id_to_char), (
+                f" [!] There are duplicate characters in the character set. {duplicates}"
+            )
 
     def char_to_id(self, char: str) -> int:
         try:
diff --git a/TTS/tts/utils/text/english/number_norm.py b/TTS/tts/utils/text/english/number_norm.py
index c5f2f452d5..be2a4b3084 100644
--- a/TTS/tts/utils/text/english/number_norm.py
+++ b/TTS/tts/utils/text/english/number_norm.py
@@ -1,4 +1,4 @@
-""" from https://github.com/keithito/tacotron """
+"""from https://github.com/keithito/tacotron"""
 
 import re
 
diff --git a/TTS/tts/utils/text/korean/korean.py b/TTS/tts/utils/text/korean/korean.py
index 0feef3bdfb..1b1e0ca0fb 100644
--- a/TTS/tts/utils/text/korean/korean.py
+++ b/TTS/tts/utils/text/korean/korean.py
@@ -1,4 +1,4 @@
-﻿# Code based on https://github.com/carpedm20/multi-speaker-tacotron-tensorflow/blob/master/text/korean.py
+# Code based on https://github.com/carpedm20/multi-speaker-tacotron-tensorflow/blob/master/text/korean.py
 import re
 
 from TTS.tts.utils.text.korean.ko_dictionary import english_dictionary, etc_dictionary
diff --git a/TTS/tts/utils/text/phonemizers/base.py b/TTS/tts/utils/text/phonemizers/base.py
index 4bd03851c7..6cc6ec0b37 100644
--- a/TTS/tts/utils/text/phonemizers/base.py
+++ b/TTS/tts/utils/text/phonemizers/base.py
@@ -52,7 +52,7 @@ def _init_language(self, language):
 
         """
         if not self.is_supported_language(language):
-            raise RuntimeError(f'language "{language}" is not supported by the ' f"{self.name()} backend")
+            raise RuntimeError(f'language "{language}" is not supported by the {self.name()} backend')
         return language
 
     @property
diff --git a/TTS/utils/audio/processor.py b/TTS/utils/audio/processor.py
index 9a8841106c..55b8575aa4 100644
--- a/TTS/utils/audio/processor.py
+++ b/TTS/utils/audio/processor.py
@@ -222,9 +222,9 @@ def __init__(
             self.hop_length = hop_length
             self.win_length = win_length
         assert min_level_db != 0.0, " [!] min_level_db is 0"
-        assert (
-            self.win_length <= self.fft_size
-        ), f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}"
+        assert self.win_length <= self.fft_size, (
+            f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}"
+        )
         members = vars(self)
         logger.info("Setting up Audio Processor...")
         for key, value in members.items():
@@ -283,7 +283,9 @@ def normalize(self, S: np.ndarray) -> np.ndarray:
                 S_norm = ((2 * self.max_norm) * S_norm) - self.max_norm
                 if self.clip_norm:
                     S_norm = np.clip(
-                        S_norm, -self.max_norm, self.max_norm  # pylint: disable=invalid-unary-operand-type
+                        S_norm,
+                        -self.max_norm,  # pylint: disable=invalid-unary-operand-type
+                        self.max_norm,
                     )
                 return S_norm
             S_norm = self.max_norm * S_norm
@@ -318,7 +320,9 @@ def denormalize(self, S: np.ndarray) -> np.ndarray:
             if self.symmetric_norm:
                 if self.clip_norm:
                     S_denorm = np.clip(
-                        S_denorm, -self.max_norm, self.max_norm  # pylint: disable=invalid-unary-operand-type
+                        S_denorm,
+                        -self.max_norm,  # pylint: disable=invalid-unary-operand-type
+                        self.max_norm,
                     )
                 S_denorm = ((S_denorm + self.max_norm) * -self.min_level_db / (2 * self.max_norm)) + self.min_level_db
                 return S_denorm + self.ref_level_db
@@ -351,9 +355,9 @@ def load_stats(self, stats_path: str) -> tuple[np.array, np.array, np.array, np.
             if key in skip_parameters:
                 continue
             if key not in ["sample_rate", "trim_db"]:
-                assert (
-                    stats_config[key] == self.__dict__[key]
-                ), f" [!] Audio param {key} does not match the value used for computing mean-var stats. {stats_config[key]} vs {self.__dict__[key]}"
+                assert stats_config[key] == self.__dict__[key], (
+                    f" [!] Audio param {key} does not match the value used for computing mean-var stats. {stats_config[key]} vs {self.__dict__[key]}"
+                )
         return mel_mean, mel_std, linear_mean, linear_std, stats_config
 
     # pylint: disable=attribute-defined-outside-init
diff --git a/TTS/utils/samplers.py b/TTS/utils/samplers.py
index 4e8f3825b9..d24733977a 100644
--- a/TTS/utils/samplers.py
+++ b/TTS/utils/samplers.py
@@ -49,9 +49,9 @@ def __init__(
         label_key="class_name",
     ):
         super().__init__(dataset_items)
-        assert (
-            batch_size % (num_classes_in_batch * num_gpus) == 0
-        ), "Batch size must be divisible by number of classes times the number of data parallel devices (if enabled)."
+        assert batch_size % (num_classes_in_batch * num_gpus) == 0, (
+            "Batch size must be divisible by number of classes times the number of data parallel devices (if enabled)."
+        )
 
         label_indices = {}
         for idx, item in enumerate(dataset_items):
diff --git a/TTS/vc/layers/freevc/wavlm/modules.py b/TTS/vc/layers/freevc/wavlm/modules.py
index cddacd69ab..cf31a866de 100644
--- a/TTS/vc/layers/freevc/wavlm/modules.py
+++ b/TTS/vc/layers/freevc/wavlm/modules.py
@@ -330,7 +330,7 @@ def __init__(
         self.encoder_decoder_attention = encoder_decoder_attention
 
         assert not self.self_attention or self.qkv_same_dim, (
-            "Self-attention requires query, key and " "value to be of the same size"
+            "Self-attention requires query, key and value to be of the same size"
         )
 
         k_bias = True
diff --git a/TTS/vc/layers/freevc/wavlm/wavlm.py b/TTS/vc/layers/freevc/wavlm/wavlm.py
index c5b8c19c32..26f385c267 100644
--- a/TTS/vc/layers/freevc/wavlm/wavlm.py
+++ b/TTS/vc/layers/freevc/wavlm/wavlm.py
@@ -67,8 +67,7 @@ def compute_mask_indices(
 
     all_num_mask = int(
         # add a random number for probabilistic rounding
-        mask_prob * all_sz / float(mask_length)
-        + np.random.rand()
+        mask_prob * all_sz / float(mask_length) + np.random.rand()
     )
 
     all_num_mask = max(min_masks, all_num_mask)
@@ -79,8 +78,7 @@ def compute_mask_indices(
             sz = all_sz - padding_mask[i].long().sum().item()
             num_mask = int(
                 # add a random number for probabilistic rounding
-                mask_prob * sz / float(mask_length)
-                + np.random.rand()
+                mask_prob * sz / float(mask_length) + np.random.rand()
             )
             num_mask = max(min_masks, num_mask)
         else:
@@ -154,9 +152,7 @@ def arrange(s, e, length, keep_length):
 
 class WavLMConfig:
     def __init__(self, cfg=None):
-        self.extractor_mode: str = (
-            "default"  # mode for feature extractor. default has a single group norm with d groups in the first conv block, whereas layer_norm has layer norms in every block (meant to use with normalize=True)
-        )
+        self.extractor_mode: str = "default"  # mode for feature extractor. default has a single group norm with d groups in the first conv block, whereas layer_norm has layer norms in every block (meant to use with normalize=True)
         self.encoder_layers: int = 12  # num encoder layers in the transformer
 
         self.encoder_embed_dim: int = 768  # encoder embedding dimension
@@ -165,9 +161,7 @@ def __init__(self, cfg=None):
         self.activation_fn: str = "gelu"  # activation function to use
 
         self.layer_norm_first: bool = False  # apply layernorm first in the transformer
-        self.conv_feature_layers: str = (
-            "[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2"  # string describing convolutional feature extraction layers in form of a python list that contains [(dim, kernel_size, stride), ...]
-        )
+        self.conv_feature_layers: str = "[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2"  # string describing convolutional feature extraction layers in form of a python list that contains [(dim, kernel_size, stride), ...]
         self.conv_bias: bool = False  # include bias in conv encoder
         self.feature_grad_mult: float = 1.0  # multiply feature extractor var grads by this
 
diff --git a/TTS/vc/models/base_vc.py b/TTS/vc/models/base_vc.py
index c0fe766b7c..9f107edbe0 100644
--- a/TTS/vc/models/base_vc.py
+++ b/TTS/vc/models/base_vc.py
@@ -199,9 +199,9 @@ def format_batch(self, batch: dict[str, Any]) -> dict[str, Any]:
                 extra_frames = dur.sum() - mel_lengths[idx]
                 largest_idxs = torch.argsort(-dur)[:extra_frames]
                 dur[largest_idxs] -= 1
-                assert (
-                    dur.sum() == mel_lengths[idx]
-                ), f" [!] total duration {dur.sum()} vs spectrogram length {mel_lengths[idx]}"
+                assert dur.sum() == mel_lengths[idx], (
+                    f" [!] total duration {dur.sum()} vs spectrogram length {mel_lengths[idx]}"
+                )
                 durations[idx, : text_lengths[idx]] = dur
 
         # set stop targets wrt reduction factor
diff --git a/TTS/vocoder/datasets/gan_dataset.py b/TTS/vocoder/datasets/gan_dataset.py
index c0882c701f..076545f8a2 100644
--- a/TTS/vocoder/datasets/gan_dataset.py
+++ b/TTS/vocoder/datasets/gan_dataset.py
@@ -128,9 +128,9 @@ def load_item(self, idx):
         # correct the audio length wrt padding applied in stft
         audio = np.pad(audio, (0, self.hop_len), mode="edge")
         audio = audio[: mel.shape[-1] * self.hop_len]
-        assert (
-            mel.shape[-1] * self.hop_len == audio.shape[-1]
-        ), f" [!] {mel.shape[-1] * self.hop_len} vs {audio.shape[-1]}"
+        assert mel.shape[-1] * self.hop_len == audio.shape[-1], (
+            f" [!] {mel.shape[-1] * self.hop_len} vs {audio.shape[-1]}"
+        )
 
         audio = torch.from_numpy(audio).float().unsqueeze(0)
         mel = torch.from_numpy(mel).float().squeeze(0)
diff --git a/TTS/vocoder/datasets/wavegrad_dataset.py b/TTS/vocoder/datasets/wavegrad_dataset.py
index 3ae9015451..435330bebe 100644
--- a/TTS/vocoder/datasets/wavegrad_dataset.py
+++ b/TTS/vocoder/datasets/wavegrad_dataset.py
@@ -102,9 +102,9 @@ def load_item(self, idx):
                     audio = np.pad(
                         audio, (0, self.seq_len + self.pad_short - len(audio)), mode="constant", constant_values=0.0
                     )
-                assert (
-                    audio.shape[-1] >= self.seq_len + self.pad_short
-                ), f"{audio.shape[-1]} vs {self.seq_len + self.pad_short}"
+                assert audio.shape[-1] >= self.seq_len + self.pad_short, (
+                    f"{audio.shape[-1]} vs {self.seq_len + self.pad_short}"
+                )
 
             # correct the audio length wrt hop length
             p = (audio.shape[-1] // self.hop_len + 1) * self.hop_len - audio.shape[-1]
diff --git a/TTS/vocoder/layers/losses.py b/TTS/vocoder/layers/losses.py
index 0fad81864e..81a1f30884 100644
--- a/TTS/vocoder/layers/losses.py
+++ b/TTS/vocoder/layers/losses.py
@@ -224,9 +224,9 @@ class GeneratorLoss(nn.Module):
 
     def __init__(self, C):
         super().__init__()
-        assert not (
-            C.use_mse_gan_loss and C.use_hinge_gan_loss
-        ), " [!] Cannot use HingeGANLoss and MSEGANLoss together."
+        assert not (C.use_mse_gan_loss and C.use_hinge_gan_loss), (
+            " [!] Cannot use HingeGANLoss and MSEGANLoss together."
+        )
 
         self.use_stft_loss = C.use_stft_loss if "use_stft_loss" in C else False
         self.use_subband_stft_loss = C.use_subband_stft_loss if "use_subband_stft_loss" in C else False
@@ -311,9 +311,9 @@ class DiscriminatorLoss(nn.Module):
 
     def __init__(self, C):
         super().__init__()
-        assert not (
-            C.use_mse_gan_loss and C.use_hinge_gan_loss
-        ), " [!] Cannot use HingeGANLoss and MSEGANLoss together."
+        assert not (C.use_mse_gan_loss and C.use_hinge_gan_loss), (
+            " [!] Cannot use HingeGANLoss and MSEGANLoss together."
+        )
 
         self.use_mse_gan_loss = C.use_mse_gan_loss
         self.use_hinge_gan_loss = C.use_hinge_gan_loss
diff --git a/TTS/vocoder/layers/lvc_block.py b/TTS/vocoder/layers/lvc_block.py
index 8913a1132e..ab1a56e7fc 100644
--- a/TTS/vocoder/layers/lvc_block.py
+++ b/TTS/vocoder/layers/lvc_block.py
@@ -175,9 +175,9 @@ def location_variable_convolution(x, kernel, bias, dilation, hop_size):
         batch, _, in_length = x.shape
         batch, _, out_channels, kernel_size, kernel_length = kernel.shape
 
-        assert in_length == (
-            kernel_length * hop_size
-        ), f"length of (x, kernel) is not matched, {in_length} vs {kernel_length * hop_size}"
+        assert in_length == (kernel_length * hop_size), (
+            f"length of (x, kernel) is not matched, {in_length} vs {kernel_length * hop_size}"
+        )
 
         padding = dilation * int((kernel_size - 1) / 2)
         x = F.pad(x, (padding, padding), "constant", 0)  # (batch, in_channels, in_length + 2*padding)
diff --git a/TTS/vocoder/models/gan.py b/TTS/vocoder/models/gan.py
index 42dfef32b7..ba3852e795 100644
--- a/TTS/vocoder/models/gan.py
+++ b/TTS/vocoder/models/gan.py
@@ -204,7 +204,12 @@ def _log(self, name: str, ap: AudioProcessor, batch: dict, outputs: dict) -> tup
         return figures, audios
 
     def train_log(
-        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int  # pylint: disable=unused-argument
+        self,
+        batch: dict,
+        outputs: dict,
+        logger: "Logger",
+        assets: dict,
+        steps: int,  # pylint: disable=unused-argument
     ) -> tuple[dict, np.ndarray]:
         """Call `_log()` for training."""
         figures, audios = self._log("eval", self.ap, batch, outputs)
@@ -218,7 +223,12 @@ def eval_step(self, batch: dict, criterion: nn.Module, optimizer_idx: int) -> tu
         return self.train_step(batch, criterion, optimizer_idx)
 
     def eval_log(
-        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int  # pylint: disable=unused-argument
+        self,
+        batch: dict,
+        outputs: dict,
+        logger: "Logger",
+        assets: dict,
+        steps: int,  # pylint: disable=unused-argument
     ) -> tuple[dict, np.ndarray]:
         """Call `_log()` for evaluation."""
         figures, audios = self._log("eval", self.ap, batch, outputs)
diff --git a/TTS/vocoder/models/hifigan_generator.py b/TTS/vocoder/models/hifigan_generator.py
index e8f175ed17..4398300f8e 100644
--- a/TTS/vocoder/models/hifigan_generator.py
+++ b/TTS/vocoder/models/hifigan_generator.py
@@ -306,9 +306,7 @@ def remove_weight_norm(self):
         remove_parametrizations(self.conv_pre, "weight")
         remove_parametrizations(self.conv_post, "weight")
 
-    def load_checkpoint(
-        self, config, checkpoint_path, eval=False, cache=False
-    ):  # pylint: disable=unused-argument, redefined-builtin
+    def load_checkpoint(self, config, checkpoint_path, eval=False, cache=False):  # pylint: disable=unused-argument, redefined-builtin
         state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
         self.load_state_dict(state["model"])
         if eval:
diff --git a/TTS/vocoder/models/melgan_generator.py b/TTS/vocoder/models/melgan_generator.py
index 03c971afa4..53ed700755 100644
--- a/TTS/vocoder/models/melgan_generator.py
+++ b/TTS/vocoder/models/melgan_generator.py
@@ -84,9 +84,7 @@ def remove_weight_norm(self):
                 except ValueError:
                     layer.remove_weight_norm()
 
-    def load_checkpoint(
-        self, config, checkpoint_path, eval=False, cache=False
-    ):  # pylint: disable=unused-argument, redefined-builtin
+    def load_checkpoint(self, config, checkpoint_path, eval=False, cache=False):  # pylint: disable=unused-argument, redefined-builtin
         state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
         self.load_state_dict(state["model"])
         if eval:
diff --git a/TTS/vocoder/models/parallel_wavegan_generator.py b/TTS/vocoder/models/parallel_wavegan_generator.py
index f4ef3a0734..71b38d4c0d 100644
--- a/TTS/vocoder/models/parallel_wavegan_generator.py
+++ b/TTS/vocoder/models/parallel_wavegan_generator.py
@@ -108,9 +108,9 @@ def forward(self, c):
         # perform upsampling
         if c is not None and self.upsample_net is not None:
             c = self.upsample_net(c)
-            assert (
-                c.shape[-1] == x.shape[-1]
-            ), f" [!] Upsampling scale does not match the expected output. {c.shape} vs {x.shape}"
+            assert c.shape[-1] == x.shape[-1], (
+                f" [!] Upsampling scale does not match the expected output. {c.shape} vs {x.shape}"
+            )
 
         # encode to hidden representation
         x = self.first_conv(x)
@@ -155,9 +155,7 @@ def _apply_weight_norm(m):
     def receptive_field_size(self):
         return _get_receptive_field_size(self.layers, self.stacks, self.kernel_size)
 
-    def load_checkpoint(
-        self, config, checkpoint_path, eval=False, cache=False
-    ):  # pylint: disable=unused-argument, redefined-builtin
+    def load_checkpoint(self, config, checkpoint_path, eval=False, cache=False):  # pylint: disable=unused-argument, redefined-builtin
         state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
         self.load_state_dict(state["model"])
         if eval:
diff --git a/TTS/vocoder/models/wavegrad.py b/TTS/vocoder/models/wavegrad.py
index 16c66e235b..b1a4a26562 100644
--- a/TTS/vocoder/models/wavegrad.py
+++ b/TTS/vocoder/models/wavegrad.py
@@ -217,9 +217,7 @@ def apply_weight_norm(self):
         self.out_conv = weight_norm(self.out_conv)
         self.y_conv = weight_norm(self.y_conv)
 
-    def load_checkpoint(
-        self, config, checkpoint_path, eval=False, cache=False
-    ):  # pylint: disable=unused-argument, redefined-builtin
+    def load_checkpoint(self, config, checkpoint_path, eval=False, cache=False):  # pylint: disable=unused-argument, redefined-builtin
         state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
         self.load_state_dict(state["model"])
         if eval:
@@ -257,7 +255,12 @@ def train_step(self, batch: dict, criterion: dict) -> tuple[dict, dict]:
         return {"model_output": noise_hat}, {"loss": loss}
 
     def train_log(  # pylint: disable=no-self-use
-        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int  # pylint: disable=unused-argument
+        self,
+        batch: dict,
+        outputs: dict,
+        logger: "Logger",
+        assets: dict,
+        steps: int,  # pylint: disable=unused-argument
     ) -> tuple[dict, np.ndarray]:
         pass
 
@@ -266,7 +269,12 @@ def eval_step(self, batch: dict, criterion: nn.Module) -> tuple[dict, dict]:
         return self.train_step(batch, criterion)
 
     def eval_log(  # pylint: disable=no-self-use
-        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int  # pylint: disable=unused-argument
+        self,
+        batch: dict,
+        outputs: dict,
+        logger: "Logger",
+        assets: dict,
+        steps: int,  # pylint: disable=unused-argument
     ) -> None:
         pass
 
diff --git a/TTS/vocoder/models/wavernn.py b/TTS/vocoder/models/wavernn.py
index 2fe55f91bc..5a93f125ba 100644
--- a/TTS/vocoder/models/wavernn.py
+++ b/TTS/vocoder/models/wavernn.py
@@ -225,9 +225,9 @@ class of models has however remained an elusive problem. With a focus on text-to
         self.aux_dims = self.args.res_out_dims // 4
 
         if self.args.use_upsample_net:
-            assert (
-                np.cumprod(self.args.upsample_factors)[-1] == config.audio.hop_length
-            ), " [!] upsample scales needs to be equal to hop_length"
+            assert np.cumprod(self.args.upsample_factors)[-1] == config.audio.hop_length, (
+                " [!] upsample scales needs to be equal to hop_length"
+            )
             self.upsample = UpsampleNetwork(
                 self.args.feat_dims,
                 self.args.upsample_factors,
@@ -527,9 +527,7 @@ def xfade_and_unfold(y, target, overlap):
 
         return unfolded
 
-    def load_checkpoint(
-        self, config, checkpoint_path, eval=False, cache=False
-    ):  # pylint: disable=unused-argument, redefined-builtin
+    def load_checkpoint(self, config, checkpoint_path, eval=False, cache=False):  # pylint: disable=unused-argument, redefined-builtin
         state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
         self.load_state_dict(state["model"])
         if eval:
@@ -556,7 +554,10 @@ def eval_step(self, batch: dict, criterion: dict) -> tuple[dict, dict]:
 
     @torch.no_grad()
     def test(
-        self, assets: dict, test_loader: "DataLoader", output: dict  # pylint: disable=unused-argument
+        self,
+        assets: dict,
+        test_loader: "DataLoader",
+        output: dict,  # pylint: disable=unused-argument
     ) -> tuple[dict, dict]:
         ap = self.ap
         figures = {}
@@ -578,7 +579,11 @@ def test(
         return figures, audios
 
     def test_log(
-        self, outputs: dict, logger: "Logger", assets: dict, steps: int  # pylint: disable=unused-argument
+        self,
+        outputs: dict,
+        logger: "Logger",
+        assets: dict,
+        steps: int,  # pylint: disable=unused-argument
     ) -> tuple[dict, np.ndarray]:
         figures, audios = outputs
         logger.eval_figures(steps, figures)
diff --git a/tests/text_tests/test_phonemizer.py b/tests/text_tests/test_phonemizer.py
index f9067530e6..370a541b97 100644
--- a/tests/text_tests/test_phonemizer.py
+++ b/tests/text_tests/test_phonemizer.py
@@ -240,12 +240,8 @@ def test_is_available(self):
 class TestBN_Phonemizer(unittest.TestCase):
     def setUp(self):
         self.phonemizer = BN_Phonemizer()
-        self._TEST_CASES = (
-            "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে, কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয়, তখনও যেন"
-        )
-        self._EXPECTED = (
-            "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয় তখনও যেন।"
-        )
+        self._TEST_CASES = "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে, কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয়, তখনও যেন"
+        self._EXPECTED = "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয় তখনও যেন।"
 
     def test_phonemize(self):
         self.assertEqual(self.phonemizer.phonemize(self._TEST_CASES, separator=""), self._EXPECTED)
diff --git a/tests/text_tests/test_text_cleaners.py b/tests/text_tests/test_text_cleaners.py
index 25c169eddd..f5d342bb00 100644
--- a/tests/text_tests/test_text_cleaners.py
+++ b/tests/text_tests/test_text_cleaners.py
@@ -45,11 +45,11 @@ def test_normalize_unicode() -> None:
         ("na\u0303", "nã"),
         ("o\u0302u", "ôu"),
         ("n\u0303", "ñ"),
-        ("\u4E2D\u56FD", "中国"),
+        ("\u4e2d\u56fd", "中国"),
         ("niño", "niño"),
         ("a\u0308", "ä"),
         ("\u3053\u3093\u306b\u3061\u306f", "こんにちは"),
-        ("\u03B1\u03B2", "αβ"),
+        ("\u03b1\u03b2", "αβ"),
     ]
     for arg, expect in test_cases:
         assert normalize_unicode(arg) == expect
diff --git a/tests/tts_tests/test_tacotron2_model.py b/tests/tts_tests/test_tacotron2_model.py
index 9a8027736e..72069bf943 100644
--- a/tests/tts_tests/test_tacotron2_model.py
+++ b/tests/tts_tests/test_tacotron2_model.py
@@ -72,9 +72,9 @@ def test_train_step(self):  # pylint: disable=no-self-use
         for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
             # if count not in [145, 59]:
-            assert (
-                param != param_ref
-            ).any(), f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            assert (param != param_ref).any(), (
+                f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            )
             count += 1
 
 
@@ -131,9 +131,9 @@ def test_train_step():
         for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
             # if count not in [145, 59]:
-            assert (
-                param != param_ref
-            ).any(), f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            assert (param != param_ref).any(), (
+                f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            )
             count += 1
 
 
@@ -198,9 +198,9 @@ def test_train_step(self):
             if name == "gst_layer.encoder.recurrence.weight_hh_l0":
                 # print(param.grad)
                 continue
-            assert (
-                param != param_ref
-            ).any(), f"param {name} {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            assert (param != param_ref).any(), (
+                f"param {name} {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            )
             count += 1
 
         # with file gst style
@@ -254,9 +254,9 @@ def test_train_step(self):
             if name == "gst_layer.encoder.recurrence.weight_hh_l0":
                 # print(param.grad)
                 continue
-            assert (
-                param != param_ref
-            ).any(), f"param {name} {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            assert (param != param_ref).any(), (
+                f"param {name} {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            )
             count += 1
 
 
@@ -321,9 +321,9 @@ def test_train_step():
         count = 0
         for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
-            assert (
-                param != param_ref
-            ).any(), f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            assert (param != param_ref).any(), (
+                f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            )
             count += 1
 
 
@@ -384,7 +384,7 @@ def test_train_step():
             name, param = name_param
             if name == "gst_layer.encoder.recurrence.weight_hh_l0":
                 continue
-            assert (
-                param != param_ref
-            ).any(), f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            assert (param != param_ref).any(), (
+                f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            )
             count += 1
diff --git a/tests/tts_tests/test_tacotron_model.py b/tests/tts_tests/test_tacotron_model.py
index 3976b9ae8d..5f9af86e7e 100644
--- a/tests/tts_tests/test_tacotron_model.py
+++ b/tests/tts_tests/test_tacotron_model.py
@@ -71,9 +71,9 @@ def test_train_step():
         for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
             # if count not in [145, 59]:
-            assert (
-                param != param_ref
-            ).any(), f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            assert (param != param_ref).any(), (
+                f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            )
             count += 1
 
 
@@ -127,9 +127,9 @@ def test_train_step():
         for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
             # if count not in [145, 59]:
-            assert (
-                param != param_ref
-            ).any(), f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            assert (param != param_ref).any(), (
+                f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            )
             count += 1
 
 
@@ -186,9 +186,9 @@ def test_train_step():
         count = 0
         for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
-            assert (
-                param != param_ref
-            ).any(), f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            assert (param != param_ref).any(), (
+                f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            )
             count += 1
 
         # with file gst style
@@ -238,9 +238,9 @@ def test_train_step():
         count = 0
         for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
-            assert (
-                param != param_ref
-            ).any(), f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            assert (param != param_ref).any(), (
+                f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            )
             count += 1
 
 
@@ -305,9 +305,9 @@ def test_train_step():
         count = 0
         for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
-            assert (
-                param != param_ref
-            ).any(), f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            assert (param != param_ref).any(), (
+                f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            )
             count += 1
 
 
@@ -366,7 +366,7 @@ def test_train_step():
             name, param = name_param
             if name == "gst_layer.encoder.recurrence.weight_hh_l0":
                 continue
-            assert (
-                param != param_ref
-            ).any(), f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            assert (param != param_ref).any(), (
+                f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            )
             count += 1
diff --git a/tests/tts_tests/test_vits.py b/tests/tts_tests/test_vits.py
index f0b347b895..790439ecb2 100644
--- a/tests/tts_tests/test_vits.py
+++ b/tests/tts_tests/test_vits.py
@@ -373,9 +373,9 @@ def _check_parameter_changes(model, model_ref):
             name = item1[0]
             param = item1[1]
             param_ref = item2[1]
-            assert (
-                param != param_ref
-            ).any(), f"param {name} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            assert (param != param_ref).any(), (
+                f"param {name} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            )
             count = count + 1
 
     def _create_batch(self, config, batch_size):
diff --git a/tests/tts_tests2/test_glow_tts.py b/tests/tts_tests2/test_glow_tts.py
index 967e9ecb9e..c92063576f 100644
--- a/tests/tts_tests2/test_glow_tts.py
+++ b/tests/tts_tests2/test_glow_tts.py
@@ -42,9 +42,9 @@ def _create_inputs(batch_size=8):
     def _check_parameter_changes(model, model_ref):
         count = 0
         for param, param_ref in zip(model.parameters(), model_ref.parameters()):
-            assert (
-                param != param_ref
-            ).any(), f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            assert (param != param_ref).any(), (
+                f"param {count} with shape {param.shape} not updated!! \n{param}\n{param_ref}"
+            )
             count += 1
 
     def test_init_multispeaker(self):
@@ -241,10 +241,10 @@ def _test_inference_with_MAS(self, batch_size):
         # inference encoder and decoder with MAS
         y = model.inference_with_MAS(input_dummy, input_lengths, mel_spec, mel_lengths)
         y2 = model.decoder_inference(mel_spec, mel_lengths)
-        assert (
-            y2["model_outputs"].shape == y["model_outputs"].shape
-        ), "Difference between the shapes of the glowTTS inference with MAS ({}) and the inference using only the decoder ({}) !!".format(
-            y["model_outputs"].shape, y2["model_outputs"].shape
+        assert y2["model_outputs"].shape == y["model_outputs"].shape, (
+            "Difference between the shapes of the glowTTS inference with MAS ({}) and the inference using only the decoder ({}) !!".format(
+                y["model_outputs"].shape, y2["model_outputs"].shape
+            )
         )
 
     def test_inference_with_MAS(self):
diff --git a/tests/vc_tests/test_freevc.py b/tests/vc_tests/test_freevc.py
index dd45d6941f..784e32a68d 100644
--- a/tests/vc_tests/test_freevc.py
+++ b/tests/vc_tests/test_freevc.py
@@ -80,9 +80,9 @@ def _test_inference(self, batch_size):
         wavlm_vec_lengths = torch.ones(batch_size, dtype=torch.long)
 
         output_wav = model.inference(wavlm_vec, None, mel, wavlm_vec_lengths)
-        assert (
-            output_wav.shape[-1] // config.audio.hop_length == wavlm_vec.shape[-1]
-        ), f"{output_wav.shape[-1] // config.audio.hop_length} != {wavlm_vec.shape}"
+        assert output_wav.shape[-1] // config.audio.hop_length == wavlm_vec.shape[-1], (
+            f"{output_wav.shape[-1] // config.audio.hop_length} != {wavlm_vec.shape}"
+        )
 
     def test_inference(self):
         self._test_inference(1)
@@ -95,9 +95,9 @@ def test_voice_conversion(self):
 
         source_wav, target_wav = self._create_inputs_inference()
         output_wav = model.voice_conversion(source_wav, target_wav)
-        assert (
-            output_wav.shape[0] == source_wav.shape[0] - source_wav.shape[0] % config.audio.hop_length
-        ), f"{output_wav.shape} != {source_wav.shape}, {config.audio.hop_length}"
+        assert output_wav.shape[0] == source_wav.shape[0] - source_wav.shape[0] % config.audio.hop_length, (
+            f"{output_wav.shape} != {source_wav.shape}, {config.audio.hop_length}"
+        )
 
     def test_train_step(self): ...
 
diff --git a/tests/vc_tests/test_openvoice.py b/tests/vc_tests/test_openvoice.py
index c9f7ae3931..703873ea47 100644
--- a/tests/vc_tests/test_openvoice.py
+++ b/tests/vc_tests/test_openvoice.py
@@ -16,7 +16,6 @@
 
 
 class TestOpenVoice(unittest.TestCase):
-
     @staticmethod
     def _create_inputs_inference():
         source_wav = torch.rand(16100)
@@ -37,6 +36,6 @@ def test_voice_conversion(self):
 
         source_wav, target_wav = self._create_inputs_inference()
         output_wav = model.voice_conversion(source_wav, target_wav)
-        assert (
-            output_wav.shape[0] == source_wav.shape[0] - source_wav.shape[0] % config.audio.hop_length
-        ), f"{output_wav.shape} != {source_wav.shape}"
+        assert output_wav.shape[0] == source_wav.shape[0] - source_wav.shape[0] % config.audio.hop_length, (
+            f"{output_wav.shape} != {source_wav.shape}"
+        )