Skip to content

Commit

Permalink
style: run ruff format
Browse files Browse the repository at this point in the history
  • Loading branch information
eginhard committed Jan 11, 2025
1 parent 5b3e40a commit e49d3d9
Show file tree
Hide file tree
Showing 68 changed files with 333 additions and 381 deletions.
6 changes: 3 additions & 3 deletions TTS/encoder/configs/base_encoder_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,6 @@ class BaseEncoderConfig(BaseTrainingConfig):
def check_values(self):
super().check_values()
c = asdict(self)
assert (
c["model_params"]["input_dim"] == self.audio.num_mels
), " [!] model input dimendion must be equal to melspectrogram dimension."
assert c["model_params"]["input_dim"] == self.audio.num_mels, (
" [!] model input dimendion must be equal to melspectrogram dimension."
)
2 changes: 1 addition & 1 deletion TTS/encoder/utils/prepare_voxceleb.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# Only support eager mode and TF>=2.0.0
# pylint: disable=no-member, invalid-name, relative-beyond-top-level
# pylint: disable=too-many-locals, too-many-statements, too-many-arguments, too-many-instance-attributes
""" voxceleb 1 & 2 """
"""voxceleb 1 & 2"""

import csv
import hashlib
Expand Down
12 changes: 6 additions & 6 deletions TTS/tts/configs/neuralhmm_tts_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,9 @@ def check_values(self):
AssertionError: transition probability is not between 0 and 1
"""
assert self.ar_order > 0, "AR order must be greater than 0 it is an autoregressive model."
assert (
len(self.outputnet_size) >= 1
), f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}"
assert (
0 < self.flat_start_params["transition_p"] < 1
), f"Transition probability must be between 0 and 1. Provided: {self.flat_start_params['transition_p']}"
assert len(self.outputnet_size) >= 1, (
f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}"
)
assert 0 < self.flat_start_params["transition_p"] < 1, (
f"Transition probability must be between 0 and 1. Provided: {self.flat_start_params['transition_p']}"
)
12 changes: 6 additions & 6 deletions TTS/tts/configs/overflow_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,9 @@ def check_values(self):
AssertionError: transition probability is not between 0 and 1
"""
assert self.ar_order > 0, "AR order must be greater than 0 it is an autoregressive model."
assert (
len(self.outputnet_size) >= 1
), f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}"
assert (
0 < self.flat_start_params["transition_p"] < 1
), f"Transition probability must be between 0 and 1. Provided: {self.flat_start_params['transition_p']}"
assert len(self.outputnet_size) >= 1, (
f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}"
)
assert 0 < self.flat_start_params["transition_p"] < 1, (
f"Transition probability must be between 0 and 1. Provided: {self.flat_start_params['transition_p']}"
)
12 changes: 6 additions & 6 deletions TTS/tts/configs/tacotron_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,12 +223,12 @@ class TacotronConfig(BaseTTSConfig):

def check_values(self):
if self.gradual_training:
assert (
self.gradual_training[0][1] == self.r
), f"[!] the first scheduled gradual training `r` must be equal to the model's `r` value. {self.gradual_training[0][1]} vs {self.r}"
assert self.gradual_training[0][1] == self.r, (
f"[!] the first scheduled gradual training `r` must be equal to the model's `r` value. {self.gradual_training[0][1]} vs {self.r}"
)
if self.model == "tacotron" and self.audio is not None:
assert self.out_channels == (
self.audio.fft_size // 2 + 1
), f"{self.out_channels} vs {self.audio.fft_size // 2 + 1}"
assert self.out_channels == (self.audio.fft_size // 2 + 1), (
f"{self.out_channels} vs {self.audio.fft_size // 2 + 1}"
)
if self.model == "tacotron2" and self.audio is not None:
assert self.out_channels == self.audio.num_mels
6 changes: 3 additions & 3 deletions TTS/tts/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ def split_dataset(items, eval_split_max_size=None, eval_split_size=0.01):
else:
eval_split_size = int(len(items) * eval_split_size)

assert (
eval_split_size > 0
), f" [!] You do not have enough samples for the evaluation set. You can work around this setting the 'eval_split_size' parameter to a minimum of {1 / len(items)}"
assert eval_split_size > 0, (
f" [!] You do not have enough samples for the evaluation set. You can work around this setting the 'eval_split_size' parameter to a minimum of {1 / len(items)}"
)
np.random.seed(0)
np.random.shuffle(items)
if is_multi_speaker:
Expand Down
4 changes: 2 additions & 2 deletions TTS/tts/datasets/formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ def vctk(root_path, meta_files=None, wavs_path="wav48_silence_trimmed", mic="mic
"""
file_ext = "flac"
items = []
meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
meta_files = glob(f"{os.path.join(root_path, 'txt')}/**/*.txt", recursive=True)
for meta_file in meta_files:
_, speaker_id, txt_file = os.path.relpath(meta_file, root_path).split(os.sep)
file_id = txt_file.split(".")[0]
Expand All @@ -451,7 +451,7 @@ def vctk(root_path, meta_files=None, wavs_path="wav48_silence_trimmed", mic="mic
def vctk_old(root_path, meta_files=None, wavs_path="wav48", ignored_speakers=None):
"""homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz"""
items = []
meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
meta_files = glob(f"{os.path.join(root_path, 'txt')}/**/*.txt", recursive=True)
for meta_file in meta_files:
_, speaker_id, txt_file = os.path.relpath(meta_file, root_path).split(os.sep)
file_id = txt_file.split(".")[0]
Expand Down
1 change: 0 additions & 1 deletion TTS/tts/layers/bark/hubert/kmeans_hubert.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

# Modified code from https://github.com/lucidrains/audiolm-pytorch/blob/main/audiolm_pytorch/hubert_kmeans.py


import torch
from einops import pack, unpack
from torch import nn
Expand Down
4 changes: 1 addition & 3 deletions TTS/tts/layers/bark/inference_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,7 @@ def load_npz(npz_file: str) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64

def load_voice(
model, voice: str, extra_voice_dirs: list[str] = []
) -> tuple[
npt.NDArray[np.int64] | None, npt.NDArray[np.int64] | None, npt.NDArray[np.int64] | None
]: # pylint: disable=dangerous-default-value
) -> tuple[npt.NDArray[np.int64] | None, npt.NDArray[np.int64] | None, npt.NDArray[np.int64] | None]: # pylint: disable=dangerous-default-value
if voice == "random":
return None, None, None

Expand Down
6 changes: 3 additions & 3 deletions TTS/tts/layers/bark/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,9 @@ def forward(self, idx, merge_context=False, past_kv=None, position_ids=None, use
assert idx.shape[1] >= 256 + 256 + 1
t = idx.shape[1] - 256
else:
assert (
t <= self.config.block_size
), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
assert t <= self.config.block_size, (
f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
)

# forward the GPT model itself
if merge_context:
Expand Down
6 changes: 3 additions & 3 deletions TTS/tts/layers/bark/model_fine.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@ def __init__(self, config):
def forward(self, pred_idx, idx):
device = idx.device
b, t, codes = idx.size()
assert (
t <= self.config.block_size
), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
assert t <= self.config.block_size, (
f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
)
assert pred_idx > 0, "cannot predict 0th codebook"
assert codes == self.n_codes_total, (b, t, codes)
pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0) # shape (1, t)
Expand Down
6 changes: 3 additions & 3 deletions TTS/tts/layers/feed_forward/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,9 @@ def __init__(
elif encoder_type.lower() == "residual_conv_bn":
self.encoder = ResidualConv1dBNEncoder(in_hidden_channels, out_channels, in_hidden_channels, encoder_params)
elif encoder_type.lower() == "fftransformer":
assert (
in_hidden_channels == out_channels
), "[!] must be `in_channels` == `out_channels` when encoder type is 'fftransformer'"
assert in_hidden_channels == out_channels, (
"[!] must be `in_channels` == `out_channels` when encoder type is 'fftransformer'"
)
# pylint: disable=unexpected-keyword-arg
self.encoder = FFTransformerBlock(in_hidden_channels, **encoder_params)
else:
Expand Down
2 changes: 1 addition & 1 deletion TTS/tts/layers/generic/pos_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class PositionalEncoding(nn.Module):
def __init__(self, channels, dropout_p=0.0, max_len=5000, use_scale=False):
super().__init__()
if channels % 2 != 0:
raise ValueError("Cannot use sin/cos positional encoding with " f"odd channels (got channels={channels:d})")
raise ValueError(f"Cannot use sin/cos positional encoding with odd channels (got channels={channels:d})")
self.use_scale = use_scale
if use_scale:
self.scale = torch.nn.Parameter(torch.ones(1))
Expand Down
4 changes: 1 addition & 3 deletions TTS/tts/layers/generic/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,7 @@ def forward(self, x, mask=None, g=None): # pylint: disable=unused-argument


class FFTDurationPredictor:
def __init__(
self, in_channels, hidden_channels, num_heads, num_layers, dropout_p=0.1, cond_channels=None
): # pylint: disable=unused-argument
def __init__(self, in_channels, hidden_channels, num_heads, num_layers, dropout_p=0.1, cond_channels=None): # pylint: disable=unused-argument
self.fft = FFTransformerBlock(in_channels, num_heads, hidden_channels, num_layers, dropout_p)
self.proj = nn.Linear(in_channels, 1)

Expand Down
6 changes: 3 additions & 3 deletions TTS/tts/layers/tortoise/arch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@ def __init__(
if num_head_channels == -1:
self.num_heads = num_heads
else:
assert (
channels % num_head_channels == 0
), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}"
assert channels % num_head_channels == 0, (
f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}"
)
self.num_heads = channels // num_head_channels
self.norm = normalization(channels)
self.qkv = nn.Conv1d(channels, channels * 3, 1)
Expand Down
12 changes: 6 additions & 6 deletions TTS/tts/layers/tortoise/audio_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,14 @@ def load_voices(voices: list[str], extra_voice_dirs: list[str] = []):
return None, None
clip, latent = load_voice(voice, extra_voice_dirs)
if latent is None:
assert (
len(latents) == 0
), "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
assert len(latents) == 0, (
"Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
)
clips.extend(clip)
elif clip is None:
assert (
len(clips) == 0
), "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
assert len(clips) == 0, (
"Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
)
latents.append(latent)
if len(latents) == 0:
return clips, None
Expand Down
6 changes: 3 additions & 3 deletions TTS/tts/layers/tortoise/autoregressive.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,9 +608,9 @@ def inference_speech(
if input_tokens is None:
inputs = fake_inputs
else:
assert (
num_return_sequences % input_tokens.shape[0] == 0
), "The number of return sequences must be divisible by the number of input sequences"
assert num_return_sequences % input_tokens.shape[0] == 0, (
"The number of return sequences must be divisible by the number of input sequences"
)
fake_inputs = fake_inputs.repeat(num_return_sequences, 1)
input_tokens = input_tokens.repeat(num_return_sequences // input_tokens.shape[0], 1)
inputs = torch.cat([fake_inputs, input_tokens], dim=1)
Expand Down
54 changes: 13 additions & 41 deletions TTS/tts/layers/tortoise/dpm_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,57 +563,29 @@ def get_orders_and_timesteps_for_singlestep_solver(self, steps, order, skip_type
if order == 3:
K = steps // 3 + 1
if steps % 3 == 0:
orders = [
3,
] * (
K - 2
) + [2, 1]
orders = [3] * (K - 2) + [2, 1]
elif steps % 3 == 1:
orders = [
3,
] * (
K - 1
) + [1]
orders = [3] * (K - 1) + [1]
else:
orders = [
3,
] * (
K - 1
) + [2]
orders = [3] * (K - 1) + [2]
elif order == 2:
if steps % 2 == 0:
K = steps // 2
orders = [
2,
] * K
orders = [2] * K
else:
K = steps // 2 + 1
orders = [
2,
] * (
K - 1
) + [1]
orders = [2] * (K - 1) + [1]
elif order == 1:
K = 1
orders = [
1,
] * steps
orders = [1] * steps
else:
raise ValueError("'order' must be '1' or '2' or '3'.")
if skip_type == "logSNR":
# To reproduce the results in DPM-Solver paper
timesteps_outer = self.get_time_steps(skip_type, t_T, t_0, K, device)
else:
timesteps_outer = self.get_time_steps(skip_type, t_T, t_0, steps, device)[
torch.cumsum(
torch.tensor(
[
0,
]
+ orders
),
0,
).to(device)
torch.cumsum(torch.tensor([0] + orders), 0).to(device)
]
return timesteps_outer, orders

Expand Down Expand Up @@ -1217,9 +1189,9 @@ def inverse(
"""
t_0 = 1.0 / self.noise_schedule.total_N if t_start is None else t_start
t_T = self.noise_schedule.T if t_end is None else t_end
assert (
t_0 > 0 and t_T > 0
), "Time range needs to be greater than 0. For discrete-time DPMs, it needs to be in [1 / N, 1], where N is the length of betas array"
assert t_0 > 0 and t_T > 0, (
"Time range needs to be greater than 0. For discrete-time DPMs, it needs to be in [1 / N, 1], where N is the length of betas array"
)
return self.sample(
x,
steps=steps,
Expand Down Expand Up @@ -1362,9 +1334,9 @@ def sample(
"""
t_0 = 1.0 / self.noise_schedule.total_N if t_end is None else t_end
t_T = self.noise_schedule.T if t_start is None else t_start
assert (
t_0 > 0 and t_T > 0
), "Time range needs to be greater than 0. For discrete-time DPMs, it needs to be in [1 / N, 1], where N is the length of betas array"
assert t_0 > 0 and t_T > 0, (
"Time range needs to be greater than 0. For discrete-time DPMs, it needs to be in [1 / N, 1], where N is the length of betas array"
)
if return_intermediate:
assert method in [
"multistep",
Expand Down
6 changes: 3 additions & 3 deletions TTS/tts/layers/tortoise/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ def route_args(router, args, depth):
class SequentialSequence(nn.Module):
def __init__(self, layers, args_route={}, layer_dropout=0.0):
super().__init__()
assert all(
len(route) == len(layers) for route in args_route.values()
), "each argument route map must have the same depth as the number of sequential layers"
assert all(len(route) == len(layers) for route in args_route.values()), (
"each argument route map must have the same depth as the number of sequential layers"
)
self.layers = layers
self.args_route = args_route
self.layer_dropout = layer_dropout
Expand Down
30 changes: 15 additions & 15 deletions TTS/tts/layers/tortoise/xtransformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,9 +560,9 @@ def __init__(

self.rel_pos_bias = rel_pos_bias
if rel_pos_bias:
assert (
rel_pos_num_buckets <= rel_pos_max_distance
), "number of relative position buckets must be less than the relative position max distance"
assert rel_pos_num_buckets <= rel_pos_max_distance, (
"number of relative position buckets must be less than the relative position max distance"
)
self.rel_pos = RelativePositionBias(
scale=dim_head**0.5,
causal=causal,
Expand Down Expand Up @@ -680,9 +680,9 @@ def forward(
del input_mask

if exists(attn_mask):
assert (
2 <= attn_mask.ndim <= 4
), "attention mask must have greater than 2 dimensions but less than or equal to 4"
assert 2 <= attn_mask.ndim <= 4, (
"attention mask must have greater than 2 dimensions but less than or equal to 4"
)
if attn_mask.ndim == 2:
attn_mask = rearrange(attn_mask, "i j -> () () i j")
elif attn_mask.ndim == 3:
Expand Down Expand Up @@ -790,9 +790,9 @@ def __init__(
rotary_emb_dim = max(default(rotary_emb_dim, dim_head // 2), 32)
self.rotary_pos_emb = RotaryEmbedding(rotary_emb_dim) if rotary_pos_emb else None

assert not (
alibi_pos_bias and rel_pos_bias
), "you can only choose Alibi positional bias or T5 relative positional bias, not both"
assert not (alibi_pos_bias and rel_pos_bias), (
"you can only choose Alibi positional bias or T5 relative positional bias, not both"
)

if alibi_pos_bias:
alibi_num_heads = default(alibi_num_heads, heads)
Expand Down Expand Up @@ -922,9 +922,9 @@ def forward(
past_key_values=None,
expected_seq_len=None,
):
assert not (
self.cross_attend ^ (exists(context) or exists(full_context))
), "context must be passed in if cross_attend is set to True"
assert not (self.cross_attend ^ (exists(context) or exists(full_context))), (
"context must be passed in if cross_attend is set to True"
)
assert context is None or full_context is None, "only one of full_context or context can be provided"

hiddens = []
Expand All @@ -940,9 +940,9 @@ def forward(
rotary_pos_emb = None
if exists(self.rotary_pos_emb):
if not self.training and self.causal:
assert (
expected_seq_len is not None
), "To decode a transformer with rotary embeddings, you must specify an `expected_seq_len`"
assert expected_seq_len is not None, (
"To decode a transformer with rotary embeddings, you must specify an `expected_seq_len`"
)
elif expected_seq_len is None:
expected_seq_len = 0
seq_len = x.shape[1]
Expand Down
Loading

0 comments on commit e49d3d9

Please sign in to comment.