diff --git a/rvc/infer/infer.py b/rvc/infer/infer.py index 483ade96..4d314250 100644 --- a/rvc/infer/infer.py +++ b/rvc/infer/infer.py @@ -313,7 +313,9 @@ def convert_audio( print(f"Converted audio chunk {len(converted_chunks)}") if split_audio: - audio_opt = merge_audio(chunks, converted_chunks, intervals, 16000, self.tgt_sr) + audio_opt = merge_audio( + chunks, converted_chunks, intervals, 16000, self.tgt_sr + ) else: audio_opt = converted_chunks[0] diff --git a/rvc/lib/algorithm/generators/refinegan.py b/rvc/lib/algorithm/generators/refinegan.py index b27db8ed..c2aba01d 100644 --- a/rvc/lib/algorithm/generators/refinegan.py +++ b/rvc/lib/algorithm/generators/refinegan.py @@ -222,11 +222,11 @@ def __init__( self.dim = self.harmonic_num + 1 self.sampling_rate = samp_rate self.voiced_threshold = voiced_threshold - + self.merge = nn.Sequential( nn.Linear(self.dim, 1, bias=False), nn.Tanh(), - ) + ) def _f02uv(self, f0): # generate uv signal @@ -275,11 +275,12 @@ def forward(self, f0): noise = noise_amp * torch.randn_like(sine_waves) sine_waves = sine_waves * uv + noise - # correct DC offset + # correct DC offset sine_waves = sine_waves - sine_waves.mean(dim=1, keepdim=True) - # merge with grad + # merge with grad return self.merge(sine_waves) - + + class RefineGANGenerator(nn.Module): """ RefineGAN generator for audio synthesis. @@ -309,7 +310,7 @@ def __init__( num_mels: int = 128, start_channels: int = 16, gin_channels: int = 256, - checkpointing: bool =False, + checkpointing: bool = False, ): super().__init__() self.downsample_rates = downsample_rates @@ -328,7 +329,7 @@ def __init__( kernel_size=7, stride=1, padding=3, - bias=False + bias=False, ) ) @@ -373,9 +374,7 @@ def __init__( for rate in upsample_rates: new_channels = channels // 2 - self.upsample_blocks.append( - nn.Upsample(scale_factor=rate, mode="linear") - ) + self.upsample_blocks.append(nn.Upsample(scale_factor=rate, mode="linear")) self.upsample_conv_blocks.append( ParallelResBlock( @@ -400,8 +399,10 @@ def __init__( ) def forward(self, mel: torch.Tensor, f0: torch.Tensor, g: torch.Tensor = None): - - f0 = F.interpolate(f0.unsqueeze(1), size=mel.shape[-1] * self.upp, mode="linear") + + f0 = F.interpolate( + f0.unsqueeze(1), size=mel.shape[-1] * self.upp, mode="linear" + ) har_source = self.m_source(f0.transpose(1, 2)).transpose(1, 2) # expanding pitch source to 16 channels