Skip to content

Commit

Permalink
Merge pull request #102 from TensorSpeech/refactor-fastspeech2
Browse files Browse the repository at this point in the history
🚀 Refactor FastSpeech, separate encoder/decoder params.
  • Loading branch information
dathudeptrai authored Jul 9, 2020
2 parents 21a8f36 + be94cef commit f65547c
Show file tree
Hide file tree
Showing 10 changed files with 801 additions and 1,013 deletions.
20 changes: 14 additions & 6 deletions examples/fastspeech/conf/fastspeech.v1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,24 @@ format: "npy"
###########################################################
fastspeech_params:
n_speakers: 1
hidden_size: 384
num_hidden_layers: 4
num_attention_heads: 2
intermediate_size: 1024
intermediate_kernel_size: 3
encoder_hidden_size: 384
encoder_num_hidden_layers: 4
encoder_num_attention_heads: 2
encoder_attention_head_size: 192 # hidden_size // num_attention_heads
encoder_intermediate_size: 1024
encoder_intermediate_kernel_size: 3
encoder_hidden_act: "mish"
decoder_hidden_size: 384
decoder_num_hidden_layers: 4
decoder_num_attention_heads: 2
decoder_attention_head_size: 192 # hidden_size // num_attention_heads
decoder_intermediate_size: 1024
decoder_intermediate_kernel_size: 3
decoder_hidden_act: "mish"
num_duration_conv_layers: 2
duration_predictor_filters: 256
duration_predictor_kernel_sizes: 3
num_mels: 80
hidden_act: "mish"
hidden_dropout_prob: 0.1
attention_probs_dropout_prob: 0.1
duration_predictor_dropout_probs: 0.1
Expand Down
20 changes: 14 additions & 6 deletions examples/fastspeech/conf/fastspeech.v3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,24 @@ format: "npy"
###########################################################
fastspeech_params:
n_speakers: 1
hidden_size: 384
num_hidden_layers: 4
num_attention_heads: 2
intermediate_size: 1024
intermediate_kernel_size: 3
encoder_hidden_size: 384
encoder_num_hidden_layers: 4
encoder_num_attention_heads: 2
encoder_attention_head_size: 192 # hidden_size // num_attention_heads
encoder_intermediate_size: 1024
encoder_intermediate_kernel_size: 3
encoder_hidden_act: "mish"
decoder_hidden_size: 384
decoder_num_hidden_layers: 4
decoder_num_attention_heads: 2
decoder_attention_head_size: 192 # hidden_size // num_attention_heads
decoder_intermediate_size: 1024
decoder_intermediate_kernel_size: 3
decoder_hidden_act: "mish"
num_duration_conv_layers: 2
duration_predictor_filters: 256
duration_predictor_kernel_sizes: 3
num_mels: 80
hidden_act: "mish"
hidden_dropout_prob: 0.2
attention_probs_dropout_prob: 0.1
duration_predictor_dropout_probs: 0.2
Expand Down
36 changes: 18 additions & 18 deletions examples/fastspeech2/conf/fastspeech2.v1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,31 +15,31 @@ format: "npy"
###########################################################
fastspeech_params:
n_speakers: 1
hidden_size: 384
num_hidden_layers: 4
num_attention_heads: 2
attention_head_size: 192 # hidden_size // num_attention_heads
intermediate_size: 1024
intermediate_kernel_size: 3
num_duration_conv_layers: 2
duration_predictor_filters: 256
duration_predictor_kernel_sizes: 3
encoder_hidden_size: 384
encoder_num_hidden_layers: 4
encoder_num_attention_heads: 2
encoder_attention_head_size: 192 # hidden_size // num_attention_heads
encoder_intermediate_size: 1024
encoder_intermediate_kernel_size: 3
encoder_hidden_act: "mish"
decoder_hidden_size: 384
decoder_num_hidden_layers: 4
decoder_num_attention_heads: 2
decoder_attention_head_size: 192 # hidden_size // num_attention_heads
decoder_intermediate_size: 1024
decoder_intermediate_kernel_size: 3
decoder_hidden_act: "mish"
variant_prediction_num_conv_layers: 2
variant_predictor_filter: 256
variant_predictor_kernel_size: 3
variant_predictor_dropout_rate: 0.5
num_mels: 80
hidden_act: "mish"
hidden_dropout_prob: 0.2
attention_probs_dropout_prob: 0.1
duration_predictor_dropout_probs: 0.5
max_position_embeddings: 2048
initializer_range: 0.02
output_attentions: False
output_hidden_states: False
f0_energy_predictor_filters: 256
f0_energy_predictor_kernel_sizes: 3
f0_energy_predictor_dropout_probs: 0.5
f0_kernel_size: 9
energy_kernel_size: 9
f0_dropout_rate: 0.5
energy_dropout_rate: 0.5

###########################################################
# DATA LOADER SETTING #
Expand Down
Loading

0 comments on commit f65547c

Please sign in to comment.