mirror of https://github.com/malarinv/tacotron2
81 lines
2.4 KiB
Python
81 lines
2.4 KiB
Python
# -*- coding: utf-8 -*-
|
|
# import tensorflow as tf
|
|
from dataclasses import dataclass
|
|
from .text import symbols
|
|
|
|
# from .text_codec import symbols
|
|
|
|
|
|
@dataclass
|
|
class HParams(object):
|
|
"""docstring for HParams."""
|
|
|
|
################################
|
|
# Experiment Parameters #
|
|
################################
|
|
epochs = 500
|
|
iters_per_checkpoint = 1000
|
|
seed = 1234
|
|
dynamic_loss_scaling = True
|
|
fp16_run = False
|
|
distributed_run = False
|
|
dist_backend = "nccl"
|
|
dist_url = "tcp://localhost:54321"
|
|
cudnn_enabled = True
|
|
cudnn_benchmark = False
|
|
ignore_layers = ["embedding.weight"]
|
|
################################
|
|
# Data Parameters #
|
|
################################
|
|
load_mel_from_disk = False
|
|
training_files = "lists/tts_data_train_processed.txt"
|
|
validation_files = "filelists/tts_data_val_processed.txt"
|
|
text_cleaners = ["english_cleaners"]
|
|
################################
|
|
# Audio Parameters #
|
|
################################
|
|
max_wav_value = 32768.0
|
|
sampling_rate = 22050
|
|
filter_length = 1024
|
|
hop_length = 256
|
|
win_length = 1024
|
|
n_mel_channels: int = 80
|
|
mel_fmin: float = 0.0
|
|
mel_fmax: float = 8000.0
|
|
################################
|
|
# Model Parameters #
|
|
################################
|
|
n_symbols = len(symbols)
|
|
symbols_embedding_dim = 512
|
|
# Encoder parameters
|
|
encoder_kernel_size = 5
|
|
encoder_n_convolutions = 3
|
|
encoder_embedding_dim = 512
|
|
# Decoder parameters
|
|
n_frames_per_step = 1 # currently only 1 is supported
|
|
decoder_rnn_dim = 1024
|
|
prenet_dim = 256
|
|
max_decoder_steps = 1000
|
|
gate_threshold = 0.5
|
|
p_attention_dropout = 0.1
|
|
p_decoder_dropout = 0.1
|
|
# Attention parameters
|
|
attention_rnn_dim = 1024
|
|
attention_dim = 128
|
|
# Location Layer parameters
|
|
attention_location_n_filters = 32
|
|
attention_location_kernel_size = 31
|
|
# Mel-post processing network parameters
|
|
postnet_embedding_dim = 512
|
|
postnet_kernel_size = 5
|
|
postnet_n_convolutions = 5
|
|
################################
|
|
# Optimization Hyperparameters #
|
|
################################
|
|
use_saved_learning_rate = False
|
|
learning_rate = 1e-3
|
|
weight_decay = 1e-6
|
|
grad_clip_thresh = 1.0
|
|
batch_size = 4
|
|
mask_padding = True # set model's padded outputs to padded values
|