mirror of https://github.com/malarinv/tacotron2
1. using sentencepiece for text_to_seq
2. using 40 mel channels 3. added makefile and .gitignoreexperiments
parent
131c1465b4
commit
4d5001bdf0
|
|
@ -0,0 +1,145 @@
|
|||
.env
|
||||
env/
|
||||
jupyter.json
|
||||
# Created by https://www.gitignore.io/api/macos
|
||||
# Edit at https://www.gitignore.io/?templates=macos
|
||||
|
||||
### macOS ###
|
||||
# General
|
||||
.DS_Store
|
||||
.AppleDouble
|
||||
.LSOverride
|
||||
|
||||
# Icon must end with two \r
|
||||
Icon
|
||||
|
||||
# Thumbnails
|
||||
._*
|
||||
|
||||
# Files that might appear in the root of a volume
|
||||
.DocumentRevisions-V100
|
||||
.fseventsd
|
||||
.Spotlight-V100
|
||||
.TemporaryItems
|
||||
.Trashes
|
||||
.VolumeIcon.icns
|
||||
.com.apple.timemachine.donotpresent
|
||||
|
||||
# Directories potentially created on remote AFP share
|
||||
.AppleDB
|
||||
.AppleDesktop
|
||||
Network Trash Folder
|
||||
Temporary Items
|
||||
.apdisk
|
||||
|
||||
# End of https://www.gitignore.io/api/macos
|
||||
|
||||
# Created by https://www.gitignore.io/api/python
|
||||
# Edit at https://www.gitignore.io/?templates=python
|
||||
|
||||
### Python ###
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
pip-wheel-metadata/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# Mr Developer
|
||||
.mr.developer.cfg
|
||||
.project
|
||||
.pydevproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# End of https://www.gitignore.io/api/python
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
.PHONY: clean clean-test clean-pyc clean-build docs help common.mk
|
||||
.DEFAULT_GOAL := help
|
||||
|
||||
|
||||
notebook:
|
||||
jupyter lab --ip=0.0.0.0 --no-browser --NotebookApp.token='${JUPYTER_TOKEN}'
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
define BROWSER_PYSCRIPT
|
||||
import os, webbrowser, sys
|
||||
|
||||
try:
|
||||
from urllib import pathname2url
|
||||
except:
|
||||
from urllib.request import pathname2url
|
||||
|
||||
webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
|
||||
endef
|
||||
export BROWSER_PYSCRIPT
|
||||
|
||||
define PRINT_HELP_PYSCRIPT
|
||||
import re, sys
|
||||
|
||||
for line in sys.stdin:
|
||||
match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
|
||||
if match:
|
||||
target, help = match.groups()
|
||||
print("%-20s %s" % (target, help))
|
||||
endef
|
||||
export PRINT_HELP_PYSCRIPT
|
||||
|
||||
BROWSER := python -c "$$BROWSER_PYSCRIPT"
|
||||
|
||||
help: ## make TARGET forwards the TARGET to sub packages
|
||||
@cat $(MAKEFILE_LIST) | python -c "$$PRINT_HELP_PYSCRIPT"
|
||||
|
|
@ -5,8 +5,8 @@ import torch.utils.data
|
|||
|
||||
import layers
|
||||
from utils import load_wav_to_torch, load_filepaths_and_text
|
||||
from text import text_to_sequence
|
||||
|
||||
# from text import text_to_sequence
|
||||
from spm_codec import text_to_sequence
|
||||
|
||||
class TextMelLoader(torch.utils.data.Dataset):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ def create_hparams(hparams_string=None, verbose=False):
|
|||
filter_length=1024,
|
||||
hop_length=256,
|
||||
win_length=1024,
|
||||
n_mel_channels=80,
|
||||
n_mel_channels=40,
|
||||
mel_fmin=0.0,
|
||||
mel_fmax=8000.0,
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,65 @@
|
|||
from utils import load_filepaths_and_text
|
||||
|
||||
# from text import text_to_sequence, sequence_to_text
|
||||
|
||||
from hparams import create_hparams
|
||||
import sentencepiece as spm
|
||||
from text import symbols
|
||||
|
||||
|
||||
SPM_CORPUS_FILE = "filelists/text_corpus.txt"
|
||||
SPM_MODEL_PREFIX = "spm"
|
||||
SPM_VOCAB_SIZE = 1000
|
||||
hparams = create_hparams()
|
||||
|
||||
|
||||
def _create_sentencepiece_corpus():
|
||||
def get_text_list(text_file):
|
||||
return [i[1] + "\n" for i in load_filepaths_and_text(text_file)]
|
||||
|
||||
full_text_list = get_text_list(hparams.training_files) + get_text_list(
|
||||
hparams.validation_files
|
||||
)
|
||||
with open(SPM_CORPUS_FILE, "w") as fd:
|
||||
fd.writelines(full_text_list)
|
||||
|
||||
|
||||
def _create_sentencepiece_vocab(vocab_size=SPM_VOCAB_SIZE):
|
||||
train_params = "--input={} --model_prefix={} --vocab_size={}".format(
|
||||
SPM_CORPUS_FILE, SPM_MODEL_PREFIX, vocab_size
|
||||
)
|
||||
spm.SentencePieceTrainer.Train(train_params)
|
||||
|
||||
|
||||
def _spm_text_codecs():
|
||||
sp = spm.SentencePieceProcessor()
|
||||
sp.Load("{}.model".format(SPM_MODEL_PREFIX))
|
||||
|
||||
def ttseq(text, cleaners):
|
||||
return sp.EncodeAsIds(text)
|
||||
|
||||
def seqtt(sequence):
|
||||
return sp.DecodeIds(sequence)
|
||||
|
||||
return ttseq, seqtt
|
||||
|
||||
|
||||
text_to_sequence, sequence_to_text = _spm_text_codecs()
|
||||
|
||||
|
||||
def _interactive_test():
|
||||
prompt = "Hello world; how are you, doing ?"
|
||||
while prompt not in ["q", "quit"]:
|
||||
oup = sequence_to_text(text_to_sequence(prompt, hparams.text_cleaners))
|
||||
print('==> ',oup)
|
||||
prompt = input("> ")
|
||||
|
||||
|
||||
def main():
|
||||
# _create_sentencepiece_corpus()
|
||||
# _create_sentencepiece_vocab()
|
||||
_interactive_test()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in New Issue