1. using sentencepiece for text_to_seq

2. using 40 mel channels
3. added makefile and .gitignore
experiments
Malar Kannan 2019-09-23 15:30:43 +05:30
parent 131c1465b4
commit 4d5001bdf0
7 changed files with 1246 additions and 3 deletions

145
.gitignore vendored Normal file
View File

@ -0,0 +1,145 @@
.env
env/
jupyter.json
# Created by https://www.gitignore.io/api/macos
# Edit at https://www.gitignore.io/?templates=macos
### macOS ###
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
# End of https://www.gitignore.io/api/macos
# Created by https://www.gitignore.io/api/python
# Edit at https://www.gitignore.io/?templates=python
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# End of https://www.gitignore.io/api/python

6
Makefile Normal file
View File

@ -0,0 +1,6 @@
.PHONY: clean clean-test clean-pyc clean-build docs help common.mk
.DEFAULT_GOAL := help
notebook:
jupyter lab --ip=0.0.0.0 --no-browser --NotebookApp.token='${JUPYTER_TOKEN}'

27
common.mk Normal file
View File

@ -0,0 +1,27 @@
define BROWSER_PYSCRIPT
import os, webbrowser, sys
try:
from urllib import pathname2url
except:
from urllib.request import pathname2url
webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
endef
export BROWSER_PYSCRIPT
define PRINT_HELP_PYSCRIPT
import re, sys
for line in sys.stdin:
match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
if match:
target, help = match.groups()
print("%-20s %s" % (target, help))
endef
export PRINT_HELP_PYSCRIPT
BROWSER := python -c "$$BROWSER_PYSCRIPT"
help: ## make TARGET forwards the TARGET to sub packages
@cat $(MAKEFILE_LIST) | python -c "$$PRINT_HELP_PYSCRIPT"

View File

@ -5,8 +5,8 @@ import torch.utils.data
import layers
from utils import load_wav_to_torch, load_filepaths_and_text
from text import text_to_sequence
# from text import text_to_sequence
from spm_codec import text_to_sequence
class TextMelLoader(torch.utils.data.Dataset):
"""

View File

@ -37,7 +37,7 @@ def create_hparams(hparams_string=None, verbose=False):
filter_length=1024,
hop_length=256,
win_length=1024,
n_mel_channels=80,
n_mel_channels=40,
mel_fmin=0.0,
mel_fmax=8000.0,

1000
spm.vocab Normal file

File diff suppressed because it is too large Load Diff

65
spm_codec.py Normal file
View File

@ -0,0 +1,65 @@
from utils import load_filepaths_and_text
# from text import text_to_sequence, sequence_to_text
from hparams import create_hparams
import sentencepiece as spm
from text import symbols
SPM_CORPUS_FILE = "filelists/text_corpus.txt"
SPM_MODEL_PREFIX = "spm"
SPM_VOCAB_SIZE = 1000
hparams = create_hparams()
def _create_sentencepiece_corpus():
def get_text_list(text_file):
return [i[1] + "\n" for i in load_filepaths_and_text(text_file)]
full_text_list = get_text_list(hparams.training_files) + get_text_list(
hparams.validation_files
)
with open(SPM_CORPUS_FILE, "w") as fd:
fd.writelines(full_text_list)
def _create_sentencepiece_vocab(vocab_size=SPM_VOCAB_SIZE):
train_params = "--input={} --model_prefix={} --vocab_size={}".format(
SPM_CORPUS_FILE, SPM_MODEL_PREFIX, vocab_size
)
spm.SentencePieceTrainer.Train(train_params)
def _spm_text_codecs():
sp = spm.SentencePieceProcessor()
sp.Load("{}.model".format(SPM_MODEL_PREFIX))
def ttseq(text, cleaners):
return sp.EncodeAsIds(text)
def seqtt(sequence):
return sp.DecodeIds(sequence)
return ttseq, seqtt
text_to_sequence, sequence_to_text = _spm_text_codecs()
def _interactive_test():
prompt = "Hello world; how are you, doing ?"
while prompt not in ["q", "quit"]:
oup = sequence_to_text(text_to_sequence(prompt, hparams.text_cleaners))
print('==> ',oup)
prompt = input("> ")
def main():
# _create_sentencepiece_corpus()
# _create_sentencepiece_vocab()
_interactive_test()
if __name__ == "__main__":
main()