From a10a6d517e4e59fb389cf2c9b61e82cd141deafe Mon Sep 17 00:00:00 2001
From: Malar Kannan <malarkannan.invention@gmail.com>
Date: Sat, 21 Sep 2019 01:19:30 +0530
Subject: [PATCH] packaged taco2

---
 AUTHORS.rst                                   |  8 ++
 HISTORY.rst                                   |  8 ++
 MANIFEST.in                                   | 10 +++
 README.md                                     | 34 +++++++
 corpus.txt                                    |  4 +
 hparams.py                                    | 88 -------------------
 requirements.txt                              |  2 +
 server.py                                     | 38 --------
 setup.cfg                                     | 28 ++++++
 setup.py                                      | 59 +++++++++++++
 __init__.py => taco2/__init__.py              |  0
 .../audio_processing.py                       |  0
 data_utils.py => taco2/data_utils.py          |  0
 taco2/hparams.py                              | 76 ++++++++++++++++
 layers.py => taco2/layers.py                  |  0
 loss_function.py => taco2/loss_function.py    |  0
 model.py => taco2/model.py                    |  0
 stft.py => taco2/stft.py                      |  0
 {text => taco2/text}/LICENSE                  |  0
 {text => taco2/text}/__init__.py              |  0
 {text => taco2/text}/cleaners.py              |  0
 {text => taco2/text}/cmudict.py               |  0
 {text => taco2/text}/numbers.py               |  0
 {text => taco2/text}/symbols.py               |  0
 tts.py => taco2/tts.py                        | 71 +++++++--------
 utils.py => taco2/utils.py                    |  0
 26 files changed, 265 insertions(+), 161 deletions(-)
 create mode 100644 AUTHORS.rst
 create mode 100644 HISTORY.rst
 create mode 100644 MANIFEST.in
 create mode 100644 README.md
 create mode 100644 corpus.txt
 delete mode 100644 hparams.py
 create mode 100644 requirements.txt
 delete mode 100644 server.py
 create mode 100644 setup.cfg
 create mode 100644 setup.py
 rename __init__.py => taco2/__init__.py (100%)
 rename audio_processing.py => taco2/audio_processing.py (100%)
 rename data_utils.py => taco2/data_utils.py (100%)
 create mode 100644 taco2/hparams.py
 rename layers.py => taco2/layers.py (100%)
 rename loss_function.py => taco2/loss_function.py (100%)
 rename model.py => taco2/model.py (100%)
 rename stft.py => taco2/stft.py (100%)
 rename {text => taco2/text}/LICENSE (100%)
 rename {text => taco2/text}/__init__.py (100%)
 rename {text => taco2/text}/cleaners.py (100%)
 rename {text => taco2/text}/cmudict.py (100%)
 rename {text => taco2/text}/numbers.py (100%)
 rename {text => taco2/text}/symbols.py (100%)
 rename tts.py => taco2/tts.py (80%)
 rename utils.py => taco2/utils.py (100%)

diff --git a/AUTHORS.rst b/AUTHORS.rst
new file mode 100644
index 0000000..f7cb4d4
--- /dev/null
+++ b/AUTHORS.rst
@@ -0,0 +1,8 @@
+=======
+Credits
+=======
+
+Contributors
+------------
+
+* Malar Kannan <malarkannan.invention@gmail.com>
diff --git a/HISTORY.rst b/HISTORY.rst
new file mode 100644
index 0000000..e06aacb
--- /dev/null
+++ b/HISTORY.rst
@@ -0,0 +1,8 @@
+=======
+History
+=======
+
+0.1.0 (2019-09-20)
+------------------
+
+* First release on PyPI.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..36ea105
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,10 @@
+include AUTHORS.rst
+include HISTORY.rst
+include LICENSE
+include README.md
+
+recursive-include tests *
+recursive-exclude * __pycache__
+recursive-exclude * *.py[co]
+
+recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f64a96b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,34 @@
+# Taco2 TTS
+
+[![image](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black)
+
+> Generate speech audio from text
+---
+
+# Table of Contents
+
+* [Features](#features)
+* [Installation](#installation)
+* [Usage](#usage)
+
+# Features
+
+* Tacotron2 Synthesized Speech
+
+
+# Installation
+Install the packages with for production use. It downloads the dependencies
+```bash
+python setup.py install
+```
+
+> Still facing an issue? Check the [Issues](#issues) section or open a new issue.
+
+The installation should be smooth with Python 3.6 or newer.
+
+# Usage
+> API
+```python
+tts_model = TTSModel("/path/to/tacotron2_model","/path/to/waveglow_model")
+SPEECH_AUDIO = tts_model.synth_speech(TEXT)
+```
diff --git a/corpus.txt b/corpus.txt
new file mode 100644
index 0000000..64314d7
--- /dev/null
+++ b/corpus.txt
@@ -0,0 +1,4 @@
+Hello world!
+How have you been?
+Today is a good day.
+This seems to be working good
diff --git a/hparams.py b/hparams.py
deleted file mode 100644
index 58cf525..0000000
--- a/hparams.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# -*- coding: utf-8 -*-
-import tensorflow as tf
-from .text import symbols
-
-
-# changed path, sampling rate and batch size
-def create_hparams(hparams_string=None, verbose=False):
-    """Create model hyperparameters. Parse nondefault from given string."""
-
-    hparams = tf.contrib.training.HParams(
-        ################################
-        # Experiment Parameters        #
-        ################################
-        epochs=500,
-        iters_per_checkpoint=1000,
-        seed=1234,
-        dynamic_loss_scaling=True,
-        fp16_run=False,
-        distributed_run=False,
-        dist_backend="nccl",
-        dist_url="tcp://localhost:54321",
-        cudnn_enabled=True,
-        cudnn_benchmark=False,
-        ignore_layers=["embedding.weight"],
-        ################################
-        # Data Parameters             #
-        ################################
-        load_mel_from_disk=False,
-        training_files="lists/tts_data_train_processed.txt",
-        validation_files="filelists/tts_data_val_processed.txt",
-        text_cleaners=["english_cleaners"],
-        ################################
-        # Audio Parameters             #
-        ################################
-        max_wav_value=32768.0,
-        sampling_rate=16000,
-        filter_length=1024,
-        hop_length=256,
-        win_length=1024,
-        n_mel_channels=80,
-        mel_fmin=0.0,
-        mel_fmax=8000.0,
-        ################################
-        # Model Parameters             #
-        ################################
-        n_symbols=len(symbols),
-        symbols_embedding_dim=512,
-        # Encoder parameters
-        encoder_kernel_size=5,
-        encoder_n_convolutions=3,
-        encoder_embedding_dim=512,
-        # Decoder parameters
-        n_frames_per_step=1,  # currently only 1 is supported
-        decoder_rnn_dim=1024,
-        prenet_dim=256,
-        max_decoder_steps=1000,
-        gate_threshold=0.5,
-        p_attention_dropout=0.1,
-        p_decoder_dropout=0.1,
-        # Attention parameters
-        attention_rnn_dim=1024,
-        attention_dim=128,
-        # Location Layer parameters
-        attention_location_n_filters=32,
-        attention_location_kernel_size=31,
-        # Mel-post processing network parameters
-        postnet_embedding_dim=512,
-        postnet_kernel_size=5,
-        postnet_n_convolutions=5,
-        ################################
-        # Optimization Hyperparameters #
-        ################################
-        use_saved_learning_rate=False,
-        learning_rate=1e-3,
-        weight_decay=1e-6,
-        grad_clip_thresh=1.0,
-        batch_size=4,
-        mask_padding=True,  # set model's padded outputs to padded values
-    )
-
-    if hparams_string:
-        tf.logging.info("Parsing command line hparams: %s", hparams_string)
-        hparams.parse(hparams_string)
-
-    if verbose:
-        tf.logging.info("Final parsed hparams: %s", hparams.values())
-
-    return hparams
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..ba4d322
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+numpy==1.17.2
+torch==1.2.0
diff --git a/server.py b/server.py
deleted file mode 100644
index d41119a..0000000
--- a/server.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# -*- coding: utf-8 -*-
-import grpc
-import time
-from sia.proto import tts_pb2
-from sia.proto import tts_pb2_grpc
-from concurrent import futures
-from .tts import TTSModel
-
-
-class TTSServer:
-    def __init__(self):
-        self.tts_model = TTSModel()
-
-    def TextToSpeechAPI(self, request, context):
-        while True:
-            input_text = request.text
-            speech_response = self.tts_model.synth_speech(input_text)
-            return tts_pb2.SpeechResponse(response=speech_response)
-
-
-def main():
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=1))
-    tts_server = TTSServer()
-    tts_pb2_grpc.add_ServerServicer_to_server(tts_server, server)
-    server.add_insecure_port("localhost:50060")
-    server.start()
-    print("TTSServer started!")
-
-    try:
-        while True:
-            time.sleep(10000)
-    except KeyboardInterrupt:
-        server.start()
-        # server.stop(0)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..82236da
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,28 @@
+[bumpversion]
+current_version = 0.1.0
+commit = True
+tag = True
+
+[bumpversion:file:setup.py]
+search = version='{current_version}'
+replace = version='{new_version}'
+
+[bumpversion:file:taco2/__init__.py]
+search = __version__ = '{current_version}'
+replace = __version__ = '{new_version}'
+
+[bdist_wheel]
+universal = 1
+
+[flake8]
+exclude = docs
+
+[aliases]
+# Define setup.py command aliases here
+test = pytest
+
+[tool:pytest]
+collect_ignore = ['setup.py']
+
+[easy_install]
+index-url = http://localhost:8080/simple
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..f07a6e6
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""The setup script."""
+
+from setuptools import setup, find_packages
+
+with open("README.md") as readme_file:
+    readme = readme_file.read()
+
+with open("HISTORY.rst") as history_file:
+    history = history_file.read()
+
+requirements = [
+    "klepto==0.1.6",
+    "numpy==1.16.4",
+    "inflect==0.2.5",
+    "librosa==0.6.0",
+    "scipy==1.3.0",
+    "Unidecode==1.0.22",
+    "torch==1.1.0",
+    "PyAudio==0.2.11"
+]
+
+setup_requirements = ["pytest-runner"]
+
+test_requirements = ["pytest"]
+
+packages = find_packages()
+
+setup(
+    author="Malar Kannan",
+    author_email="malar@agaralabs.com",
+    classifiers=[
+        "Development Status :: 2 - Pre-Alpha",
+        "Intended Audience :: Developers",
+        "Natural Language :: English",
+        "Programming Language :: Python :: 2",
+        "Programming Language :: Python :: 2.7",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.4",
+        "Programming Language :: Python :: 3.5",
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
+    ],
+    description="Taco2 TTS package.",
+    install_requires=requirements,
+    long_description=readme + "\n\n" + history,
+    include_package_data=True,
+    keywords="tacotron2",
+    name="taco2-tts",
+    packages=packages,
+    setup_requires=setup_requirements,
+    test_suite="tests",
+    tests_require=test_requirements,
+    url="https://github.com/malarinv/tacotron2",
+    version="0.1.0",
+    zip_safe=False,
+    entry_points={"console_scripts": ("tts_debug = tts:main",)},
+)
diff --git a/__init__.py b/taco2/__init__.py
similarity index 100%
rename from __init__.py
rename to taco2/__init__.py
diff --git a/audio_processing.py b/taco2/audio_processing.py
similarity index 100%
rename from audio_processing.py
rename to taco2/audio_processing.py
diff --git a/data_utils.py b/taco2/data_utils.py
similarity index 100%
rename from data_utils.py
rename to taco2/data_utils.py
diff --git a/taco2/hparams.py b/taco2/hparams.py
new file mode 100644
index 0000000..1a126a7
--- /dev/null
+++ b/taco2/hparams.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+# import tensorflow as tf
+from dataclasses import dataclass
+from .text import symbols
+
+@dataclass
+class HParams(object):
+    """docstring for HParams."""
+    ################################
+    # Experiment Parameters        #
+    ################################
+    epochs=500
+    iters_per_checkpoint=1000
+    seed=1234
+    dynamic_loss_scaling=True
+    fp16_run=False
+    distributed_run=False
+    dist_backend="nccl"
+    dist_url="tcp://localhost:54321"
+    cudnn_enabled=True
+    cudnn_benchmark=False
+    ignore_layers=["embedding.weight"]
+    ################################
+    # Data Parameters             #
+    ################################
+    load_mel_from_disk=False
+    training_files="lists/tts_data_train_processed.txt"
+    validation_files="filelists/tts_data_val_processed.txt"
+    text_cleaners=["english_cleaners"]
+    ################################
+    # Audio Parameters             #
+    ################################
+    max_wav_value=32768.0
+    sampling_rate=16000
+    filter_length=1024
+    hop_length=256
+    win_length=1024
+    n_mel_channels=80
+    mel_fmin=0.0
+    mel_fmax=8000.0
+    ################################
+    # Model Parameters             #
+    ################################
+    n_symbols=len(symbols)
+    symbols_embedding_dim=512
+    # Encoder parameters
+    encoder_kernel_size=5
+    encoder_n_convolutions=3
+    encoder_embedding_dim=512
+    # Decoder parameters
+    n_frames_per_step=1  # currently only 1 is supported
+    decoder_rnn_dim=1024
+    prenet_dim=256
+    max_decoder_steps=1000
+    gate_threshold=0.5
+    p_attention_dropout=0.1
+    p_decoder_dropout=0.1
+    # Attention parameters
+    attention_rnn_dim=1024
+    attention_dim=128
+    # Location Layer parameters
+    attention_location_n_filters=32
+    attention_location_kernel_size=31
+    # Mel-post processing network parameters
+    postnet_embedding_dim=512
+    postnet_kernel_size=5
+    postnet_n_convolutions=5
+    ################################
+    # Optimization Hyperparameters #
+    ################################
+    use_saved_learning_rate=False
+    learning_rate=1e-3
+    weight_decay=1e-6
+    grad_clip_thresh=1.0
+    batch_size=4
+    mask_padding=True  # set model's padded outputs to padded values
diff --git a/layers.py b/taco2/layers.py
similarity index 100%
rename from layers.py
rename to taco2/layers.py
diff --git a/loss_function.py b/taco2/loss_function.py
similarity index 100%
rename from loss_function.py
rename to taco2/loss_function.py
diff --git a/model.py b/taco2/model.py
similarity index 100%
rename from model.py
rename to taco2/model.py
diff --git a/stft.py b/taco2/stft.py
similarity index 100%
rename from stft.py
rename to taco2/stft.py
diff --git a/text/LICENSE b/taco2/text/LICENSE
similarity index 100%
rename from text/LICENSE
rename to taco2/text/LICENSE
diff --git a/text/__init__.py b/taco2/text/__init__.py
similarity index 100%
rename from text/__init__.py
rename to taco2/text/__init__.py
diff --git a/text/cleaners.py b/taco2/text/cleaners.py
similarity index 100%
rename from text/cleaners.py
rename to taco2/text/cleaners.py
diff --git a/text/cmudict.py b/taco2/text/cmudict.py
similarity index 100%
rename from text/cmudict.py
rename to taco2/text/cmudict.py
diff --git a/text/numbers.py b/taco2/text/numbers.py
similarity index 100%
rename from text/numbers.py
rename to taco2/text/numbers.py
diff --git a/text/symbols.py b/taco2/text/symbols.py
similarity index 100%
rename from text/symbols.py
rename to taco2/text/symbols.py
diff --git a/tts.py b/taco2/tts.py
similarity index 80%
rename from tts.py
rename to taco2/tts.py
index f9a97a8..b1e41a9 100644
--- a/tts.py
+++ b/taco2/tts.py
@@ -3,19 +3,14 @@
 
 import numpy as np
 import torch
-from .hparams import create_hparams
-from .text import text_to_sequence
-from .glow import WaveGlow
-
-# import os
-# import soundfile as sf
 import pyaudio
-import klepto
 from librosa import resample
 from librosa.effects import time_stretch
-from sia.file_utils import cached_model_path
-from sia.instruments import do_time
+import klepto
 from .model import Tacotron2
+from glow import WaveGlow
+from .hparams import HParams
+from .text import text_to_sequence
 
 TTS_SAMPLE_RATE = 22050
 OUTPUT_SAMPLE_RATE = 16000
@@ -35,43 +30,34 @@ WAVEGLOW_CONFIG = {
 class TTSModel(object):
     """docstring for TTSModel."""
 
-    def __init__(self):
+    def __init__(self, tacotron2_path, waveglow_path):
         super(TTSModel, self).__init__()
-        hparams = create_hparams()
+        hparams = HParams()
         hparams.sampling_rate = TTS_SAMPLE_RATE
         self.model = Tacotron2(hparams)
-        tacotron2_path = cached_model_path("tacotron2_model")
         self.model.load_state_dict(
             torch.load(tacotron2_path, map_location="cpu")["state_dict"]
         )
         self.model.eval()
-        waveglow_path = cached_model_path("waveglow_model")
-        self.waveglow = WaveGlow(**WAVEGLOW_CONFIG)
         wave_params = torch.load(waveglow_path, map_location="cpu")
+        self.waveglow = WaveGlow(**WAVEGLOW_CONFIG)
         self.waveglow.load_state_dict(wave_params)
         self.waveglow.eval()
         for k in self.waveglow.convinv:
             k.float()
         self.k_cache = klepto.archives.file_archive(cached=False)
-        self.synth_speech = klepto.safe.inf_cache(cache=self.k_cache)(
-            self.synth_speech
-        )
+        self.synth_speech = klepto.safe.inf_cache(cache=self.k_cache)(self.synth_speech)
         # workaround from
         # https://github.com/NVIDIA/waveglow/issues/127
         for m in self.waveglow.modules():
             if "Conv" in str(type(m)):
                 setattr(m, "padding_mode", "zeros")
 
-    @do_time
     def synth_speech(self, t):
         text = t
-        sequence = np.array(text_to_sequence(text, ["english_cleaners"]))[
-            None, :
-        ]
+        sequence = np.array(text_to_sequence(text, ["english_cleaners"]))[None, :]
         sequence = torch.autograd.Variable(torch.from_numpy(sequence)).long()
-        mel_outputs, mel_outputs_postnet, _, alignments = self.model.inference(
-            sequence
-        )
+        mel_outputs, mel_outputs_postnet, _, alignments = self.model.inference(sequence)
         with torch.no_grad():
             audio_t = self.waveglow.infer(mel_outputs_postnet, sigma=0.666)
         audio = audio_t[0].data.cpu().numpy()
@@ -130,10 +116,7 @@ def display(data):
 def player_gen():
     audio_interface = pyaudio.PyAudio()
     _audio_stream = audio_interface.open(
-        format=pyaudio.paInt16,
-        channels=1,
-        rate=OUTPUT_SAMPLE_RATE,
-        output=True,
+        format=pyaudio.paInt16, channels=1, rate=OUTPUT_SAMPLE_RATE, output=True
     )
 
     def play_device(data):
@@ -144,14 +127,30 @@ def player_gen():
 
 
 def synthesize_corpus():
-    tts_model = TTSModel()
+    tts_model = TTSModel(
+        "/Users/malar/Work/tacotron2_statedict.pt",
+        "/Users/malar/Work/waveglow.pt",
+    )
     all_data = []
     for (i, line) in enumerate(open("corpus.txt").readlines()):
-        print('synthesizing... "{}"'.format(line.strip()))
+        print(f'synthesizing... "{line.strip()}"')
         data = tts_model.synth_speech(line.strip())
         all_data.append(data)
     return all_data
 
+def repl():
+    tts_model = TTSModel(
+        "/Users/malar/Work/tacotron2_statedict.pt",
+        # "/Users/malar/Work/waveglow_256channels.pt",
+        "/Users/malar/Work/waveglow.pt",
+    )
+    player = player_gen()
+    def loop():
+        text = input('tts >')
+        data = tts_model.synth_speech(text.strip())
+        player(data)
+    return loop
+
 
 def play_corpus(corpus_synths):
     player = player_gen()
@@ -160,11 +159,13 @@ def play_corpus(corpus_synths):
 
 
 def main():
-    corpus_synth_data = synthesize_corpus()
-    play_corpus(corpus_synth_data)
-    import pdb
-
-    pdb.set_trace()
+    # corpus_synth_data = synthesize_corpus()
+    # play_corpus(corpus_synth_data)
+    interactive_loop = repl()
+    while True:
+        interactive_loop()
+    # import pdb
+    # pdb.set_trace()
 
 
 if __name__ == "__main__":
diff --git a/utils.py b/taco2/utils.py
similarity index 100%
rename from utils.py
rename to taco2/utils.py