1
0
mirror of https://github.com/malarinv/tacotron2 synced 2026-03-08 09:42:34 +00:00

3 Commits

Author SHA1 Message Date
6d3679d760 relax scipy version 2021-07-02 23:21:45 +05:30
a851e80db2 1. added rpyc server optional package
2. updated path variable
2020-03-05 16:59:55 +05:30
cb0c8ddd06 1. make pyaudio as extra requirement
2. warn if pyaudio not installed if player_gen is used
2020-01-22 14:09:37 +05:30
5 changed files with 115 additions and 4 deletions

View File

@@ -15,12 +15,19 @@ requirements = [
"numpy~=1.16.4", "numpy~=1.16.4",
"inflect==0.2.5", "inflect==0.2.5",
"librosa==0.6.0", "librosa==0.6.0",
"scipy~=1.3.0", "scipy~=1.3",
"Unidecode==1.0.22", "Unidecode==1.0.22",
"torch~=1.1.0", "torch~=1.1.0",
"PyAudio==0.2.11"
] ]
extra_requirements = {
"playback": ["PyAudio==0.2.11"],
"server": [
"google-cloud-texttospeech==1.0.1",
"rpyc==4.1.4",
],
}
setup_requirements = ["pytest-runner"] setup_requirements = ["pytest-runner"]
test_requirements = ["pytest"] test_requirements = ["pytest"]
@@ -44,6 +51,7 @@ setup(
], ],
description="Taco2 TTS package.", description="Taco2 TTS package.",
install_requires=requirements, install_requires=requirements,
extras_require=extra_requirements,
long_description=readme + "\n\n" + history, long_description=readme + "\n\n" + history,
include_package_data=True, include_package_data=True,
keywords="tacotron2 tts", keywords="tacotron2 tts",
@@ -55,5 +63,10 @@ setup(
url="https://github.com/malarinv/tacotron2", url="https://github.com/malarinv/tacotron2",
version="0.3.0", version="0.3.0",
zip_safe=False, zip_safe=False,
entry_points={"console_scripts": ("tts_debug = taco2.tts:main",)}, entry_points={
"console_scripts": (
"tts_debug = taco2.tts:main",
"tts_rpyc_server = taco2.server.__main__:main",
)
},
) )

0
taco2/server/__init__.py Normal file
View File

48
taco2/server/__main__.py Normal file
View File

@@ -0,0 +1,48 @@
import os
import logging
import rpyc
from rpyc.utils.server import ThreadedServer
from .backend import TTSSynthesizer
tts_backend = os.environ.get("TTS_BACKEND", "taco2")
tts_synthesizer = TTSSynthesizer(backend=tts_backend)
class TTSService(rpyc.Service):
def on_connect(self, conn):
# code that runs when a connection is created
# (to init the service, if needed)
pass
def on_disconnect(self, conn):
# code that runs after the connection has already closed
# (to finalize the service, if needed)
pass
def exposed_synth_speech(self, utterance: str): # this is an exposed method
speech_audio = tts_synthesizer.synth_speech(utterance)
return speech_audio
def exposed_synth_speech_cb(
self, utterance: str, respond
): # this is an exposed method
speech_audio = tts_synthesizer.synth_speech(utterance)
respond(speech_audio)
def main():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
port = int(os.environ.get("TTS_RPYC_PORT", "7754"))
logging.info("starting tts server...")
t = ThreadedServer(TTSService, port=port)
t.start()
if __name__ == "__main__":
main()

45
taco2/server/backend.py Normal file
View File

@@ -0,0 +1,45 @@
import os
from google.cloud import texttospeech
from ..tts import TTSModel
tts_model_weights = os.environ.get(
"TTS_MODELS", "models/tacotron2_statedict.pt,models/waveglow_256channels.pt"
)
tts_creds = os.environ.get(
"GOOGLE_APPLICATION_CREDENTIALS", "/code/config/gre2e/keys/gre2e_gcp.json"
)
taco2, wav_glow = tts_model_weights.split(",", 1)
class TTSSynthesizer(object):
"""docstring for TTSSynthesizer."""
def __init__(self, backend="taco2"):
super(TTSSynthesizer, self).__init__()
if backend == "taco2":
tts_model = TTSModel(f"{taco2}", f"{wav_glow}") # Loads the models
self.synth_speech = tts_model.synth_speech
elif backend == "gcp":
client = texttospeech.TextToSpeechClient()
# Build the voice request, select the language code ("en-US") and the ssml
# voice gender ("neutral")
voice = texttospeech.types.VoiceSelectionParams(language_code="en-US")
# Select the type of audio file you want returned
audio_config = texttospeech.types.AudioConfig(
audio_encoding=texttospeech.enums.AudioEncoding.LINEAR16
)
# Perform the text-to-speech request on the text input with the selected
# voice parameters and audio file type
def gcp_synthesize(speech_text):
synthesis_input = texttospeech.types.SynthesisInput(text=speech_text)
response = client.synthesize_speech(
synthesis_input, voice, audio_config
)
return response.audio_content
self.synth_speech = gcp_synthesize

View File

@@ -3,9 +3,9 @@
import numpy as np import numpy as np
import torch import torch
import pyaudio
import klepto import klepto
import argparse import argparse
import warnings
from pathlib import Path from pathlib import Path
from .model import Tacotron2 from .model import Tacotron2
from glow import WaveGlow from glow import WaveGlow
@@ -156,6 +156,11 @@ class TTSModel(object):
def player_gen(): def player_gen():
try:
import pyaudio
except ModuleNotFoundError:
warnings.warn("module 'pyaudio' is not installed requried for playback")
return
audio_interface = pyaudio.PyAudio() audio_interface = pyaudio.PyAudio()
_audio_stream = audio_interface.open( _audio_stream = audio_interface.open(
format=pyaudio.paInt16, channels=1, rate=OUTPUT_SAMPLE_RATE, output=True format=pyaudio.paInt16, channels=1, rate=OUTPUT_SAMPLE_RATE, output=True