From a851e80db2b36b6d8a31b027f6f273f366d402a7 Mon Sep 17 00:00:00 2001 From: Malar Kannan Date: Thu, 5 Mar 2020 15:59:04 +0530 Subject: [PATCH] 1. added rpyc server optional package 2. updated path variable --- setup.py | 15 +++++++++++-- taco2/server/__init__.py | 0 taco2/server/__main__.py | 48 ++++++++++++++++++++++++++++++++++++++++ taco2/server/backend.py | 45 +++++++++++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 2 deletions(-) create mode 100644 taco2/server/__init__.py create mode 100644 taco2/server/__main__.py create mode 100644 taco2/server/backend.py diff --git a/setup.py b/setup.py index 2923e73..0dae066 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,13 @@ requirements = [ "torch~=1.1.0", ] -extra_requirements = {"playback": ["PyAudio==0.2.11"]} +extra_requirements = { + "playback": ["PyAudio==0.2.11"], + "server": [ + "google-cloud-texttospeech==1.0.1", + "rpyc==4.1.4", + ], +} setup_requirements = ["pytest-runner"] @@ -57,5 +63,10 @@ setup( url="https://github.com/malarinv/tacotron2", version="0.3.0", zip_safe=False, - entry_points={"console_scripts": ("tts_debug = taco2.tts:main",)}, + entry_points={ + "console_scripts": ( + "tts_debug = taco2.tts:main", + "tts_rpyc_server = taco2.server.__main__:main", + ) + }, ) diff --git a/taco2/server/__init__.py b/taco2/server/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/taco2/server/__main__.py b/taco2/server/__main__.py new file mode 100644 index 0000000..2f0b63b --- /dev/null +++ b/taco2/server/__main__.py @@ -0,0 +1,48 @@ +import os +import logging + +import rpyc +from rpyc.utils.server import ThreadedServer + +from .backend import TTSSynthesizer + + +tts_backend = os.environ.get("TTS_BACKEND", "taco2") +tts_synthesizer = TTSSynthesizer(backend=tts_backend) + + +class TTSService(rpyc.Service): + def on_connect(self, conn): + # code that runs when a connection is created + # (to init the service, if needed) + pass + + def on_disconnect(self, conn): + # code that runs after the connection has already closed + # (to finalize the service, if needed) + pass + + def exposed_synth_speech(self, utterance: str): # this is an exposed method + speech_audio = tts_synthesizer.synth_speech(utterance) + return speech_audio + + def exposed_synth_speech_cb( + self, utterance: str, respond + ): # this is an exposed method + speech_audio = tts_synthesizer.synth_speech(utterance) + respond(speech_audio) + + +def main(): + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + port = int(os.environ.get("TTS_RPYC_PORT", "7754")) + logging.info("starting tts server...") + t = ThreadedServer(TTSService, port=port) + t.start() + + +if __name__ == "__main__": + main() diff --git a/taco2/server/backend.py b/taco2/server/backend.py new file mode 100644 index 0000000..eed20de --- /dev/null +++ b/taco2/server/backend.py @@ -0,0 +1,45 @@ +import os + +from google.cloud import texttospeech +from ..tts import TTSModel + + +tts_model_weights = os.environ.get( + "TTS_MODELS", "models/tacotron2_statedict.pt,models/waveglow_256channels.pt" +) + +tts_creds = os.environ.get( + "GOOGLE_APPLICATION_CREDENTIALS", "/code/config/gre2e/keys/gre2e_gcp.json" +) +taco2, wav_glow = tts_model_weights.split(",", 1) + + +class TTSSynthesizer(object): + """docstring for TTSSynthesizer.""" + + def __init__(self, backend="taco2"): + super(TTSSynthesizer, self).__init__() + if backend == "taco2": + tts_model = TTSModel(f"{taco2}", f"{wav_glow}") # Loads the models + self.synth_speech = tts_model.synth_speech + elif backend == "gcp": + client = texttospeech.TextToSpeechClient() + # Build the voice request, select the language code ("en-US") and the ssml + # voice gender ("neutral") + voice = texttospeech.types.VoiceSelectionParams(language_code="en-US") + + # Select the type of audio file you want returned + audio_config = texttospeech.types.AudioConfig( + audio_encoding=texttospeech.enums.AudioEncoding.LINEAR16 + ) + + # Perform the text-to-speech request on the text input with the selected + # voice parameters and audio file type + def gcp_synthesize(speech_text): + synthesis_input = texttospeech.types.SynthesisInput(text=speech_text) + response = client.synthesize_speech( + synthesis_input, voice, audio_config + ) + return response.audio_content + + self.synth_speech = gcp_synthesize