refactored module structure

2026-06-13 12:32:08 +00:00 · 2020-05-21 16:47:45 +05:30
parent 2d5b720284
commit fca9c1aeb3
23 changed files with 17 additions and 115 deletions
--- a/jasper/data/tts/init.py
+++ b/jasper/data/tts/init.py
--- a/jasper/data/tts/googletts.py
+++ b/jasper/data/tts/googletts.py
@@ -0,0 +1,52 @@
+from logging import getLogger
+from google.cloud import texttospeech
+
+LOGGER = getLogger("googletts")
+
+
+class GoogleTTS(object):
+    def __init__(self):
+        self.client = texttospeech.TextToSpeechClient()
+
+    def text_to_speech(self, text: str, params: dict) -> bytes:
+        tts_input = texttospeech.types.SynthesisInput(ssml=text)
+        voice = texttospeech.types.VoiceSelectionParams(
+            language_code=params["language"], name=params["name"]
+        )
+        audio_config = texttospeech.types.AudioConfig(
+            audio_encoding=texttospeech.enums.AudioEncoding.LINEAR16,
+            sample_rate_hertz=params["sample_rate"],
+        )
+        response = self.client.synthesize_speech(tts_input, voice, audio_config)
+        audio_content = response.audio_content
+        return audio_content
+
+    @classmethod
+    def voice_list(cls):
+        """Lists the available voices."""
+
+        client = cls().client
+
+        # Performs the list voices request
+        voices = client.list_voices()
+        results = []
+        for voice in voices.voices:
+            supported_eng_langs = [
+                lang for lang in voice.language_codes if lang[:2] == "en"
+            ]
+            if len(supported_eng_langs) > 0:
+                lang = ",".join(supported_eng_langs)
+            else:
+                continue
+
+            ssml_gender = texttospeech.enums.SsmlVoiceGender(voice.ssml_gender)
+            results.append(
+                {
+                    "name": voice.name,
+                    "language": lang,
+                    "gender": ssml_gender.name,
+                    "engine": "wavenet" if "Wav" in voice.name else "standard",
+                    "sample_rate": voice.natural_sample_rate_hertz,
+                }
+            )
+        return results
--- a/jasper/data/tts/ttsclient.py
+++ b/jasper/data/tts/ttsclient.py
@@ -0,0 +1,26 @@
+"""
+TTSClient Abstract Class
+"""
+from abc import ABC, abstractmethod
+
+
+class TTSClient(ABC):
+    """
+    Base class for TTS
+    """
+
+    @abstractmethod
+    def text_to_speech(self, text: str, num_channels: int, sample_rate: int,
+                       audio_encoding) -> bytes:
+        """
+        convert text to bytes
+
+        Arguments:
+            text {[type]} -- text to convert
+            channel {[type]} -- output audio bytes channel setting
+            width {[type]} -- width of audio bytes
+            rate {[type]} -- rare for audio bytes
+
+        Returns:
+            [type] -- [description]
+        """