From e57576d6fa20bf23ac17b96313b9f6a68378d1f4 Mon Sep 17 00:00:00 2001
From: Malar Kannan <malar@avaz.in>
Date: Thu, 26 Oct 2017 16:51:32 +0530
Subject: [PATCH] discarding phoneme incapable synthesizers

---
 TODO.md          |  1 +
 tts_samplegen.py | 19 +++++++++++++++----
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/TODO.md b/TODO.md
index 7cd8d97..9bc98eb 100644
--- a/TODO.md
+++ b/TODO.md
@@ -2,3 +2,4 @@
 1. create spectrograms of 150ms windows with 50ms overlap for each word.
 2. train a rnn to output a vector using the spectrograms
 3. train a nn to output True/False based on the acceptability of the rnn output. -> Siamese network(implementation detail)
+4. validate with real world samples
diff --git a/tts_samplegen.py b/tts_samplegen.py
index a7c318e..cb27f4e 100644
--- a/tts_samplegen.py
+++ b/tts_samplegen.py
@@ -89,11 +89,16 @@ class SynthVariant(object):
         self.rate = rate
         self.name = voice
         self.lang = lang
+        self.phoneme_capable = self.is_phoneme_capable()
         create_dir(dest_dir + self.name + '/' + str(self.rate))
 
     def __repr__(self):
         return 'Synthesizer[{} - {}]'.format(self.name, self.rate)
 
+    def is_phoneme_capable(self):
+        orig_phon = self.synth.phonemesFromText_('water')
+        return orig_phon != ''
+
     def generate_audio(self, word, variant):
         orig_phon, phoneme, phon_cmd = self.synth.phonemesFromText_(
             word), '', word
@@ -105,7 +110,7 @@ class SynthVariant(object):
             phon_cmd = '[[inpt PHON]] ' + phoneme
         elif variant == 'high':
             phoneme = orig_phon
-            phon_cmd = word
+            phon_cmd = '[[inpt PHON]] ' + phoneme
         # elif variant == 'long':
         # if phon != '':
         # self.phone_synth.startSpeakingString_toURL_(phon,d_url)
@@ -152,8 +157,11 @@ def synth_generator():
     for (i, v, l) in us_voices_ids:
         for r in voice_rates:
             s = SynthVariant(i, v, l, r)
-            print('Created ', s)
-            voice_synths.append(s)
+            if s.phoneme_capable:
+                print('Adding ', s)
+                voice_synths.append(s)
+            else:
+                print('Discarding phoneme incapable ', s)
 
     def synth_for_words(words, writer):
         # all_synths = []
@@ -233,6 +241,9 @@ def generate_audio_for_stories():
 
 
 # synths = synth_generator()([OUTPUT_NAME])
-generate_audio_for_stories()
+
 # write_synths(synths, dest_file, True)
 # write_synths(synths,'./outputs/synths.json')
+
+if __name__ == '__main__':
+    generate_audio_for_stories()