discarding phoneme incapable synthesizers

2017-10-26 16:51:32 +05:30
parent a953fa3355
commit e57576d6fa
2 changed files with 16 additions and 4 deletions
--- a/TODO.md
+++ b/TODO.md
@@ -2,3 +2,4 @@
 1. create spectrograms of 150ms windows with 50ms overlap for each word.
 2. train a rnn to output a vector using the spectrograms
 3. train a nn to output True/False based on the acceptability of the rnn output. -> Siamese network(implementation detail)
+4. validate with real world samples
--- a/tts_samplegen.py
+++ b/tts_samplegen.py
@@ -89,11 +89,16 @@ class SynthVariant(object):
        self.rate = rate
        self.name = voice
        self.lang = lang
+        self.phoneme_capable = self.is_phoneme_capable()
        create_dir(dest_dir + self.name + '/' + str(self.rate))

    def __repr__(self):
        return 'Synthesizer[{} - {}]'.format(self.name, self.rate)

+    def is_phoneme_capable(self):
+        orig_phon = self.synth.phonemesFromText_('water')
+        return orig_phon != ''
+
    def generate_audio(self, word, variant):
        orig_phon, phoneme, phon_cmd = self.synth.phonemesFromText_(
            word), '', word
@@ -105,7 +110,7 @@ class SynthVariant(object):
            phon_cmd = '[[inpt PHON]] ' + phoneme
        elif variant == 'high':
            phoneme = orig_phon
-            phon_cmd = word
+            phon_cmd = '[[inpt PHON]] ' + phoneme
        # elif variant == 'long':
        # if phon != '':
        # self.phone_synth.startSpeakingString_toURL_(phon,d_url)
@@ -152,8 +157,11 @@ def synth_generator():
    for (i, v, l) in us_voices_ids:
        for r in voice_rates:
            s = SynthVariant(i, v, l, r)
-            print('Created ', s)
-            voice_synths.append(s)
+            if s.phoneme_capable:
+                print('Adding ', s)
+                voice_synths.append(s)
+            else:
+                print('Discarding phoneme incapable ', s)

    def synth_for_words(words, writer):
        # all_synths = []
@@ -233,6 +241,9 @@ def generate_audio_for_stories():


 # synths = synth_generator()([OUTPUT_NAME])
-generate_audio_for_stories()
+
 # write_synths(synths, dest_file, True)
 # write_synths(synths,'./outputs/synths.json')
+
+if __name__ == '__main__':
+    generate_audio_for_stories()