1. using cli say instead of api since api generates empty responses sometimes

2. generating all words voices for each variants
2017-10-05 11:02:38 +05:30
parent 9d700f18ca
commit 0337f0d5be
1 changed files with 51 additions and 28 deletions
--- a/tts-wav-gen.py
+++ b/tts-wav-gen.py
@@ -1,16 +1,20 @@
 import objc
 from AppKit import NSSpeechSynthesizer,NSSpeechInputModeProperty,NSSpeechModePhoneme
-from Foundation import NSURL,NSError
+from Foundation import NSURL,NSError,NSObject
 import json
 import random
 import os
 import re
+import subprocess


 dest_filename = lambda p: p+str(random.randint(0,10000))+'.aiff'
 dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p
 dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p))

+def cli_gen_audio(word,rate,voice,out_path):
+    subprocess.call(['say','-v',voice,'-r',str(rate),'-o',out_path,word])
+
 class SynthFile(object):
    """docstring for SynthFile."""
    def __init__(self,word, filename,voice,rate,operation):
@@ -32,49 +36,68 @@ class SynthVariant(object):
    """docstring for SynthVariant."""
    def __init__(self,identifier,rate,op):
        super(SynthVariant, self).__init__()
-        sp = NSSpeechSynthesizer.alloc().init()
-        sp.setVolume_(100)
-        sp.setVoice_(identifier)
-        sp.setRate_(rate)
-        self.synth = sp
-        p_syn = NSSpeechSynthesizer.alloc().init()
-        p_syn.setVolume_(100)
-        p_syn.setVoice_(identifier)
-        p_syn.setRate_(rate)
-        p_syn.setObject_forProperty_error_(NSSpeechModePhoneme,NSSpeechInputModeProperty,None)
-        self.phone_synth = p_syn
+        self.synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier)
+        self.synth.setVolume_(100)
+        # sp.setVoice_(identifier)
+        self.synth.setRate_(rate)
+        self.phone_synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier)
+        self.phone_synth.setVolume_(100)
+        self.phone_synth.setRate_(rate)
+        self.phone_synth.setObject_forProperty_error_(NSSpeechModePhoneme,NSSpeechInputModeProperty,None)
        self.identifier = identifier
        self.rate = rate
        self.name = identifier.split('.')[-1]
        self.operation = op

+    def __repr__(self):
+        return 'Synthesizer[{} - {}]({})'.format(self.name,self.rate,self.operation)

-    def synth_file(self,word):
+    def generate_audio(self,word):
        fname = dest_filename(word)
+        d_path = dest_path(fname)
        d_url = dest_url(fname)
+        started = False
        if self.operation == 'normal':
-            self.synth.startSpeakingString_toURL_(word,d_url)
+            # self.synth.startSpeakingString_toURL_(word,d_url)
+            cli_gen_audio(word,self.rate,self.name,d_path)
        else:
            orig_phon = self.synth.phonemesFromText_(word)
-            phon = re.sub('[0-9]','',orig_phon)
-            self.phone_synth.startSpeakingString_toURL_(phon,d_url)
+            phon = '[[inpt PHON]] '+re.sub('[0-9]','',orig_phon)
+            cli_gen_audio(phon,self.rate,self.name,d_path)
+            # if phon != '':
+            #     self.phone_synth.startSpeakingString_toURL_(phon,d_url)
+            # else:
+            #     self.synth.startSpeakingString_toURL_(word,d_url)
        return SynthFile(word,fname,self.name,self.rate,self.operation)

+    def synth_file(self,word):
+        # s = objc.selector(self.generate_audio,signature=b"@@:@")
+        # obj = NSObject.alloc().init()
+        # sf = obj.performSelectorOnMainThread_withObject_waitUntilDone_(s,word,True)
+        # return sf
+        return self.generate_audio(word)
+

 def synth_generator():
    voices_installed = NSSpeechSynthesizer.availableVoices()
    voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in  voices_installed]
    us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US']
-    voice_rates = list(range(180,221,(220-180)//5))
+    voice_rates = list(range(180,221,(220-180)//4))
    voice_synths = []
    variants = ['normal','phoneme']
    for v in us_voices_ids:
        for r in voice_rates:
            for o in variants:
                voice_synths.append(SynthVariant(v,r,o))
-    def synth_for_word(word):
-        return [s.synth_file(word) for s in voice_synths]
-    return synth_for_word
+    def synth_for_words(words):
+        all_synths = []
+        for s in voice_synths:
+            for w in words:
+                all_synths.append(s.synth_file(w))
+            # print(s)
+        # return [s.synth_file(word) for s in voice_synths]
+        return all_synths
+    return synth_for_words

 def write_synths(synth_list,fname,csv=False):
    f = open(fname,'w')
@@ -88,14 +111,14 @@ def write_synths(synth_list,fname,csv=False):
 def generate_audio_for_stories():
    stories_data = json.load(open('./inputs/all_stories_hs.json'))
    word_list = [t[0] for i in stories_data.values() for t in i]
-    word_audio_synth = synth_generator()
-    all_synths = []
-    for word in word_list:
-        words_synths = word_audio_synth(word)
-        all_synths.extend(words_synths)
-    return all_synths
+    words_audio_synth = synth_generator()
+    # all_synths = []
+    # for word in word_list[:1]:
+    #     words_synths = word_audio_synth(word)
+    #     all_synths.extend(words_synths)
+    return words_audio_synth(word_list)

-# synths = synth_generator()('education')
+# synths = synth_generator()(['education'])
 synths = generate_audio_for_stories()
 write_synths(synths,'./outputs/synth_data.csv',True)
-write_synths(synths,'./outputs/synths.json')
+# write_synths(synths,'./outputs/synths.json')