diff --git a/tts-wav-gen.py b/tts-wav-gen.py index 4256f7e..90ac549 100644 --- a/tts-wav-gen.py +++ b/tts-wav-gen.py @@ -8,9 +8,15 @@ import re import subprocess +OUTPUT_NAME = 'audio' +def create_output_dir(): + direc = os.path.abspath('.')+'/outputs/'+OUTPUT_NAME+'/' + if not os.path.exists(direc): + os.mkdir(direc) +create_output_dir() dest_filename = lambda n,v,r,t: '{}-{}-{}-{}-'.format(n,v,r,t)+str(random.randint(0,10000))+'.aiff' -dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p -dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p)) +dest_path = lambda n: os.path.abspath('.')+'/outputs/'+OUTPUT_NAME+'/'+n +dest_url = lambda p: NSURL.fileURLWithPath_(p) def cli_gen_audio(word,rate,voice,out_path): subprocess.call(['say','-v',voice,'-r',str(rate),'-o',out_path,word]) @@ -38,7 +44,6 @@ class SynthVariant(object): super(SynthVariant, self).__init__() self.synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier) self.synth.setVolume_(100) - # sp.setVoice_(identifier) self.synth.setRate_(rate) self.phone_synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier) self.phone_synth.setVolume_(100) @@ -55,28 +60,21 @@ class SynthVariant(object): def generate_audio(self,word): fname = dest_filename(word,self.name,self.rate,self.operation) d_path = dest_path(fname) - d_url = dest_url(fname) - started = False + d_url = dest_url(d_path) if self.operation == 'normal': - # self.synth.startSpeakingString_toURL_(word,d_url) - cli_gen_audio(word,self.rate,self.name,d_path) + self.synth.startSpeakingString_toURL_(word,d_url) + # cli_gen_audio(word,self.rate,self.name,d_path) else: orig_phon = self.synth.phonemesFromText_(word) phon = '[[inpt PHON]] '+re.sub('[0-9]','',orig_phon) + # phon = re.sub('[0-9]','',orig_phon) cli_gen_audio(phon,self.rate,self.name,d_path) # if phon != '': - # self.phone_synth.startSpeakingString_toURL_(phon,d_url) + # self.phone_synth.startSpeakingString_toURL_(phon,d_url) # else: # self.synth.startSpeakingString_toURL_(word,d_url) return SynthFile(word,fname,self.name,self.rate,self.operation) - def synth_file(self,word): - # s = objc.selector(self.generate_audio,signature=b"@@:@") - # obj = NSObject.alloc().init() - # sf = obj.performSelectorOnMainThread_withObject_waitUntilDone_(s,word,True) - # return sf - return self.generate_audio(word) - def synth_generator(): voices_installed = NSSpeechSynthesizer.availableVoices() @@ -96,9 +94,7 @@ def synth_generator(): all_synths = [] for w in words: for s in voice_synths: - all_synths.append(s.synth_file(w)) - # print(s) - # return [s.synth_file(word) for s in voice_synths] + all_synths.append(s.generate_audio(w)) return all_synths return synth_for_words @@ -115,13 +111,20 @@ def generate_audio_for_stories(): stories_data = json.load(open('./inputs/all_stories_hs.json')) word_list = [t[0] for i in stories_data.values() for t in i] words_audio_synth = synth_generator() - # all_synths = [] - # for word in word_list[:1]: - # words_synths = word_audio_synth(word) - # all_synths.extend(words_synths) return words_audio_synth(word_list) -# synths = synth_generator()(['education']) -synths = generate_audio_for_stories() -write_synths(synths,'./outputs/synth_data.csv',True) +# words_audio_synth = synth_generator() +# synth = NSSpeechSynthesizer.alloc().init() +# voices_installed = NSSpeechSynthesizer.availableVoices() +# voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed] +# us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US' and v['VoiceIdentifier'].split('.')[-1][0].isupper()] +# synth.setVoice_(us_voices_ids[2]) +# synth.startSpeakingString_('your') +# fname = dest_filename(word,self.name,self.rate,self.operation) +# d_path = dest_path(fname) +# d_url = dest_url(d_path) + +synths = synth_generator()([OUTPUT_NAME]) +# synths = generate_audio_for_stories() +write_synths(synths,'./outputs/'+OUTPUT_NAME+'.csv',True) # write_synths(synths,'./outputs/synths.json')