diff --git a/tts-wav-gen.py b/tts-wav-gen.py index 91b1f23..60a62ff 100644 --- a/tts-wav-gen.py +++ b/tts-wav-gen.py @@ -1,16 +1,20 @@ import objc from AppKit import NSSpeechSynthesizer,NSSpeechInputModeProperty,NSSpeechModePhoneme -from Foundation import NSURL,NSError +from Foundation import NSURL,NSError,NSObject import json import random import os import re +import subprocess dest_filename = lambda p: p+str(random.randint(0,10000))+'.aiff' dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p)) +def cli_gen_audio(word,rate,voice,out_path): + subprocess.call(['say','-v',voice,'-r',str(rate),'-o',out_path,word]) + class SynthFile(object): """docstring for SynthFile.""" def __init__(self,word, filename,voice,rate,operation): @@ -32,49 +36,68 @@ class SynthVariant(object): """docstring for SynthVariant.""" def __init__(self,identifier,rate,op): super(SynthVariant, self).__init__() - sp = NSSpeechSynthesizer.alloc().init() - sp.setVolume_(100) - sp.setVoice_(identifier) - sp.setRate_(rate) - self.synth = sp - p_syn = NSSpeechSynthesizer.alloc().init() - p_syn.setVolume_(100) - p_syn.setVoice_(identifier) - p_syn.setRate_(rate) - p_syn.setObject_forProperty_error_(NSSpeechModePhoneme,NSSpeechInputModeProperty,None) - self.phone_synth = p_syn + self.synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier) + self.synth.setVolume_(100) + # sp.setVoice_(identifier) + self.synth.setRate_(rate) + self.phone_synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier) + self.phone_synth.setVolume_(100) + self.phone_synth.setRate_(rate) + self.phone_synth.setObject_forProperty_error_(NSSpeechModePhoneme,NSSpeechInputModeProperty,None) self.identifier = identifier self.rate = rate self.name = identifier.split('.')[-1] self.operation = op + def __repr__(self): + return 'Synthesizer[{} - {}]({})'.format(self.name,self.rate,self.operation) - def synth_file(self,word): + def generate_audio(self,word): fname = dest_filename(word) + d_path = dest_path(fname) d_url = dest_url(fname) + started = False if self.operation == 'normal': - self.synth.startSpeakingString_toURL_(word,d_url) + # self.synth.startSpeakingString_toURL_(word,d_url) + cli_gen_audio(word,self.rate,self.name,d_path) else: orig_phon = self.synth.phonemesFromText_(word) - phon = re.sub('[0-9]','',orig_phon) - self.phone_synth.startSpeakingString_toURL_(phon,d_url) + phon = '[[inpt PHON]] '+re.sub('[0-9]','',orig_phon) + cli_gen_audio(phon,self.rate,self.name,d_path) + # if phon != '': + # self.phone_synth.startSpeakingString_toURL_(phon,d_url) + # else: + # self.synth.startSpeakingString_toURL_(word,d_url) return SynthFile(word,fname,self.name,self.rate,self.operation) + def synth_file(self,word): + # s = objc.selector(self.generate_audio,signature=b"@@:@") + # obj = NSObject.alloc().init() + # sf = obj.performSelectorOnMainThread_withObject_waitUntilDone_(s,word,True) + # return sf + return self.generate_audio(word) + def synth_generator(): voices_installed = NSSpeechSynthesizer.availableVoices() voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed] us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US'] - voice_rates = list(range(180,221,(220-180)//5)) + voice_rates = list(range(180,221,(220-180)//4)) voice_synths = [] variants = ['normal','phoneme'] for v in us_voices_ids: for r in voice_rates: for o in variants: voice_synths.append(SynthVariant(v,r,o)) - def synth_for_word(word): - return [s.synth_file(word) for s in voice_synths] - return synth_for_word + def synth_for_words(words): + all_synths = [] + for s in voice_synths: + for w in words: + all_synths.append(s.synth_file(w)) + # print(s) + # return [s.synth_file(word) for s in voice_synths] + return all_synths + return synth_for_words def write_synths(synth_list,fname,csv=False): f = open(fname,'w') @@ -88,14 +111,14 @@ def write_synths(synth_list,fname,csv=False): def generate_audio_for_stories(): stories_data = json.load(open('./inputs/all_stories_hs.json')) word_list = [t[0] for i in stories_data.values() for t in i] - word_audio_synth = synth_generator() - all_synths = [] - for word in word_list: - words_synths = word_audio_synth(word) - all_synths.extend(words_synths) - return all_synths + words_audio_synth = synth_generator() + # all_synths = [] + # for word in word_list[:1]: + # words_synths = word_audio_synth(word) + # all_synths.extend(words_synths) + return words_audio_synth(word_list) -# synths = synth_generator()('education') +# synths = synth_generator()(['education']) synths = generate_audio_for_stories() write_synths(synths,'./outputs/synth_data.csv',True) -write_synths(synths,'./outputs/synths.json') +# write_synths(synths,'./outputs/synths.json')