import objc from AppKit import NSSpeechSynthesizer,NSSpeechInputModeProperty,NSSpeechModePhoneme from Foundation import NSURL,NSError,NSObject import json import random import os import re import subprocess OUTPUT_NAME = 'audio' dest_dir = os.path.abspath('.')+'/outputs/'+OUTPUT_NAME+'/' dest_file = './outputs/'+OUTPUT_NAME+'.csv' def create_dir(direc): if not os.path.exists(direc): os.mkdir(direc) dest_filename = lambda n,v,r,t: '{}-{}-{}-{}-'.format(n,v,r,t)+str(random.randint(0,10000))+'.aiff' dest_path = lambda v,r,n: dest_dir+v+'/'+r+'/'+n dest_url = lambda p: NSURL.fileURLWithPath_(p) def cli_gen_audio(speech_cmd,rate,voice,out_path): subprocess.call(['say','-v',voice,'-r',str(rate),'-o',out_path,speech_cmd]) class SynthFile(object): """docstring for SynthFile.""" def __init__(self,word,phon, filename,voice,rate,operation): super(SynthFile, self).__init__() self.word = word self.phoneme = phon self.filename = filename self.voice = voice self.rate = rate self.variant = operation def get_json(self): return {'filename':self.filename,'voice':self.voice, 'rate':self.rate,'operation':self.operation} def get_csv(self): return '{},{},{},{},{}\n'.format(self.word,self.phoneme,self.voice,self.rate,self.variant,self.filename) class SynthVariant(object): """docstring for SynthVariant.""" def __init__(self,identifier,rate): super(SynthVariant, self).__init__() self.synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier) self.synth.setVolume_(100) self.synth.setRate_(rate) self.phone_synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier) self.phone_synth.setVolume_(100) self.phone_synth.setRate_(rate) self.phone_synth.setObject_forProperty_error_(NSSpeechModePhoneme,NSSpeechInputModeProperty,None) self.identifier = identifier self.rate = rate self.name = identifier.split('.')[-1] def __repr__(self): return 'Synthesizer[{} - {}]({})'.format(self.name,self.rate) def generate_audio(self,word,variant): orig_phon,phoneme,phon_cmd = self.synth.phonemesFromText_(word),'',word if variant == 'low': # self.synth.startSpeakingString_toURL_(word,d_url) phoneme = orig_phon elif variant == 'medium': phoneme = re.sub('[0-9]','',orig_phon) phon_cmd = '[[inpt PHON]] '+phoneme elif variant == 'high': phoneme = orig_phon phon_cmd = word # elif variant == 'long': # if phon != '': # self.phone_synth.startSpeakingString_toURL_(phon,d_url) # else: # self.synth.startSpeakingString_toURL_(word,d_url) fname = dest_filename(word,phoneme,self.name,self.rate) d_path = dest_path(self.name,self.rate,fname) d_url = dest_url(d_path) cli_gen_audio(phon_cmd,self.rate,self.name,d_path) return SynthFile(word,phoneme,fname,self.name,self.rate,variant) def synth_generator(): voices_installed = NSSpeechSynthesizer.availableVoices() voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed] us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US' and v['VoiceIdentifier'].split('.')[-1][0].isupper()] # us_voices_ids = ['com.apple.speech.synthesis.voice.Fred','com.apple.speech.synthesis.voice.Alex', # 'com.apple.speech.synthesis.voice.Victoria'] # voice_rates = list(range(150,221,(220-180)//4)) voice_rates = [150,180,210,250] voice_synths = [] create_dir(dest_dir) for v in us_voices_ids: for r in voice_rates: create_dir(dest_dir+v+'/'+r) voice_synths.append(SynthVariant(v,r)) def synth_for_words(words): all_synths = [] for w in words: for s in voice_synths: for v in ['low','medium','high']: all_synths.append(s.generate_audio(w,v)) return all_synths return synth_for_words def write_synths(synth_list,fname,csv=False): f = open(fname,'w') if csv: for s in synth_list: f.write(s.get_csv()) else: json.dump([s.get_json() for s in synth_list],f) f.close() def generate_audio_for_stories(): stories_data = json.load(open('./inputs/all_stories_hs.json')) word_list = [t[0] for i in stories_data.values() for t in i] words_audio_synth = synth_generator() return words_audio_synth(word_list) # words_audio_synth = synth_generator() # synth = NSSpeechSynthesizer.alloc().init() # voices_installed = NSSpeechSynthesizer.availableVoices() # voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed] # us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US' and v['VoiceIdentifier'].split('.')[-1][0].isupper()] # synth.setVoice_(us_voices_ids[2]) # synth.startSpeakingString_('your') # fname = dest_filename(word,self.name,self.rate,self.operation) # d_path = dest_path(fname) # d_url = dest_url(d_path) synths = synth_generator()([OUTPUT_NAME]) # synths = generate_audio_for_stories() write_synths(synths,dest_file,True) # write_synths(synths,'./outputs/synths.json')