import objc from AppKit import NSSpeechSynthesizer,NSSpeechInputModeProperty,NSSpeechModePhoneme from Foundation import NSURL,NSError import json import random import os import re dest_filename = lambda p: p+str(random.randint(0,10000))+'.aiff' dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p)) class SynthFile(object): """docstring for SynthFile.""" def __init__(self,word, filename,voice,rate,operation): super(SynthFile, self).__init__() self.word = word self.filename = filename self.voice = voice self.rate = rate self.operation = operation def get_json(self): return {'filename':self.filename,'voice':self.voice, 'rate':self.rate,'operation':self.operation} def get_csv(self): return '{},{},{},{},{}\n'.format(self.word,self.voice,self.rate,self.operation,self.filename) class SynthVariant(object): """docstring for SynthVariant.""" def __init__(self,identifier,rate,op): super(SynthVariant, self).__init__() sp = NSSpeechSynthesizer.alloc().init() sp.setVolume_(100) sp.setVoice_(identifier) sp.setRate_(rate) self.synth = sp p_syn = NSSpeechSynthesizer.alloc().init() p_syn.setVolume_(100) p_syn.setVoice_(identifier) p_syn.setRate_(rate) p_syn.setObject_forProperty_error_(NSSpeechModePhoneme,NSSpeechInputModeProperty,None) self.phone_synth = p_syn self.identifier = identifier self.rate = rate self.name = identifier.split('.')[-1] self.operation = op def synth_file(self,word): fname = dest_filename(word) d_url = dest_url(fname) if self.operation == 'normal': self.synth.startSpeakingString_toURL_(word,d_url) else: orig_phon = self.synth.phonemesFromText_(word) phon = re.sub('[0-9]','',orig_phon) self.phone_synth.startSpeakingString_toURL_(phon,d_url) return SynthFile(word,fname,self.name,self.rate,self.operation) def synth_generator(): voices_installed = NSSpeechSynthesizer.availableVoices() voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed] us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US'] voice_rates = list(range(180,221,(220-180)//5)) voice_synths = [] variants = ['normal','phoneme'] for v in us_voices_ids: for r in voice_rates: for o in variants: voice_synths.append(SynthVariant(v,r,o)) def synth_for_word(word): return [s.synth_file(word) for s in voice_synths] return synth_for_word def write_synths(synth_list,fname,csv=False): f = open(fname,'w') if csv: for s in synth_list: f.write(s.get_csv()) else: json.dump([s.get_json() for s in synth_list],f) f.close() def generate_audio_for_stories(): stories_data = json.load(open('./inputs/all_stories_hs.json')) word_list = [t[0] for i in stories_data.values() for t in i] word_audio_synth = synth_generator() all_synths = [] for word in word_list: words_synths = word_audio_synth(word) all_synths.extend(words_synths) return all_synths # synths = synth_generator()('education') synths = generate_audio_for_stories() write_synths(synths,'./outputs/synth_data.csv',True) write_synths(synths,'./outputs/synths.json')