import objc from AppKit import NSSpeechSynthesizer, NSSpeechInputModeProperty from AppKit import NSSpeechModePhoneme from Foundation import NSURL import json import random import os import re import subprocess import progressbar from generate_similar import similar_phoneme,similar_word OUTPUT_NAME = 'story_words' dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/' dest_file = './outputs/' + OUTPUT_NAME + '.csv' def prog_bar(title): widgets = [title, progressbar.Counter(), 'th entry - ', progressbar.FormatLabel( ''), ' [', progressbar.Bar(), '] - ', progressbar.ETA()] prog = progressbar.ProgressBar(widgets=widgets) def update_prog(current): widgets[3] = progressbar.FormatLabel(current) prog.update() return (update_prog, prog) def create_dir(direc): if not os.path.exists(direc): os.makedirs(direc) def dest_filename(w, v, r, t): return '{}-{}-{}-{}-{}.aiff'.format(w, v, r, t, str(random.randint(0, 10000))) def dest_path(v, r, n): rel = v + '/' + str(r) + '/' + n return (dest_dir + rel), rel def cli_gen_audio(speech_cmd, rate, voice, out_path): subprocess.call( ['say', '-v', voice, '-r', str(rate), '-o', out_path, speech_cmd]) class SynthFile(object): """docstring for SynthFile.""" def __init__(self, word, phon, filename, voice, voice_lang, rate, operation): super(SynthFile, self).__init__() self.word = word self.phoneme = phon self.filename = filename self.voice = voice self.voice_lang = voice_lang self.rate = rate self.variant = operation def get_json(self): return { 'filename': self.filename, 'voice': self.voice, 'rate': self.rate, 'operation': self.operation } def get_csv(self): cols = [self.word, self.phoneme, self.voice, self.voice_lang, self.rate, self.variant, self.filename] return ','.join([str(c) for c in cols])+'\n' class SynthVariant(object): """docstring for SynthVariant.""" def __init__(self, identifier, voice, lang, rate): super(SynthVariant, self).__init__() self.synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier) self.synth.setVolume_(100) self.synth.setRate_(rate) self.phone_synth = NSSpeechSynthesizer.alloc().initWithVoice_( identifier) self.phone_synth.setVolume_(100) self.phone_synth.setRate_(rate) self.phone_synth.setObject_forProperty_error_( NSSpeechModePhoneme, NSSpeechInputModeProperty, None) self.identifier = identifier self.rate = rate self.name = voice self.lang = lang self.phoneme_capable = self.is_phoneme_capable() def __repr__(self): return 'Synthesizer[{} - {}]'.format(self.name, self.rate) def is_phoneme_capable(self): orig_phon = self.synth.phonemesFromText_('water') return orig_phon != '' def generate_audio(self, word, variant): orig_phon, phoneme, phon_cmd = self.synth.phonemesFromText_( word), '', word if variant == 'low': # self.synth.startSpeakingString_toURL_(word,d_url) phoneme = orig_phon elif variant == 'medium': phoneme = similar_phoneme(orig_phon) phon_cmd = '[[inpt PHON]] ' + phoneme elif variant == 'high': phoneme = similar_word(word) phon_cmd = phoneme # elif variant == 'long': # if phon != '': # self.phone_synth.startSpeakingString_toURL_(phon,d_url) # else: # self.synth.startSpeakingString_toURL_(word,d_url) fname = dest_filename(word, self.name, self.rate, variant) d_path, r_path = dest_path(self.name, self.rate, fname) # d_url = NSURL.fileURLWithPath_(d_path) cli_gen_audio(phon_cmd, self.rate, self.name, d_path) return SynthFile(word, phoneme, r_path, self.name, self.lang, self.rate, variant) def create_synth_dirs(self): if self.phoneme_capable: create_dir(dest_dir + self.name + '/' + str(self.rate)) @staticmethod def voices_for_lang(lang): voices_installed = NSSpeechSynthesizer.availableVoices() voice_attrs = [ NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed ] # sk = [k for k in voice_attrs[0].keys() if k not in [ # 'VoiceIndividuallySpokenCharacters', 'VoiceSupportedCharacters']] # s_attrs = [[v[i] for i in sk] for v in voice_attrs if 'VoiceShowInFullListOnly' in v # and 'VoiceRelativeDesirability' in v] return [ (v['VoiceIdentifier'], v['VoiceName'], v['VoiceLanguage']) for v in voice_attrs if v['VoiceLanguage'] == lang and v['VoiceGender'] != 'VoiceGenderNeuter' ] @classmethod def synth_with(cls,voice_params,rate=180): identifier,voice,lang = voice_params return cls(identifier,voice,lang,rate) def synth_generator(): us_voices_ids = SynthVariant.voices_for_lang('en-US') voice_rates = [150, 180, 210, 250] voice_synths = [] create_dir(dest_dir) for vp in us_voices_ids: for r in voice_rates: s = SynthVariant.synth_with(vp,r) if s.phoneme_capable: print('Adding ', s) voice_synths.append(s) else: print('Discarding phoneme incapable ', s) def synth_for_words(words, writer): prog_title = "Synthesizing {} words : ".format(len(words)) for s in voice_synths: s.create_synth_dirs() for v in ['low', 'medium', 'high']: (update, prog) = prog_bar(prog_title) for w in prog(words): update('"{}" with {} variant ({})'.format(w, s, v)) synthed = s.generate_audio(w, v) writer(synthed) return synth_for_words def write_synths(synth_list, fname, csv=False): f = open(fname, 'w') if csv: for s in synth_list: f.write(s.get_csv()) else: json.dump([s.get_json() for s in synth_list], f) f.close() def synth_logger(fname, csv=False): f = open(fname, 'w') def csv_writer(s): f.write(s.get_csv()) synth_list = [] def json_writer(s): synth_list.append(s) def close_file(): if csv: f.close() else: json.dump([s.get_json() for s in synth_list], f) f.close() if csv: return csv_writer, close_file else: return json_writer, close_file def generate_audio_for_stories(): story_file = './inputs/all_stories_hs.json' # story_file = './inputs/all_stories.json' stories_data = json.load(open(story_file)) word_list = [t[0] for i in stories_data.values() for t in i] # word_list = [i for g in stories_data.values() for i in g] (writer, closer) = synth_logger(dest_file, csv=True) synth_for_words = synth_generator() try: synth_for_words(word_list, writer) except: import traceback import sys traceback.print_exc(file=sys.stdout) pass closer() if __name__ == '__main__': generate_audio_for_stories()