import objc from AppKit import NSSpeechSynthesizer, NSSpeechInputModeProperty from AppKit import NSSpeechModePhoneme from Foundation import NSURL import json import random import os import re import subprocess import progressbar OUTPUT_NAME = 'story_sents' dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/' dest_file = './outputs/' + OUTPUT_NAME + '.csv' def prog_bar(title): widgets = [progressbar.FormatLabel( title), ' [', progressbar.Bar(), '] - ', progressbar.ETA()] prog = progressbar.ProgressBar(widgets=widgets) def update_prog(current): widgets[0] = progressbar.FormatLabel('{} : {}'.format(title, current)) prog.update() return (update_prog, prog) def create_dir(direc): if not os.path.exists(direc): os.makedirs(direc) def dest_filename(w, v, r, t): return '{}-{}-{}-{}-{}.aiff'.format(w, v, r, t, str(random.randint(0, 10000))) def dest_path(v, r, n): rel = v + '/' + str(r) + '/' + n return (dest_dir + rel), rels def cli_gen_audio(speech_cmd, rate, voice, out_path): subprocess.call( ['say', '-v', voice, '-r', str(rate), '-o', out_path, speech_cmd]) class SynthFile(object): """docstring for SynthFile.""" def __init__(self, word, phon, filename, voice, rate, operation): super(SynthFile, self).__init__() self.word = word self.phoneme = phon self.filename = filename self.voice = voice self.rate = rate self.variant = operation def get_json(self): return { 'filename': self.filename, 'voice': self.voice, 'rate': self.rate, 'operation': self.operation } def get_csv(self): return '{},{},{},{},{},{}\n'.format(self.word, self.phoneme, self.voice, self.rate, self.variant, self.filename) class SynthVariant(object): """docstring for SynthVariant.""" def __init__(self, identifier, voice, lang, rate): super(SynthVariant, self).__init__() self.synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier) self.synth.setVolume_(100) self.synth.setRate_(rate) self.phone_synth = NSSpeechSynthesizer.alloc().initWithVoice_( identifier) self.phone_synth.setVolume_(100) self.phone_synth.setRate_(rate) self.phone_synth.setObject_forProperty_error_( NSSpeechModePhoneme, NSSpeechInputModeProperty, None) self.identifier = identifier self.rate = rate self.name = voice self.lang = lang create_dir(dest_dir + self.name + '/' + str(self.rate)) def __repr__(self): return 'Synthesizer[{} - {}]'.format(self.name, self.rate) def generate_audio(self, word, variant): orig_phon, phoneme, phon_cmd = self.synth.phonemesFromText_( word), '', word if variant == 'low': # self.synth.startSpeakingString_toURL_(word,d_url) phoneme = orig_phon elif variant == 'medium': phoneme = re.sub('[0-9]', '', orig_phon) phon_cmd = '[[inpt PHON]] ' + phoneme elif variant == 'high': phoneme = orig_phon phon_cmd = word # elif variant == 'long': # if phon != '': # self.phone_synth.startSpeakingString_toURL_(phon,d_url) # else: # self.synth.startSpeakingString_toURL_(word,d_url) fname = dest_filename(word, self.name, self.rate, variant) d_path, r_path = dest_path(self.name, self.rate, fname) # d_url = NSURL.fileURLWithPath_(d_path) cli_gen_audio(phon_cmd, self.rate, self.name, d_path) return SynthFile(word, phoneme, r_path, self.name, self.rate, variant) def synth_generator(): voices_installed = NSSpeechSynthesizer.availableVoices() voice_attrs = [ NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed ] # sk = [k for k in voice_attrs[0].keys() if k not in [ # 'VoiceIndividuallySpokenCharacters', 'VoiceSupportedCharacters']] # s_attrs = [[v[i] for i in sk] for v in voice_attrs if 'VoiceShowInFullListOnly' in v # and 'VoiceRelativeDesirability' in v] us_voices_ids = [ (v['VoiceIdentifier'], v['VoiceName'], v['VoiceLanguage']) for v in voice_attrs # v['VoiceDemoText'], # v['VoiceShowInFullListOnly'], # v['VoiceRelativeDesirability']) if v['VoiceLanguage'] == 'en-US' and v['VoiceGender'] != 'VoiceGenderNeuter' # and v['VoiceIdentifier'].split('.')[-1][0].isupper() # and 'VoiceShowInFullListOnly' in v # and 'VoiceRelativeDesirability' in v ] # import pdb # pdb.set_trace() # us_voices_ids = ['com.apple.speech.synthesis.voice.Fred', # 'com.apple.speech.synthesis.voice.Alex', # 'com.apple.speech.synthesis.voice.Victoria'] # voice_rates = list(range(150,221,(220-180)//4)) voice_rates = [150, 180, 210, 250] voice_synths = [] create_dir(dest_dir) for (i, v, l) in us_voices_ids: for r in voice_rates: s = SynthVariant(i, v, l, r) print('Created ', s) voice_synths.append(s) def synth_for_words(words, writer): # all_synths = [] prog_title = "Synthesizing {} words, current word".format(len(words)) (update, prog) = prog_bar(prog_title) for w in prog(words): for s in voice_synths: for v in ['low', 'medium', 'high']: update('"{}" with {} variant ({})'.format(w, s, v)) synthed = s.generate_audio(w, v) writer(synthed) # all_synths.append(synthed) # return all_synths return synth_for_words def write_synths(synth_list, fname, csv=False): f = open(fname, 'w') if csv: for s in synth_list: f.write(s.get_csv()) else: json.dump([s.get_json() for s in synth_list], f) f.close() def synth_logger(fname, csv=False): f = open(fname, 'w') def csv_writer(s): f.write(s.get_csv()) synth_list = [] def json_writer(s): synth_list.append(s) def close_file(): if csv: f.close() else: json.dump([s.get_json() for s in synth_list], f) f.close() if csv: return csv_writer, close_file else: return json_writer, close_file def generate_audio_for_stories(): # story_file = './inputs/all_stories_hs.json' story_file = './inputs/all_stories.json' stories_data = json.load(open(story_file)) # word_list = [t[0] for i in stories_data.values() for t in i] word_list = [i for g in stories_data.values() for i in g] (writer, closer) = synth_logger(dest_file, csv=True) synth_for_words = synth_generator() try: synth_for_words(word_list, writer) except: import traceback import sys traceback.print_exc(file=sys.stdout) pass closer() # words_audio_synth = synth_generator() # synth = NSSpeechSynthesizer.alloc().init() # voices_installed = NSSpeechSynthesizer.availableVoices() # voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed] # us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US' and v['VoiceIdentifier'].split('.')[-1][0].isupper()] # synth.setVoice_(us_voices_ids[2]) # synth.startSpeakingString_('your') # fname = dest_filename(word,self.name,self.rate,self.operation) # d_path = dest_path(fname) # d_url = dest_url(d_path) # synths = synth_generator()([OUTPUT_NAME]) generate_audio_for_stories() # write_synths(synths, dest_file, True) # write_synths(synths,'./outputs/synths.json')