From 05f36daf7e6d880a22787f41f9aba4674b51f591 Mon Sep 17 00:00:00 2001 From: Malar Kannan Date: Thu, 26 Oct 2017 15:27:22 +0530 Subject: [PATCH] refactored sample generation code --- .gitignore | 2 +- tts_samplegen.py | 76 +++++++++++++++++++++++++++++++++++------------- 2 files changed, 56 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index 00a2a2a..d2d0b11 100644 --- a/.gitignore +++ b/.gitignore @@ -138,7 +138,7 @@ Temporary Items # End of https://www.gitignore.io/api/macos outputs/* -inputs/mnist +inputs/* inputs/audio* logs/* models/* diff --git a/tts_samplegen.py b/tts_samplegen.py index 022a16a..5608085 100644 --- a/tts_samplegen.py +++ b/tts_samplegen.py @@ -7,24 +7,35 @@ import random import os import re import subprocess +import progressbar -OUTPUT_NAME = 'audio' +OUTPUT_NAME = 'story_sents' dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/' dest_file = './outputs/' + OUTPUT_NAME + '.csv' +def prog_bar(title): + widgets = [progressbar.FormatLabel( + title), ' [', progressbar.Bar(), '] - ', progressbar.ETA()] + prog = progressbar.ProgressBar(widgets=widgets) + + def update_prog(current): + widgets[0] = progressbar.FormatLabel('{} : {}'.format(title, current)) + prog.update() + return (update_prog, prog) + + def create_dir(direc): if not os.path.exists(direc): - os.mkdir(direc) + os.makedirs(direc) -def dest_filename(n, v, r, t): - return '{}-{}-{}-{}-'.format(n, v, r, - t) + str(random.randint(0, 10000)) + '.aiff' +def dest_filename(w, v, r, t): + return '{}-{}-{}-{}-{}.aiff'.format(w, v, r, t, str(random.randint(0, 10000))) def dest_path(v, r, n): - return dest_dir + v + '/' + r + '/' + n + return dest_dir + v + '/' + str(r) + '/' + n def cli_gen_audio(speech_cmd, rate, voice, out_path): @@ -62,7 +73,7 @@ class SynthFile(object): class SynthVariant(object): """docstring for SynthVariant.""" - def __init__(self, identifier, rate): + def __init__(self, identifier, voice, lang, rate): super(SynthVariant, self).__init__() self.synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier) self.synth.setVolume_(100) @@ -75,10 +86,12 @@ class SynthVariant(object): NSSpeechModePhoneme, NSSpeechInputModeProperty, None) self.identifier = identifier self.rate = rate - self.name = identifier.split('.')[-1] + self.name = voice + self.lang = lang + create_dir(dest_dir + self.name + '/' + str(self.rate)) def __repr__(self): - return 'Synthesizer[{} - {}]({})'.format(self.name, self.rate) + return 'Synthesizer[{} - {}]'.format(self.name, self.rate) def generate_audio(self, word, variant): orig_phon, phoneme, phon_cmd = self.synth.phonemesFromText_( @@ -97,7 +110,7 @@ class SynthVariant(object): # self.phone_synth.startSpeakingString_toURL_(phon,d_url) # else: # self.synth.startSpeakingString_toURL_(word,d_url) - fname = dest_filename(word, phoneme, self.name, self.rate) + fname = dest_filename(word, self.name, self.rate, variant) d_path = dest_path(self.name, self.rate, fname) # d_url = NSURL.fileURLWithPath_(d_path) cli_gen_audio(phon_cmd, self.rate, self.name, d_path) @@ -109,11 +122,25 @@ def synth_generator(): voice_attrs = [ NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed ] + # sk = [k for k in voice_attrs[0].keys() if k not in [ + # 'VoiceIndividuallySpokenCharacters', 'VoiceSupportedCharacters']] + # s_attrs = [[v[i] for i in sk] for v in voice_attrs if 'VoiceShowInFullListOnly' in v + # and 'VoiceRelativeDesirability' in v] us_voices_ids = [ - v['VoiceIdentifier'] for v in voice_attrs + (v['VoiceIdentifier'], + v['VoiceName'], + v['VoiceLanguage']) for v in voice_attrs + # v['VoiceDemoText'], + # v['VoiceShowInFullListOnly'], + # v['VoiceRelativeDesirability']) if v['VoiceLanguage'] == 'en-US' - and v['VoiceIdentifier'].split('.')[-1][0].isupper() + and v['VoiceGender'] != 'VoiceGenderNeuter' + # and v['VoiceIdentifier'].split('.')[-1][0].isupper() + # and 'VoiceShowInFullListOnly' in v + # and 'VoiceRelativeDesirability' in v ] + # import pdb + # pdb.set_trace() # us_voices_ids = ['com.apple.speech.synthesis.voice.Fred', # 'com.apple.speech.synthesis.voice.Alex', # 'com.apple.speech.synthesis.voice.Victoria'] @@ -121,16 +148,20 @@ def synth_generator(): voice_rates = [150, 180, 210, 250] voice_synths = [] create_dir(dest_dir) - for v in us_voices_ids: + for (i, v, l) in us_voices_ids: for r in voice_rates: - create_dir(dest_dir + v + '/' + r) - voice_synths.append(SynthVariant(v, r)) + s = SynthVariant(i, v, l, r) + print('Created ', s) + voice_synths.append(s) def synth_for_words(words): all_synths = [] - for w in words: + prog_title = "Synthesizing {} words, current word".format(len(words)) + (update, prog) = prog_bar(prog_title) + for w in prog(words): for s in voice_synths: for v in ['low', 'medium', 'high']: + update('"{}" with {} variant ({})'.format(w, s, v)) all_synths.append(s.generate_audio(w, v)) return all_synths @@ -148,12 +179,14 @@ def write_synths(synth_list, fname, csv=False): def generate_audio_for_stories(): - stories_data = json.load(open('./inputs/all_stories_hs.json')) - word_list = [t[0] for i in stories_data.values() for t in i] + # story_file = './inputs/all_stories_hs.json' + story_file = './inputs/all_stories.json' + stories_data = json.load(open(story_file)) + # word_list = [t[0] for i in stories_data.values() for t in i] + word_list = [i for g in stories_data.values() for i in g] words_audio_synth = synth_generator() return words_audio_synth(word_list) - # words_audio_synth = synth_generator() # synth = NSSpeechSynthesizer.alloc().init() # voices_installed = NSSpeechSynthesizer.availableVoices() @@ -165,7 +198,8 @@ def generate_audio_for_stories(): # d_path = dest_path(fname) # d_url = dest_url(d_path) -synths = synth_generator()([OUTPUT_NAME]) -# synths = generate_audio_for_stories() + +# synths = synth_generator()([OUTPUT_NAME]) +synths = generate_audio_for_stories() write_synths(synths, dest_file, True) # write_synths(synths,'./outputs/synths.json')