From 9d700f18ca50482b6ff4c336f8d7d5f676742bb8 Mon Sep 17 00:00:00 2001 From: Malar Kannan Date: Wed, 4 Oct 2017 23:21:28 +0530 Subject: [PATCH] implemented phoneme/voice/rate variant genration --- {input => inputs}/all_stories_hs.json | 0 tts-wav-gen.py | 100 +++++++++++++++++++++++--- 2 files changed, 89 insertions(+), 11 deletions(-) rename {input => inputs}/all_stories_hs.json (100%) diff --git a/input/all_stories_hs.json b/inputs/all_stories_hs.json similarity index 100% rename from input/all_stories_hs.json rename to inputs/all_stories_hs.json diff --git a/tts-wav-gen.py b/tts-wav-gen.py index 1cf0baa..91b1f23 100644 --- a/tts-wav-gen.py +++ b/tts-wav-gen.py @@ -1,23 +1,101 @@ import objc -from AppKit import NSSpeechSynthesizer -from Foundation import NSURL +from AppKit import NSSpeechSynthesizer,NSSpeechInputModeProperty,NSSpeechModePhoneme +from Foundation import NSURL,NSError import json +import random import os +import re +dest_filename = lambda p: p+str(random.randint(0,10000))+'.aiff' +dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p +dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p)) -sp = NSSpeechSynthesizer.alloc().init() -sp.setVolume_(100) +class SynthFile(object): + """docstring for SynthFile.""" + def __init__(self,word, filename,voice,rate,operation): + super(SynthFile, self).__init__() + self.word = word + self.filename = filename + self.voice = voice + self.rate = rate + self.operation = operation -dest_path = os.path.abspath('.')+'/outputs/' -dest_url = lambda p: NSURL.fileURLWithPath_(dest_path+p+'.aiff') -def generate_aiff_word(word): - sp.startSpeakingString_toURL_(word,dest_url(word)) + def get_json(self): + return {'filename':self.filename,'voice':self.voice, + 'rate':self.rate,'operation':self.operation} + + def get_csv(self): + return '{},{},{},{},{}\n'.format(self.word,self.voice,self.rate,self.operation,self.filename) + +class SynthVariant(object): + """docstring for SynthVariant.""" + def __init__(self,identifier,rate,op): + super(SynthVariant, self).__init__() + sp = NSSpeechSynthesizer.alloc().init() + sp.setVolume_(100) + sp.setVoice_(identifier) + sp.setRate_(rate) + self.synth = sp + p_syn = NSSpeechSynthesizer.alloc().init() + p_syn.setVolume_(100) + p_syn.setVoice_(identifier) + p_syn.setRate_(rate) + p_syn.setObject_forProperty_error_(NSSpeechModePhoneme,NSSpeechInputModeProperty,None) + self.phone_synth = p_syn + self.identifier = identifier + self.rate = rate + self.name = identifier.split('.')[-1] + self.operation = op + + + def synth_file(self,word): + fname = dest_filename(word) + d_url = dest_url(fname) + if self.operation == 'normal': + self.synth.startSpeakingString_toURL_(word,d_url) + else: + orig_phon = self.synth.phonemesFromText_(word) + phon = re.sub('[0-9]','',orig_phon) + self.phone_synth.startSpeakingString_toURL_(phon,d_url) + return SynthFile(word,fname,self.name,self.rate,self.operation) + + +def synth_generator(): + voices_installed = NSSpeechSynthesizer.availableVoices() + voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed] + us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US'] + voice_rates = list(range(180,221,(220-180)//5)) + voice_synths = [] + variants = ['normal','phoneme'] + for v in us_voices_ids: + for r in voice_rates: + for o in variants: + voice_synths.append(SynthVariant(v,r,o)) + def synth_for_word(word): + return [s.synth_file(word) for s in voice_synths] + return synth_for_word + +def write_synths(synth_list,fname,csv=False): + f = open(fname,'w') + if csv: + for s in synth_list: + f.write(s.get_csv()) + else: + json.dump([s.get_json() for s in synth_list],f) + f.close() def generate_audio_for_stories(): - stories_data = json.load(open('./input/all_stories_hs.json')) + stories_data = json.load(open('./inputs/all_stories_hs.json')) word_list = [t[0] for i in stories_data.values() for t in i] + word_audio_synth = synth_generator() + all_synths = [] for word in word_list: - generate_aiff_word(word) + words_synths = word_audio_synth(word) + all_synths.extend(words_synths) + return all_synths -generate_audio_for_stories() +# synths = synth_generator()('education') +synths = generate_audio_for_stories() +write_synths(synths,'./outputs/synth_data.csv',True) +write_synths(synths,'./outputs/synths.json')