speech-scoring/tts-wav-gen.py

import objc
from AppKit import NSSpeechSynthesizer,NSSpeechInputModeProperty,NSSpeechModePhoneme
from Foundation import NSURL,NSError,NSObject
import json
import random
import os
import re
import subprocess


dest_filename = lambda n,v,r,t: '{}-{}-{}-{}-'.format(n,v,r,t)+str(random.randint(0,10000))+'.aiff'
dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p
dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p))

def cli_gen_audio(word,rate,voice,out_path):
    subprocess.call(['say','-v',voice,'-r',str(rate),'-o',out_path,word])

class SynthFile(object):
    """docstring for SynthFile."""
    def __init__(self,word, filename,voice,rate,operation):
        super(SynthFile, self).__init__()
        self.word = word
        self.filename = filename
        self.voice = voice
        self.rate = rate
        self.operation = operation

    def get_json(self):
        return {'filename':self.filename,'voice':self.voice,
                'rate':self.rate,'operation':self.operation}

    def get_csv(self):
        return '{},{},{},{},{}\n'.format(self.word,self.voice,self.rate,self.operation,self.filename)

class SynthVariant(object):
    """docstring for SynthVariant."""
    def __init__(self,identifier,rate,op):
        super(SynthVariant, self).__init__()
        self.synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier)
        self.synth.setVolume_(100)
        # sp.setVoice_(identifier)
        self.synth.setRate_(rate)
        self.phone_synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier)
        self.phone_synth.setVolume_(100)
        self.phone_synth.setRate_(rate)
        self.phone_synth.setObject_forProperty_error_(NSSpeechModePhoneme,NSSpeechInputModeProperty,None)
        self.identifier = identifier
        self.rate = rate
        self.name = identifier.split('.')[-1]
        self.operation = op

    def __repr__(self):
        return 'Synthesizer[{} - {}]({})'.format(self.name,self.rate,self.operation)

    def generate_audio(self,word):
        fname = dest_filename(word,self.name,self.rate,self.operation)
        d_path = dest_path(fname)
        d_url = dest_url(fname)
        started = False
        if self.operation == 'normal':
            # self.synth.startSpeakingString_toURL_(word,d_url)
            cli_gen_audio(word,self.rate,self.name,d_path)
        else:
            orig_phon = self.synth.phonemesFromText_(word)
            phon = '[[inpt PHON]] '+re.sub('[0-9]','',orig_phon)
            cli_gen_audio(phon,self.rate,self.name,d_path)
            # if phon != '':
            #     self.phone_synth.startSpeakingString_toURL_(phon,d_url)
            # else:
            #     self.synth.startSpeakingString_toURL_(word,d_url)
        return SynthFile(word,fname,self.name,self.rate,self.operation)

    def synth_file(self,word):
        # s = objc.selector(self.generate_audio,signature=b"@@:@")
        # obj = NSObject.alloc().init()
        # sf = obj.performSelectorOnMainThread_withObject_waitUntilDone_(s,word,True)
        # return sf
        return self.generate_audio(word)


def synth_generator():
    voices_installed = NSSpeechSynthesizer.availableVoices()
    voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in  voices_installed]
    us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US' and v['VoiceIdentifier'].split('.')[-1][0].isupper()]
    # us_voices_ids = ['com.apple.speech.synthesis.voice.Fred','com.apple.speech.synthesis.voice.Alex',
    #                  'com.apple.speech.synthesis.voice.Victoria']
    # voice_rates = list(range(150,221,(220-180)//4))
    voice_rates = [150,180,210]
    voice_synths = []
    variants = ['normal','phoneme']
    for v in us_voices_ids:
        for r in voice_rates:
            for o in variants:
                voice_synths.append(SynthVariant(v,r,o))
    def synth_for_words(words):
        all_synths = []
        for w in words:
            for s in voice_synths:
                all_synths.append(s.synth_file(w))
            # print(s)
        # return [s.synth_file(word) for s in voice_synths]
        return all_synths
    return synth_for_words

def write_synths(synth_list,fname,csv=False):
    f = open(fname,'w')
    if csv:
        for s in synth_list:
            f.write(s.get_csv())
    else:
        json.dump([s.get_json() for s in synth_list],f)
    f.close()

def generate_audio_for_stories():
    stories_data = json.load(open('./inputs/all_stories_hs.json'))
    word_list = [t[0] for i in stories_data.values() for t in i]
    words_audio_synth = synth_generator()
    # all_synths = []
    # for word in word_list[:1]:
    #     words_synths = word_audio_synth(word)
    #     all_synths.extend(words_synths)
    return words_audio_synth(word_list)

# synths = synth_generator()(['education'])
synths = generate_audio_for_stories()
write_synths(synths,'./outputs/synth_data.csv',True)
# write_synths(synths,'./outputs/synths.json')
generated voice files using ios api 2017-10-04 12:21:24 +00:00			`import objc`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`from AppKit import NSSpeechSynthesizer,NSSpeechInputModeProperty,NSSpeechModePhoneme`
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`from Foundation import NSURL,NSError,NSObject`
generated voice files using ios api 2017-10-04 12:21:24 +00:00			`import json`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`import random`
generated voice files using ios api 2017-10-04 12:21:24 +00:00			`import os`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`import re`
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`import subprocess`
generated voice files using ios api 2017-10-04 12:21:24 +00:00

1. included arpabet apple phoneme mapper 2. using only voices with phoneme capability and 3 rates only 2017-10-05 08:28:00 +00:00			`dest_filename = lambda n,v,r,t: '{}-{}-{}-{}-'.format(n,v,r,t)+str(random.randint(0,10000))+'.aiff'`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p`
			`dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p))`
generated voice files using ios api 2017-10-04 12:21:24 +00:00
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`def cli_gen_audio(word,rate,voice,out_path):`
			`subprocess.call(['say','-v',voice,'-r',str(rate),'-o',out_path,word])`

implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`class SynthFile(object):`
			`"""docstring for SynthFile."""`
			`def __init__(self,word, filename,voice,rate,operation):`
			`super(SynthFile, self).__init__()`
			`self.word = word`
			`self.filename = filename`
			`self.voice = voice`
			`self.rate = rate`
			`self.operation = operation`
generated voice files using ios api 2017-10-04 12:21:24 +00:00
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`def get_json(self):`
			`return {'filename':self.filename,'voice':self.voice,`
			`'rate':self.rate,'operation':self.operation}`

			`def get_csv(self):`
			`return '{},{},{},{},{}\n'.format(self.word,self.voice,self.rate,self.operation,self.filename)`

			`class SynthVariant(object):`
			`"""docstring for SynthVariant."""`
			`def __init__(self,identifier,rate,op):`
			`super(SynthVariant, self).__init__()`
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`self.synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier)`
			`self.synth.setVolume_(100)`
			`# sp.setVoice_(identifier)`
			`self.synth.setRate_(rate)`
			`self.phone_synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier)`
			`self.phone_synth.setVolume_(100)`
			`self.phone_synth.setRate_(rate)`
			`self.phone_synth.setObject_forProperty_error_(NSSpeechModePhoneme,NSSpeechInputModeProperty,None)`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`self.identifier = identifier`
			`self.rate = rate`
			`self.name = identifier.split('.')[-1]`
			`self.operation = op`

1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`def __repr__(self):`
			`return 'Synthesizer[{} - {}]({})'.format(self.name,self.rate,self.operation)`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`def generate_audio(self,word):`
1. included arpabet apple phoneme mapper 2. using only voices with phoneme capability and 3 rates only 2017-10-05 08:28:00 +00:00			`fname = dest_filename(word,self.name,self.rate,self.operation)`
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`d_path = dest_path(fname)`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`d_url = dest_url(fname)`
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`started = False`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`if self.operation == 'normal':`
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`# self.synth.startSpeakingString_toURL_(word,d_url)`
			`cli_gen_audio(word,self.rate,self.name,d_path)`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`else:`
			`orig_phon = self.synth.phonemesFromText_(word)`
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`phon = '[[inpt PHON]] '+re.sub('[0-9]','',orig_phon)`
			`cli_gen_audio(phon,self.rate,self.name,d_path)`
			`# if phon != '':`
			`# self.phone_synth.startSpeakingString_toURL_(phon,d_url)`
			`# else:`
			`# self.synth.startSpeakingString_toURL_(word,d_url)`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`return SynthFile(word,fname,self.name,self.rate,self.operation)`

1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`def synth_file(self,word):`
			`# s = objc.selector(self.generate_audio,signature=b"@@:@")`
			`# obj = NSObject.alloc().init()`
			`# sf = obj.performSelectorOnMainThread_withObject_waitUntilDone_(s,word,True)`
			`# return sf`
			`return self.generate_audio(word)`

implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00
			`def synth_generator():`
			`voices_installed = NSSpeechSynthesizer.availableVoices()`
			`voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed]`
1. included arpabet apple phoneme mapper 2. using only voices with phoneme capability and 3 rates only 2017-10-05 08:28:00 +00:00			`us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US' and v['VoiceIdentifier'].split('.')[-1][0].isupper()]`
			`# us_voices_ids = ['com.apple.speech.synthesis.voice.Fred','com.apple.speech.synthesis.voice.Alex',`
			`# 'com.apple.speech.synthesis.voice.Victoria']`
			`# voice_rates = list(range(150,221,(220-180)//4))`
			`voice_rates = [150,180,210]`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`voice_synths = []`
			`variants = ['normal','phoneme']`
			`for v in us_voices_ids:`
			`for r in voice_rates:`
			`for o in variants:`
			`voice_synths.append(SynthVariant(v,r,o))`
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`def synth_for_words(words):`
			`all_synths = []`
1. included arpabet apple phoneme mapper 2. using only voices with phoneme capability and 3 rates only 2017-10-05 08:28:00 +00:00			`for w in words:`
			`for s in voice_synths:`
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`all_synths.append(s.synth_file(w))`
			`# print(s)`
			`# return [s.synth_file(word) for s in voice_synths]`
			`return all_synths`
			`return synth_for_words`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00
			`def write_synths(synth_list,fname,csv=False):`
			`f = open(fname,'w')`
			`if csv:`
			`for s in synth_list:`
			`f.write(s.get_csv())`
			`else:`
			`json.dump([s.get_json() for s in synth_list],f)`
			`f.close()`
generated voice files using ios api 2017-10-04 12:21:24 +00:00
			`def generate_audio_for_stories():`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`stories_data = json.load(open('./inputs/all_stories_hs.json'))`
generated voice files using ios api 2017-10-04 12:21:24 +00:00			`word_list = [t[0] for i in stories_data.values() for t in i]`
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`words_audio_synth = synth_generator()`
			`# all_synths = []`
			`# for word in word_list[:1]:`
			`# words_synths = word_audio_synth(word)`
			`# all_synths.extend(words_synths)`
			`return words_audio_synth(word_list)`
generated voice files using ios api 2017-10-04 12:21:24 +00:00
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`# synths = synth_generator()(['education'])`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`synths = generate_audio_for_stories()`
			`write_synths(synths,'./outputs/synth_data.csv',True)`
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`# write_synths(synths,'./outputs/synths.json')`