speech-scoring/tts-wav-gen.py

128 lines
4.9 KiB
Python
Raw Normal View History

2017-10-04 12:21:24 +00:00
import objc
from AppKit import NSSpeechSynthesizer,NSSpeechInputModeProperty,NSSpeechModePhoneme
from Foundation import NSURL,NSError,NSObject
2017-10-04 12:21:24 +00:00
import json
import random
2017-10-04 12:21:24 +00:00
import os
import re
import subprocess
2017-10-04 12:21:24 +00:00
dest_filename = lambda n,v,r,t: '{}-{}-{}-{}-'.format(n,v,r,t)+str(random.randint(0,10000))+'.aiff'
dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p
dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p))
2017-10-04 12:21:24 +00:00
def cli_gen_audio(word,rate,voice,out_path):
subprocess.call(['say','-v',voice,'-r',str(rate),'-o',out_path,word])
class SynthFile(object):
"""docstring for SynthFile."""
def __init__(self,word, filename,voice,rate,operation):
super(SynthFile, self).__init__()
self.word = word
self.filename = filename
self.voice = voice
self.rate = rate
self.operation = operation
2017-10-04 12:21:24 +00:00
def get_json(self):
return {'filename':self.filename,'voice':self.voice,
'rate':self.rate,'operation':self.operation}
def get_csv(self):
return '{},{},{},{},{}\n'.format(self.word,self.voice,self.rate,self.operation,self.filename)
class SynthVariant(object):
"""docstring for SynthVariant."""
def __init__(self,identifier,rate,op):
super(SynthVariant, self).__init__()
self.synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier)
self.synth.setVolume_(100)
# sp.setVoice_(identifier)
self.synth.setRate_(rate)
self.phone_synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier)
self.phone_synth.setVolume_(100)
self.phone_synth.setRate_(rate)
self.phone_synth.setObject_forProperty_error_(NSSpeechModePhoneme,NSSpeechInputModeProperty,None)
self.identifier = identifier
self.rate = rate
self.name = identifier.split('.')[-1]
self.operation = op
def __repr__(self):
return 'Synthesizer[{} - {}]({})'.format(self.name,self.rate,self.operation)
def generate_audio(self,word):
fname = dest_filename(word,self.name,self.rate,self.operation)
d_path = dest_path(fname)
d_url = dest_url(fname)
started = False
if self.operation == 'normal':
# self.synth.startSpeakingString_toURL_(word,d_url)
cli_gen_audio(word,self.rate,self.name,d_path)
else:
orig_phon = self.synth.phonemesFromText_(word)
phon = '[[inpt PHON]] '+re.sub('[0-9]','',orig_phon)
cli_gen_audio(phon,self.rate,self.name,d_path)
# if phon != '':
# self.phone_synth.startSpeakingString_toURL_(phon,d_url)
# else:
# self.synth.startSpeakingString_toURL_(word,d_url)
return SynthFile(word,fname,self.name,self.rate,self.operation)
def synth_file(self,word):
# s = objc.selector(self.generate_audio,signature=b"@@:@")
# obj = NSObject.alloc().init()
# sf = obj.performSelectorOnMainThread_withObject_waitUntilDone_(s,word,True)
# return sf
return self.generate_audio(word)
def synth_generator():
voices_installed = NSSpeechSynthesizer.availableVoices()
voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed]
us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US' and v['VoiceIdentifier'].split('.')[-1][0].isupper()]
# us_voices_ids = ['com.apple.speech.synthesis.voice.Fred','com.apple.speech.synthesis.voice.Alex',
# 'com.apple.speech.synthesis.voice.Victoria']
# voice_rates = list(range(150,221,(220-180)//4))
voice_rates = [150,180,210]
voice_synths = []
variants = ['normal','phoneme']
for v in us_voices_ids:
for r in voice_rates:
for o in variants:
voice_synths.append(SynthVariant(v,r,o))
def synth_for_words(words):
all_synths = []
for w in words:
for s in voice_synths:
all_synths.append(s.synth_file(w))
# print(s)
# return [s.synth_file(word) for s in voice_synths]
return all_synths
return synth_for_words
def write_synths(synth_list,fname,csv=False):
f = open(fname,'w')
if csv:
for s in synth_list:
f.write(s.get_csv())
else:
json.dump([s.get_json() for s in synth_list],f)
f.close()
2017-10-04 12:21:24 +00:00
def generate_audio_for_stories():
stories_data = json.load(open('./inputs/all_stories_hs.json'))
2017-10-04 12:21:24 +00:00
word_list = [t[0] for i in stories_data.values() for t in i]
words_audio_synth = synth_generator()
# all_synths = []
# for word in word_list[:1]:
# words_synths = word_audio_synth(word)
# all_synths.extend(words_synths)
return words_audio_synth(word_list)
2017-10-04 12:21:24 +00:00
# synths = synth_generator()(['education'])
synths = generate_audio_for_stories()
write_synths(synths,'./outputs/synth_data.csv',True)
# write_synths(synths,'./outputs/synths.json')