1. using cli say instead of api since api generates empty responses sometimes
2. generating all words voices for each variantsmaster
parent
9d700f18ca
commit
0337f0d5be
|
|
@ -1,16 +1,20 @@
|
||||||
import objc
|
import objc
|
||||||
from AppKit import NSSpeechSynthesizer,NSSpeechInputModeProperty,NSSpeechModePhoneme
|
from AppKit import NSSpeechSynthesizer,NSSpeechInputModeProperty,NSSpeechModePhoneme
|
||||||
from Foundation import NSURL,NSError
|
from Foundation import NSURL,NSError,NSObject
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
dest_filename = lambda p: p+str(random.randint(0,10000))+'.aiff'
|
dest_filename = lambda p: p+str(random.randint(0,10000))+'.aiff'
|
||||||
dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p
|
dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p
|
||||||
dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p))
|
dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p))
|
||||||
|
|
||||||
|
def cli_gen_audio(word,rate,voice,out_path):
|
||||||
|
subprocess.call(['say','-v',voice,'-r',str(rate),'-o',out_path,word])
|
||||||
|
|
||||||
class SynthFile(object):
|
class SynthFile(object):
|
||||||
"""docstring for SynthFile."""
|
"""docstring for SynthFile."""
|
||||||
def __init__(self,word, filename,voice,rate,operation):
|
def __init__(self,word, filename,voice,rate,operation):
|
||||||
|
|
@ -32,49 +36,68 @@ class SynthVariant(object):
|
||||||
"""docstring for SynthVariant."""
|
"""docstring for SynthVariant."""
|
||||||
def __init__(self,identifier,rate,op):
|
def __init__(self,identifier,rate,op):
|
||||||
super(SynthVariant, self).__init__()
|
super(SynthVariant, self).__init__()
|
||||||
sp = NSSpeechSynthesizer.alloc().init()
|
self.synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier)
|
||||||
sp.setVolume_(100)
|
self.synth.setVolume_(100)
|
||||||
sp.setVoice_(identifier)
|
# sp.setVoice_(identifier)
|
||||||
sp.setRate_(rate)
|
self.synth.setRate_(rate)
|
||||||
self.synth = sp
|
self.phone_synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier)
|
||||||
p_syn = NSSpeechSynthesizer.alloc().init()
|
self.phone_synth.setVolume_(100)
|
||||||
p_syn.setVolume_(100)
|
self.phone_synth.setRate_(rate)
|
||||||
p_syn.setVoice_(identifier)
|
self.phone_synth.setObject_forProperty_error_(NSSpeechModePhoneme,NSSpeechInputModeProperty,None)
|
||||||
p_syn.setRate_(rate)
|
|
||||||
p_syn.setObject_forProperty_error_(NSSpeechModePhoneme,NSSpeechInputModeProperty,None)
|
|
||||||
self.phone_synth = p_syn
|
|
||||||
self.identifier = identifier
|
self.identifier = identifier
|
||||||
self.rate = rate
|
self.rate = rate
|
||||||
self.name = identifier.split('.')[-1]
|
self.name = identifier.split('.')[-1]
|
||||||
self.operation = op
|
self.operation = op
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return 'Synthesizer[{} - {}]({})'.format(self.name,self.rate,self.operation)
|
||||||
|
|
||||||
def synth_file(self,word):
|
def generate_audio(self,word):
|
||||||
fname = dest_filename(word)
|
fname = dest_filename(word)
|
||||||
|
d_path = dest_path(fname)
|
||||||
d_url = dest_url(fname)
|
d_url = dest_url(fname)
|
||||||
|
started = False
|
||||||
if self.operation == 'normal':
|
if self.operation == 'normal':
|
||||||
self.synth.startSpeakingString_toURL_(word,d_url)
|
# self.synth.startSpeakingString_toURL_(word,d_url)
|
||||||
|
cli_gen_audio(word,self.rate,self.name,d_path)
|
||||||
else:
|
else:
|
||||||
orig_phon = self.synth.phonemesFromText_(word)
|
orig_phon = self.synth.phonemesFromText_(word)
|
||||||
phon = re.sub('[0-9]','',orig_phon)
|
phon = '[[inpt PHON]] '+re.sub('[0-9]','',orig_phon)
|
||||||
self.phone_synth.startSpeakingString_toURL_(phon,d_url)
|
cli_gen_audio(phon,self.rate,self.name,d_path)
|
||||||
|
# if phon != '':
|
||||||
|
# self.phone_synth.startSpeakingString_toURL_(phon,d_url)
|
||||||
|
# else:
|
||||||
|
# self.synth.startSpeakingString_toURL_(word,d_url)
|
||||||
return SynthFile(word,fname,self.name,self.rate,self.operation)
|
return SynthFile(word,fname,self.name,self.rate,self.operation)
|
||||||
|
|
||||||
|
def synth_file(self,word):
|
||||||
|
# s = objc.selector(self.generate_audio,signature=b"@@:@")
|
||||||
|
# obj = NSObject.alloc().init()
|
||||||
|
# sf = obj.performSelectorOnMainThread_withObject_waitUntilDone_(s,word,True)
|
||||||
|
# return sf
|
||||||
|
return self.generate_audio(word)
|
||||||
|
|
||||||
|
|
||||||
def synth_generator():
|
def synth_generator():
|
||||||
voices_installed = NSSpeechSynthesizer.availableVoices()
|
voices_installed = NSSpeechSynthesizer.availableVoices()
|
||||||
voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed]
|
voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed]
|
||||||
us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US']
|
us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US']
|
||||||
voice_rates = list(range(180,221,(220-180)//5))
|
voice_rates = list(range(180,221,(220-180)//4))
|
||||||
voice_synths = []
|
voice_synths = []
|
||||||
variants = ['normal','phoneme']
|
variants = ['normal','phoneme']
|
||||||
for v in us_voices_ids:
|
for v in us_voices_ids:
|
||||||
for r in voice_rates:
|
for r in voice_rates:
|
||||||
for o in variants:
|
for o in variants:
|
||||||
voice_synths.append(SynthVariant(v,r,o))
|
voice_synths.append(SynthVariant(v,r,o))
|
||||||
def synth_for_word(word):
|
def synth_for_words(words):
|
||||||
return [s.synth_file(word) for s in voice_synths]
|
all_synths = []
|
||||||
return synth_for_word
|
for s in voice_synths:
|
||||||
|
for w in words:
|
||||||
|
all_synths.append(s.synth_file(w))
|
||||||
|
# print(s)
|
||||||
|
# return [s.synth_file(word) for s in voice_synths]
|
||||||
|
return all_synths
|
||||||
|
return synth_for_words
|
||||||
|
|
||||||
def write_synths(synth_list,fname,csv=False):
|
def write_synths(synth_list,fname,csv=False):
|
||||||
f = open(fname,'w')
|
f = open(fname,'w')
|
||||||
|
|
@ -88,14 +111,14 @@ def write_synths(synth_list,fname,csv=False):
|
||||||
def generate_audio_for_stories():
|
def generate_audio_for_stories():
|
||||||
stories_data = json.load(open('./inputs/all_stories_hs.json'))
|
stories_data = json.load(open('./inputs/all_stories_hs.json'))
|
||||||
word_list = [t[0] for i in stories_data.values() for t in i]
|
word_list = [t[0] for i in stories_data.values() for t in i]
|
||||||
word_audio_synth = synth_generator()
|
words_audio_synth = synth_generator()
|
||||||
all_synths = []
|
# all_synths = []
|
||||||
for word in word_list:
|
# for word in word_list[:1]:
|
||||||
words_synths = word_audio_synth(word)
|
# words_synths = word_audio_synth(word)
|
||||||
all_synths.extend(words_synths)
|
# all_synths.extend(words_synths)
|
||||||
return all_synths
|
return words_audio_synth(word_list)
|
||||||
|
|
||||||
# synths = synth_generator()('education')
|
# synths = synth_generator()(['education'])
|
||||||
synths = generate_audio_for_stories()
|
synths = generate_audio_for_stories()
|
||||||
write_synths(synths,'./outputs/synth_data.csv',True)
|
write_synths(synths,'./outputs/synth_data.csv',True)
|
||||||
write_synths(synths,'./outputs/synths.json')
|
# write_synths(synths,'./outputs/synths.json')
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue