172 lines
5.6 KiB
Python
172 lines
5.6 KiB
Python
import objc
|
|
from AppKit import NSSpeechSynthesizer, NSSpeechInputModeProperty
|
|
from AppKit import NSSpeechModePhoneme
|
|
from Foundation import NSURL
|
|
import json
|
|
import random
|
|
import os
|
|
import re
|
|
import subprocess
|
|
|
|
OUTPUT_NAME = 'audio'
|
|
dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'
|
|
dest_file = './outputs/' + OUTPUT_NAME + '.csv'
|
|
|
|
|
|
def create_dir(direc):
|
|
if not os.path.exists(direc):
|
|
os.mkdir(direc)
|
|
|
|
|
|
def dest_filename(n, v, r, t):
|
|
return '{}-{}-{}-{}-'.format(n, v, r,
|
|
t) + str(random.randint(0, 10000)) + '.aiff'
|
|
|
|
|
|
def dest_path(v, r, n):
|
|
return dest_dir + v + '/' + r + '/' + n
|
|
|
|
|
|
def cli_gen_audio(speech_cmd, rate, voice, out_path):
|
|
subprocess.call(
|
|
['say', '-v', voice, '-r',
|
|
str(rate), '-o', out_path, speech_cmd])
|
|
|
|
|
|
class SynthFile(object):
|
|
"""docstring for SynthFile."""
|
|
|
|
def __init__(self, word, phon, filename, voice, rate, operation):
|
|
super(SynthFile, self).__init__()
|
|
self.word = word
|
|
self.phoneme = phon
|
|
self.filename = filename
|
|
self.voice = voice
|
|
self.rate = rate
|
|
self.variant = operation
|
|
|
|
def get_json(self):
|
|
return {
|
|
'filename': self.filename,
|
|
'voice': self.voice,
|
|
'rate': self.rate,
|
|
'operation': self.operation
|
|
}
|
|
|
|
def get_csv(self):
|
|
return '{},{},{},{},{}\n'.format(self.word, self.phoneme, self.voice,
|
|
self.rate, self.variant,
|
|
self.filename)
|
|
|
|
|
|
class SynthVariant(object):
|
|
"""docstring for SynthVariant."""
|
|
|
|
def __init__(self, identifier, rate):
|
|
super(SynthVariant, self).__init__()
|
|
self.synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier)
|
|
self.synth.setVolume_(100)
|
|
self.synth.setRate_(rate)
|
|
self.phone_synth = NSSpeechSynthesizer.alloc().initWithVoice_(
|
|
identifier)
|
|
self.phone_synth.setVolume_(100)
|
|
self.phone_synth.setRate_(rate)
|
|
self.phone_synth.setObject_forProperty_error_(
|
|
NSSpeechModePhoneme, NSSpeechInputModeProperty, None)
|
|
self.identifier = identifier
|
|
self.rate = rate
|
|
self.name = identifier.split('.')[-1]
|
|
|
|
def __repr__(self):
|
|
return 'Synthesizer[{} - {}]({})'.format(self.name, self.rate)
|
|
|
|
def generate_audio(self, word, variant):
|
|
orig_phon, phoneme, phon_cmd = self.synth.phonemesFromText_(
|
|
word), '', word
|
|
if variant == 'low':
|
|
# self.synth.startSpeakingString_toURL_(word,d_url)
|
|
phoneme = orig_phon
|
|
elif variant == 'medium':
|
|
phoneme = re.sub('[0-9]', '', orig_phon)
|
|
phon_cmd = '[[inpt PHON]] ' + phoneme
|
|
elif variant == 'high':
|
|
phoneme = orig_phon
|
|
phon_cmd = word
|
|
# elif variant == 'long':
|
|
# if phon != '':
|
|
# self.phone_synth.startSpeakingString_toURL_(phon,d_url)
|
|
# else:
|
|
# self.synth.startSpeakingString_toURL_(word,d_url)
|
|
fname = dest_filename(word, phoneme, self.name, self.rate)
|
|
d_path = dest_path(self.name, self.rate, fname)
|
|
# d_url = NSURL.fileURLWithPath_(d_path)
|
|
cli_gen_audio(phon_cmd, self.rate, self.name, d_path)
|
|
return SynthFile(word, phoneme, fname, self.name, self.rate, variant)
|
|
|
|
|
|
def synth_generator():
|
|
voices_installed = NSSpeechSynthesizer.availableVoices()
|
|
voice_attrs = [
|
|
NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed
|
|
]
|
|
us_voices_ids = [
|
|
v['VoiceIdentifier'] for v in voice_attrs
|
|
if v['VoiceLanguage'] == 'en-US'
|
|
and v['VoiceIdentifier'].split('.')[-1][0].isupper()
|
|
]
|
|
# us_voices_ids = ['com.apple.speech.synthesis.voice.Fred',
|
|
# 'com.apple.speech.synthesis.voice.Alex',
|
|
# 'com.apple.speech.synthesis.voice.Victoria']
|
|
# voice_rates = list(range(150,221,(220-180)//4))
|
|
voice_rates = [150, 180, 210, 250]
|
|
voice_synths = []
|
|
create_dir(dest_dir)
|
|
for v in us_voices_ids:
|
|
for r in voice_rates:
|
|
create_dir(dest_dir + v + '/' + r)
|
|
voice_synths.append(SynthVariant(v, r))
|
|
|
|
def synth_for_words(words):
|
|
all_synths = []
|
|
for w in words:
|
|
for s in voice_synths:
|
|
for v in ['low', 'medium', 'high']:
|
|
all_synths.append(s.generate_audio(w, v))
|
|
return all_synths
|
|
|
|
return synth_for_words
|
|
|
|
|
|
def write_synths(synth_list, fname, csv=False):
|
|
f = open(fname, 'w')
|
|
if csv:
|
|
for s in synth_list:
|
|
f.write(s.get_csv())
|
|
else:
|
|
json.dump([s.get_json() for s in synth_list], f)
|
|
f.close()
|
|
|
|
|
|
def generate_audio_for_stories():
|
|
stories_data = json.load(open('./inputs/all_stories_hs.json'))
|
|
word_list = [t[0] for i in stories_data.values() for t in i]
|
|
words_audio_synth = synth_generator()
|
|
return words_audio_synth(word_list)
|
|
|
|
|
|
# words_audio_synth = synth_generator()
|
|
# synth = NSSpeechSynthesizer.alloc().init()
|
|
# voices_installed = NSSpeechSynthesizer.availableVoices()
|
|
# voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed]
|
|
# us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US' and v['VoiceIdentifier'].split('.')[-1][0].isupper()]
|
|
# synth.setVoice_(us_voices_ids[2])
|
|
# synth.startSpeakingString_('your')
|
|
# fname = dest_filename(word,self.name,self.rate,self.operation)
|
|
# d_path = dest_path(fname)
|
|
# d_url = dest_url(d_path)
|
|
|
|
synths = synth_generator()([OUTPUT_NAME])
|
|
# synths = generate_audio_for_stories()
|
|
write_synths(synths, dest_file, True)
|
|
# write_synths(synths,'./outputs/synths.json')
|