1. included arpabet apple phoneme mapper
2. using only voices with phoneme capability and 3 rates onlymaster
parent
0337f0d5be
commit
67f27ac683
|
|
@ -0,0 +1,55 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Convert ARPABET <http://www.speech.cs.cmu.edu/cgi-bin/cmudict>
|
||||
to Apple's codes <https://developer.apple.com/library/content/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html>
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
mapping = {s.split()[0]: s.split()[1] for s in """
|
||||
AA AA
|
||||
AE AE
|
||||
AH UX
|
||||
AO AO
|
||||
AW AW
|
||||
AY AY
|
||||
B b
|
||||
CH C
|
||||
D d
|
||||
DH D
|
||||
EH EH
|
||||
ER UXr
|
||||
EY EY
|
||||
F f
|
||||
G g
|
||||
HH h
|
||||
IH IH
|
||||
IY IY
|
||||
JH J
|
||||
K k
|
||||
L l
|
||||
M m
|
||||
N n
|
||||
NG N
|
||||
OW OW
|
||||
OY OY
|
||||
P p
|
||||
R r
|
||||
S s
|
||||
SH S
|
||||
T t
|
||||
TH T
|
||||
UH UH
|
||||
UW UW
|
||||
V v
|
||||
W w
|
||||
Y y
|
||||
Z z
|
||||
ZH Z
|
||||
""".strip().split('\n')}
|
||||
|
||||
arpabet_phonemes = sys.stdin.read().split()
|
||||
apple_phonemes = [mapping[p.upper()] for p in arpabet_phonemes]
|
||||
print('[[inpt PHON]] ' + ''.join(apple_phonemes))
|
||||
|
|
@ -8,7 +8,7 @@ import re
|
|||
import subprocess
|
||||
|
||||
|
||||
dest_filename = lambda p: p+str(random.randint(0,10000))+'.aiff'
|
||||
dest_filename = lambda n,v,r,t: '{}-{}-{}-{}-'.format(n,v,r,t)+str(random.randint(0,10000))+'.aiff'
|
||||
dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p
|
||||
dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p))
|
||||
|
||||
|
|
@ -53,7 +53,7 @@ class SynthVariant(object):
|
|||
return 'Synthesizer[{} - {}]({})'.format(self.name,self.rate,self.operation)
|
||||
|
||||
def generate_audio(self,word):
|
||||
fname = dest_filename(word)
|
||||
fname = dest_filename(word,self.name,self.rate,self.operation)
|
||||
d_path = dest_path(fname)
|
||||
d_url = dest_url(fname)
|
||||
started = False
|
||||
|
|
@ -81,8 +81,11 @@ class SynthVariant(object):
|
|||
def synth_generator():
|
||||
voices_installed = NSSpeechSynthesizer.availableVoices()
|
||||
voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed]
|
||||
us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US']
|
||||
voice_rates = list(range(180,221,(220-180)//4))
|
||||
us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US' and v['VoiceIdentifier'].split('.')[-1][0].isupper()]
|
||||
# us_voices_ids = ['com.apple.speech.synthesis.voice.Fred','com.apple.speech.synthesis.voice.Alex',
|
||||
# 'com.apple.speech.synthesis.voice.Victoria']
|
||||
# voice_rates = list(range(150,221,(220-180)//4))
|
||||
voice_rates = [150,180,210]
|
||||
voice_synths = []
|
||||
variants = ['normal','phoneme']
|
||||
for v in us_voices_ids:
|
||||
|
|
@ -91,8 +94,8 @@ def synth_generator():
|
|||
voice_synths.append(SynthVariant(v,r,o))
|
||||
def synth_for_words(words):
|
||||
all_synths = []
|
||||
for s in voice_synths:
|
||||
for w in words:
|
||||
for w in words:
|
||||
for s in voice_synths:
|
||||
all_synths.append(s.synth_file(w))
|
||||
# print(s)
|
||||
# return [s.synth_file(word) for s in voice_synths]
|
||||
|
|
|
|||
Loading…
Reference in New Issue