1. included arpabet apple phoneme mapper

2. using only voices with phoneme capability and 3 rates only
master
Malar Kannan 2017-10-05 13:58:00 +05:30
parent 0337f0d5be
commit 67f27ac683
2 changed files with 64 additions and 6 deletions

55
arpabet-to-apple.py Normal file
View File

@ -0,0 +1,55 @@
#!/usr/bin/env python3
"""
Convert ARPABET <http://www.speech.cs.cmu.edu/cgi-bin/cmudict>
to Apple's codes <https://developer.apple.com/library/content/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html>
"""
import sys
mapping = {s.split()[0]: s.split()[1] for s in """
AA AA
AE AE
AH UX
AO AO
AW AW
AY AY
B b
CH C
D d
DH D
EH EH
ER UXr
EY EY
F f
G g
HH h
IH IH
IY IY
JH J
K k
L l
M m
N n
NG N
OW OW
OY OY
P p
R r
S s
SH S
T t
TH T
UH UH
UW UW
V v
W w
Y y
Z z
ZH Z
""".strip().split('\n')}
arpabet_phonemes = sys.stdin.read().split()
apple_phonemes = [mapping[p.upper()] for p in arpabet_phonemes]
print('[[inpt PHON]] ' + ''.join(apple_phonemes))

View File

@ -8,7 +8,7 @@ import re
import subprocess import subprocess
dest_filename = lambda p: p+str(random.randint(0,10000))+'.aiff' dest_filename = lambda n,v,r,t: '{}-{}-{}-{}-'.format(n,v,r,t)+str(random.randint(0,10000))+'.aiff'
dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p
dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p)) dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p))
@ -53,7 +53,7 @@ class SynthVariant(object):
return 'Synthesizer[{} - {}]({})'.format(self.name,self.rate,self.operation) return 'Synthesizer[{} - {}]({})'.format(self.name,self.rate,self.operation)
def generate_audio(self,word): def generate_audio(self,word):
fname = dest_filename(word) fname = dest_filename(word,self.name,self.rate,self.operation)
d_path = dest_path(fname) d_path = dest_path(fname)
d_url = dest_url(fname) d_url = dest_url(fname)
started = False started = False
@ -81,8 +81,11 @@ class SynthVariant(object):
def synth_generator(): def synth_generator():
voices_installed = NSSpeechSynthesizer.availableVoices() voices_installed = NSSpeechSynthesizer.availableVoices()
voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed] voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed]
us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US'] us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US' and v['VoiceIdentifier'].split('.')[-1][0].isupper()]
voice_rates = list(range(180,221,(220-180)//4)) # us_voices_ids = ['com.apple.speech.synthesis.voice.Fred','com.apple.speech.synthesis.voice.Alex',
# 'com.apple.speech.synthesis.voice.Victoria']
# voice_rates = list(range(150,221,(220-180)//4))
voice_rates = [150,180,210]
voice_synths = [] voice_synths = []
variants = ['normal','phoneme'] variants = ['normal','phoneme']
for v in us_voices_ids: for v in us_voices_ids:
@ -91,8 +94,8 @@ def synth_generator():
voice_synths.append(SynthVariant(v,r,o)) voice_synths.append(SynthVariant(v,r,o))
def synth_for_words(words): def synth_for_words(words):
all_synths = [] all_synths = []
for s in voice_synths: for w in words:
for w in words: for s in voice_synths:
all_synths.append(s.synth_file(w)) all_synths.append(s.synth_file(w))
# print(s) # print(s)
# return [s.synth_file(word) for s in voice_synths] # return [s.synth_file(word) for s in voice_synths]