From 67f27ac683df3fe6bbe80f41bd8b3b57fadac0d6 Mon Sep 17 00:00:00 2001 From: Malar Kannan Date: Thu, 5 Oct 2017 13:58:00 +0530 Subject: [PATCH] 1. included arpabet apple phoneme mapper 2. using only voices with phoneme capability and 3 rates only --- arpabet-to-apple.py | 55 +++++++++++++++++++++++++++++++++++++++++++++ tts-wav-gen.py | 15 ++++++++----- 2 files changed, 64 insertions(+), 6 deletions(-) create mode 100644 arpabet-to-apple.py diff --git a/arpabet-to-apple.py b/arpabet-to-apple.py new file mode 100644 index 0000000..dc542e4 --- /dev/null +++ b/arpabet-to-apple.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 + +""" +Convert ARPABET +to Apple's codes +""" + +import sys + + +mapping = {s.split()[0]: s.split()[1] for s in """ +AA AA +AE AE +AH UX +AO AO +AW AW +AY AY +B b +CH C +D d +DH D +EH EH +ER UXr +EY EY +F f +G g +HH h +IH IH +IY IY +JH J +K k +L l +M m +N n +NG N +OW OW +OY OY +P p +R r +S s +SH S +T t +TH T +UH UH +UW UW +V v +W w +Y y +Z z +ZH Z +""".strip().split('\n')} + +arpabet_phonemes = sys.stdin.read().split() +apple_phonemes = [mapping[p.upper()] for p in arpabet_phonemes] +print('[[inpt PHON]] ' + ''.join(apple_phonemes)) diff --git a/tts-wav-gen.py b/tts-wav-gen.py index 60a62ff..4256f7e 100644 --- a/tts-wav-gen.py +++ b/tts-wav-gen.py @@ -8,7 +8,7 @@ import re import subprocess -dest_filename = lambda p: p+str(random.randint(0,10000))+'.aiff' +dest_filename = lambda n,v,r,t: '{}-{}-{}-{}-'.format(n,v,r,t)+str(random.randint(0,10000))+'.aiff' dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p)) @@ -53,7 +53,7 @@ class SynthVariant(object): return 'Synthesizer[{} - {}]({})'.format(self.name,self.rate,self.operation) def generate_audio(self,word): - fname = dest_filename(word) + fname = dest_filename(word,self.name,self.rate,self.operation) d_path = dest_path(fname) d_url = dest_url(fname) started = False @@ -81,8 +81,11 @@ class SynthVariant(object): def synth_generator(): voices_installed = NSSpeechSynthesizer.availableVoices() voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed] - us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US'] - voice_rates = list(range(180,221,(220-180)//4)) + us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US' and v['VoiceIdentifier'].split('.')[-1][0].isupper()] + # us_voices_ids = ['com.apple.speech.synthesis.voice.Fred','com.apple.speech.synthesis.voice.Alex', + # 'com.apple.speech.synthesis.voice.Victoria'] + # voice_rates = list(range(150,221,(220-180)//4)) + voice_rates = [150,180,210] voice_synths = [] variants = ['normal','phoneme'] for v in us_voices_ids: @@ -91,8 +94,8 @@ def synth_generator(): voice_synths.append(SynthVariant(v,r,o)) def synth_for_words(words): all_synths = [] - for s in voice_synths: - for w in words: + for w in words: + for s in voice_synths: all_synths.append(s.synth_file(w)) # print(s) # return [s.synth_file(word) for s in voice_synths]