1. included arpabet apple phoneme mapper

2. using only voices with phoneme capability and 3 rates only
2017-10-05 13:58:00 +05:30
parent 0337f0d5be
commit 67f27ac683
2 changed files with 64 additions and 6 deletions
--- a/arpabet-to-apple.py
+++ b/arpabet-to-apple.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+
+"""
+Convert ARPABET <http://www.speech.cs.cmu.edu/cgi-bin/cmudict>
+to Apple's codes <https://developer.apple.com/library/content/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html>
+"""
+
+import sys
+
+
+mapping = {s.split()[0]: s.split()[1] for s in """
+AA AA
+AE AE
+AH UX
+AO AO
+AW AW
+AY AY
+B  b
+CH C
+D  d
+DH D
+EH EH
+ER UXr
+EY EY
+F  f
+G  g
+HH h
+IH IH
+IY IY
+JH J
+K  k
+L  l
+M  m
+N  n
+NG N
+OW OW
+OY OY
+P  p
+R  r
+S  s
+SH S
+T  t
+TH T
+UH UH
+UW UW
+V  v
+W  w
+Y  y
+Z  z
+ZH Z
+""".strip().split('\n')}
+    
+arpabet_phonemes = sys.stdin.read().split()
+apple_phonemes = [mapping[p.upper()] for p in arpabet_phonemes]
+print('[[inpt PHON]] ' + ''.join(apple_phonemes))
--- a/tts-wav-gen.py
+++ b/tts-wav-gen.py
@@ -8,7 +8,7 @@ import re
 import subprocess


-dest_filename = lambda p: p+str(random.randint(0,10000))+'.aiff'
+dest_filename = lambda n,v,r,t: '{}-{}-{}-{}-'.format(n,v,r,t)+str(random.randint(0,10000))+'.aiff'
 dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p
 dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p))

@@ -53,7 +53,7 @@ class SynthVariant(object):
        return 'Synthesizer[{} - {}]({})'.format(self.name,self.rate,self.operation)

    def generate_audio(self,word):
-        fname = dest_filename(word)
+        fname = dest_filename(word,self.name,self.rate,self.operation)
        d_path = dest_path(fname)
        d_url = dest_url(fname)
        started = False
@@ -81,8 +81,11 @@ class SynthVariant(object):
 def synth_generator():
    voices_installed = NSSpeechSynthesizer.availableVoices()
    voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in  voices_installed]
-    us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US']
-    voice_rates = list(range(180,221,(220-180)//4))
+    us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US' and v['VoiceIdentifier'].split('.')[-1][0].isupper()]
+    # us_voices_ids = ['com.apple.speech.synthesis.voice.Fred','com.apple.speech.synthesis.voice.Alex',
+    #                  'com.apple.speech.synthesis.voice.Victoria']
+    # voice_rates = list(range(150,221,(220-180)//4))
+    voice_rates = [150,180,210]
    voice_synths = []
    variants = ['normal','phoneme']
    for v in us_voices_ids:
@@ -91,8 +94,8 @@ def synth_generator():
                voice_synths.append(SynthVariant(v,r,o))
    def synth_for_words(words):
        all_synths = []
-        for s in voice_synths:
-            for w in words:
+        for w in words:
+            for s in voice_synths:
                all_synths.append(s.synth_file(w))
            # print(s)
        # return [s.synth_file(word) for s in voice_synths]