From 67f27ac683df3fe6bbe80f41bd8b3b57fadac0d6 Mon Sep 17 00:00:00 2001
From: Malar Kannan <malar@avaz.in>
Date: Thu, 5 Oct 2017 13:58:00 +0530
Subject: [PATCH] 1. included arpabet apple phoneme mapper 2. using only voices
 with phoneme capability and 3 rates only

---
 arpabet-to-apple.py | 55 +++++++++++++++++++++++++++++++++++++++++++++
 tts-wav-gen.py      | 15 ++++++++-----
 2 files changed, 64 insertions(+), 6 deletions(-)
 create mode 100644 arpabet-to-apple.py

diff --git a/arpabet-to-apple.py b/arpabet-to-apple.py
new file mode 100644
index 0000000..dc542e4
--- /dev/null
+++ b/arpabet-to-apple.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+
+"""
+Convert ARPABET <http://www.speech.cs.cmu.edu/cgi-bin/cmudict>
+to Apple's codes <https://developer.apple.com/library/content/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html>
+"""
+
+import sys
+
+
+mapping = {s.split()[0]: s.split()[1] for s in """
+AA AA
+AE AE
+AH UX
+AO AO
+AW AW
+AY AY
+B  b
+CH C
+D  d
+DH D
+EH EH
+ER UXr
+EY EY
+F  f
+G  g
+HH h
+IH IH
+IY IY
+JH J
+K  k
+L  l
+M  m
+N  n
+NG N
+OW OW
+OY OY
+P  p
+R  r
+S  s
+SH S
+T  t
+TH T
+UH UH
+UW UW
+V  v
+W  w
+Y  y
+Z  z
+ZH Z
+""".strip().split('\n')}
+    
+arpabet_phonemes = sys.stdin.read().split()
+apple_phonemes = [mapping[p.upper()] for p in arpabet_phonemes]
+print('[[inpt PHON]] ' + ''.join(apple_phonemes))
diff --git a/tts-wav-gen.py b/tts-wav-gen.py
index 60a62ff..4256f7e 100644
--- a/tts-wav-gen.py
+++ b/tts-wav-gen.py
@@ -8,7 +8,7 @@ import re
 import subprocess
 
 
-dest_filename = lambda p: p+str(random.randint(0,10000))+'.aiff'
+dest_filename = lambda n,v,r,t: '{}-{}-{}-{}-'.format(n,v,r,t)+str(random.randint(0,10000))+'.aiff'
 dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p
 dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p))
 
@@ -53,7 +53,7 @@ class SynthVariant(object):
         return 'Synthesizer[{} - {}]({})'.format(self.name,self.rate,self.operation)
 
     def generate_audio(self,word):
-        fname = dest_filename(word)
+        fname = dest_filename(word,self.name,self.rate,self.operation)
         d_path = dest_path(fname)
         d_url = dest_url(fname)
         started = False
@@ -81,8 +81,11 @@ class SynthVariant(object):
 def synth_generator():
     voices_installed = NSSpeechSynthesizer.availableVoices()
     voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in  voices_installed]
-    us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US']
-    voice_rates = list(range(180,221,(220-180)//4))
+    us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US' and v['VoiceIdentifier'].split('.')[-1][0].isupper()]
+    # us_voices_ids = ['com.apple.speech.synthesis.voice.Fred','com.apple.speech.synthesis.voice.Alex',
+    #                  'com.apple.speech.synthesis.voice.Victoria']
+    # voice_rates = list(range(150,221,(220-180)//4))
+    voice_rates = [150,180,210]
     voice_synths = []
     variants = ['normal','phoneme']
     for v in us_voices_ids:
@@ -91,8 +94,8 @@ def synth_generator():
                 voice_synths.append(SynthVariant(v,r,o))
     def synth_for_words(words):
         all_synths = []
-        for s in voice_synths:
-            for w in words:
+        for w in words:
+            for s in voice_synths:
                 all_synths.append(s.synth_file(w))
             # print(s)
         # return [s.synth_file(word) for s in voice_synths]