1. included arpabet apple phoneme mapper

2. using only voices with phoneme capability and 3 rates only
2017-10-05 13:58:00 +05:30
parent 0337f0d5be
commit 67f27ac683
2 changed files with 64 additions and 6 deletions
--- a/arpabet-to-apple.py
+++ b/arpabet-to-apple.py
@@ -0,0 +1,55 @@
 #!/usr/bin/env python3
 """
 Convert ARPABET <http://www.speech.cs.cmu.edu/cgi-bin/cmudict>
 to Apple's codes <https://developer.apple.com/library/content/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html>
 """
 import sys
 mapping = {s.split()[0]: s.split()[1] for s in """
 AA AA
 AE AE
 AH UX
 AO AO
 AW AW
 AY AY
 B  b
 CH C
 D  d
 DH D
 EH EH
 ER UXr
 EY EY
 F  f
 G  g
 HH h
 IH IH
 IY IY
 JH J
 K  k
 L  l
 M  m
 N  n
 NG N
 OW OW
 OY OY
 P  p
 R  r
 S  s
 SH S
 T  t
 TH T
 UH UH
 UW UW
 V  v
 W  w
 Y  y
 Z  z
 ZH Z
 """.strip().split('\n')}
 arpabet_phonemes = sys.stdin.read().split()
 apple_phonemes = [mapping[p.upper()] for p in arpabet_phonemes]
 print('[[inpt PHON]] ' + ''.join(apple_phonemes))
--- a/tts-wav-gen.py
+++ b/tts-wav-gen.py
@@ -8,7 +8,7 @@ import re
 import subprocess
-dest_filename = lambda p: p+str(random.randint(0,10000))+'.aiff'
+dest_filename = lambda n,v,r,t: '{}-{}-{}-{}-'.format(n,v,r,t)+str(random.randint(0,10000))+'.aiff'
 dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p
 dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p))
@@ -53,7 +53,7 @@ class SynthVariant(object):
        return 'Synthesizer[{} - {}]({})'.format(self.name,self.rate,self.operation)
    def generate_audio(self,word):
-        fname = dest_filename(word)
+        fname = dest_filename(word,self.name,self.rate,self.operation)
        d_path = dest_path(fname)
        d_url = dest_url(fname)
        started = False
@@ -81,8 +81,11 @@ class SynthVariant(object):
 def synth_generator():
    voices_installed = NSSpeechSynthesizer.availableVoices()
    voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in  voices_installed]
-    us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US']
+    us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US' and v['VoiceIdentifier'].split('.')[-1][0].isupper()]
-    voice_rates = list(range(180,221,(220-180)//4))
+    # us_voices_ids = ['com.apple.speech.synthesis.voice.Fred','com.apple.speech.synthesis.voice.Alex',
    #                  'com.apple.speech.synthesis.voice.Victoria']
    # voice_rates = list(range(150,221,(220-180)//4))
    voice_rates = [150,180,210]
    voice_synths = []
    variants = ['normal','phoneme']
    for v in us_voices_ids:
@@ -91,8 +94,8 @@ def synth_generator():
                voice_synths.append(SynthVariant(v,r,o))
    def synth_for_words(words):
        all_synths = []
-        for s in voice_synths:
+        for w in words:
-            for w in words:
+            for s in voice_synths:
                all_synths.append(s.synth_file(w))
            # print(s)
        # return [s.synth_file(word) for s in voice_synths]