From 9d700f18ca50482b6ff4c336f8d7d5f676742bb8 Mon Sep 17 00:00:00 2001
From: Malar Kannan <malar@avaz.in>
Date: Wed, 4 Oct 2017 23:21:28 +0530
Subject: [PATCH] implemented phoneme/voice/rate variant genration

---
 {input => inputs}/all_stories_hs.json |   0
 tts-wav-gen.py                        | 100 +++++++++++++++++++++++---
 2 files changed, 89 insertions(+), 11 deletions(-)
 rename {input => inputs}/all_stories_hs.json (100%)

diff --git a/input/all_stories_hs.json b/inputs/all_stories_hs.json
similarity index 100%
rename from input/all_stories_hs.json
rename to inputs/all_stories_hs.json
diff --git a/tts-wav-gen.py b/tts-wav-gen.py
index 1cf0baa..91b1f23 100644
--- a/tts-wav-gen.py
+++ b/tts-wav-gen.py
@@ -1,23 +1,101 @@
 import objc
-from AppKit import NSSpeechSynthesizer
-from Foundation import NSURL
+from AppKit import NSSpeechSynthesizer,NSSpeechInputModeProperty,NSSpeechModePhoneme
+from Foundation import NSURL,NSError
 import json
+import random
 import os
+import re
 
 
+dest_filename = lambda p: p+str(random.randint(0,10000))+'.aiff'
+dest_path = lambda p: os.path.abspath('.')+'/outputs/audio/'+p
+dest_url = lambda p: NSURL.fileURLWithPath_(dest_path(p))
 
-sp = NSSpeechSynthesizer.alloc().init()
-sp.setVolume_(100)
+class SynthFile(object):
+    """docstring for SynthFile."""
+    def __init__(self,word, filename,voice,rate,operation):
+        super(SynthFile, self).__init__()
+        self.word = word
+        self.filename = filename
+        self.voice = voice
+        self.rate = rate
+        self.operation = operation
 
-dest_path = os.path.abspath('.')+'/outputs/'
-dest_url = lambda p: NSURL.fileURLWithPath_(dest_path+p+'.aiff')
-def generate_aiff_word(word):
-    sp.startSpeakingString_toURL_(word,dest_url(word))
+    def get_json(self):
+        return {'filename':self.filename,'voice':self.voice,
+                'rate':self.rate,'operation':self.operation}
+
+    def get_csv(self):
+        return '{},{},{},{},{}\n'.format(self.word,self.voice,self.rate,self.operation,self.filename)
+
+class SynthVariant(object):
+    """docstring for SynthVariant."""
+    def __init__(self,identifier,rate,op):
+        super(SynthVariant, self).__init__()
+        sp = NSSpeechSynthesizer.alloc().init()
+        sp.setVolume_(100)
+        sp.setVoice_(identifier)
+        sp.setRate_(rate)
+        self.synth = sp
+        p_syn = NSSpeechSynthesizer.alloc().init()
+        p_syn.setVolume_(100)
+        p_syn.setVoice_(identifier)
+        p_syn.setRate_(rate)
+        p_syn.setObject_forProperty_error_(NSSpeechModePhoneme,NSSpeechInputModeProperty,None)
+        self.phone_synth = p_syn
+        self.identifier = identifier
+        self.rate = rate
+        self.name = identifier.split('.')[-1]
+        self.operation = op
+
+
+    def synth_file(self,word):
+        fname = dest_filename(word)
+        d_url = dest_url(fname)
+        if self.operation == 'normal':
+            self.synth.startSpeakingString_toURL_(word,d_url)
+        else:
+            orig_phon = self.synth.phonemesFromText_(word)
+            phon = re.sub('[0-9]','',orig_phon)
+            self.phone_synth.startSpeakingString_toURL_(phon,d_url)
+        return SynthFile(word,fname,self.name,self.rate,self.operation)
+
+
+def synth_generator():
+    voices_installed = NSSpeechSynthesizer.availableVoices()
+    voice_attrs = [NSSpeechSynthesizer.attributesForVoice_(v) for v in  voices_installed]
+    us_voices_ids = [v['VoiceIdentifier'] for v in voice_attrs if v['VoiceLanguage'] == 'en-US']
+    voice_rates = list(range(180,221,(220-180)//5))
+    voice_synths = []
+    variants = ['normal','phoneme']
+    for v in us_voices_ids:
+        for r in voice_rates:
+            for o in variants:
+                voice_synths.append(SynthVariant(v,r,o))
+    def synth_for_word(word):
+        return [s.synth_file(word) for s in voice_synths]
+    return synth_for_word
+
+def write_synths(synth_list,fname,csv=False):
+    f = open(fname,'w')
+    if csv:
+        for s in synth_list:
+            f.write(s.get_csv())
+    else:
+        json.dump([s.get_json() for s in synth_list],f)
+    f.close()
 
 def generate_audio_for_stories():
-    stories_data = json.load(open('./input/all_stories_hs.json'))
+    stories_data = json.load(open('./inputs/all_stories_hs.json'))
     word_list = [t[0] for i in stories_data.values() for t in i]
+    word_audio_synth = synth_generator()
+    all_synths = []
     for word in word_list:
-        generate_aiff_word(word)
+        words_synths = word_audio_synth(word)
+        all_synths.extend(words_synths)
+    return all_synths
 
-generate_audio_for_stories()
+# synths = synth_generator()('education')
+synths = generate_audio_for_stories()
+write_synths(synths,'./outputs/synth_data.csv',True)
+write_synths(synths,'./outputs/synths.json')