speech-scoring/speech_samplegen.py

import objc
from AppKit import NSSpeechSynthesizer, NSSpeechInputModeProperty
from AppKit import NSSpeechModePhoneme
from Foundation import NSURL
import json
import csv
import random
import os
import re
import subprocess
import time
from tqdm import tqdm

from generate_similar import similar_phoneme_phrase,similar_phrase
from speech_tools import hms_string,create_dir,format_filename,reservoir_sample

OUTPUT_NAME = 'test_5_words'
dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'
dest_file = './outputs/' + OUTPUT_NAME + '.csv'


def dest_filename(w, v, r, t):
    rand_no = str(random.randint(0, 10000))
    fname = '{}-{}-{}-{}-{}.aiff'.format(w, v, r, t, rand_no)
    sanitized = format_filename(fname)
    return sanitized


def dest_path(v, r, n):
    rel = v + '/' + str(r) + '/' + n
    return (dest_dir + rel), rel


def cli_gen_audio(speech_cmd, rate, voice, out_path):
    subprocess.call(
        ['say', '-v', voice, '-r',
         str(rate), '-o', out_path, "'"+speech_cmd+"'"])


class SynthFile(object):
    """docstring for SynthFile."""

    def __init__(self, word, phon, filename, voice, voice_lang, rate, operation):
        super(SynthFile, self).__init__()
        self.word = word
        self.phoneme = phon
        self.filename = filename
        self.voice = voice
        self.voice_lang = voice_lang
        self.rate = rate
        self.variant = operation

    def get_json(self):
        return {
            'filename': self.filename,
            'voice': self.voice,
            'rate': self.rate,
            'operation': self.operation
        }

    def get_csv(self):
        cols = [self.word, self.phoneme, self.voice,
                self.voice_lang, self.rate, self.variant,
                self.filename]

        return ','.join([str(c) for c in cols])+'\n'

    def get_values(self):
        cols = [self.word, self.phoneme, self.voice,
                self.voice_lang, self.rate, self.variant,
                self.filename]
        return [str(c) for c in cols]

class SynthVariant(object):
    """docstring for SynthVariant."""

    def __init__(self, identifier, voice, lang, rate):
        super(SynthVariant, self).__init__()
        self.synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier)
        self.synth.setVolume_(100)
        self.synth.setRate_(rate)
        self.phone_synth = NSSpeechSynthesizer.alloc().initWithVoice_(
            identifier)
        self.phone_synth.setVolume_(100)
        self.phone_synth.setRate_(rate)
        self.phone_synth.setObject_forProperty_error_(
            NSSpeechModePhoneme, NSSpeechInputModeProperty, None)
        self.identifier = identifier
        self.rate = rate
        self.name = voice
        self.lang = lang
        self.phoneme_capable = self.is_phoneme_capable()


    def __repr__(self):
        return 'Synthesizer[{} - {}]'.format(self.name, self.rate)

    def is_phoneme_capable(self):
        orig_phon = self.synth.phonemesFromText_('water')
        return orig_phon != ''

    def generate_audio(self, text, variant):
        orig_phon, phoneme, phon_cmd = self.synth.phonemesFromText_(
            text), '', text
        if variant == 'low':
            # self.synth.startSpeakingString_toURL_(word,d_url)
            phoneme = orig_phon
        elif variant == 'medium':
            phoneme = similar_phoneme_phrase(orig_phon)
            phon_cmd = '[[inpt PHON]] ' + phoneme
        elif variant == 'high':
            phoneme = similar_phrase(text)
            phon_cmd = phoneme
        # elif variant == 'long':
        # if phon != '':
        # self.phone_synth.startSpeakingString_toURL_(phon,d_url)
        # else:
        #     self.synth.startSpeakingString_toURL_(word,d_url)
        fname = dest_filename(text, self.name, self.rate, variant)
        d_path, r_path = dest_path(self.name, self.rate, fname)
        # d_url = NSURL.fileURLWithPath_(d_path)
        cli_gen_audio(phon_cmd, self.rate, self.name, d_path)
        return SynthFile(text, phoneme, r_path, self.name, self.lang, self.rate, variant)

    def create_synth_dirs(self):
        if self.phoneme_capable:
            create_dir(dest_dir + self.name + '/' + str(self.rate))

    @staticmethod
    def voices_for_lang(lang):
        voices_installed = NSSpeechSynthesizer.availableVoices()
        voice_attrs = [
            NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed
        ]
        # sk = [k for k in voice_attrs[0].keys() if k not in [
        #     'VoiceIndividuallySpokenCharacters', 'VoiceSupportedCharacters']]
        # s_attrs = [[v[i] for i in sk] for v in voice_attrs if 'VoiceShowInFullListOnly' in v
        #            and 'VoiceRelativeDesirability' in v]
        return [
            (v['VoiceIdentifier'],
             v['VoiceName'],
             v['VoiceLanguage']) for v in voice_attrs
            if v['VoiceLanguage'] == lang
            and v['VoiceGender'] != 'VoiceGenderNeuter'
        ]

    @classmethod
    def synth_with(cls,voice_params,rate=180):
        identifier,voice,lang = voice_params
        return cls(identifier,voice,lang,rate)


def synth_generator():
    us_voices_ids = SynthVariant.voices_for_lang('en-US')
    voice_rates = [150, 180, 210]#, 250]
    voice_synths = []
    create_dir(dest_dir)
    for vp in us_voices_ids:
        for r in voice_rates:
            s = SynthVariant.synth_with(vp,r)
            if s.phoneme_capable:
                print('Adding ', s)
                voice_synths.append(s)
            else:
                print('Discarding phoneme incapable ', s)

    def synth_for_words(words, writer):
        start_time = time.time()
        prog_title = "Synthesizing {} words : ".format(len(words))
        for s in voice_synths:
            s.create_synth_dirs()
            for v in ['low', 'medium', 'high']:
                prog = tqdm(words)
                prog.set_postfix(variant=v,voice=s.name,rate=s.rate)
                for w in tqdm(words):
                    prog.set_description('Synthesizing text:"{}"'.format(w))
                    synthed = s.generate_audio(w, v)
                    writer(synthed)
                prog.close()
        end_time = time.time()
        time_str = hms_string(end_time - start_time)
        print("It took {} to synthsize all variants.".format(time_str))
    return synth_for_words

def synth_logger(fname, csv_mode=False):
    f = open(fname, 'w')
    s_csv_w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
    def csv_writer(s):
        s_csv_w.writerow(s.get_values())
    synth_list = []

    def json_writer(s):
        synth_list.append(s)

    def close_file():
        if csv_mode:
            f.close()
        else:
            json.dump([s.get_json() for s in synth_list], f)
            f.close()
    if csv_mode:
        return csv_writer, close_file
    else:
        return json_writer, close_file

def generate_audio_for_text_list(text_list):
    (writer, closer) = synth_logger(dest_file, csv_mode=True)
    synth_for_texts = synth_generator()
    try:
        synth_for_texts(text_list, writer)
    except:
        import traceback
        import sys
        traceback.print_exc(file=sys.stdout)
        pass
    closer()

def generate_audio_for_stories():
    '''
    Generates the audio sample variants for the list of words in the stories
    '''
    # story_file = './inputs/all_stories_hs.json'
    story_file = './inputs/all_stories.json'
    stories_data = json.load(open(story_file))
    # text_list_dup = [t[0] for i in stories_data.values() for t in i]
    text_list_dup = [t for i in stories_data.values() for t in i]
    text_list = sorted(list(set(text_list_dup)))
    generate_audio_for_text_list(text_list)

def generate_test_audio_for_stories(sample_count=0):
    '''
    Picks a list of words from the wordlist that are not in story words
    and generates the variants
    '''
    story_file = './inputs/all_stories_hs.json'
    # story_file = './inputs/all_stories.json'
    stories_data = json.load(open(story_file))
    text_list_dup = [t[0] for i in stories_data.values() for t in i]
    text_list = sorted(list(set(text_list_dup)))
    # text_list = [i.replace('-','') for g in stories_data.values() for i in g]
    word_list = [i.strip('\n_') for i in open('./inputs/wordlist.txt','r').readlines()]
    text_set = set(text_list)
    new_word_list = [i for i in word_list if i not in text_set and len(i) > 4]
    # test_words = new_word_list[:int(len(text_list)/5+1)]
    test_words = reservoir_sample(new_word_list,sample_count) if sample_count > 0 else new_word_list
    generate_audio_for_text_list(test_words)


if __name__ == '__main__':
    generate_test_audio_for_stories(5)
    # generate_audio_for_text_list(['I want to go home','education'])
    # generate_audio_for_stories()
generated voice files using ios api 2017-10-04 12:21:24 +00:00			`import objc`
formatted 2017-10-25 08:06:41 +00:00			`from AppKit import NSSpeechSynthesizer, NSSpeechInputModeProperty`
			`from AppKit import NSSpeechModePhoneme`
			`from Foundation import NSURL`
generated voice files using ios api 2017-10-04 12:21:24 +00:00			`import json`
using csv writer instead as comma in phrases are mis-aligning columns 2017-11-07 06:26:09 +00:00			`import csv`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`import random`
generated voice files using ios api 2017-10-04 12:21:24 +00:00			`import os`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`import re`
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`import subprocess`
computing phoneme/word variant for each word in a phrase 2017-11-03 09:18:55 +00:00			`import time`
ported to tqdm 2017-11-14 17:26:13 +00:00			`from tqdm import tqdm`
generated voice files using ios api 2017-10-04 12:21:24 +00:00
computing phoneme/word variant for each word in a phrase 2017-11-03 09:18:55 +00:00			`from generate_similar import similar_phoneme_phrase,similar_phrase`
generating test for phone seg model 2017-12-28 14:31:44 +00:00			`from speech_tools import hms_string,create_dir,format_filename,reservoir_sample`
wip high variant phoneme 2017-10-26 12:36:14 +00:00
generating test for phone seg model 2017-12-28 14:31:44 +00:00			`OUTPUT_NAME = 'test_5_words'`
formatted 2017-10-25 08:06:41 +00:00			`dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'`
			`dest_file = './outputs/' + OUTPUT_NAME + '.csv'`


refactored sample generation code 2017-10-26 09:57:22 +00:00			`def dest_filename(w, v, r, t):`
implemented evaluation of test data with model by overfitting on smaller dataset 2017-11-14 12:24:44 +00:00			`rand_no = str(random.randint(0, 10000))`
			`fname = '{}-{}-{}-{}-{}.aiff'.format(w, v, r, t, rand_no)`
			`sanitized = format_filename(fname)`
			`return sanitized`
formatted 2017-10-25 08:06:41 +00:00

			`def dest_path(v, r, n):`
writing to csv proactively 2017-10-26 10:28:25 +00:00			`rel = v + '/' + str(r) + '/' + n`
fixed progress 2017-10-26 10:48:17 +00:00			`return (dest_dir + rel), rel`
formatted 2017-10-25 08:06:41 +00:00

			`def cli_gen_audio(speech_cmd, rate, voice, out_path):`
			`subprocess.call(`
			`['say', '-v', voice, '-r',`
ported to tqdm 2017-11-14 17:26:13 +00:00			`str(rate), '-o', out_path, "'"+speech_cmd+"'"])`
formatted 2017-10-25 08:06:41 +00:00
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`class SynthFile(object):`
			`"""docstring for SynthFile."""`
formatted 2017-10-25 08:06:41 +00:00
wip high variant phoneme 2017-10-26 12:36:14 +00:00			`def __init__(self, word, phon, filename, voice, voice_lang, rate, operation):`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`super(SynthFile, self).__init__()`
			`self.word = word`
1. implemented spectrogram generator for audio files 2. imported siamese network class (wip) 3. added similarity measure based phoneme neighbor generator 4. fixed samplegen variants code 5. create triplets (wip) 6. updates 2017-10-13 11:10:57 +00:00			`self.phoneme = phon`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`self.filename = filename`
			`self.voice = voice`
wip high variant phoneme 2017-10-26 12:36:14 +00:00			`self.voice_lang = voice_lang`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`self.rate = rate`
1. implemented spectrogram generator for audio files 2. imported siamese network class (wip) 3. added similarity measure based phoneme neighbor generator 4. fixed samplegen variants code 5. create triplets (wip) 6. updates 2017-10-13 11:10:57 +00:00			`self.variant = operation`
generated voice files using ios api 2017-10-04 12:21:24 +00:00
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`def get_json(self):`
formatted 2017-10-25 08:06:41 +00:00			`return {`
			`'filename': self.filename,`
			`'voice': self.voice,`
			`'rate': self.rate,`
			`'operation': self.operation`
			`}`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00
			`def get_csv(self):`
wip high variant phoneme 2017-10-26 12:36:14 +00:00			`cols = [self.word, self.phoneme, self.voice,`
			`self.voice_lang, self.rate, self.variant,`
			`self.filename]`

			`return ','.join([str(c) for c in cols])+'\n'`
formatted 2017-10-25 08:06:41 +00:00
using csv writer instead as comma in phrases are mis-aligning columns 2017-11-07 06:26:09 +00:00			`def get_values(self):`
			`cols = [self.word, self.phoneme, self.voice,`
			`self.voice_lang, self.rate, self.variant,`
			`self.filename]`
			`return [str(c) for c in cols]`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00
			`class SynthVariant(object):`
			`"""docstring for SynthVariant."""`
formatted 2017-10-25 08:06:41 +00:00
refactored sample generation code 2017-10-26 09:57:22 +00:00			`def __init__(self, identifier, voice, lang, rate):`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`super(SynthVariant, self).__init__()`
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`self.synth = NSSpeechSynthesizer.alloc().initWithVoice_(identifier)`
			`self.synth.setVolume_(100)`
			`self.synth.setRate_(rate)`
formatted 2017-10-25 08:06:41 +00:00			`self.phone_synth = NSSpeechSynthesizer.alloc().initWithVoice_(`
			`identifier)`
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`self.phone_synth.setVolume_(100)`
			`self.phone_synth.setRate_(rate)`
formatted 2017-10-25 08:06:41 +00:00			`self.phone_synth.setObject_forProperty_error_(`
			`NSSpeechModePhoneme, NSSpeechInputModeProperty, None)`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`self.identifier = identifier`
			`self.rate = rate`
refactored sample generation code 2017-10-26 09:57:22 +00:00			`self.name = voice`
			`self.lang = lang`
discarding phoneme incapable synthesizers 2017-10-26 11:21:32 +00:00			`self.phoneme_capable = self.is_phoneme_capable()`
fixed out of range exception 2017-10-31 04:59:24 +00:00
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`def __repr__(self):`
refactored sample generation code 2017-10-26 09:57:22 +00:00			`return 'Synthesizer[{} - {}]'.format(self.name, self.rate)`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00
discarding phoneme incapable synthesizers 2017-10-26 11:21:32 +00:00			`def is_phoneme_capable(self):`
			`orig_phon = self.synth.phonemesFromText_('water')`
			`return orig_phon != ''`

computing phoneme/word variant for each word in a phrase 2017-11-03 09:18:55 +00:00			`def generate_audio(self, text, variant):`
formatted 2017-10-25 08:06:41 +00:00			`orig_phon, phoneme, phon_cmd = self.synth.phonemesFromText_(`
computing phoneme/word variant for each word in a phrase 2017-11-03 09:18:55 +00:00			`text), '', text`
1. implemented spectrogram generator for audio files 2. imported siamese network class (wip) 3. added similarity measure based phoneme neighbor generator 4. fixed samplegen variants code 5. create triplets (wip) 6. updates 2017-10-13 11:10:57 +00:00			`if variant == 'low':`
			`# self.synth.startSpeakingString_toURL_(word,d_url)`
			`phoneme = orig_phon`
			`elif variant == 'medium':`
computing phoneme/word variant for each word in a phrase 2017-11-03 09:18:55 +00:00			`phoneme = similar_phoneme_phrase(orig_phon)`
formatted 2017-10-25 08:06:41 +00:00			`phon_cmd = '[[inpt PHON]] ' + phoneme`
1. implemented spectrogram generator for audio files 2. imported siamese network class (wip) 3. added similarity measure based phoneme neighbor generator 4. fixed samplegen variants code 5. create triplets (wip) 6. updates 2017-10-13 11:10:57 +00:00			`elif variant == 'high':`
computing phoneme/word variant for each word in a phrase 2017-11-03 09:18:55 +00:00			`phoneme = similar_phrase(text)`
implemented tts gen variants 2017-10-27 13:23:22 +00:00			`phon_cmd = phoneme`
1. implemented spectrogram generator for audio files 2. imported siamese network class (wip) 3. added similarity measure based phoneme neighbor generator 4. fixed samplegen variants code 5. create triplets (wip) 6. updates 2017-10-13 11:10:57 +00:00			`# elif variant == 'long':`
formatted 2017-10-25 08:06:41 +00:00			`# if phon != '':`
			`# self.phone_synth.startSpeakingString_toURL_(phon,d_url)`
			`# else:`
			`# self.synth.startSpeakingString_toURL_(word,d_url)`
computing phoneme/word variant for each word in a phrase 2017-11-03 09:18:55 +00:00			`fname = dest_filename(text, self.name, self.rate, variant)`
writing to csv proactively 2017-10-26 10:28:25 +00:00			`d_path, r_path = dest_path(self.name, self.rate, fname)`
formatted 2017-10-25 08:06:41 +00:00			`# d_url = NSURL.fileURLWithPath_(d_path)`
			`cli_gen_audio(phon_cmd, self.rate, self.name, d_path)`
computing phoneme/word variant for each word in a phrase 2017-11-03 09:18:55 +00:00			`return SynthFile(text, phoneme, r_path, self.name, self.lang, self.rate, variant)`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00
fixed out of range exception 2017-10-31 04:59:24 +00:00			`def create_synth_dirs(self):`
			`if self.phoneme_capable:`
			`create_dir(dest_dir + self.name + '/' + str(self.rate))`

implemented tts gen variants 2017-10-27 13:23:22 +00:00			`@staticmethod`
			`def voices_for_lang(lang):`
			`voices_installed = NSSpeechSynthesizer.availableVoices()`
			`voice_attrs = [`
			`NSSpeechSynthesizer.attributesForVoice_(v) for v in voices_installed`
			`]`
			`# sk = [k for k in voice_attrs[0].keys() if k not in [`
			`# 'VoiceIndividuallySpokenCharacters', 'VoiceSupportedCharacters']]`
			`# s_attrs = [[v[i] for i in sk] for v in voice_attrs if 'VoiceShowInFullListOnly' in v`
			`# and 'VoiceRelativeDesirability' in v]`
			`return [`
			`(v['VoiceIdentifier'],`
			`v['VoiceName'],`
			`v['VoiceLanguage']) for v in voice_attrs`
			`if v['VoiceLanguage'] == lang`
			`and v['VoiceGender'] != 'VoiceGenderNeuter'`
			`]`

			`@classmethod`
			`def synth_with(cls,voice_params,rate=180):`
			`identifier,voice,lang = voice_params`
			`return cls(identifier,voice,lang,rate)`

implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00
			`def synth_generator():`
implemented tts gen variants 2017-10-27 13:23:22 +00:00			`us_voices_ids = SynthVariant.voices_for_lang('en-US')`
ported to tqdm 2017-11-14 17:26:13 +00:00			`voice_rates = [150, 180, 210]#, 250]`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`voice_synths = []`
1. implemented spectrogram generator for audio files 2. imported siamese network class (wip) 3. added similarity measure based phoneme neighbor generator 4. fixed samplegen variants code 5. create triplets (wip) 6. updates 2017-10-13 11:10:57 +00:00			`create_dir(dest_dir)`
implemented tts gen variants 2017-10-27 13:23:22 +00:00			`for vp in us_voices_ids:`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00			`for r in voice_rates:`
implemented tts gen variants 2017-10-27 13:23:22 +00:00			`s = SynthVariant.synth_with(vp,r)`
discarding phoneme incapable synthesizers 2017-10-26 11:21:32 +00:00			`if s.phoneme_capable:`
			`print('Adding ', s)`
			`voice_synths.append(s)`
			`else:`
			`print('Discarding phoneme incapable ', s)`
formatted 2017-10-25 08:06:41 +00:00
writing to csv proactively 2017-10-26 10:28:25 +00:00			`def synth_for_words(words, writer):`
computing phoneme/word variant for each word in a phrase 2017-11-03 09:18:55 +00:00			`start_time = time.time()`
fixed progress 2017-10-26 10:48:17 +00:00			`prog_title = "Synthesizing {} words : ".format(len(words))`
generating all words for a every voice first 2017-10-27 13:34:09 +00:00			`for s in voice_synths:`
fixed out of range exception 2017-10-31 04:59:24 +00:00			`s.create_synth_dirs()`
generating all words for a every voice first 2017-10-27 13:34:09 +00:00			`for v in ['low', 'medium', 'high']:`
ported to tqdm 2017-11-14 17:26:13 +00:00			`prog = tqdm(words)`
			`prog.set_postfix(variant=v,voice=s.name,rate=s.rate)`
			`for w in tqdm(words):`
fixed dupliate words 2017-11-15 12:57:49 +00:00			`prog.set_description('Synthesizing text:"{}"'.format(w))`
writing to csv proactively 2017-10-26 10:28:25 +00:00			`synthed = s.generate_audio(w, v)`
			`writer(synthed)`
fixed dupliate words 2017-11-15 12:57:49 +00:00			`prog.close()`
computing phoneme/word variant for each word in a phrase 2017-11-03 09:18:55 +00:00			`end_time = time.time()`
			`time_str = hms_string(end_time - start_time)`
			`print("It took {} to synthsize all variants.".format(time_str))`
1. using cli say instead of api since api generates empty responses sometimes 2. generating all words voices for each variants 2017-10-05 05:32:38 +00:00			`return synth_for_words`
implemented phoneme/voice/rate variant genration 2017-10-04 17:51:28 +00:00
ported to tqdm 2017-11-14 17:26:13 +00:00			`def synth_logger(fname, csv_mode=False):`
writing to csv proactively 2017-10-26 10:28:25 +00:00			`f = open(fname, 'w')`
using csv writer instead as comma in phrases are mis-aligning columns 2017-11-07 06:26:09 +00:00			`s_csv_w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)`
writing to csv proactively 2017-10-26 10:28:25 +00:00			`def csv_writer(s):`
using csv writer instead as comma in phrases are mis-aligning columns 2017-11-07 06:26:09 +00:00			`s_csv_w.writerow(s.get_values())`
writing to csv proactively 2017-10-26 10:28:25 +00:00			`synth_list = []`

			`def json_writer(s):`
			`synth_list.append(s)`

			`def close_file():`
ported to tqdm 2017-11-14 17:26:13 +00:00			`if csv_mode:`
writing to csv proactively 2017-10-26 10:28:25 +00:00			`f.close()`
			`else:`
			`json.dump([s.get_json() for s in synth_list], f)`
			`f.close()`
ported to tqdm 2017-11-14 17:26:13 +00:00			`if csv_mode:`
writing to csv proactively 2017-10-26 10:28:25 +00:00			`return csv_writer, close_file`
			`else:`
			`return json_writer, close_file`

generating randome samples 2017-11-02 07:44:08 +00:00			`def generate_audio_for_text_list(text_list):`
ported to tqdm 2017-11-14 17:26:13 +00:00			`(writer, closer) = synth_logger(dest_file, csv_mode=True)`
computing phoneme/word variant for each word in a phrase 2017-11-03 09:18:55 +00:00			`synth_for_texts = synth_generator()`
generating randome samples 2017-11-02 07:44:08 +00:00			`try:`
computing phoneme/word variant for each word in a phrase 2017-11-03 09:18:55 +00:00			`synth_for_texts(text_list, writer)`
generating randome samples 2017-11-02 07:44:08 +00:00			`except:`
			`import traceback`
			`import sys`
			`traceback.print_exc(file=sys.stdout)`
			`pass`
			`closer()`
writing to csv proactively 2017-10-26 10:28:25 +00:00
generated voice files using ios api 2017-10-04 12:21:24 +00:00			`def generate_audio_for_stories():`
Added README.md describing the workflow 2017-12-29 07:44:37 +00:00			`'''`
			`Generates the audio sample variants for the list of words in the stories`
			`'''`
generating randome samples 2017-11-02 07:44:08 +00:00			`# story_file = './inputs/all_stories_hs.json'`
			`story_file = './inputs/all_stories.json'`
refactored sample generation code 2017-10-26 09:57:22 +00:00			`stories_data = json.load(open(story_file))`
fixed dupliate words 2017-11-15 12:57:49 +00:00			`# text_list_dup = [t[0] for i in stories_data.values() for t in i]`
			`text_list_dup = [t for i in stories_data.values() for t in i]`
			`text_list = sorted(list(set(text_list_dup)))`
all phrases 2017-11-15 13:00:43 +00:00			`generate_audio_for_text_list(text_list)`
generated voice files using ios api 2017-10-04 12:21:24 +00:00
generating test for phone seg model 2017-12-28 14:31:44 +00:00			`def generate_test_audio_for_stories(sample_count=0):`
Added README.md describing the workflow 2017-12-29 07:44:37 +00:00			`'''`
			`Picks a list of words from the wordlist that are not in story words`
			`and generates the variants`
			`'''`
implemeted test data sample generation 2017-11-07 04:53:31 +00:00			`story_file = './inputs/all_stories_hs.json'`
			`# story_file = './inputs/all_stories.json'`
			`stories_data = json.load(open(story_file))`
fixed dupliate words 2017-11-15 12:57:49 +00:00			`text_list_dup = [t[0] for i in stories_data.values() for t in i]`
			`text_list = sorted(list(set(text_list_dup)))`
implemeted test data sample generation 2017-11-07 04:53:31 +00:00			`# text_list = [i.replace('-','') for g in stories_data.values() for i in g]`
			`word_list = [i.strip('\n_') for i in open('./inputs/wordlist.txt','r').readlines()]`
			`text_set = set(text_list)`
			`new_word_list = [i for i in word_list if i not in text_set and len(i) > 4]`
generating test for phone seg model 2017-12-28 14:31:44 +00:00			`# test_words = new_word_list[:int(len(text_list)/5+1)]`
			`test_words = reservoir_sample(new_word_list,sample_count) if sample_count > 0 else new_word_list`
implemeted test data sample generation 2017-11-07 04:53:31 +00:00			`generate_audio_for_text_list(test_words)`


discarding phoneme incapable synthesizers 2017-10-26 11:21:32 +00:00			`if __name__ == '__main__':`
generating test for phone seg model 2017-12-28 14:31:44 +00:00			`generate_test_audio_for_stories(5)`
removing - from phrases before synthesizing audio 2017-11-03 10:00:13 +00:00			`# generate_audio_for_text_list(['I want to go home','education'])`
generating test for phone seg model 2017-12-28 14:31:44 +00:00			`# generate_audio_for_stories()`