Merge branch 'master' of ssh://invmac/~/Public/repos/speech-scoring

implemented tts segmentation data generation
removed a layer using lstm
2017-11-23 17:50:47 +05:30 · 2017-11-23 17:50:11 +05:30 · 2017-11-22 15:46:42 +05:30 · 2017-11-22 15:04:02 +05:30
5 changed files with 219 additions and 4 deletions
--- a/requirements-linux.txt
+++ b/requirements-linux.txt
@@ -41,6 +41,7 @@ partd==0.3.8
 pexpect==4.2.1
 pickleshare==0.7.4
 pkg-resources==0.0.0
 praat-parselmouth==0.2.0
 progressbar2==3.34.3
 prompt-toolkit==1.0.15
 protobuf==3.4.0
--- a/speech_data.py
+++ b/speech_data.py
@@ -254,7 +254,7 @@ if __name__ == '__main__':
    # create_spectrogram_tfrecords('story_all',sample_count=25)
    # fix_csv('story_words_test')
    #fix_csv('story_phrases')
-    create_spectrogram_tfrecords('story_phrases',sample_count=100,train_test_ratio=0.1)
+    create_spectrogram_tfrecords('story_phrases',sample_count=500,train_test_ratio=0.1)
    # create_spectrogram_tfrecords('audio',sample_count=50)
    # read_siamese_tfrecords_generator('audio')
    # padd_zeros_siamese_tfrecords('audio')
--- a/speech_model.py
+++ b/speech_model.py
@@ -18,9 +18,9 @@ def create_base_rnn_network(input_dim):
    inp = Input(shape=input_dim)
    # ls0 = LSTM(512, return_sequences=True)(inp)
    ls1 = Bidirectional(LSTM(128, return_sequences=True))(inp)
-    ls2 = LSTM(128, return_sequences=True)(ls1)
+    #ls2 = LSTM(128, return_sequences=True)(ls1)
    # ls3 = LSTM(32, return_sequences=True)(ls2)
-    ls4 = LSTM(64)(ls2)
+    ls4 = LSTM(64)(ls1)
    # d1 = Dense(128, activation='relu')(ls4)
    #d2 = Dense(64, activation='relu')(ls2)
    return Model(inp, ls4)
@@ -75,7 +75,7 @@ def train_siamese(audio_group = 'audio'):
    log_dir = './logs/'+audio_group
    create_dir(log_dir)
    tr_gen_fn,te_pairs,te_y,copy_read_consts = read_siamese_tfrecords_generator(audio_group,batch_size=batch_size,test_size=batch_size)
-    n_step,n_features,n_records = copy_read_consts()
+    n_step,n_features,n_records = copy_read_consts(model_dir)
    tr_gen = tr_gen_fn()
    input_dim = (n_step, n_features)
--- a/speech_similar.py
+++ b/speech_similar.py
@@ -0,0 +1,141 @@
 import pandas as pd
 import pronouncing
 import re
 import numpy as np
 import random
 # mapping = {
 #     s.split()[0]: s.split()[1]
 #     for s in """
 # AA AA
 # AE AE
 # AH UX
 # AO AO
 # AW AW
 # AY AY
 # B  b
 # CH C
 # D  d
 # DH D
 # EH EH
 # ER UXr
 # EY EY
 # F  f
 # G  g
 # HH h
 # IH IH
 # IY IY
 # JH J
 # K  k
 # L  l
 # M  m
 # N  n
 # NG N
 # OW OW
 # OY OY
 # P  p
 # R  r
 # S  s
 # SH S
 # T  t
 # TH T
 # UH UH
 # UW UW
 # V  v
 # W  w
 # Y  y
 # Z  z
 # ZH Z
 # """.strip().split('\n')
 # }
 # sim_mat = pd.read_csv('./similarity.csv', header=0, index_col=0)
 #
 #
 # def convert_ph(ph):
 #     stress_level = re.search("(\w+)([0-9])", ph)
 #     if stress_level:
 #         return stress_level.group(2) + mapping[stress_level.group(1)]
 #     else:
 #         return mapping[ph]
 #
 #
 # def sim_mat_to_apple_table(smt):
 #     colnames = [convert_ph(ph) for ph in smt.index.tolist()]
 #     smt = pd.DataFrame(np.nan_to_num(smt.values))
 #     fsmt = (smt.T + smt)
 #     np.fill_diagonal(fsmt.values, 100.0)
 #     asmt = pd.DataFrame.copy(fsmt)
 #     asmt.columns = colnames
 #     asmt.index = colnames
 #     apple_sim_table = asmt.stack().reset_index()
 #     apple_sim_table.columns = ['q', 'r', 's']
 #     return apple_sim_table
 #
 #
 # apple_sim_table = sim_mat_to_apple_table(sim_mat)
 #
 #
 # def top_match(ph):
 #     selected = apple_sim_table[(apple_sim_table.q == ph)
 #                                & (apple_sim_table.s < 100) &
 #                                (apple_sim_table.s >= 70)]
 #     tm = ph
 #     if len(selected) > 0:
 #         tm = pd.DataFrame.sort_values(selected, 's', ascending=False).iloc[0].r
 #     return tm
 apple_phonemes = [
    '%', '@', 'AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW',
    'UH', 'UX', 'OW', 'AW', 'OY', 'b', 'C', 'd', 'D', 'f', 'g', 'h', 'J', 'k',
    'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z'
 ]
 class ApplePhoneme(object):
    """docstring for ApplePhoneme."""
    def __init__(self, phone, stress, vowel=False):
        super(ApplePhoneme, self).__init__()
        self.phone = phone
        self.stress = stress
        self.vowel = vowel
    def __str__(self):
        return (str(self.stress) if (self.vowel and self.stress>0) else '') + self.phone
    def __repr__(self):
        return "'{}'".format(str(self))
    def adjust_stress(self):
        self.stress = random.choice([i for i in range(3) if i != self.stress])
 def parse_apple_phonemes(ph_str):
    for i in range(len(ph_str)):
        pref, rest = ph_str[:i + 1], ph_str[i + 1:]
        if pref in apple_phonemes:
            vowel = pref[0] in 'AEIOU'
            return [ApplePhoneme(pref, 0, vowel)] + parse_apple_phonemes(rest)
        elif pref[0].isdigit() and pref[1:] in apple_phonemes:
            return [ApplePhoneme(pref[1:], int(pref[0]) , True)] + parse_apple_phonemes(rest)
        elif not pref.isalnum():
            return [ApplePhoneme(pref, 0, False)] + parse_apple_phonemes(rest)
    return []
 def similar_phoneme_word(ph_str):
    phons = parse_apple_phonemes(ph_str)
    vowels = [i for i in phons if i.vowel]
    random.choice(vowels).adjust_stress()
    return ''.join([str(i) for i in phons])
 def similar_phoneme_phrase(ph_str):
    return ' '.join([similar_phoneme_word(w) for w in ph_str.split()])
 def similar_word(word_str):
    similar = pronouncing.rhymes(word_str)
    return random.choice(similar) if len(similar) > 0 else word_str
 def similar_phrase(ph_str):
    return ' '.join([similar_word(w) for w in ph_str.split()])
--- a/speech_tts_queue.py
+++ b/speech_tts_queue.py
@@ -0,0 +1,73 @@
 import objc
 from AppKit import *
 from Foundation import NSURL
 from PyObjCTools import AppHelper
 from time import time
 apple_phonemes = [
    '%', '@', 'AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW',
    'UH', 'UX', 'OW', 'AW', 'OY', 'b', 'C', 'd', 'D', 'f', 'g', 'h', 'J', 'k',
    'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z'
 ]
 len(apple_phonemes)
 speech_phoneme_data = []
 class SpeechDelegate (NSObject):
    def speechSynthesizer_willSpeakWord_ofString_(self, sender, word, text):
        '''Called automatically when the application has launched'''
        print("Speaking word {} in sentence {}".format(word,text))
    def speechSynthesizer_willSpeakPhoneme_(self,sender,phoneme):
        phon_ch = apple_phonemes[phoneme]
        # print('first',speech_phoneme_data)
        # prev_time = speech_phoneme_data[-1][1]
        # print('prev_time',prev_time)
        speech_phoneme_data.append((phon_ch,time()))
        print("phoneme boundary for {} time {}".format(phon_ch,time()))
        # NSApp().terminate_(self)
    def speechSynthesizer_didFinishSpeaking_(self,synth,didFinishSpeaking):
        speech_phoneme_data.append(('%',time()))
        print("finished speaking time {}".format(time()))
        diff_time = []
        for i in range(len(speech_phoneme_data)-1):
            dur = speech_phoneme_data[i+1][1] - speech_phoneme_data[i][1]
            diff_time.append((speech_phoneme_data[i][0],dur))
        print(diff_time)
 # del SpeechDelegate
 class Delegate (NSObject):
    def applicationDidFinishLaunching_(self, aNotification):
        '''Called automatically when the application has launched'''
        print("Window, World!")
    def windowWillClose_(self, aNotification):
        '''Called automatically when the window is closed'''
        print("Window has been closed")
        # Terminate the application
        NSApp().terminate_(self)
 def main():
    speech_delg = SpeechDelegate.alloc().init()
    speech_delg.speechSynthesizer_didFinishSpeaking_('t',True)
    voices = NSSpeechSynthesizer.availableVoices()
    identifier = voices[2]
    time()
    alex_voice = NSSpeechSynthesizer.alloc().initWithVoice_(identifier)
    alex_voice.setDelegate_(speech_delg)
    alex_voice.startSpeakingString_("This is a test for speech synthesis generation")
    # Create a new application instance ...
    a=NSApplication.sharedApplication()
    # ... and create its delgate.  Note the use of the
    # Objective C constructors below, because Delegate
    # is a subcalss of an Objective C class, NSObject
    delegate = Delegate.alloc().init()
    # Tell the application which delegate object to use.
    a.setDelegate_(delegate)
    AppHelper.runEventLoop()
 if __name__ == '__main__':
    main()
Author	SHA1	Message	Date
Malar Kannan	ae46578aec	Merge branch 'master' of ssh://invmac/~/Public/repos/speech-scoring	2017-11-23 17:50:47 +05:30
Malar Kannan	3d7542271d	implemented tts segmentation data generation	2017-11-23 17:50:11 +05:30
Malar Kannan	54f38ca775	removed a layer using lstm	2017-11-22 15:46:42 +05:30
Malar Kannan	6355db4af7	adding missing model-dir for training constants copying	2017-11-22 15:04:02 +05:30