Compare commits

...

8 Commits

Author SHA1 Message Date
Malar Kannan f44665e9b2 1. fixed softmax output and overfit the model for small sample
2. updated to run on complete data
2017-12-12 12:18:27 +05:30
Malar Kannan cc4fbe45b9 trying to overfit 2 samples with model -> doesn't seem to converge 2017-12-11 15:03:14 +05:30
Malar Kannan 8d550c58cc fixed batch normalization layer before activation 2017-12-11 14:33:56 +05:30
Malar Kannan 240ecb3f27 removed bn output layer 2017-12-11 14:12:23 +05:30
Malar Kannan 05242d5991 added batch normalization 2017-12-11 14:09:04 +05:30
Malar Kannan fea9184aec using the full data and fixed typo in model layer name 2017-12-11 13:47:30 +05:30
Malar Kannan a6543491f8 fixed empty phoneme boundary case 2017-12-11 13:05:46 +05:30
Malar Kannan d387922f7d added dense-relu/softmax layers to segment output 2017-12-11 12:30:08 +05:30
2 changed files with 30 additions and 21 deletions

View File

@ -156,8 +156,8 @@ def create_segments_tfrecords(collection_name='story_test_segments',sample_count
f_bounds = [spec_frame(phrase_spec,b) for b in ph_bounds] f_bounds = [spec_frame(phrase_spec,b) for b in ph_bounds]
valid_bounds = [i for i in f_bounds if 0 < i < spec_n] valid_bounds = [i for i in f_bounds if 0 < i < spec_n]
b_frames = np.asarray(valid_bounds) b_frames = np.asarray(valid_bounds)
# print(spec_n,b_frames) if len(b_frames) > 0:
result[b_frames] = 1 result[b_frames] = 1
nonlocal n_records,n_spec,n_features nonlocal n_records,n_spec,n_features
n_spec = max([n_spec,spec_n]) n_spec = max([n_spec,spec_n])
n_features = spec_w n_features = spec_w
@ -178,9 +178,10 @@ def create_segments_tfrecords(collection_name='story_test_segments',sample_count
word_groups = [i for i in audio_samples.groupby('phrase')] word_groups = [i for i in audio_samples.groupby('phrase')]
wg_sampled = reservoir_sample(word_groups,sample_count) if sample_count > 0 else word_groups wg_sampled = reservoir_sample(word_groups,sample_count) if sample_count > 0 else word_groups
# write_samples(word_groups,'all')
tr_audio_samples,te_audio_samples = train_test_split(wg_sampled,test_size=train_test_ratio) tr_audio_samples,te_audio_samples = train_test_split(wg_sampled,test_size=train_test_ratio)
write_samples(tr_audio_samples,'train') write_samples(tr_audio_samples,'train')
# write_samples(te_audio_samples,'test') write_samples(te_audio_samples,'test')
const_file = './outputs/segments/'+collection_name+'/constants.pkl' const_file = './outputs/segments/'+collection_name+'/constants.pkl'
pickle.dump((n_spec,n_features,n_records),open(const_file,'wb')) pickle.dump((n_spec,n_features,n_records),open(const_file,'wb'))
@ -255,9 +256,9 @@ if __name__ == '__main__':
# plot_random_phrases() # plot_random_phrases()
# fix_csv('story_test_segments') # fix_csv('story_test_segments')
# plot_segments('story_test_segments') # plot_segments('story_test_segments')
# fix_csv('story_phrases') # fix_csv('story_words')
# pass # pass
create_segments_tfrecords('story_phrases', sample_count=100) create_segments_tfrecords('story_words.30', sample_count=36,train_test_ratio=0.1)
# record_generator,input_data,output_data,copy_read_consts = read_segments_tfrecords_generator('story_test') # record_generator,input_data,output_data,copy_read_consts = read_segments_tfrecords_generator('story_test')
# tr_gen = record_generator() # tr_gen = record_generator()
# for i in tr_gen: # for i in tr_gen:

View File

@ -4,10 +4,10 @@ import numpy as np
from keras.models import Model,load_model,model_from_yaml from keras.models import Model,load_model,model_from_yaml
from keras.layers import Input,Concatenate,Lambda, Reshape, Dropout from keras.layers import Input,Concatenate,Lambda, Reshape, Dropout
from keras.layers import Dense,Conv2D, LSTM, Bidirectional, GRU from keras.layers import Dense,Conv2D, LSTM, Bidirectional, GRU
from keras.layers import BatchNormalization from keras.layers import BatchNormalization,Activation
from keras.losses import categorical_crossentropy from keras.losses import categorical_crossentropy
from keras.utils import to_categorical from keras.utils import to_categorical
from keras.optimizers import RMSprop from keras.optimizers import RMSprop,Adadelta,Adagrad,Adam,Nadam
from keras.callbacks import TensorBoard, ModelCheckpoint from keras.callbacks import TensorBoard, ModelCheckpoint
from keras import backend as K from keras import backend as K
from keras.utils import plot_model from keras.utils import plot_model
@ -36,30 +36,38 @@ def ctc_lambda_func(args):
return K.ctc_batch_cost(labels, y_pred, input_length, label_length) return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def segment_model(input_dim): def segment_model(input_dim):
# input_dim = (100,100,1)
inp = Input(shape=input_dim) inp = Input(shape=input_dim)
cnv1 = Conv2D(filters=32, kernel_size=(5,9))(inp) cnv1 = Conv2D(filters=32, kernel_size=(5,9))(inp)
cnv2 = Conv2D(filters=1, kernel_size=(5,9))(cnv1) cnv2 = Conv2D(filters=1, kernel_size=(5,9))(cnv1)
dr_cnv2 = Dropout(rate=0.95)(cnv2) dr_cnv2 = Dropout(rate=0.95)(cnv2)
# dr_cnv2
cn_rnn_dim = (dr_cnv2.shape[1].value,dr_cnv2.shape[2].value) cn_rnn_dim = (dr_cnv2.shape[1].value,dr_cnv2.shape[2].value)
r_dr_cnv2 = Reshape(target_shape=cn_rnn_dim)(dr_cnv2) r_dr_cnv2 = Reshape(target_shape=cn_rnn_dim)(dr_cnv2)
b_gr1 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(r_dr_cnv2) b_gr1 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(r_dr_cnv2)
# b_gr1
b_gr2 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(b_gr1) b_gr2 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(b_gr1)
b_gr3 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(b_gr2) b_gr3 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(b_gr2)
# b_gr3
oup = Dense(2, activation='softmax')(b_gr3) oup = Dense(2, activation='softmax')(b_gr3)
# oup
return Model(inp, oup) return Model(inp, oup)
def simple_segment_model(input_dim): def simple_segment_model(input_dim):
inp = Input(shape=input_dim) inp = Input(shape=input_dim)
b_gr1 = Bidirectional(GRU(256, return_sequences=True),merge_mode='sum')(inp) b_gr1 = Bidirectional(LSTM(32, return_sequences=True))(inp)
# b_gr1 b_gr1 = Bidirectional(LSTM(16, return_sequences=True),merge_mode='sum')(b_gr1)
b_gr2 = Bidirectional(GRU(64, return_sequences=True),merge_mode='sum')(b_gr1) b_gr1 = LSTM(1, return_sequences=True,activation='softmax')(b_gr1)
b_gr3 = Bidirectional(GRU(1, return_sequences=True),merge_mode='sum')(b_gr2) # b_gr1 = LSTM(4, return_sequences=True)(b_gr1)
oup = Reshape(target_shape=(input_dim[0],))(b_gr3) # b_gr1 = LSTM(2, return_sequences=True)(b_gr1)
# bn_b_gr1 = BatchNormalization(momentum=0.98)(b_gr1)
# b_gr2 = GRU(64, return_sequences=True)(b_gr1)
# bn_b_gr2 = BatchNormalization(momentum=0.98)(b_gr2)
# d1 = Dense(32)(b_gr2)
# bn_d1 = BatchNormalization(momentum=0.98)(d1)
# bn_da1 = Activation('relu')(bn_d1)
# d2 = Dense(8)(bn_da1)
# bn_d2 = BatchNormalization(momentum=0.98)(d2)
# bn_da2 = Activation('relu')(bn_d2)
# d3 = Dense(1)(b_gr1)
# # bn_d3 = BatchNormalization(momentum=0.98)(d3)
# bn_da3 = Activation('softmax')(d3)
oup = Reshape(target_shape=(input_dim[0],))(b_gr1)
return Model(inp, oup) return Model(inp, oup)
def write_model_arch(mod,mod_file): def write_model_arch(mod,mod_file):
@ -75,7 +83,7 @@ def load_model_arch(mod_file):
def train_segment(collection_name = 'test',resume_weights='',initial_epoch=0): def train_segment(collection_name = 'test',resume_weights='',initial_epoch=0):
# collection_name = 'story_test' # collection_name = 'story_test'
batch_size = 64 batch_size = 128
# batch_size = 4 # batch_size = 4
model_dir = './models/segment/'+collection_name model_dir = './models/segment/'+collection_name
create_dir(model_dir) create_dir(model_dir)
@ -111,8 +119,8 @@ def train_segment(collection_name = 'test',resume_weights='',initial_epoch=0):
mode='auto', mode='auto',
period=1) period=1)
# train # train
rms = RMSprop() opt = RMSprop()
model.compile(loss=categorical_crossentropy, optimizer=rms, metrics=[accuracy]) model.compile(loss=categorical_crossentropy, optimizer=opt, metrics=[accuracy])
write_model_arch(model,model_dir+'/speech_segment_model_arch.yaml') write_model_arch(model,model_dir+'/speech_segment_model_arch.yaml')
epoch_n_steps = step_count(n_records,batch_size) epoch_n_steps = step_count(n_records,batch_size)
if resume_weights != '': if resume_weights != '':
@ -133,4 +141,4 @@ def train_segment(collection_name = 'test',resume_weights='',initial_epoch=0):
if __name__ == '__main__': if __name__ == '__main__':
# pass # pass
train_segment('story_phrases','./models/segment/story_phrases.1000/speech_segment_model-final.h5',1001) train_segment('story_words')#,'./models/segment/story_phrases.1000/speech_segment_model-final.h5',1001)