implemented the model, todo implement ctc and training queueing logic

master
Malar Kannan 2017-11-28 19:10:19 +05:30
parent 1928fce4e8
commit 0b1152b5c3
4 changed files with 24 additions and 26 deletions

View File

@ -41,7 +41,6 @@ parso==0.1.0
partd==0.3.8 partd==0.3.8
pexpect==4.2.1 pexpect==4.2.1
pickleshare==0.7.4 pickleshare==0.7.4
pkg-resources==0.0.0
praat-parselmouth==0.2.0 praat-parselmouth==0.2.0
progressbar2==3.34.3 progressbar2==3.34.3
prompt-toolkit==1.0.15 prompt-toolkit==1.0.15

View File

@ -7,5 +7,6 @@ def fix_csv(collection_name = 'test'):
def segment_data_gen(collection_name = 'test'): def segment_data_gen(collection_name = 'test'):
# collection_name = 'test' collection_name = 'test'
seg_data = pd.read_csv('./outputs/'+collection_name+'.fixed.csv',index_col=0) seg_data = pd.read_csv('./outputs/'+collection_name+'.fixed.csv',index_col=0)

View File

@ -2,8 +2,9 @@ from __future__ import absolute_import
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
from keras.models import Model,load_model,model_from_yaml from keras.models import Model,load_model,model_from_yaml
from keras.layers import Input,Concatenate,Lambda, BatchNormalization, Dropout from keras.layers import Input,Concatenate,Lambda, Reshape, Dropout
from keras.layers import Dense, LSTM, Bidirectional, GRU from keras.layers import Dense,Conv2D, LSTM, Bidirectional, GRU
from keras.layers import BatchNormalization
from keras.losses import categorical_crossentropy from keras.losses import categorical_crossentropy
from keras.utils import to_categorical from keras.utils import to_categorical
from keras.optimizers import RMSprop from keras.optimizers import RMSprop
@ -11,34 +12,28 @@ from keras.callbacks import TensorBoard, ModelCheckpoint
from keras import backend as K from keras import backend as K
from keras.utils import plot_model from keras.utils import plot_model
from speech_tools import create_dir,step_count from speech_tools import create_dir,step_count
from speech_data import segment_data_gen from segment_data import segment_data_gen
# TODO implement ctc losses
# https://github.com/fchollet/keras/blob/master/examples/image_ocr.py
def accuracy(y_true, y_pred): def accuracy(y_true, y_pred):
'''Compute classification accuracy with a fixed threshold on distances. '''Compute classification accuracy with a fixed threshold on distances.
''' '''
return K.mean(K.equal(y_true, K.cast(y_pred > 0.5, y_true.dtype))) return K.mean(K.equal(y_true, K.cast(y_pred > 0.5, y_true.dtype)))
def dense_classifier(processed):
conc_proc = Concatenate()(processed)
d1 = Dense(64, activation='relu')(conc_proc)
# dr1 = Dropout(0.1)(d1)
# d2 = Dense(128, activation='relu')(d1)
d3 = Dense(8, activation='relu')(d1)
# dr2 = Dropout(0.1)(d2)
return Dense(2, activation='softmax')(d3)
def segment_model(input_dim): def segment_model(input_dim):
inp = Input(shape=input_dim) inp = Input(shape=input_dim)
# ls0 = LSTM(512, return_sequences=True)(inp) # ls0 = LSTM(512, return_sequences=True)(inp)
ls1 = LSTM(128, return_sequences=True)(inp) cnv1 = Conv2D(filters=512, kernel_size=(5,9))(inp)
ls2 = LSTM(64, return_sequences=True)(ls1) cnv2 = Conv2D(filters=1, kernel_size=(5,9))(cnv1)
# ls3 = LSTM(32, return_sequences=True)(ls2) dr_cnv2 = Dropout(rate=0.95)(cnv2)
ls4 = LSTM(32)(ls2) cn_rnn_dim = (dr_cnv2.shape[1].value,dr_cnv2.shape[2].value)
d1 = Dense(64, activation='relu')(ls4) r_dr_cnv2 = Reshape(target_shape=cn_rnn_dim)(dr_cnv2)
d3 = Dense(8, activation='relu')(d1) b_gr1 = Bidirectional(GRU(512, return_sequences=True))(r_dr_cnv2)
oup = Dense(2, activation='softmax')(d3) b_gr2 = Bidirectional(GRU(512, return_sequences=True))(b_gr1)
return Model(inp, oup) b_gr3 = Bidirectional(GRU(512))(b_gr2)
return Model(inp, b_gr3)
def write_model_arch(mod,mod_file): def write_model_arch(mod,mod_file):
model_f = open(mod_file,'w') model_f = open(mod_file,'w')
@ -99,10 +94,11 @@ def train_segment(collection_name = 'test'):
model.save(model_dir+'/speech_segment_model-final.h5') model.save(model_dir+'/speech_segment_model-final.h5')
y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]]) y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])
te_acc = compute_accuracy(te_y, y_pred) # te_acc = compute_accuracy(te_y, y_pred)
print('* Accuracy on test set: %0.2f%%' % (100 * te_acc)) # print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))
if __name__ == '__main__': if __name__ == '__main__':
import pdb; pdb.set_trace()
train_segment('test') train_segment('test')

View File

@ -20,9 +20,11 @@ def create_base_rnn_network(input_dim):
inp = Input(shape=input_dim) inp = Input(shape=input_dim)
# ls0 = LSTM(512, return_sequences=True)(inp) # ls0 = LSTM(512, return_sequences=True)(inp)
ls1 = LSTM(128, return_sequences=True)(inp) ls1 = LSTM(128, return_sequences=True)(inp)
ls2 = LSTM(64, return_sequences=True)(ls1) bn_ls1 = BatchNormalization(momentum=0.98)(ls1)
ls2 = LSTM(64, return_sequences=True)(bn_ls1)
bn_ls2 = BatchNormalization(momentum=0.98)(ls2)
# ls3 = LSTM(32, return_sequences=True)(ls2) # ls3 = LSTM(32, return_sequences=True)(ls2)
ls4 = LSTM(32)(ls2) ls4 = LSTM(32)(bn_ls2)
# d1 = Dense(128, activation='relu')(ls4) # d1 = Dense(128, activation='relu')(ls4)
#d2 = Dense(64, activation='relu')(ls2) #d2 = Dense(64, activation='relu')(ls2)
return Model(inp, ls4) return Model(inp, ls4)