updated model to use dense classifier
parent
80c0ce403e
commit
6fbf06814c
|
|
@ -107,8 +107,8 @@ def create_speech_pairs_data(audio_group='audio'):
|
||||||
def speech_model_data():
|
def speech_model_data():
|
||||||
tr_pairs = np.load('outputs/tr_pairs.npy') / 255.0
|
tr_pairs = np.load('outputs/tr_pairs.npy') / 255.0
|
||||||
te_pairs = np.load('outputs/te_pairs.npy') / 255.0
|
te_pairs = np.load('outputs/te_pairs.npy') / 255.0
|
||||||
# tr_pairs[tr_pairs < 0] = 0
|
tr_pairs[tr_pairs < 0] = 0
|
||||||
# te_pairs[te_pairs < 0] = 0
|
te_pairs[te_pairs < 0] = 0
|
||||||
tr_y = np.load('outputs/tr_y.npy')
|
tr_y = np.load('outputs/tr_y.npy')
|
||||||
te_y = np.load('outputs/te_y.npy')
|
te_y = np.load('outputs/te_y.npy')
|
||||||
return tr_pairs, te_pairs, tr_y, te_y
|
return tr_pairs, te_pairs, tr_y, te_y
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,10 @@ from __future__ import print_function
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from speech_data import speech_model_data
|
from speech_data import speech_model_data
|
||||||
from keras.models import Model,load_model
|
from keras.models import Model,load_model
|
||||||
from keras.layers import Input, Dense, Dropout, LSTM, Lambda
|
from keras.layers import Input, Dense, Dropout, LSTM, Lambda, Concatenate
|
||||||
|
# from keras.losses import categorical_crossentropy
|
||||||
|
from keras.losses import binary_crossentropy
|
||||||
|
# from keras.utils.np_utils import to_categorical
|
||||||
from keras.optimizers import RMSprop
|
from keras.optimizers import RMSprop
|
||||||
from keras.callbacks import TensorBoard, ModelCheckpoint
|
from keras.callbacks import TensorBoard, ModelCheckpoint
|
||||||
from keras import backend as K
|
from keras import backend as K
|
||||||
|
|
@ -34,20 +37,9 @@ def create_base_rnn_network(input_dim):
|
||||||
inp = Input(shape=input_dim)
|
inp = Input(shape=input_dim)
|
||||||
ls1 = LSTM(1024, return_sequences=True)(inp)
|
ls1 = LSTM(1024, return_sequences=True)(inp)
|
||||||
ls2 = LSTM(512, return_sequences=True)(ls1)
|
ls2 = LSTM(512, return_sequences=True)(ls1)
|
||||||
ls3 = LSTM(32)(ls2)
|
# ls3 = LSTM(32, return_sequences=True)(ls2)
|
||||||
return Model(inp, ls3)
|
ls4 = LSTM(32)(ls2)
|
||||||
|
return Model(inp, ls4)
|
||||||
|
|
||||||
def create_base_network(input_dim):
|
|
||||||
'''Base network to be shared (eq. to feature extraction).
|
|
||||||
'''
|
|
||||||
input = Input(shape=input_dim)
|
|
||||||
x = Dense(128, activation='relu')(input)
|
|
||||||
x = Dropout(0.1)(x)
|
|
||||||
x = Dense(128, activation='relu')(x)
|
|
||||||
x = Dropout(0.1)(x)
|
|
||||||
x = Dense(128, activation='relu')(x)
|
|
||||||
return Model(input, x)
|
|
||||||
|
|
||||||
|
|
||||||
def compute_accuracy(y_true, y_pred):
|
def compute_accuracy(y_true, y_pred):
|
||||||
|
|
@ -62,6 +54,13 @@ def accuracy(y_true, y_pred):
|
||||||
'''
|
'''
|
||||||
return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))
|
return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))
|
||||||
|
|
||||||
|
def dense_classifier(processed):
|
||||||
|
conc_proc = Concatenate()(processed)
|
||||||
|
d1 = Dense(8, activation='relu')(conc_proc)
|
||||||
|
dr1 = Dropout(0.1)(d1)
|
||||||
|
# d2 = Dense(8, activation='relu')(dr1)
|
||||||
|
# dr2 = Dropout(0.1)(d2)
|
||||||
|
return Dense(1, activation='sigmoid')(dr1)
|
||||||
|
|
||||||
def siamese_model(input_dim):
|
def siamese_model(input_dim):
|
||||||
# input_dim = (15, 1654)
|
# input_dim = (15, 1654)
|
||||||
|
|
@ -70,11 +69,12 @@ def siamese_model(input_dim):
|
||||||
input_b = Input(shape=input_dim)
|
input_b = Input(shape=input_dim)
|
||||||
processed_a = base_network(input_a)
|
processed_a = base_network(input_a)
|
||||||
processed_b = base_network(input_b)
|
processed_b = base_network(input_b)
|
||||||
distance = Lambda(
|
final_output = dense_classifier([processed_a,processed_b])
|
||||||
euclidean_distance,
|
model = Model([input_a, input_b], final_output)
|
||||||
output_shape=eucl_dist_output_shape)([processed_a, processed_b])
|
# distance = Lambda(
|
||||||
|
# euclidean_distance,
|
||||||
model = Model([input_a, input_b], distance)
|
# output_shape=eucl_dist_output_shape)([processed_a, processed_b])
|
||||||
|
# model = Model([input_a, input_b], distance)
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -95,12 +95,12 @@ def train_siamese():
|
||||||
embeddings_freq=0,
|
embeddings_freq=0,
|
||||||
embeddings_layer_names=None,
|
embeddings_layer_names=None,
|
||||||
embeddings_metadata=None)
|
embeddings_metadata=None)
|
||||||
cp_file_fmt = './models/siamese_speech_model-{epoch:02d}-epoch-{val_acc:0.2f}\
|
cp_file_fmt = './models/siamese_speech_model-{epoch:02d}-epoch-{val_loss:0.2f}\
|
||||||
-acc.h5'
|
-acc.h5'
|
||||||
|
|
||||||
cp_cb = ModelCheckpoint(
|
cp_cb = ModelCheckpoint(
|
||||||
cp_file_fmt,
|
cp_file_fmt,
|
||||||
monitor='val_acc',
|
monitor='val_loss',
|
||||||
verbose=0,
|
verbose=0,
|
||||||
save_best_only=False,
|
save_best_only=False,
|
||||||
save_weights_only=False,
|
save_weights_only=False,
|
||||||
|
|
@ -108,7 +108,7 @@ def train_siamese():
|
||||||
period=1)
|
period=1)
|
||||||
# train
|
# train
|
||||||
rms = RMSprop(lr=0.001)
|
rms = RMSprop(lr=0.001)
|
||||||
model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy])
|
model.compile(loss=binary_crossentropy, optimizer=rms, metrics=[accuracy])
|
||||||
model.fit(
|
model.fit(
|
||||||
[tr_pairs[:, 0], tr_pairs[:, 1]],
|
[tr_pairs[:, 0], tr_pairs[:, 1]],
|
||||||
tr_y,
|
tr_y,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue