parent
77821093cb
commit
e6f0c8b21b
360
Siamese.ipynb
360
Siamese.ipynb
|
|
@ -1,360 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"'''Train a Siamese MLP on pairs of digits from the MNIST dataset.\n",
|
||||
"\n",
|
||||
"It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the\n",
|
||||
"output of the shared network and by optimizing the contrastive loss (see paper\n",
|
||||
"for mode details).\n",
|
||||
"\n",
|
||||
"[1] \"Dimensionality Reduction by Learning an Invariant Mapping\"\n",
|
||||
" http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf\n",
|
||||
"\n",
|
||||
"Gets to 97.2% test accuracy after 20 epochs.\n",
|
||||
"2 seconds per epoch on a Titan X Maxwell GPU\n",
|
||||
"'''\n",
|
||||
"from __future__ import absolute_import\n",
|
||||
"from __future__ import print_function\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"# import random\n",
|
||||
"# from keras.datasets import mnist\n",
|
||||
"from speech_data import speech_model_data\n",
|
||||
"from keras.models import Model\n",
|
||||
"from keras.layers import Input, Dense, Dropout, SimpleRNN, LSTM, Lambda\n",
|
||||
"# Dense, Dropout, Input, Lambda, LSTM, SimpleRNN\n",
|
||||
"from keras.optimizers import RMSprop, SGD\n",
|
||||
"from keras.callbacks import TensorBoard\n",
|
||||
"from keras import backend as K\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def euclidean_distance(vects):\n",
|
||||
" x, y = vects\n",
|
||||
" return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True),\n",
|
||||
" K.epsilon()))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def eucl_dist_output_shape(shapes):\n",
|
||||
" shape1, shape2 = shapes\n",
|
||||
" return (shape1[0], 1)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def contrastive_loss(y_true, y_pred):\n",
|
||||
" '''Contrastive loss from Hadsell-et-al.'06\n",
|
||||
" http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf\n",
|
||||
" '''\n",
|
||||
" margin = 1\n",
|
||||
" # print(y_true, y_pred)\n",
|
||||
" return K.mean(y_true * K.square(y_pred) +\n",
|
||||
" (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def create_base_rnn_network(input_dim):\n",
|
||||
" '''Base network to be shared (eq. to feature extraction).\n",
|
||||
" '''\n",
|
||||
" inp = Input(shape=input_dim)\n",
|
||||
" # d1 = Dense(1024, activation='sigmoid')(inp)\n",
|
||||
" # # d2 = Dense(2, activation='sigmoid')(d1)\n",
|
||||
" ls1 = LSTM(1024, return_sequences=True)(inp)\n",
|
||||
" ls2 = LSTM(512, return_sequences=True)(ls1)\n",
|
||||
" ls3 = LSTM(32)(ls2) # , return_sequences=True\n",
|
||||
" # sr2 = SimpleRNN(128, return_sequences=True)(sr1)\n",
|
||||
" # sr3 = SimpleRNN(32)(sr2)\n",
|
||||
" # x = Dense(128, activation='relu')(sr1)\n",
|
||||
" return Model(inp, ls3)\n",
|
||||
"\n",
|
||||
"def create_base_network(input_dim):\n",
|
||||
" '''Base network to be shared (eq. to feature extraction).\n",
|
||||
" '''\n",
|
||||
" input = Input(shape=input_dim)\n",
|
||||
" x = Dense(128, activation='relu')(input)\n",
|
||||
" x = Dropout(0.1)(x)\n",
|
||||
" x = Dense(128, activation='relu')(x)\n",
|
||||
" x = Dropout(0.1)(x)\n",
|
||||
" x = Dense(128, activation='relu')(x)\n",
|
||||
" return Model(input, x)\n",
|
||||
"\n",
|
||||
"def compute_accuracy(y_true, y_pred):\n",
|
||||
" '''Compute classification accuracy with a fixed threshold on distances.\n",
|
||||
" '''\n",
|
||||
" pred = y_pred.ravel() < 0.5\n",
|
||||
" return np.mean(pred == y_true)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def accuracy(y_true, y_pred):\n",
|
||||
" '''Compute classification accuracy with a fixed threshold on distances.\n",
|
||||
" '''\n",
|
||||
" return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# the data, shuffled and split between train and test sets\n",
|
||||
"tr_pairs, te_pairs, tr_y, te_y = speech_model_data()\n",
|
||||
"\n",
|
||||
"%matplotlib inline\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"def plot_spec(ims):\n",
|
||||
" timebins, freqbins = np.shape(ims)\n",
|
||||
" # import pdb;pdb.set_trace()\n",
|
||||
"# plt.figure(figsize=(15, 7.5))\n",
|
||||
" plt.imshow(np.transpose(ims), origin=\"lower\", aspect=\"auto\", cmap=\"jet\", interpolation=\"none\")\n",
|
||||
" plt.colorbar()\n",
|
||||
" xlocs = np.float32(np.linspace(0, timebins-1, 5))\n",
|
||||
" plt.xticks(xlocs, [\"%.02f\" % l for l in ((xlocs*15/timebins)+(0.5*2**10))/22100])\n",
|
||||
" ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 10)))\n",
|
||||
"# plt.yticks(ylocs, [\"%.02f\" % freq[i] for i in ylocs])\n",
|
||||
" \n",
|
||||
"def show_nth(n):\n",
|
||||
" plt.figure(figsize=(15,7.5))\n",
|
||||
" plt.subplot(1,2,1)\n",
|
||||
" plot_spec(te_pairs[n][0].reshape(15,1654))\n",
|
||||
" print(te_y[n])\n",
|
||||
" plt.subplot(1,2,2)\n",
|
||||
" plot_spec(te_pairs[n][1].reshape(15,1654))\n",
|
||||
"show_nth(0)\n",
|
||||
"\n",
|
||||
"# y_train.shape,y_test.shape\n",
|
||||
"# x_train.shape,x_test.shape\n",
|
||||
"# x_train = x_train.reshape(60000, 784)\n",
|
||||
"# x_test = x_test.reshape(10000, 784)\n",
|
||||
"# x_train = x_train.astype('float32')\n",
|
||||
"# x_test = x_test.astype('float32')\n",
|
||||
"# x_train /= 255\n",
|
||||
"# x_test /= 255\n",
|
||||
"\n",
|
||||
"# input_dim = (tr_pairs.shape[2], tr_pairs.shape[3])\n",
|
||||
"# epochs = 20\n",
|
||||
"\n",
|
||||
"# # network definition\n",
|
||||
"# base_network = create_base_rnn_network(input_dim)\n",
|
||||
"# input_a = Input(shape=input_dim)\n",
|
||||
"# input_b = Input(shape=input_dim)\n",
|
||||
"\n",
|
||||
"# # because we re-use the same instance `base_network`,\n",
|
||||
"# # the weights of the network\n",
|
||||
"# # will be shared across the two branches\n",
|
||||
"# processed_a = base_network(input_a)\n",
|
||||
"# processed_b = base_network(input_b)\n",
|
||||
"\n",
|
||||
"# distance = Lambda(euclidean_distance,\n",
|
||||
"# output_shape=eucl_dist_output_shape)(\n",
|
||||
"# [processed_a, processed_b]\n",
|
||||
"# )\n",
|
||||
"\n",
|
||||
"# model = Model([input_a, input_b], distance)\n",
|
||||
"\n",
|
||||
"# tb_cb = TensorBoard(log_dir='./siamese_logs', histogram_freq=1, batch_size=32,\n",
|
||||
"# write_graph=True, write_grads=True, write_images=True,\n",
|
||||
"# embeddings_freq=0, embeddings_layer_names=None,\n",
|
||||
"# embeddings_metadata=None)\n",
|
||||
"# # train\n",
|
||||
"# rms = RMSprop(lr=0.00001) # lr=0.001)\n",
|
||||
"# sgd = SGD(lr=0.001)\n",
|
||||
"# model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy])\n",
|
||||
"# model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,\n",
|
||||
"# batch_size=128,\n",
|
||||
"# epochs=epochs,\n",
|
||||
"# validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),\n",
|
||||
"# callbacks=[tb_cb])\n",
|
||||
"\n",
|
||||
"# # compute final accuracy on training and test sets\n",
|
||||
"# y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])\n",
|
||||
"# tr_acc = compute_accuracy(tr_y, y_pred)\n",
|
||||
"# y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])\n",
|
||||
"# te_acc = compute_accuracy(te_y, y_pred)\n",
|
||||
"\n",
|
||||
"# print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))\n",
|
||||
"# print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using TensorFlow backend.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Train on 36252 samples, validate on 4028 samples\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"'''Train a Siamese MLP on pairs of digits from the MNIST dataset.\n",
|
||||
"\n",
|
||||
"It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the\n",
|
||||
"output of the shared network and by optimizing the contrastive loss (see paper\n",
|
||||
"for mode details).\n",
|
||||
"\n",
|
||||
"[1] \"Dimensionality Reduction by Learning an Invariant Mapping\"\n",
|
||||
" http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf\n",
|
||||
"\n",
|
||||
"Gets to 97.2% test accuracy after 20 epochs.\n",
|
||||
"2 seconds per epoch on a Titan X Maxwell GPU\n",
|
||||
"'''\n",
|
||||
"from __future__ import absolute_import\n",
|
||||
"from __future__ import print_function\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"# import random\n",
|
||||
"# from keras.datasets import mnist\n",
|
||||
"from speech_data import speech_model_data\n",
|
||||
"from keras.models import Model\n",
|
||||
"from keras.layers import Input, Dense, Dropout, SimpleRNN, LSTM, Lambda\n",
|
||||
"# Dense, Dropout, Input, Lambda, LSTM, SimpleRNN\n",
|
||||
"from keras.optimizers import RMSprop, SGD\n",
|
||||
"from keras.callbacks import TensorBoard\n",
|
||||
"from keras import backend as K\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def euclidean_distance(vects):\n",
|
||||
" x, y = vects\n",
|
||||
" return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True),\n",
|
||||
" K.epsilon()))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def eucl_dist_output_shape(shapes):\n",
|
||||
" shape1, shape2 = shapes\n",
|
||||
" return (shape1[0], 1)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def contrastive_loss(y_true, y_pred):\n",
|
||||
" '''Contrastive loss from Hadsell-et-al.'06\n",
|
||||
" http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf\n",
|
||||
" '''\n",
|
||||
" margin = 1\n",
|
||||
" # print(y_true, y_pred)\n",
|
||||
" return K.mean(y_true * K.square(y_pred) +\n",
|
||||
" (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def create_base_rnn_network(input_dim):\n",
|
||||
" '''Base network to be shared (eq. to feature extraction).\n",
|
||||
" '''\n",
|
||||
" inp = Input(shape=input_dim)\n",
|
||||
" # d1 = Dense(1024, activation='sigmoid')(inp)\n",
|
||||
" # # d2 = Dense(2, activation='sigmoid')(d1)\n",
|
||||
" ls1 = LSTM(1024, return_sequences=True)(inp)\n",
|
||||
" ls2 = LSTM(512, return_sequences=True)(ls1)\n",
|
||||
" ls3 = LSTM(32)(ls2) # , return_sequences=True\n",
|
||||
" # sr2 = SimpleRNN(128, return_sequences=True)(sr1)\n",
|
||||
" # sr3 = SimpleRNN(32)(sr2)\n",
|
||||
" # x = Dense(128, activation='relu')(sr1)\n",
|
||||
" return Model(inp, ls3)\n",
|
||||
"\n",
|
||||
"def create_base_network(input_dim):\n",
|
||||
" '''Base network to be shared (eq. to feature extraction).\n",
|
||||
" '''\n",
|
||||
" input = Input(shape=input_dim)\n",
|
||||
" x = Dense(128, activation='relu')(input)\n",
|
||||
" x = Dropout(0.1)(x)\n",
|
||||
" x = Dense(128, activation='relu')(x)\n",
|
||||
" x = Dropout(0.1)(x)\n",
|
||||
" x = Dense(128, activation='relu')(x)\n",
|
||||
" return Model(input, x)\n",
|
||||
"\n",
|
||||
"def compute_accuracy(y_true, y_pred):\n",
|
||||
" '''Compute classification accuracy with a fixed threshold on distances.\n",
|
||||
" '''\n",
|
||||
" pred = y_pred.ravel() < 0.5\n",
|
||||
" return np.mean(pred == y_true)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def accuracy(y_true, y_pred):\n",
|
||||
" '''Compute classification accuracy with a fixed threshold on distances.\n",
|
||||
" '''\n",
|
||||
" return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# the data, shuffled and split between train and test sets\n",
|
||||
"tr_pairs, te_pairs, tr_y, te_y = speech_model_data()\n",
|
||||
"# y_train.shape,y_test.shape\n",
|
||||
"# x_train.shape,x_test.shape\n",
|
||||
"# x_train = x_train.reshape(60000, 784)\n",
|
||||
"# x_test = x_test.reshape(10000, 784)\n",
|
||||
"# x_train = x_train.astype('float32')\n",
|
||||
"# x_test = x_test.astype('float32')\n",
|
||||
"# x_train /= 255\n",
|
||||
"# x_test /= 255\n",
|
||||
"input_dim = (tr_pairs.shape[2], tr_pairs.shape[3])\n",
|
||||
"epochs = 20\n",
|
||||
"\n",
|
||||
"# network definition\n",
|
||||
"base_network = create_base_rnn_network(input_dim)\n",
|
||||
"input_a = Input(shape=input_dim)\n",
|
||||
"input_b = Input(shape=input_dim)\n",
|
||||
"\n",
|
||||
"# because we re-use the same instance `base_network`,\n",
|
||||
"# the weights of the network\n",
|
||||
"# will be shared across the two branches\n",
|
||||
"processed_a = base_network(input_a)\n",
|
||||
"processed_b = base_network(input_b)\n",
|
||||
"\n",
|
||||
"distance = Lambda(euclidean_distance,\n",
|
||||
" output_shape=eucl_dist_output_shape)(\n",
|
||||
" [processed_a, processed_b]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"model = Model([input_a, input_b], distance)\n",
|
||||
"\n",
|
||||
"tb_cb = TensorBoard(log_dir='./logs/siamese_logs', histogram_freq=1, batch_size=32,\n",
|
||||
" write_graph=True, write_grads=True, write_images=True,\n",
|
||||
" 3\n",
|
||||
" embeddings_freq=0, embeddings_layer_names=None,\n",
|
||||
" embeddings_metadata=None)\n",
|
||||
"# train\n",
|
||||
"rms = RMSprop(lr=0.001) # lr=0.001)\n",
|
||||
"sgd = SGD(lr=0.001)\n",
|
||||
"model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy])\n",
|
||||
"model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,\n",
|
||||
" batch_size=128,\n",
|
||||
" epochs=epochs,\n",
|
||||
" validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),\n",
|
||||
" callbacks=[tb_cb])\n",
|
||||
"\n",
|
||||
"model.save('./models/siamese_speech_model.h5')\n",
|
||||
"# compute final accuracy on training and test sets\n",
|
||||
"y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])\n",
|
||||
"tr_acc = compute_accuracy(tr_y, y_pred)\n",
|
||||
"y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])\n",
|
||||
"te_acc = compute_accuracy(te_y, y_pred)\n",
|
||||
"\n",
|
||||
"print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))\n",
|
||||
"print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
142
mnist_siamese.py
142
mnist_siamese.py
|
|
@ -1,142 +0,0 @@
|
|||
'''Train a Siamese MLP on pairs of digits from the MNIST dataset.
|
||||
|
||||
It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the
|
||||
output of the shared network and by optimizing the contrastive loss (see paper
|
||||
for mode details).
|
||||
|
||||
[1] "Dimensionality Reduction by Learning an Invariant Mapping"
|
||||
http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
|
||||
|
||||
Gets to 97.2% test accuracy after 20 epochs.
|
||||
2 seconds per epoch on a Titan X Maxwell GPU
|
||||
'''
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
|
||||
import random
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Model
|
||||
from keras.layers import Dense, Dropout, Input, Lambda
|
||||
from keras.optimizers import RMSprop
|
||||
from keras import backend as K
|
||||
|
||||
%matplotlib inline
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
num_classes = 10
|
||||
|
||||
|
||||
def euclidean_distance(vects):
|
||||
x, y = vects
|
||||
return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
|
||||
|
||||
|
||||
def eucl_dist_output_shape(shapes):
|
||||
shape1, shape2 = shapes
|
||||
return (shape1[0], 1)
|
||||
|
||||
|
||||
def contrastive_loss(y_true, y_pred):
|
||||
'''Contrastive loss from Hadsell-et-al.'06
|
||||
http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
|
||||
'''
|
||||
margin = 1
|
||||
return K.mean(y_true * K.square(y_pred) +
|
||||
(1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
|
||||
|
||||
|
||||
def create_pairs(x, digit_indices):
|
||||
'''Positive and negative pair creation.
|
||||
Alternates between positive and negative pairs.
|
||||
'''
|
||||
pairs = []
|
||||
labels = []
|
||||
n = min([len(digit_indices[d]) for d in range(num_classes)]) - 1
|
||||
for d in range(num_classes):
|
||||
for i in range(n):
|
||||
z1, z2 = digit_indices[d][i], digit_indices[d][i + 1]
|
||||
pairs += [[x[z1], x[z2]]]
|
||||
inc = random.randrange(1, num_classes)
|
||||
dn = (d + inc) % num_classes
|
||||
z1, z2 = digit_indices[d][i], digit_indices[dn][i]
|
||||
pairs += [[x[z1], x[z2]]]
|
||||
labels += [1, 0]
|
||||
return np.array(pairs), np.array(labels)
|
||||
|
||||
|
||||
def create_base_network(input_dim):
|
||||
'''Base network to be shared (eq. to feature extraction).
|
||||
'''
|
||||
input = Input(shape=(input_dim,))
|
||||
x = Dense(128, activation='relu')(input)
|
||||
x = Dropout(0.1)(x)
|
||||
x = Dense(128, activation='relu')(x)
|
||||
x = Dropout(0.1)(x)
|
||||
x = Dense(128, activation='relu')(x)
|
||||
return Model(input, x)
|
||||
|
||||
|
||||
def compute_accuracy(y_true, y_pred):
|
||||
'''Compute classification accuracy with a fixed threshold on distances.
|
||||
'''
|
||||
pred = y_pred.ravel() < 0.5
|
||||
return np.mean(pred == y_true)
|
||||
|
||||
|
||||
def accuracy(y_true, y_pred):
|
||||
'''Compute classification accuracy with a fixed threshold on distances.
|
||||
'''
|
||||
return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))
|
||||
|
||||
|
||||
# the data, shuffled and split between train and test sets
|
||||
(x_train, y_train), (x_test, y_test) = mnist.load_data()
|
||||
x_train = x_train.reshape(60000, 784)
|
||||
x_test = x_test.reshape(10000, 784)
|
||||
x_train = x_train.astype('float32')
|
||||
x_test = x_test.astype('float32')
|
||||
x_train /= 255
|
||||
x_test /= 255
|
||||
input_dim = 784
|
||||
epochs = 20
|
||||
|
||||
# create training+test positive and negative pairs
|
||||
digit_indices = [np.where(y_train == i)[0] for i in range(num_classes)]
|
||||
tr_pairs, tr_y = create_pairs(x_train, digit_indices)
|
||||
|
||||
digit_indices = [np.where(y_test == i)[0] for i in range(num_classes)]
|
||||
te_pairs, te_y = create_pairs(x_test, digit_indices)
|
||||
# network definition
|
||||
base_network = create_base_network(input_dim)
|
||||
|
||||
input_a = Input(shape=(input_dim,))
|
||||
input_b = Input(shape=(input_dim,))
|
||||
|
||||
# because we re-use the same instance `base_network`,
|
||||
# the weights of the network
|
||||
# will be shared across the two branches
|
||||
processed_a = base_network(input_a)
|
||||
processed_b = base_network(input_b)
|
||||
|
||||
distance = Lambda(euclidean_distance,
|
||||
output_shape=eucl_dist_output_shape)([processed_a, processed_b])
|
||||
|
||||
model = Model([input_a, input_b], distance)
|
||||
|
||||
# train
|
||||
rms = RMSprop()
|
||||
model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy])
|
||||
model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,
|
||||
batch_size=128,
|
||||
epochs=epochs,
|
||||
validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y))
|
||||
|
||||
# compute final accuracy on training and test sets
|
||||
y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
|
||||
tr_acc = compute_accuracy(tr_y, y_pred)
|
||||
y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])
|
||||
te_acc = compute_accuracy(te_y, y_pred)
|
||||
|
||||
print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))
|
||||
print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))
|
||||
|
|
@ -1,90 +0,0 @@
|
|||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
class SiameseLSTM(object):
|
||||
"""
|
||||
A LSTM based deep Siamese network for text similarity.
|
||||
Uses an character embedding layer, followed by a biLSTM and Energy Loss layer.
|
||||
"""
|
||||
|
||||
def BiRNN(self, x, dropout, scope, embedding_size, sequence_length):
|
||||
n_input=embedding_size
|
||||
n_steps=sequence_length
|
||||
n_hidden=n_steps
|
||||
n_layers=3
|
||||
# Prepare data shape to match `bidirectional_rnn` function requirements
|
||||
# Current data input shape: (batch_size, n_steps, n_input) (?, seq_len, embedding_size)
|
||||
# Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
|
||||
# Permuting batch_size and n_steps
|
||||
x = tf.transpose(x, [1, 0, 2])
|
||||
# Reshape to (n_steps*batch_size, n_input)
|
||||
x = tf.reshape(x, [-1, n_input])
|
||||
print(x)
|
||||
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
|
||||
x = tf.split(x, n_steps, 0)
|
||||
print(x)
|
||||
# Define lstm cells with tensorflow
|
||||
# Forward direction cell
|
||||
with tf.name_scope("fw"+scope),tf.variable_scope("fw"+scope):
|
||||
stacked_rnn_fw = []
|
||||
for _ in range(n_layers):
|
||||
fw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
|
||||
lstm_fw_cell = tf.contrib.rnn.DropoutWrapper(fw_cell,output_keep_prob=dropout)
|
||||
stacked_rnn_fw.append(lstm_fw_cell)
|
||||
lstm_fw_cell_m = tf.nn.rnn_cell.MultiRNNCell(cells=stacked_rnn_fw, state_is_tuple=True)
|
||||
|
||||
with tf.name_scope("bw"+scope),tf.variable_scope("bw"+scope):
|
||||
stacked_rnn_bw = []
|
||||
for _ in range(n_layers):
|
||||
bw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
|
||||
lstm_bw_cell = tf.contrib.rnn.DropoutWrapper(bw_cell,output_keep_prob=dropout)
|
||||
stacked_rnn_bw.append(lstm_bw_cell)
|
||||
lstm_bw_cell_m = tf.nn.rnn_cell.MultiRNNCell(cells=stacked_rnn_bw, state_is_tuple=True)
|
||||
# Get lstm cell output
|
||||
|
||||
with tf.name_scope("bw"+scope),tf.variable_scope("bw"+scope):
|
||||
outputs, _, _ = tf.nn.static_bidirectional_rnn(lstm_fw_cell_m, lstm_bw_cell_m, x, dtype=tf.float32)
|
||||
return outputs[-1]
|
||||
|
||||
def contrastive_loss(self, y,d,batch_size):
|
||||
tmp= y *tf.square(d)
|
||||
#tmp= tf.mul(y,tf.square(d))
|
||||
tmp2 = (1-y) *tf.square(tf.maximum((1 - d),0))
|
||||
return tf.reduce_sum(tmp +tmp2)/batch_size/2
|
||||
|
||||
def __init__(
|
||||
self, sequence_length, vocab_size, embedding_size, hidden_units, l2_reg_lambda, batch_size):
|
||||
|
||||
# Placeholders for input, output and dropout
|
||||
self.input_x1 = tf.placeholder(tf.int32, [None, sequence_length], name="input_x1")
|
||||
self.input_x2 = tf.placeholder(tf.int32, [None, sequence_length], name="input_x2")
|
||||
self.input_y = tf.placeholder(tf.float32, [None], name="input_y")
|
||||
self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
|
||||
|
||||
# Keeping track of l2 regularization loss (optional)
|
||||
l2_loss = tf.constant(0.0, name="l2_loss")
|
||||
|
||||
# Embedding layer
|
||||
with tf.name_scope("embedding"):
|
||||
self.W = tf.Variable(
|
||||
tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
|
||||
trainable=True,name="W")
|
||||
self.embedded_chars1 = tf.nn.embedding_lookup(self.W, self.input_x1)
|
||||
#self.embedded_chars_expanded1 = tf.expand_dims(self.embedded_chars1, -1)
|
||||
self.embedded_chars2 = tf.nn.embedding_lookup(self.W, self.input_x2)
|
||||
#self.embedded_chars_expanded2 = tf.expand_dims(self.embedded_chars2, -1)
|
||||
|
||||
# Create a convolution + maxpool layer for each filter size
|
||||
with tf.name_scope("output"):
|
||||
self.out1=self.BiRNN(self.embedded_chars1, self.dropout_keep_prob, "side1", embedding_size, sequence_length)
|
||||
self.out2=self.BiRNN(self.embedded_chars2, self.dropout_keep_prob, "side2", embedding_size, sequence_length)
|
||||
self.distance = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(self.out1,self.out2)),1,keep_dims=True))
|
||||
self.distance = tf.div(self.distance, tf.add(tf.sqrt(tf.reduce_sum(tf.square(self.out1),1,keep_dims=True)),tf.sqrt(tf.reduce_sum(tf.square(self.out2),1,keep_dims=True))))
|
||||
self.distance = tf.reshape(self.distance, [-1], name="distance")
|
||||
with tf.name_scope("loss"):
|
||||
self.loss = self.contrastive_loss(self.input_y,self.distance, batch_size)
|
||||
#### Accuracy computation is outside of this class.
|
||||
with tf.name_scope("accuracy"):
|
||||
self.temp_sim = tf.subtract(tf.ones_like(self.distance),tf.rint(self.distance), name="temp_sim") #auto threshold 0.5
|
||||
correct_predictions = tf.equal(self.temp_sim, self.input_y)
|
||||
self.accuracy=tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
|
||||
|
|
@ -1,27 +1,12 @@
|
|||
'''Train a Siamese MLP on pairs of digits from the MNIST dataset.
|
||||
|
||||
It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the
|
||||
output of the shared network and by optimizing the contrastive loss (see paper
|
||||
for mode details).
|
||||
|
||||
[1] "Dimensionality Reduction by Learning an Invariant Mapping"
|
||||
http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
|
||||
|
||||
Gets to 97.2% test accuracy after 20 epochs.
|
||||
2 seconds per epoch on a Titan X Maxwell GPU
|
||||
'''
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
|
||||
# import random
|
||||
# from keras.datasets import mnist
|
||||
from speech_data import speech_model_data
|
||||
from keras.models import Model
|
||||
from keras.layers import Input, Dense, Dropout, SimpleRNN, LSTM, Lambda
|
||||
# Dense, Dropout, Input, Lambda, LSTM, SimpleRNN
|
||||
from keras.layers import Input, Dense, Dropout, LSTM, Lambda
|
||||
from keras.optimizers import RMSprop, SGD
|
||||
from keras.callbacks import TensorBoard
|
||||
from keras.callbacks import TensorBoard, ModelCheckpoint
|
||||
from keras import backend as K
|
||||
|
||||
|
||||
|
|
@ -40,26 +25,20 @@ def contrastive_loss(y_true, y_pred):
|
|||
'''Contrastive loss from Hadsell-et-al.'06
|
||||
http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
|
||||
'''
|
||||
margin = 1
|
||||
# print(y_true, y_pred)
|
||||
return K.mean(y_true * K.square(y_pred) +
|
||||
(1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
|
||||
(1 - y_true) * K.square(K.maximum(1 - y_pred, 0)))
|
||||
|
||||
|
||||
def create_base_rnn_network(input_dim):
|
||||
'''Base network to be shared (eq. to feature extraction).
|
||||
'''
|
||||
inp = Input(shape=input_dim)
|
||||
# d1 = Dense(1024, activation='sigmoid')(inp)
|
||||
# # d2 = Dense(2, activation='sigmoid')(d1)
|
||||
ls1 = LSTM(1024, return_sequences=True)(inp)
|
||||
ls2 = LSTM(512, return_sequences=True)(ls1)
|
||||
ls3 = LSTM(32)(ls2) # , return_sequences=True
|
||||
# sr2 = SimpleRNN(128, return_sequences=True)(sr1)
|
||||
# sr3 = SimpleRNN(32)(sr2)
|
||||
# x = Dense(128, activation='relu')(sr1)
|
||||
ls3 = LSTM(32)(ls2)
|
||||
return Model(inp, ls3)
|
||||
|
||||
|
||||
def create_base_network(input_dim):
|
||||
'''Base network to be shared (eq. to feature extraction).
|
||||
'''
|
||||
|
|
@ -71,6 +50,7 @@ def create_base_network(input_dim):
|
|||
x = Dense(128, activation='relu')(x)
|
||||
return Model(input, x)
|
||||
|
||||
|
||||
def compute_accuracy(y_true, y_pred):
|
||||
'''Compute classification accuracy with a fixed threshold on distances.
|
||||
'''
|
||||
|
|
@ -86,16 +66,7 @@ def accuracy(y_true, y_pred):
|
|||
|
||||
# the data, shuffled and split between train and test sets
|
||||
tr_pairs, te_pairs, tr_y, te_y = speech_model_data()
|
||||
# y_train.shape,y_test.shape
|
||||
# x_train.shape,x_test.shape
|
||||
# x_train = x_train.reshape(60000, 784)
|
||||
# x_test = x_test.reshape(10000, 784)
|
||||
# x_train = x_train.astype('float32')
|
||||
# x_test = x_test.astype('float32')
|
||||
# x_train /= 255
|
||||
# x_test /= 255
|
||||
input_dim = (tr_pairs.shape[2], tr_pairs.shape[3])
|
||||
epochs = 20
|
||||
|
||||
# network definition
|
||||
base_network = create_base_rnn_network(input_dim)
|
||||
|
|
@ -115,20 +86,26 @@ distance = Lambda(euclidean_distance,
|
|||
|
||||
model = Model([input_a, input_b], distance)
|
||||
|
||||
tb_cb = TensorBoard(log_dir='./siamese_logs', histogram_freq=1, batch_size=32,
|
||||
write_graph=True, write_grads=True, write_images=True,
|
||||
embeddings_freq=0, embeddings_layer_names=None,
|
||||
embeddings_metadata=None)
|
||||
tb_cb = TensorBoard(log_dir='./logs/siamese_logs', histogram_freq=1,
|
||||
batch_size=32, write_graph=True, write_grads=True,
|
||||
write_images=True, embeddings_freq=0,
|
||||
embeddings_layer_names=None, embeddings_metadata=None)
|
||||
cp_file_fmt = './models/siamese_speech_model-{epoch:02d}-epoch-{val_acc:0.2f}\
|
||||
-acc.h5'
|
||||
cp_cb = ModelCheckpoint(cp_file_fmt, monitor='val_acc', verbose=0,
|
||||
save_best_only=False, save_weights_only=False,
|
||||
mode='auto', period=1)
|
||||
# train
|
||||
rms = RMSprop(lr=0.001) # lr=0.001)
|
||||
rms = RMSprop(lr=0.001)
|
||||
sgd = SGD(lr=0.001)
|
||||
model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy])
|
||||
model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,
|
||||
batch_size=128,
|
||||
epochs=epochs,
|
||||
epochs=50,
|
||||
validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),
|
||||
callbacks=[tb_cb])
|
||||
callbacks=[tb_cb, cp_cb])
|
||||
|
||||
model.save('./models/siamese_speech_model-final.h5')
|
||||
# compute final accuracy on training and test sets
|
||||
y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
|
||||
tr_acc = compute_accuracy(tr_y, y_pred)
|
||||
|
|
|
|||
Loading…
Reference in New Issue