1. clean-up code

2. implemented checkpoint model saving
2017-10-24 16:41:35 +05:30
parent 77821093cb
commit e6f0c8b21b
4 changed files with 19 additions and 634 deletions
--- a/Siamese.ipynb
+++ b/Siamese.ipynb
@@ -1,360 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "'''Train a Siamese MLP on pairs of digits from the MNIST dataset.\n",
-    "\n",
-    "It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the\n",
-    "output of the shared network and by optimizing the contrastive loss (see paper\n",
-    "for mode details).\n",
-    "\n",
-    "[1] \"Dimensionality Reduction by Learning an Invariant Mapping\"\n",
-    "    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf\n",
-    "\n",
-    "Gets to 97.2% test accuracy after 20 epochs.\n",
-    "2 seconds per epoch on a Titan X Maxwell GPU\n",
-    "'''\n",
-    "from __future__ import absolute_import\n",
-    "from __future__ import print_function\n",
-    "import numpy as np\n",
-    "\n",
-    "# import random\n",
-    "# from keras.datasets import mnist\n",
-    "from speech_data import speech_model_data\n",
-    "from keras.models import Model\n",
-    "from keras.layers import Input, Dense, Dropout, SimpleRNN, LSTM, Lambda\n",
-    "# Dense, Dropout, Input, Lambda, LSTM, SimpleRNN\n",
-    "from keras.optimizers import RMSprop, SGD\n",
-    "from keras.callbacks import TensorBoard\n",
-    "from keras import backend as K\n",
-    "\n",
-    "\n",
-    "def euclidean_distance(vects):\n",
-    "    x, y = vects\n",
-    "    return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True),\n",
-    "                            K.epsilon()))\n",
-    "\n",
-    "\n",
-    "def eucl_dist_output_shape(shapes):\n",
-    "    shape1, shape2 = shapes\n",
-    "    return (shape1[0], 1)\n",
-    "\n",
-    "\n",
-    "def contrastive_loss(y_true, y_pred):\n",
-    "    '''Contrastive loss from Hadsell-et-al.'06\n",
-    "    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf\n",
-    "    '''\n",
-    "    margin = 1\n",
-    "    # print(y_true, y_pred)\n",
-    "    return K.mean(y_true * K.square(y_pred) +\n",
-    "                  (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))\n",
-    "\n",
-    "\n",
-    "def create_base_rnn_network(input_dim):\n",
-    "    '''Base network to be shared (eq. to feature extraction).\n",
-    "    '''\n",
-    "    inp = Input(shape=input_dim)\n",
-    "    # d1 = Dense(1024, activation='sigmoid')(inp)\n",
-    "    # # d2 = Dense(2, activation='sigmoid')(d1)\n",
-    "    ls1 = LSTM(1024, return_sequences=True)(inp)\n",
-    "    ls2 = LSTM(512, return_sequences=True)(ls1)\n",
-    "    ls3 = LSTM(32)(ls2)  # , return_sequences=True\n",
-    "    # sr2 = SimpleRNN(128, return_sequences=True)(sr1)\n",
-    "    # sr3 = SimpleRNN(32)(sr2)\n",
-    "    # x = Dense(128, activation='relu')(sr1)\n",
-    "    return Model(inp, ls3)\n",
-    "\n",
-    "def create_base_network(input_dim):\n",
-    "    '''Base network to be shared (eq. to feature extraction).\n",
-    "    '''\n",
-    "    input = Input(shape=input_dim)\n",
-    "    x = Dense(128, activation='relu')(input)\n",
-    "    x = Dropout(0.1)(x)\n",
-    "    x = Dense(128, activation='relu')(x)\n",
-    "    x = Dropout(0.1)(x)\n",
-    "    x = Dense(128, activation='relu')(x)\n",
-    "    return Model(input, x)\n",
-    "\n",
-    "def compute_accuracy(y_true, y_pred):\n",
-    "    '''Compute classification accuracy with a fixed threshold on distances.\n",
-    "    '''\n",
-    "    pred = y_pred.ravel() < 0.5\n",
-    "    return np.mean(pred == y_true)\n",
-    "\n",
-    "\n",
-    "def accuracy(y_true, y_pred):\n",
-    "    '''Compute classification accuracy with a fixed threshold on distances.\n",
-    "    '''\n",
-    "    return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))\n",
-    "\n",
-    "\n",
-    "# the data, shuffled and split between train and test sets\n",
-    "tr_pairs, te_pairs, tr_y, te_y = speech_model_data()\n",
-    "\n",
-    "%matplotlib inline\n",
-    "import matplotlib.pyplot as plt\n",
-    "def plot_spec(ims):\n",
-    "    timebins, freqbins = np.shape(ims)\n",
-    "    # import pdb;pdb.set_trace()\n",
-    "#     plt.figure(figsize=(15, 7.5))\n",
-    "    plt.imshow(np.transpose(ims), origin=\"lower\", aspect=\"auto\", cmap=\"jet\", interpolation=\"none\")\n",
-    "    plt.colorbar()\n",
-    "    xlocs = np.float32(np.linspace(0, timebins-1, 5))\n",
-    "    plt.xticks(xlocs, [\"%.02f\" % l for l in ((xlocs*15/timebins)+(0.5*2**10))/22100])\n",
-    "    ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 10)))\n",
-    "#     plt.yticks(ylocs, [\"%.02f\" % freq[i] for i in ylocs])\n",
-    "    \n",
-    "def show_nth(n):\n",
-    "    plt.figure(figsize=(15,7.5))\n",
-    "    plt.subplot(1,2,1)\n",
-    "    plot_spec(te_pairs[n][0].reshape(15,1654))\n",
-    "    print(te_y[n])\n",
-    "    plt.subplot(1,2,2)\n",
-    "    plot_spec(te_pairs[n][1].reshape(15,1654))\n",
-    "show_nth(0)\n",
-    "\n",
-    "# y_train.shape,y_test.shape\n",
-    "# x_train.shape,x_test.shape\n",
-    "# x_train = x_train.reshape(60000, 784)\n",
-    "# x_test = x_test.reshape(10000, 784)\n",
-    "# x_train = x_train.astype('float32')\n",
-    "# x_test = x_test.astype('float32')\n",
-    "# x_train /= 255\n",
-    "# x_test /= 255\n",
-    "\n",
-    "# input_dim = (tr_pairs.shape[2], tr_pairs.shape[3])\n",
-    "# epochs = 20\n",
-    "\n",
-    "# # network definition\n",
-    "# base_network = create_base_rnn_network(input_dim)\n",
-    "# input_a = Input(shape=input_dim)\n",
-    "# input_b = Input(shape=input_dim)\n",
-    "\n",
-    "# # because we re-use the same instance `base_network`,\n",
-    "# # the weights of the network\n",
-    "# # will be shared across the two branches\n",
-    "# processed_a = base_network(input_a)\n",
-    "# processed_b = base_network(input_b)\n",
-    "\n",
-    "# distance = Lambda(euclidean_distance,\n",
-    "#                   output_shape=eucl_dist_output_shape)(\n",
-    "#     [processed_a, processed_b]\n",
-    "# )\n",
-    "\n",
-    "# model = Model([input_a, input_b], distance)\n",
-    "\n",
-    "# tb_cb = TensorBoard(log_dir='./siamese_logs', histogram_freq=1, batch_size=32,\n",
-    "#                     write_graph=True, write_grads=True, write_images=True,\n",
-    "#                     embeddings_freq=0, embeddings_layer_names=None,\n",
-    "#                     embeddings_metadata=None)\n",
-    "# # train\n",
-    "# rms = RMSprop(lr=0.00001)  # lr=0.001)\n",
-    "# sgd = SGD(lr=0.001)\n",
-    "# model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy])\n",
-    "# model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,\n",
-    "#           batch_size=128,\n",
-    "#           epochs=epochs,\n",
-    "#           validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),\n",
-    "#           callbacks=[tb_cb])\n",
-    "\n",
-    "# # compute final accuracy on training and test sets\n",
-    "# y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])\n",
-    "# tr_acc = compute_accuracy(tr_y, y_pred)\n",
-    "# y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])\n",
-    "# te_acc = compute_accuracy(te_y, y_pred)\n",
-    "\n",
-    "# print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))\n",
-    "# print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Using TensorFlow backend.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train on 36252 samples, validate on 4028 samples\n"
-     ]
-    }
-   ],
-   "source": [
-    "'''Train a Siamese MLP on pairs of digits from the MNIST dataset.\n",
-    "\n",
-    "It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the\n",
-    "output of the shared network and by optimizing the contrastive loss (see paper\n",
-    "for mode details).\n",
-    "\n",
-    "[1] \"Dimensionality Reduction by Learning an Invariant Mapping\"\n",
-    "    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf\n",
-    "\n",
-    "Gets to 97.2% test accuracy after 20 epochs.\n",
-    "2 seconds per epoch on a Titan X Maxwell GPU\n",
-    "'''\n",
-    "from __future__ import absolute_import\n",
-    "from __future__ import print_function\n",
-    "import numpy as np\n",
-    "\n",
-    "# import random\n",
-    "# from keras.datasets import mnist\n",
-    "from speech_data import speech_model_data\n",
-    "from keras.models import Model\n",
-    "from keras.layers import Input, Dense, Dropout, SimpleRNN, LSTM, Lambda\n",
-    "# Dense, Dropout, Input, Lambda, LSTM, SimpleRNN\n",
-    "from keras.optimizers import RMSprop, SGD\n",
-    "from keras.callbacks import TensorBoard\n",
-    "from keras import backend as K\n",
-    "\n",
-    "\n",
-    "def euclidean_distance(vects):\n",
-    "    x, y = vects\n",
-    "    return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True),\n",
-    "                            K.epsilon()))\n",
-    "\n",
-    "\n",
-    "def eucl_dist_output_shape(shapes):\n",
-    "    shape1, shape2 = shapes\n",
-    "    return (shape1[0], 1)\n",
-    "\n",
-    "\n",
-    "def contrastive_loss(y_true, y_pred):\n",
-    "    '''Contrastive loss from Hadsell-et-al.'06\n",
-    "    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf\n",
-    "    '''\n",
-    "    margin = 1\n",
-    "    # print(y_true, y_pred)\n",
-    "    return K.mean(y_true * K.square(y_pred) +\n",
-    "                  (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))\n",
-    "\n",
-    "\n",
-    "def create_base_rnn_network(input_dim):\n",
-    "    '''Base network to be shared (eq. to feature extraction).\n",
-    "    '''\n",
-    "    inp = Input(shape=input_dim)\n",
-    "    # d1 = Dense(1024, activation='sigmoid')(inp)\n",
-    "    # # d2 = Dense(2, activation='sigmoid')(d1)\n",
-    "    ls1 = LSTM(1024, return_sequences=True)(inp)\n",
-    "    ls2 = LSTM(512, return_sequences=True)(ls1)\n",
-    "    ls3 = LSTM(32)(ls2)  # , return_sequences=True\n",
-    "    # sr2 = SimpleRNN(128, return_sequences=True)(sr1)\n",
-    "    # sr3 = SimpleRNN(32)(sr2)\n",
-    "    # x = Dense(128, activation='relu')(sr1)\n",
-    "    return Model(inp, ls3)\n",
-    "\n",
-    "def create_base_network(input_dim):\n",
-    "    '''Base network to be shared (eq. to feature extraction).\n",
-    "    '''\n",
-    "    input = Input(shape=input_dim)\n",
-    "    x = Dense(128, activation='relu')(input)\n",
-    "    x = Dropout(0.1)(x)\n",
-    "    x = Dense(128, activation='relu')(x)\n",
-    "    x = Dropout(0.1)(x)\n",
-    "    x = Dense(128, activation='relu')(x)\n",
-    "    return Model(input, x)\n",
-    "\n",
-    "def compute_accuracy(y_true, y_pred):\n",
-    "    '''Compute classification accuracy with a fixed threshold on distances.\n",
-    "    '''\n",
-    "    pred = y_pred.ravel() < 0.5\n",
-    "    return np.mean(pred == y_true)\n",
-    "\n",
-    "\n",
-    "def accuracy(y_true, y_pred):\n",
-    "    '''Compute classification accuracy with a fixed threshold on distances.\n",
-    "    '''\n",
-    "    return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))\n",
-    "\n",
-    "\n",
-    "# the data, shuffled and split between train and test sets\n",
-    "tr_pairs, te_pairs, tr_y, te_y = speech_model_data()\n",
-    "# y_train.shape,y_test.shape\n",
-    "# x_train.shape,x_test.shape\n",
-    "# x_train = x_train.reshape(60000, 784)\n",
-    "# x_test = x_test.reshape(10000, 784)\n",
-    "# x_train = x_train.astype('float32')\n",
-    "# x_test = x_test.astype('float32')\n",
-    "# x_train /= 255\n",
-    "# x_test /= 255\n",
-    "input_dim = (tr_pairs.shape[2], tr_pairs.shape[3])\n",
-    "epochs = 20\n",
-    "\n",
-    "# network definition\n",
-    "base_network = create_base_rnn_network(input_dim)\n",
-    "input_a = Input(shape=input_dim)\n",
-    "input_b = Input(shape=input_dim)\n",
-    "\n",
-    "# because we re-use the same instance `base_network`,\n",
-    "# the weights of the network\n",
-    "# will be shared across the two branches\n",
-    "processed_a = base_network(input_a)\n",
-    "processed_b = base_network(input_b)\n",
-    "\n",
-    "distance = Lambda(euclidean_distance,\n",
-    "                  output_shape=eucl_dist_output_shape)(\n",
-    "    [processed_a, processed_b]\n",
-    ")\n",
-    "\n",
-    "model = Model([input_a, input_b], distance)\n",
-    "\n",
-    "tb_cb = TensorBoard(log_dir='./logs/siamese_logs', histogram_freq=1, batch_size=32,\n",
-    "                    write_graph=True, write_grads=True, write_images=True,\n",
-    "                3\n",
-    "                    embeddings_freq=0, embeddings_layer_names=None,\n",
-    "                    embeddings_metadata=None)\n",
-    "# train\n",
-    "rms = RMSprop(lr=0.001)  # lr=0.001)\n",
-    "sgd = SGD(lr=0.001)\n",
-    "model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy])\n",
-    "model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,\n",
-    "          batch_size=128,\n",
-    "          epochs=epochs,\n",
-    "          validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),\n",
-    "          callbacks=[tb_cb])\n",
-    "\n",
-    "model.save('./models/siamese_speech_model.h5')\n",
-    "# compute final accuracy on training and test sets\n",
-    "y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])\n",
-    "tr_acc = compute_accuracy(tr_y, y_pred)\n",
-    "y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])\n",
-    "te_acc = compute_accuracy(te_y, y_pred)\n",
-    "\n",
-    "print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))\n",
-    "print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.5.2"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/mnist_siamese.py
+++ b/mnist_siamese.py
@@ -1,142 +0,0 @@
-'''Train a Siamese MLP on pairs of digits from the MNIST dataset.
-
-It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the
-output of the shared network and by optimizing the contrastive loss (see paper
-for mode details).
-
-[1] "Dimensionality Reduction by Learning an Invariant Mapping"
-    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
-
-Gets to 97.2% test accuracy after 20 epochs.
-2 seconds per epoch on a Titan X Maxwell GPU
-'''
-from __future__ import absolute_import
-from __future__ import print_function
-import numpy as np
-
-import random
-from keras.datasets import mnist
-from keras.models import Model
-from keras.layers import Dense, Dropout, Input, Lambda
-from keras.optimizers import RMSprop
-from keras import backend as K
-
-%matplotlib inline
-import matplotlib.pyplot as plt
-
-num_classes = 10
-
-
-def euclidean_distance(vects):
-    x, y = vects
-    return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
-
-
-def eucl_dist_output_shape(shapes):
-    shape1, shape2 = shapes
-    return (shape1[0], 1)
-
-
-def contrastive_loss(y_true, y_pred):
-    '''Contrastive loss from Hadsell-et-al.'06
-    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
-    '''
-    margin = 1
-    return K.mean(y_true * K.square(y_pred) +
-                  (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
-
-
-def create_pairs(x, digit_indices):
-    '''Positive and negative pair creation.
-    Alternates between positive and negative pairs.
-    '''
-    pairs = []
-    labels = []
-    n = min([len(digit_indices[d]) for d in range(num_classes)]) - 1
-    for d in range(num_classes):
-        for i in range(n):
-            z1, z2 = digit_indices[d][i], digit_indices[d][i + 1]
-            pairs += [[x[z1], x[z2]]]
-            inc = random.randrange(1, num_classes)
-            dn = (d + inc) % num_classes
-            z1, z2 = digit_indices[d][i], digit_indices[dn][i]
-            pairs += [[x[z1], x[z2]]]
-            labels += [1, 0]
-    return np.array(pairs), np.array(labels)
-
-
-def create_base_network(input_dim):
-    '''Base network to be shared (eq. to feature extraction).
-    '''
-    input = Input(shape=(input_dim,))
-    x = Dense(128, activation='relu')(input)
-    x = Dropout(0.1)(x)
-    x = Dense(128, activation='relu')(x)
-    x = Dropout(0.1)(x)
-    x = Dense(128, activation='relu')(x)
-    return Model(input, x)
-
-
-def compute_accuracy(y_true, y_pred):
-    '''Compute classification accuracy with a fixed threshold on distances.
-    '''
-    pred = y_pred.ravel() < 0.5
-    return np.mean(pred == y_true)
-
-
-def accuracy(y_true, y_pred):
-    '''Compute classification accuracy with a fixed threshold on distances.
-    '''
-    return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))
-
-
-# the data, shuffled and split between train and test sets
-(x_train, y_train), (x_test, y_test) = mnist.load_data()
-x_train = x_train.reshape(60000, 784)
-x_test = x_test.reshape(10000, 784)
-x_train = x_train.astype('float32')
-x_test = x_test.astype('float32')
-x_train /= 255
-x_test /= 255
-input_dim = 784
-epochs = 20
-
-# create training+test positive and negative pairs
-digit_indices = [np.where(y_train == i)[0] for i in range(num_classes)]
-tr_pairs, tr_y = create_pairs(x_train, digit_indices)
-
-digit_indices = [np.where(y_test == i)[0] for i in range(num_classes)]
-te_pairs, te_y = create_pairs(x_test, digit_indices)
-# network definition
-base_network = create_base_network(input_dim)
-
-input_a = Input(shape=(input_dim,))
-input_b = Input(shape=(input_dim,))
-
-# because we re-use the same instance `base_network`,
-# the weights of the network
-# will be shared across the two branches
-processed_a = base_network(input_a)
-processed_b = base_network(input_b)
-
-distance = Lambda(euclidean_distance,
-                  output_shape=eucl_dist_output_shape)([processed_a, processed_b])
-
-model = Model([input_a, input_b], distance)
-
-# train
-rms = RMSprop()
-model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy])
-model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,
-          batch_size=128,
-          epochs=epochs,
-          validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y))
-
-# compute final accuracy on training and test sets
-y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
-tr_acc = compute_accuracy(tr_y, y_pred)
-y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])
-te_acc = compute_accuracy(te_y, y_pred)
-
-print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))
-print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))
--- a/siamese_network_tf.py
+++ b/siamese_network_tf.py
@@ -1,90 +0,0 @@
-import tensorflow as tf
-import numpy as np
-
-class SiameseLSTM(object):
-    """
-    A LSTM based deep Siamese network for text similarity.
-    Uses an character embedding layer, followed by a biLSTM and Energy Loss layer.
-    """
-    
-    def BiRNN(self, x, dropout, scope, embedding_size, sequence_length):
-        n_input=embedding_size
-        n_steps=sequence_length
-        n_hidden=n_steps
-        n_layers=3
-        # Prepare data shape to match `bidirectional_rnn` function requirements
-        # Current data input shape: (batch_size, n_steps, n_input) (?, seq_len, embedding_size)
-        # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
-        # Permuting batch_size and n_steps
-        x = tf.transpose(x, [1, 0, 2])
-        # Reshape to (n_steps*batch_size, n_input)
-        x = tf.reshape(x, [-1, n_input])
-        print(x)
-        # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
-        x = tf.split(x, n_steps, 0)
-        print(x)
-        # Define lstm cells with tensorflow
-        # Forward direction cell
-        with tf.name_scope("fw"+scope),tf.variable_scope("fw"+scope):
-            stacked_rnn_fw = []
-            for _ in range(n_layers):
-                fw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
-                lstm_fw_cell = tf.contrib.rnn.DropoutWrapper(fw_cell,output_keep_prob=dropout)
-                stacked_rnn_fw.append(lstm_fw_cell)
-            lstm_fw_cell_m = tf.nn.rnn_cell.MultiRNNCell(cells=stacked_rnn_fw, state_is_tuple=True)
-
-        with tf.name_scope("bw"+scope),tf.variable_scope("bw"+scope):
-            stacked_rnn_bw = []
-            for _ in range(n_layers):
-                bw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
-                lstm_bw_cell = tf.contrib.rnn.DropoutWrapper(bw_cell,output_keep_prob=dropout)
-                stacked_rnn_bw.append(lstm_bw_cell)
-            lstm_bw_cell_m = tf.nn.rnn_cell.MultiRNNCell(cells=stacked_rnn_bw, state_is_tuple=True)
-        # Get lstm cell output
-
-        with tf.name_scope("bw"+scope),tf.variable_scope("bw"+scope):
-            outputs, _, _ = tf.nn.static_bidirectional_rnn(lstm_fw_cell_m, lstm_bw_cell_m, x, dtype=tf.float32)
-        return outputs[-1]
-    
-    def contrastive_loss(self, y,d,batch_size):
-        tmp= y *tf.square(d)
-        #tmp= tf.mul(y,tf.square(d))
-        tmp2 = (1-y) *tf.square(tf.maximum((1 - d),0))
-        return tf.reduce_sum(tmp +tmp2)/batch_size/2
-    
-    def __init__(
-        self, sequence_length, vocab_size, embedding_size, hidden_units, l2_reg_lambda, batch_size):
-
-        # Placeholders for input, output and dropout
-        self.input_x1 = tf.placeholder(tf.int32, [None, sequence_length], name="input_x1")
-        self.input_x2 = tf.placeholder(tf.int32, [None, sequence_length], name="input_x2")
-        self.input_y = tf.placeholder(tf.float32, [None], name="input_y")
-        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
-
-        # Keeping track of l2 regularization loss (optional)
-        l2_loss = tf.constant(0.0, name="l2_loss")
-          
-        # Embedding layer
-        with tf.name_scope("embedding"):
-            self.W = tf.Variable(
-                tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
-                trainable=True,name="W")
-            self.embedded_chars1 = tf.nn.embedding_lookup(self.W, self.input_x1)
-            #self.embedded_chars_expanded1 = tf.expand_dims(self.embedded_chars1, -1)
-            self.embedded_chars2 = tf.nn.embedding_lookup(self.W, self.input_x2)
-            #self.embedded_chars_expanded2 = tf.expand_dims(self.embedded_chars2, -1)
-
-        # Create a convolution + maxpool layer for each filter size
-        with tf.name_scope("output"):
-            self.out1=self.BiRNN(self.embedded_chars1, self.dropout_keep_prob, "side1", embedding_size, sequence_length)
-            self.out2=self.BiRNN(self.embedded_chars2, self.dropout_keep_prob, "side2", embedding_size, sequence_length)
-            self.distance = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(self.out1,self.out2)),1,keep_dims=True))
-            self.distance = tf.div(self.distance, tf.add(tf.sqrt(tf.reduce_sum(tf.square(self.out1),1,keep_dims=True)),tf.sqrt(tf.reduce_sum(tf.square(self.out2),1,keep_dims=True))))
-            self.distance = tf.reshape(self.distance, [-1], name="distance")
-        with tf.name_scope("loss"):
-            self.loss = self.contrastive_loss(self.input_y,self.distance, batch_size)
-        #### Accuracy computation is outside of this class.
-        with tf.name_scope("accuracy"):
-            self.temp_sim = tf.subtract(tf.ones_like(self.distance),tf.rint(self.distance), name="temp_sim") #auto threshold 0.5
-            correct_predictions = tf.equal(self.temp_sim, self.input_y)
-            self.accuracy=tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
--- a/speech_siamese.py
+++ b/speech_siamese.py
@@ -1,27 +1,12 @@
-'''Train a Siamese MLP on pairs of digits from the MNIST dataset.

-It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the
-output of the shared network and by optimizing the contrastive loss (see paper
-for mode details).
-
-[1] "Dimensionality Reduction by Learning an Invariant Mapping"
-    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
-
-Gets to 97.2% test accuracy after 20 epochs.
-2 seconds per epoch on a Titan X Maxwell GPU
-'''
 from __future__ import absolute_import
 from __future__ import print_function
 import numpy as np
-
-# import random
-# from keras.datasets import mnist
 from speech_data import speech_model_data
 from keras.models import Model
-from keras.layers import Input, Dense, Dropout, SimpleRNN, LSTM, Lambda
-# Dense, Dropout, Input, Lambda, LSTM, SimpleRNN
+from keras.layers import Input, Dense, Dropout, LSTM, Lambda
 from keras.optimizers import RMSprop, SGD
-from keras.callbacks import TensorBoard
+from keras.callbacks import TensorBoard, ModelCheckpoint
 from keras import backend as K


@@ -40,26 +25,20 @@ def contrastive_loss(y_true, y_pred):
    '''Contrastive loss from Hadsell-et-al.'06
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
-    margin = 1
-    # print(y_true, y_pred)
    return K.mean(y_true * K.square(y_pred) +
-                  (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
+                  (1 - y_true) * K.square(K.maximum(1 - y_pred, 0)))


 def create_base_rnn_network(input_dim):
    '''Base network to be shared (eq. to feature extraction).
    '''
    inp = Input(shape=input_dim)
-    # d1 = Dense(1024, activation='sigmoid')(inp)
-    # # d2 = Dense(2, activation='sigmoid')(d1)
    ls1 = LSTM(1024, return_sequences=True)(inp)
    ls2 = LSTM(512, return_sequences=True)(ls1)
-    ls3 = LSTM(32)(ls2)  # , return_sequences=True
-    # sr2 = SimpleRNN(128, return_sequences=True)(sr1)
-    # sr3 = SimpleRNN(32)(sr2)
-    # x = Dense(128, activation='relu')(sr1)
+    ls3 = LSTM(32)(ls2)
    return Model(inp, ls3)

+
 def create_base_network(input_dim):
    '''Base network to be shared (eq. to feature extraction).
    '''
@@ -71,6 +50,7 @@ def create_base_network(input_dim):
    x = Dense(128, activation='relu')(x)
    return Model(input, x)

+
 def compute_accuracy(y_true, y_pred):
    '''Compute classification accuracy with a fixed threshold on distances.
    '''
@@ -86,16 +66,7 @@ def accuracy(y_true, y_pred):

 # the data, shuffled and split between train and test sets
 tr_pairs, te_pairs, tr_y, te_y = speech_model_data()
-# y_train.shape,y_test.shape
-# x_train.shape,x_test.shape
-# x_train = x_train.reshape(60000, 784)
-# x_test = x_test.reshape(10000, 784)
-# x_train = x_train.astype('float32')
-# x_test = x_test.astype('float32')
-# x_train /= 255
-# x_test /= 255
 input_dim = (tr_pairs.shape[2], tr_pairs.shape[3])
-epochs = 20

 # network definition
 base_network = create_base_rnn_network(input_dim)
@@ -115,20 +86,26 @@ distance = Lambda(euclidean_distance,

 model = Model([input_a, input_b], distance)

-tb_cb = TensorBoard(log_dir='./siamese_logs', histogram_freq=1, batch_size=32,
-                    write_graph=True, write_grads=True, write_images=True,
-                    embeddings_freq=0, embeddings_layer_names=None,
-                    embeddings_metadata=None)
+tb_cb = TensorBoard(log_dir='./logs/siamese_logs', histogram_freq=1,
+                    batch_size=32, write_graph=True, write_grads=True,
+                    write_images=True, embeddings_freq=0,
+                    embeddings_layer_names=None, embeddings_metadata=None)
+cp_file_fmt = './models/siamese_speech_model-{epoch:02d}-epoch-{val_acc:0.2f}\
+-acc.h5'
+cp_cb = ModelCheckpoint(cp_file_fmt, monitor='val_acc', verbose=0,
+                        save_best_only=False, save_weights_only=False,
+                        mode='auto', period=1)
 # train
-rms = RMSprop(lr=0.001)  # lr=0.001)
+rms = RMSprop(lr=0.001)
 sgd = SGD(lr=0.001)
 model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy])
 model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,
          batch_size=128,
-          epochs=epochs,
+          epochs=50,
          validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),
-          callbacks=[tb_cb])
+          callbacks=[tb_cb, cp_cb])

+model.save('./models/siamese_speech_model-final.h5')
 # compute final accuracy on training and test sets
 y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
 tr_acc = compute_accuracy(tr_y, y_pred)