speech-scoring/Siamese.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''Train a Siamese MLP on pairs of digits from the MNIST dataset.\n",
    "\n",
    "It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the\n",
    "output of the shared network and by optimizing the contrastive loss (see paper\n",
    "for mode details).\n",
    "\n",
    "[1] \"Dimensionality Reduction by Learning an Invariant Mapping\"\n",
    "    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf\n",
    "\n",
    "Gets to 97.2% test accuracy after 20 epochs.\n",
    "2 seconds per epoch on a Titan X Maxwell GPU\n",
    "'''\n",
    "from __future__ import absolute_import\n",
    "from __future__ import print_function\n",
    "import numpy as np\n",
    "\n",
    "# import random\n",
    "# from keras.datasets import mnist\n",
    "from speech_data import speech_model_data\n",
    "from keras.models import Model\n",
    "from keras.layers import Input, Dense, Dropout, SimpleRNN, LSTM, Lambda\n",
    "# Dense, Dropout, Input, Lambda, LSTM, SimpleRNN\n",
    "from keras.optimizers import RMSprop, SGD\n",
    "from keras.callbacks import TensorBoard\n",
    "from keras import backend as K\n",
    "\n",
    "\n",
    "def euclidean_distance(vects):\n",
    "    x, y = vects\n",
    "    return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True),\n",
    "                            K.epsilon()))\n",
    "\n",
    "\n",
    "def eucl_dist_output_shape(shapes):\n",
    "    shape1, shape2 = shapes\n",
    "    return (shape1[0], 1)\n",
    "\n",
    "\n",
    "def contrastive_loss(y_true, y_pred):\n",
    "    '''Contrastive loss from Hadsell-et-al.'06\n",
    "    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf\n",
    "    '''\n",
    "    margin = 1\n",
    "    # print(y_true, y_pred)\n",
    "    return K.mean(y_true * K.square(y_pred) +\n",
    "                  (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))\n",
    "\n",
    "\n",
    "def create_base_rnn_network(input_dim):\n",
    "    '''Base network to be shared (eq. to feature extraction).\n",
    "    '''\n",
    "    inp = Input(shape=input_dim)\n",
    "    # d1 = Dense(1024, activation='sigmoid')(inp)\n",
    "    # # d2 = Dense(2, activation='sigmoid')(d1)\n",
    "    ls1 = LSTM(1024, return_sequences=True)(inp)\n",
    "    ls2 = LSTM(512, return_sequences=True)(ls1)\n",
    "    ls3 = LSTM(32)(ls2)  # , return_sequences=True\n",
    "    # sr2 = SimpleRNN(128, return_sequences=True)(sr1)\n",
    "    # sr3 = SimpleRNN(32)(sr2)\n",
    "    # x = Dense(128, activation='relu')(sr1)\n",
    "    return Model(inp, ls3)\n",
    "\n",
    "def create_base_network(input_dim):\n",
    "    '''Base network to be shared (eq. to feature extraction).\n",
    "    '''\n",
    "    input = Input(shape=input_dim)\n",
    "    x = Dense(128, activation='relu')(input)\n",
    "    x = Dropout(0.1)(x)\n",
    "    x = Dense(128, activation='relu')(x)\n",
    "    x = Dropout(0.1)(x)\n",
    "    x = Dense(128, activation='relu')(x)\n",
    "    return Model(input, x)\n",
    "\n",
    "def compute_accuracy(y_true, y_pred):\n",
    "    '''Compute classification accuracy with a fixed threshold on distances.\n",
    "    '''\n",
    "    pred = y_pred.ravel() < 0.5\n",
    "    return np.mean(pred == y_true)\n",
    "\n",
    "\n",
    "def accuracy(y_true, y_pred):\n",
    "    '''Compute classification accuracy with a fixed threshold on distances.\n",
    "    '''\n",
    "    return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))\n",
    "\n",
    "\n",
    "# the data, shuffled and split between train and test sets\n",
    "tr_pairs, te_pairs, tr_y, te_y = speech_model_data()\n",
    "\n",
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "def plot_spec(ims):\n",
    "    timebins, freqbins = np.shape(ims)\n",
    "    # import pdb;pdb.set_trace()\n",
    "#     plt.figure(figsize=(15, 7.5))\n",
    "    plt.imshow(np.transpose(ims), origin=\"lower\", aspect=\"auto\", cmap=\"jet\", interpolation=\"none\")\n",
    "    plt.colorbar()\n",
    "    xlocs = np.float32(np.linspace(0, timebins-1, 5))\n",
    "    plt.xticks(xlocs, [\"%.02f\" % l for l in ((xlocs*15/timebins)+(0.5*2**10))/22100])\n",
    "    ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 10)))\n",
    "#     plt.yticks(ylocs, [\"%.02f\" % freq[i] for i in ylocs])\n",
    "    \n",
    "def show_nth(n):\n",
    "    plt.figure(figsize=(15,7.5))\n",
    "    plt.subplot(1,2,1)\n",
    "    plot_spec(te_pairs[n][0].reshape(15,1654))\n",
    "    print(te_y[n])\n",
    "    plt.subplot(1,2,2)\n",
    "    plot_spec(te_pairs[n][1].reshape(15,1654))\n",
    "show_nth(0)\n",
    "\n",
    "# y_train.shape,y_test.shape\n",
    "# x_train.shape,x_test.shape\n",
    "# x_train = x_train.reshape(60000, 784)\n",
    "# x_test = x_test.reshape(10000, 784)\n",
    "# x_train = x_train.astype('float32')\n",
    "# x_test = x_test.astype('float32')\n",
    "# x_train /= 255\n",
    "# x_test /= 255\n",
    "\n",
    "# input_dim = (tr_pairs.shape[2], tr_pairs.shape[3])\n",
    "# epochs = 20\n",
    "\n",
    "# # network definition\n",
    "# base_network = create_base_rnn_network(input_dim)\n",
    "# input_a = Input(shape=input_dim)\n",
    "# input_b = Input(shape=input_dim)\n",
    "\n",
    "# # because we re-use the same instance `base_network`,\n",
    "# # the weights of the network\n",
    "# # will be shared across the two branches\n",
    "# processed_a = base_network(input_a)\n",
    "# processed_b = base_network(input_b)\n",
    "\n",
    "# distance = Lambda(euclidean_distance,\n",
    "#                   output_shape=eucl_dist_output_shape)(\n",
    "#     [processed_a, processed_b]\n",
    "# )\n",
    "\n",
    "# model = Model([input_a, input_b], distance)\n",
    "\n",
    "# tb_cb = TensorBoard(log_dir='./siamese_logs', histogram_freq=1, batch_size=32,\n",
    "#                     write_graph=True, write_grads=True, write_images=True,\n",
    "#                     embeddings_freq=0, embeddings_layer_names=None,\n",
    "#                     embeddings_metadata=None)\n",
    "# # train\n",
    "# rms = RMSprop(lr=0.00001)  # lr=0.001)\n",
    "# sgd = SGD(lr=0.001)\n",
    "# model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy])\n",
    "# model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,\n",
    "#           batch_size=128,\n",
    "#           epochs=epochs,\n",
    "#           validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),\n",
    "#           callbacks=[tb_cb])\n",
    "\n",
    "# # compute final accuracy on training and test sets\n",
    "# y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])\n",
    "# tr_acc = compute_accuracy(tr_y, y_pred)\n",
    "# y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])\n",
    "# te_acc = compute_accuracy(te_y, y_pred)\n",
    "\n",
    "# print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))\n",
    "# print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''Train a Siamese MLP on pairs of digits from the MNIST dataset.\n",
    "\n",
    "It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the\n",
    "output of the shared network and by optimizing the contrastive loss (see paper\n",
    "for mode details).\n",
    "\n",
    "[1] \"Dimensionality Reduction by Learning an Invariant Mapping\"\n",
    "    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf\n",
    "\n",
    "Gets to 97.2% test accuracy after 20 epochs.\n",
    "2 seconds per epoch on a Titan X Maxwell GPU\n",
    "'''\n",
    "from __future__ import absolute_import\n",
    "from __future__ import print_function\n",
    "import numpy as np\n",
    "\n",
    "# import random\n",
    "# from keras.datasets import mnist\n",
    "from speech_data import speech_model_data\n",
    "from keras.models import Model\n",
    "from keras.layers import Input, Dense, Dropout, SimpleRNN, LSTM, Lambda\n",
    "# Dense, Dropout, Input, Lambda, LSTM, SimpleRNN\n",
    "from keras.optimizers import RMSprop, SGD\n",
    "from keras.callbacks import TensorBoard\n",
    "from keras import backend as K\n",
    "\n",
    "\n",
    "def euclidean_distance(vects):\n",
    "    x, y = vects\n",
    "    return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True),\n",
    "                            K.epsilon()))\n",
    "\n",
    "\n",
    "def eucl_dist_output_shape(shapes):\n",
    "    shape1, shape2 = shapes\n",
    "    return (shape1[0], 1)\n",
    "\n",
    "\n",
    "def contrastive_loss(y_true, y_pred):\n",
    "    '''Contrastive loss from Hadsell-et-al.'06\n",
    "    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf\n",
    "    '''\n",
    "    margin = 1\n",
    "    # print(y_true, y_pred)\n",
    "    return K.mean(y_true * K.square(y_pred) +\n",
    "                  (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))\n",
    "\n",
    "\n",
    "def create_base_rnn_network(input_dim):\n",
    "    '''Base network to be shared (eq. to feature extraction).\n",
    "    '''\n",
    "    inp = Input(shape=input_dim)\n",
    "    # d1 = Dense(1024, activation='sigmoid')(inp)\n",
    "    # # d2 = Dense(2, activation='sigmoid')(d1)\n",
    "    ls1 = LSTM(1024, return_sequences=True)(inp)\n",
    "    ls2 = LSTM(512, return_sequences=True)(ls1)\n",
    "    ls3 = LSTM(32)(ls2)  # , return_sequences=True\n",
    "    # sr2 = SimpleRNN(128, return_sequences=True)(sr1)\n",
    "    # sr3 = SimpleRNN(32)(sr2)\n",
    "    # x = Dense(128, activation='relu')(sr1)\n",
    "    return Model(inp, ls3)\n",
    "\n",
    "def create_base_network(input_dim):\n",
    "    '''Base network to be shared (eq. to feature extraction).\n",
    "    '''\n",
    "    input = Input(shape=input_dim)\n",
    "    x = Dense(128, activation='relu')(input)\n",
    "    x = Dropout(0.1)(x)\n",
    "    x = Dense(128, activation='relu')(x)\n",
    "    x = Dropout(0.1)(x)\n",
    "    x = Dense(128, activation='relu')(x)\n",
    "    return Model(input, x)\n",
    "\n",
    "def compute_accuracy(y_true, y_pred):\n",
    "    '''Compute classification accuracy with a fixed threshold on distances.\n",
    "    '''\n",
    "    pred = y_pred.ravel() < 0.5\n",
    "    return np.mean(pred == y_true)\n",
    "\n",
    "\n",
    "def accuracy(y_true, y_pred):\n",
    "    '''Compute classification accuracy with a fixed threshold on distances.\n",
    "    '''\n",
    "    return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))\n",
    "\n",
    "\n",
    "# the data, shuffled and split between train and test sets\n",
    "tr_pairs, te_pairs, tr_y, te_y = speech_model_data()\n",
    "# y_train.shape,y_test.shape\n",
    "# x_train.shape,x_test.shape\n",
    "# x_train = x_train.reshape(60000, 784)\n",
    "# x_test = x_test.reshape(10000, 784)\n",
    "# x_train = x_train.astype('float32')\n",
    "# x_test = x_test.astype('float32')\n",
    "# x_train /= 255\n",
    "# x_test /= 255\n",
    "input_dim = (tr_pairs.shape[2], tr_pairs.shape[3])\n",
    "epochs = 20\n",
    "\n",
    "# network definition\n",
    "base_network = create_base_rnn_network(input_dim)\n",
    "input_a = Input(shape=input_dim)\n",
    "input_b = Input(shape=input_dim)\n",
    "\n",
    "# because we re-use the same instance `base_network`,\n",
    "# the weights of the network\n",
    "# will be shared across the two branches\n",
    "processed_a = base_network(input_a)\n",
    "processed_b = base_network(input_b)\n",
    "\n",
    "distance = Lambda(euclidean_distance,\n",
    "                  output_shape=eucl_dist_output_shape)(\n",
    "    [processed_a, processed_b]\n",
    ")\n",
    "\n",
    "model = Model([input_a, input_b], distance)\n",
    "\n",
    "tb_cb = TensorBoard(log_dir='./siamese_logs', histogram_freq=1, batch_size=32,\n",
    "                    write_graph=True, write_grads=True, write_images=True,\n",
    "                    embeddings_freq=0, embeddings_layer_names=None,\n",
    "                    embeddings_metadata=None)\n",
    "# train\n",
    "rms = RMSprop(lr=0.001)  # lr=0.001)\n",
    "sgd = SGD(lr=0.001)\n",
    "model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy])\n",
    "model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,\n",
    "          batch_size=128,\n",
    "          epochs=epochs,\n",
    "          validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),\n",
    "          callbacks=[tb_cb])\n",
    "\n",
    "model.save('./siamese_speech_model.h5')\n",
    "# compute final accuracy on training and test sets\n",
    "y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])\n",
    "tr_acc = compute_accuracy(tr_y, y_pred)\n",
    "y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])\n",
    "te_acc = compute_accuracy(te_y, y_pred)\n",
    "\n",
    "print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))\n",
    "print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "code",`
updated learning rate 2017-10-23 14:51:44 +00:00			`"execution_count": null,`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"metadata": {},`
updated learning rate 2017-10-23 14:51:44 +00:00			`"outputs": [],`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"source": [`
			`"'''Train a Siamese MLP on pairs of digits from the MNIST dataset.\n",`
			`"\n",`
			`"It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the\n",`
			`"output of the shared network and by optimizing the contrastive loss (see paper\n",`
			`"for mode details).\n",`
			`"\n",`
			`"[1] \"Dimensionality Reduction by Learning an Invariant Mapping\"\n",`
			`" http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf\n",`
			`"\n",`
			`"Gets to 97.2% test accuracy after 20 epochs.\n",`
			`"2 seconds per epoch on a Titan X Maxwell GPU\n",`
			`"'''\n",`
			`"from __future__ import absolute_import\n",`
			`"from __future__ import print_function\n",`
			`"import numpy as np\n",`
			`"\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`"# import random\n",`
			`"# from keras.datasets import mnist\n",`
			`"from speech_data import speech_model_data\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"from keras.models import Model\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`"from keras.layers import Input, Dense, Dropout, SimpleRNN, LSTM, Lambda\n",`
			`"# Dense, Dropout, Input, Lambda, LSTM, SimpleRNN\n",`
			`"from keras.optimizers import RMSprop, SGD\n",`
			`"from keras.callbacks import TensorBoard\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"from keras import backend as K\n",`
			`"\n",`
			`"\n",`
			`"def euclidean_distance(vects):\n",`
			`" x, y = vects\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`" return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True),\n",`
			`" K.epsilon()))\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"\n",`
			`"\n",`
			`"def eucl_dist_output_shape(shapes):\n",`
			`" shape1, shape2 = shapes\n",`
			`" return (shape1[0], 1)\n",`
			`"\n",`
			`"\n",`
			`"def contrastive_loss(y_true, y_pred):\n",`
			`" '''Contrastive loss from Hadsell-et-al.'06\n",`
			`" http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf\n",`
			`" '''\n",`
			`" margin = 1\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`" # print(y_true, y_pred)\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`" return K.mean(y_true * K.square(y_pred) +\n",`
			`" (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))\n",`
			`"\n",`
			`"\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`"def create_base_rnn_network(input_dim):\n",`
			`" '''Base network to be shared (eq. to feature extraction).\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`" '''\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`" inp = Input(shape=input_dim)\n",`
			`" # d1 = Dense(1024, activation='sigmoid')(inp)\n",`
			`" # # d2 = Dense(2, activation='sigmoid')(d1)\n",`
			`" ls1 = LSTM(1024, return_sequences=True)(inp)\n",`
			`" ls2 = LSTM(512, return_sequences=True)(ls1)\n",`
			`" ls3 = LSTM(32)(ls2) # , return_sequences=True\n",`
			`" # sr2 = SimpleRNN(128, return_sequences=True)(sr1)\n",`
			`" # sr3 = SimpleRNN(32)(sr2)\n",`
			`" # x = Dense(128, activation='relu')(sr1)\n",`
			`" return Model(inp, ls3)\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"\n",`
			`"def create_base_network(input_dim):\n",`
			`" '''Base network to be shared (eq. to feature extraction).\n",`
			`" '''\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`" input = Input(shape=input_dim)\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`" x = Dense(128, activation='relu')(input)\n",`
			`" x = Dropout(0.1)(x)\n",`
			`" x = Dense(128, activation='relu')(x)\n",`
			`" x = Dropout(0.1)(x)\n",`
			`" x = Dense(128, activation='relu')(x)\n",`
			`" return Model(input, x)\n",`
			`"\n",`
			`"def compute_accuracy(y_true, y_pred):\n",`
			`" '''Compute classification accuracy with a fixed threshold on distances.\n",`
			`" '''\n",`
			`" pred = y_pred.ravel() < 0.5\n",`
			`" return np.mean(pred == y_true)\n",`
			`"\n",`
			`"\n",`
			`"def accuracy(y_true, y_pred):\n",`
			`" '''Compute classification accuracy with a fixed threshold on distances.\n",`
			`" '''\n",`
			`" return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))\n",`
			`"\n",`
			`"\n",`
			`"# the data, shuffled and split between train and test sets\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`"tr_pairs, te_pairs, tr_y, te_y = speech_model_data()\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`"%matplotlib inline\n",`
			`"import matplotlib.pyplot as plt\n",`
			`"def plot_spec(ims):\n",`
			`" timebins, freqbins = np.shape(ims)\n",`
			`" # import pdb;pdb.set_trace()\n",`
			`"# plt.figure(figsize=(15, 7.5))\n",`
			`" plt.imshow(np.transpose(ims), origin=\"lower\", aspect=\"auto\", cmap=\"jet\", interpolation=\"none\")\n",`
			`" plt.colorbar()\n",`
			`" xlocs = np.float32(np.linspace(0, timebins-1, 5))\n",`
			`" plt.xticks(xlocs, [\"%.02f\" % l for l in ((xlocs15/timebins)+(0.52**10))/22100])\n",`
			`" ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 10)))\n",`
			`"# plt.yticks(ylocs, [\"%.02f\" % freq[i] for i in ylocs])\n",`
			`" \n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"def show_nth(n):\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`" plt.figure(figsize=(15,7.5))\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`" plt.subplot(1,2,1)\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`" plot_spec(te_pairs[n][0].reshape(15,1654))\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`" print(te_y[n])\n",`
			`" plt.subplot(1,2,2)\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`" plot_spec(te_pairs[n][1].reshape(15,1654))\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"show_nth(0)\n",`
			`"\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`"# y_train.shape,y_test.shape\n",`
			`"# x_train.shape,x_test.shape\n",`
			`"# x_train = x_train.reshape(60000, 784)\n",`
			`"# x_test = x_test.reshape(10000, 784)\n",`
			`"# x_train = x_train.astype('float32')\n",`
			`"# x_test = x_test.astype('float32')\n",`
			`"# x_train /= 255\n",`
			`"# x_test /= 255\n",`
			`"\n",`
			`"# input_dim = (tr_pairs.shape[2], tr_pairs.shape[3])\n",`
			`"# epochs = 20\n",`
			`"\n",`
			`"# # network definition\n",`
			`"# base_network = create_base_rnn_network(input_dim)\n",`
			`"# input_a = Input(shape=input_dim)\n",`
			`"# input_b = Input(shape=input_dim)\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"\n",`
			"# # because we re-use the same instance `base_network`,\n",
			`"# # the weights of the network\n",`
			`"# # will be shared across the two branches\n",`
			`"# processed_a = base_network(input_a)\n",`
			`"# processed_b = base_network(input_b)\n",`
			`"\n",`
			`"# distance = Lambda(euclidean_distance,\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`"# output_shape=eucl_dist_output_shape)(\n",`
			`"# [processed_a, processed_b]\n",`
			`"# )\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"\n",`
			`"# model = Model([input_a, input_b], distance)\n",`
			`"\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`"# tb_cb = TensorBoard(log_dir='./siamese_logs', histogram_freq=1, batch_size=32,\n",`
			`"# write_graph=True, write_grads=True, write_images=True,\n",`
			`"# embeddings_freq=0, embeddings_layer_names=None,\n",`
			`"# embeddings_metadata=None)\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"# # train\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`"# rms = RMSprop(lr=0.00001) # lr=0.001)\n",`
			`"# sgd = SGD(lr=0.001)\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"# model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy])\n",`
			`"# model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,\n",`
			`"# batch_size=128,\n",`
			`"# epochs=epochs,\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`"# validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),\n",`
			`"# callbacks=[tb_cb])\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"\n",`
			`"# # compute final accuracy on training and test sets\n",`
			`"# y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])\n",`
			`"# tr_acc = compute_accuracy(tr_y, y_pred)\n",`
			`"# y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])\n",`
			`"# te_acc = compute_accuracy(te_y, y_pred)\n",`
			`"\n",`
			`"# print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))\n",`
			`"# print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))\n"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
updated learning rate 2017-10-23 14:51:44 +00:00			`"execution_count": null,`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"metadata": {},`
updated learning rate 2017-10-23 14:51:44 +00:00			`"outputs": [],`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"source": [`
			`"'''Train a Siamese MLP on pairs of digits from the MNIST dataset.\n",`
			`"\n",`
			`"It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the\n",`
			`"output of the shared network and by optimizing the contrastive loss (see paper\n",`
			`"for mode details).\n",`
			`"\n",`
			`"[1] \"Dimensionality Reduction by Learning an Invariant Mapping\"\n",`
			`" http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf\n",`
			`"\n",`
			`"Gets to 97.2% test accuracy after 20 epochs.\n",`
			`"2 seconds per epoch on a Titan X Maxwell GPU\n",`
			`"'''\n",`
			`"from __future__ import absolute_import\n",`
			`"from __future__ import print_function\n",`
			`"import numpy as np\n",`
			`"\n",`
			`"# import random\n",`
			`"# from keras.datasets import mnist\n",`
			`"from speech_data import speech_model_data\n",`
			`"from keras.models import Model\n",`
			`"from keras.layers import Input, Dense, Dropout, SimpleRNN, LSTM, Lambda\n",`
			`"# Dense, Dropout, Input, Lambda, LSTM, SimpleRNN\n",`
			`"from keras.optimizers import RMSprop, SGD\n",`
			`"from keras.callbacks import TensorBoard\n",`
			`"from keras import backend as K\n",`
			`"\n",`
			`"\n",`
			`"def euclidean_distance(vects):\n",`
			`" x, y = vects\n",`
			`" return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True),\n",`
			`" K.epsilon()))\n",`
			`"\n",`
			`"\n",`
			`"def eucl_dist_output_shape(shapes):\n",`
			`" shape1, shape2 = shapes\n",`
			`" return (shape1[0], 1)\n",`
			`"\n",`
			`"\n",`
			`"def contrastive_loss(y_true, y_pred):\n",`
			`" '''Contrastive loss from Hadsell-et-al.'06\n",`
			`" http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf\n",`
			`" '''\n",`
			`" margin = 1\n",`
			`" # print(y_true, y_pred)\n",`
			`" return K.mean(y_true * K.square(y_pred) +\n",`
			`" (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))\n",`
			`"\n",`
			`"\n",`
			`"def create_base_rnn_network(input_dim):\n",`
			`" '''Base network to be shared (eq. to feature extraction).\n",`
			`" '''\n",`
			`" inp = Input(shape=input_dim)\n",`
			`" # d1 = Dense(1024, activation='sigmoid')(inp)\n",`
			`" # # d2 = Dense(2, activation='sigmoid')(d1)\n",`
			`" ls1 = LSTM(1024, return_sequences=True)(inp)\n",`
			`" ls2 = LSTM(512, return_sequences=True)(ls1)\n",`
			`" ls3 = LSTM(32)(ls2) # , return_sequences=True\n",`
			`" # sr2 = SimpleRNN(128, return_sequences=True)(sr1)\n",`
			`" # sr3 = SimpleRNN(32)(sr2)\n",`
			`" # x = Dense(128, activation='relu')(sr1)\n",`
			`" return Model(inp, ls3)\n",`
			`"\n",`
			`"def create_base_network(input_dim):\n",`
			`" '''Base network to be shared (eq. to feature extraction).\n",`
			`" '''\n",`
			`" input = Input(shape=input_dim)\n",`
			`" x = Dense(128, activation='relu')(input)\n",`
			`" x = Dropout(0.1)(x)\n",`
			`" x = Dense(128, activation='relu')(x)\n",`
			`" x = Dropout(0.1)(x)\n",`
			`" x = Dense(128, activation='relu')(x)\n",`
			`" return Model(input, x)\n",`
			`"\n",`
			`"def compute_accuracy(y_true, y_pred):\n",`
			`" '''Compute classification accuracy with a fixed threshold on distances.\n",`
			`" '''\n",`
			`" pred = y_pred.ravel() < 0.5\n",`
			`" return np.mean(pred == y_true)\n",`
			`"\n",`
			`"\n",`
			`"def accuracy(y_true, y_pred):\n",`
			`" '''Compute classification accuracy with a fixed threshold on distances.\n",`
			`" '''\n",`
			`" return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))\n",`
			`"\n",`
			`"\n",`
			`"# the data, shuffled and split between train and test sets\n",`
			`"tr_pairs, te_pairs, tr_y, te_y = speech_model_data()\n",`
			`"# y_train.shape,y_test.shape\n",`
			`"# x_train.shape,x_test.shape\n",`
			`"# x_train = x_train.reshape(60000, 784)\n",`
			`"# x_test = x_test.reshape(10000, 784)\n",`
			`"# x_train = x_train.astype('float32')\n",`
			`"# x_test = x_test.astype('float32')\n",`
			`"# x_train /= 255\n",`
			`"# x_test /= 255\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`"input_dim = (tr_pairs.shape[2], tr_pairs.shape[3])\n",`
			`"epochs = 20\n",`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`"\n",`
updated learning rate 2017-10-23 14:51:44 +00:00			`"# network definition\n",`
			`"base_network = create_base_rnn_network(input_dim)\n",`
			`"input_a = Input(shape=input_dim)\n",`
			`"input_b = Input(shape=input_dim)\n",`
			`"\n",`
			"# because we re-use the same instance `base_network`,\n",
			`"# the weights of the network\n",`
			`"# will be shared across the two branches\n",`
			`"processed_a = base_network(input_a)\n",`
			`"processed_b = base_network(input_b)\n",`
			`"\n",`
			`"distance = Lambda(euclidean_distance,\n",`
			`" output_shape=eucl_dist_output_shape)(\n",`
			`" [processed_a, processed_b]\n",`
			`")\n",`
			`"\n",`
			`"model = Model([input_a, input_b], distance)\n",`
			`"\n",`
			`"tb_cb = TensorBoard(log_dir='./siamese_logs', histogram_freq=1, batch_size=32,\n",`
			`" write_graph=True, write_grads=True, write_images=True,\n",`
			`" embeddings_freq=0, embeddings_layer_names=None,\n",`
			`" embeddings_metadata=None)\n",`
			`"# train\n",`
			`"rms = RMSprop(lr=0.001) # lr=0.001)\n",`
			`"sgd = SGD(lr=0.001)\n",`
			`"model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy])\n",`
			`"model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,\n",`
			`" batch_size=128,\n",`
			`" epochs=epochs,\n",`
			`" validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),\n",`
			`" callbacks=[tb_cb])\n",`
			`"\n",`
			`"model.save('./siamese_speech_model.h5')\n",`
			`"# compute final accuracy on training and test sets\n",`
			`"y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])\n",`
			`"tr_acc = compute_accuracy(tr_y, y_pred)\n",`
			`"y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])\n",`
			`"te_acc = compute_accuracy(te_y, y_pred)\n",`
			`"\n",`
			`"print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))\n",`
			`"print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))"`
1. fixed dimension issue in data 2. experimenting with different base network 2017-10-23 13:30:27 +00:00			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": []`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
			`"version": "3.5.2"`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 2`
			`}`