implemented conv2d of mnist from scratch added fourth week notes

2017-10-28 17:46:00 +05:30
parent 64b89c326b
commit e792d29ba1
6 changed files with 354 additions and 2 deletions
--- a/FourthSaturday/Notes.md
+++ b/FourthSaturday/Notes.md
@@ -0,0 +1,59 @@
 Convolutional Neural Network
 ============================
 Hubel and Weisel(1962) experiment -> inspiration for CNN
 single neuron detects edges oriented at 45degress
 filter kernel -> (a patch in image - matrix) (typical to 3x3|5x5|7x7
                 smaller the better)
                 returns a feature map
 CNN -> multiple layers of kernels(1st layer computes on the input image,
       subsequent layers computes on the feature maps generated by the previous
       layer)
 strides -> amount of pixels to overlap between kernel computation on the same
           layer
 (max) pooling kernel -> looks at a patch of image and
                        returns the (maximum) value in that patch
                        (doesn't have any learnable parameters)
                        usually the number of feature maps is doubled after a
                        pooling layer is computed
                        maps (n,n)eg.[(28x28)x128] -> (mxm)eg.[(14,14)x128] -> (x256)
 No of weight required per layer = (k1xk1)xc1xc2 (c1 is channels in input layer)
                                  (k1,k1) is the dimension of filter kernel
                                  (c2 is number of feature maps in first layer)
                                  -> in 1st layer
                                  (k2,k2)xc2xc3 (c3) number of feature maps
 conv2d -> padding 'same' adds 0's at the borders to make the output
          dimension same as image size
          'valid' does the convolution one actual pixels alone -> will return
          a smaller dimension relative to the image
 technique: use a smaller train/test data and try to overfit the model
           (100% on train to verify that the model is expressive enough
           to learn the data)
 Deconvolutional Layers(misnomer):
 upsampling an image using this layer
 (tf.layers.conv2d_transpose,tf.nn.conv2d_transpose)
 Transfer Learning:
 ==================
 using pretrained networks as starting point for a task (using a subset of layers)
 eg. VGG(Visual Geometry Group) networks (224x224 -> 1000 classes)
    -> classification(what) & localization(where)
 CNN works great for classification(since it is invariant to location)
 to predict the location (use the earlier layers(cotains locality info)
 for final output)
 using it to identify a class not in the 1000 pretrained classes
 using it to identify a class with input size 64x64(depends on the first layer filter size)
 Regularization:
 ===============
 Dropout based regularization is great for image classification application.
 (Warning: not to be used on data without redundancy(image data has lot of redundancy
  eg. identifing a partial face is quite easy))
--- a/FourthSaturday/mnist_conv.py
+++ b/FourthSaturday/mnist_conv.py
@@ -0,0 +1,96 @@
 import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data
 mnist = input_data.read_data_sets('../mnist_data', one_hot=True)
 x = tf.placeholder(tf.float32,shape=[None,28*28])
 y_ = tf.placeholder(tf.float32,shape=[None,10])
 # W = tf.Variable(tf.zeros([28*28,10]))
 # b = tf.Variable(tf.zeros([10]))
 #
 # y = tf.matmul(x,W) + b
 #
 # cross_entropy = tf.reduce_mean(
 #     tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
 #
 # train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
 #
 # with tf.Session() as sess:
 #     sess.run(tf.global_variables_initializer())
 #     for _ in range(1000):
 #       batch = mnist.train.next_batch(100)
 #       sess.run([train_step],feed_dict={x: batch[0], y_: batch[1]})
 #     correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
 #     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 #     print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
 def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)
 def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)
 def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 2, 2, 1], padding='VALID')
 # def max_pool_3x3(x):
 #   return tf.nn.max_pool(x, ksize=[1, 5, 5, 1],
 #                         strides=[1, 2, 2, 1], padding='SAME')
 x_image = tf.reshape(x, [-1, 28, 28, 1])
 W_conv1 = weight_variable([4, 4, 1, 128])
 b_conv1 = bias_variable([128])
 h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
 W_conv1
 h_conv1
 # h_pool1 = max_pool_3x3(h_conv1)
 # h_pool1
 h_conv1
 W_conv2 = weight_variable([5, 5, 128, 64])
 b_conv2 = bias_variable([64])
 h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2) + b_conv2)
 h_conv2
 # h_pool2 = max_pool_3x3(h_conv2)
 # h_pool2
 W_fc1 = weight_variable([5 * 5 * 64, 512])
 W_fc1
 b_fc1 = bias_variable([512])
 h_pool2_flat = tf.reshape(h_conv2, [-1, 5*5*64])
 h_pool2_flat
 h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
 h_fc1
 keep_prob = tf.placeholder(tf.float32)
 h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
 h_fc1_drop
 W_fc2 = weight_variable([512, 10])
 b_fc2 = bias_variable([10])
 y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
 y_conv
 cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
 train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
 correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  for i in range(20000):
    batch = mnist.train.next_batch(50)
    if i % 100 == 0:
      train_accuracy = accuracy.eval(feed_dict={
          x: batch[0], y_: batch[1], keep_prob: 1.0})
      print('step %d, training accuracy %g' % (i, train_accuracy))
    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
  print('test accuracy %g' % accuracy.eval(feed_dict={
      x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
--- a/FourthSaturday/mnist_deep.py
+++ b/FourthSaturday/mnist_deep.py
@@ -0,0 +1,176 @@
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """A deep MNIST classifier using convolutional layers.
 See extensive documentation at
 https://www.tensorflow.org/get_started/mnist/pros
 """
 # Disable linter warnings to maintain consistency with tutorial.
 # pylint: disable=invalid-name
 # pylint: disable=g-bad-import-order
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import argparse
 import sys
 import tempfile
 from tensorflow.examples.tutorials.mnist import input_data
 import tensorflow as tf
 FLAGS = None
 def deepnn(x):
  """deepnn builds the graph for a deep net for classifying digits.
  Args:
    x: an input tensor with the dimensions (N_examples, 784), where 784 is the
    number of pixels in a standard MNIST image.
  Returns:
    A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with values
    equal to the logits of classifying the digit into one of 10 classes (the
    digits 0-9). keep_prob is a scalar placeholder for the probability of
    dropout.
  """
  # Reshape to use within a convolutional neural net.
  # Last dimension is for "features" - there is only one here, since images are
  # grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
  with tf.name_scope('reshape'):
    x_image = tf.reshape(x, [-1, 28, 28, 1])
  # First convolutional layer - maps one grayscale image to 32 feature maps.
  with tf.name_scope('conv1'):
    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
  # Pooling layer - downsamples by 2X.
  with tf.name_scope('pool1'):
    h_pool1 = max_pool_2x2(h_conv1)
  # Second convolutional layer -- maps 32 feature maps to 64.
  with tf.name_scope('conv2'):
    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
  # Second pooling layer.
  with tf.name_scope('pool2'):
    h_pool2 = max_pool_2x2(h_conv2)
  # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
  # is down to 7x7x64 feature maps -- maps this to 1024 features.
  with tf.name_scope('fc1'):
    W_fc1 = weight_variable([7 * 7 * 64, 1024])
    b_fc1 = bias_variable([1024])
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
  # Dropout - controls the complexity of the model, prevents co-adaptation of
  # features.
  with tf.name_scope('dropout'):
    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
  # Map the 1024 features to 10 classes, one for each digit
  with tf.name_scope('fc2'):
    W_fc2 = weight_variable([1024, 10])
    b_fc2 = bias_variable([10])
    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
  return y_conv, keep_prob
 def conv2d(x, W):
  """conv2d returns a 2d convolution layer with full stride."""
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
 def max_pool_2x2(x):
  """max_pool_2x2 downsamples a feature map by 2X."""
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')
 def weight_variable(shape):
  """weight_variable generates a weight variable of a given shape."""
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)
 def bias_variable(shape):
  """bias_variable generates a bias variable of a given shape."""
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)
 def main(_):
  # Import data
  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
  # Create the model
  x = tf.placeholder(tf.float32, [None, 784])
  # Define loss and optimizer
  y_ = tf.placeholder(tf.float32, [None, 10])
  # Build the graph for the deep net
  y_conv, keep_prob = deepnn(x)
  with tf.name_scope('loss'):
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                            logits=y_conv)
  cross_entropy = tf.reduce_mean(cross_entropy)
  with tf.name_scope('adam_optimizer'):
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
  with tf.name_scope('accuracy'):
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    correct_prediction = tf.cast(correct_prediction, tf.float32)
  accuracy = tf.reduce_mean(correct_prediction)
  graph_location = tempfile.mkdtemp()
  print('Saving graph to: %s' % graph_location)
  train_writer = tf.summary.FileWriter(graph_location)
  train_writer.add_graph(tf.get_default_graph())
  with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(20000):
      batch = mnist.train.next_batch(50)
      if i % 100 == 0:
        train_accuracy = accuracy.eval(feed_dict={
            x: batch[0], y_: batch[1], keep_prob: 1.0})
        print('step %d, training accuracy %g' % (i, train_accuracy))
      train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
    print('test accuracy %g' % accuracy.eval(feed_dict={
        x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
 if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument('--data_dir', type=str,
                      default='/tmp/tensorflow/mnist/input_data',
                      help='Directory for storing input data')
  FLAGS, unparsed = parser.parse_known_args()
  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
--- a/FourthSaturday/sentiment.py
+++ b/FourthSaturday/sentiment.py
--- a/Notes.md
+++ b/Notes.md
@@ -0,0 +1,18 @@
 Deep Learning:
 ==============
 Creating a model such that we don't have to hand engineer features, instead
 architecting the model such that it is capable of inferring the features
 on its own with large number of datasets and layers.
 Input of softmax layer is called logits( classifier )
 Optimization Momentum:
 ======================
 using averaged gradients computed in previous iterations to identify how much
 weight is given to the gradient descent.
 Weight initialization:
 ======================
 create a smaller network -> compute weights
 use the weights and add new layer and -> compute weights
 iterate and grow the network by using precomputed weights for deeper networks.
--- a/ThirdSunday/Faces.py
+++ b/ThirdSunday/Faces.py
@@ -65,14 +65,17 @@ def create_model(input_dim,output_dim):
        error_lower_bound = tf.constant(0.9,name='lower_bound')
        x = tf.placeholder(tf.float32, [None,input_dim])
        y = tf.placeholder(tf.float32, [None,output_dim])
        W1 = tf.Variable(tf.random_normal([input_dim, 512],stddev=2.0/math.sqrt(input_dim)),name='layer_1_weights')
        b1 = tf.Variable(tf.random_normal([512]),name='bias_1_weights')
        layer_1 = tf.nn.relu(tf.add(tf.matmul(x,W1),b1))
        W2 = tf.Variable(tf.random_normal([512, 128],stddev=2.0/math.sqrt(512)),name='layer_2_weights')
        b2 = tf.Variable(tf.random_normal([128]),name='bias_2_weights')
        layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1,W2),b2))
        W_o = tf.Variable(tf.random_normal([128, output_dim],stddev=2.0/math.sqrt(128)),name='layer_output_weights')
        b_o = tf.Variable(tf.random_normal([output_dim]),name='bias_output_weights')
        layer_1 = tf.nn.relu(tf.add(tf.matmul(x,W1),b1))
        layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1,W2),b2))
        #y_ = tf.nn.softmax(tf.add(tf.matmul(layer_2,W_o),b_o))+1e-6
        y_ = tf.add(tf.matmul(layer_2,W_o),b_o)#tf.nn.relu()