implemented conv2d of mnist from scratch added fourth week notes

master
Malar Kannan 2017-10-28 17:46:00 +05:30
parent 64b89c326b
commit e792d29ba1
6 changed files with 354 additions and 2 deletions

59
FourthSaturday/Notes.md Normal file
View File

@ -0,0 +1,59 @@
Convolutional Neural Network
============================
Hubel and Weisel(1962) experiment -> inspiration for CNN
single neuron detects edges oriented at 45degress
filter kernel -> (a patch in image - matrix) (typical to 3x3|5x5|7x7
smaller the better)
returns a feature map
CNN -> multiple layers of kernels(1st layer computes on the input image,
subsequent layers computes on the feature maps generated by the previous
layer)
strides -> amount of pixels to overlap between kernel computation on the same
layer
(max) pooling kernel -> looks at a patch of image and
returns the (maximum) value in that patch
(doesn't have any learnable parameters)
usually the number of feature maps is doubled after a
pooling layer is computed
maps (n,n)eg.[(28x28)x128] -> (mxm)eg.[(14,14)x128] -> (x256)
No of weight required per layer = (k1xk1)xc1xc2 (c1 is channels in input layer)
(k1,k1) is the dimension of filter kernel
(c2 is number of feature maps in first layer)
-> in 1st layer
(k2,k2)xc2xc3 (c3) number of feature maps
conv2d -> padding 'same' adds 0's at the borders to make the output
dimension same as image size
'valid' does the convolution one actual pixels alone -> will return
a smaller dimension relative to the image
technique: use a smaller train/test data and try to overfit the model
(100% on train to verify that the model is expressive enough
to learn the data)
Deconvolutional Layers(misnomer):
upsampling an image using this layer
(tf.layers.conv2d_transpose,tf.nn.conv2d_transpose)
Transfer Learning:
==================
using pretrained networks as starting point for a task (using a subset of layers)
eg. VGG(Visual Geometry Group) networks (224x224 -> 1000 classes)
-> classification(what) & localization(where)
CNN works great for classification(since it is invariant to location)
to predict the location (use the earlier layers(cotains locality info)
for final output)
using it to identify a class not in the 1000 pretrained classes
using it to identify a class with input size 64x64(depends on the first layer filter size)
Regularization:
===============
Dropout based regularization is great for image classification application.
(Warning: not to be used on data without redundancy(image data has lot of redundancy
eg. identifing a partial face is quite easy))

View File

@ -0,0 +1,96 @@
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('../mnist_data', one_hot=True)
x = tf.placeholder(tf.float32,shape=[None,28*28])
y_ = tf.placeholder(tf.float32,shape=[None,10])
# W = tf.Variable(tf.zeros([28*28,10]))
# b = tf.Variable(tf.zeros([10]))
#
# y = tf.matmul(x,W) + b
#
# cross_entropy = tf.reduce_mean(
# tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
#
# train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
#
# with tf.Session() as sess:
# sess.run(tf.global_variables_initializer())
# for _ in range(1000):
# batch = mnist.train.next_batch(100)
# sess.run([train_step],feed_dict={x: batch[0], y_: batch[1]})
# correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
# accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 2, 2, 1], padding='VALID')
# def max_pool_3x3(x):
# return tf.nn.max_pool(x, ksize=[1, 5, 5, 1],
# strides=[1, 2, 2, 1], padding='SAME')
x_image = tf.reshape(x, [-1, 28, 28, 1])
W_conv1 = weight_variable([4, 4, 1, 128])
b_conv1 = bias_variable([128])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
W_conv1
h_conv1
# h_pool1 = max_pool_3x3(h_conv1)
# h_pool1
h_conv1
W_conv2 = weight_variable([5, 5, 128, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2) + b_conv2)
h_conv2
# h_pool2 = max_pool_3x3(h_conv2)
# h_pool2
W_fc1 = weight_variable([5 * 5 * 64, 512])
W_fc1
b_fc1 = bias_variable([512])
h_pool2_flat = tf.reshape(h_conv2, [-1, 5*5*64])
h_pool2_flat
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
h_fc1
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
h_fc1_drop
W_fc2 = weight_variable([512, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
y_conv
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(20000):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print('step %d, training accuracy %g' % (i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print('test accuracy %g' % accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

View File

@ -0,0 +1,176 @@
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A deep MNIST classifier using convolutional layers.
See extensive documentation at
https://www.tensorflow.org/get_started/mnist/pros
"""
# Disable linter warnings to maintain consistency with tutorial.
# pylint: disable=invalid-name
# pylint: disable=g-bad-import-order
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
import tempfile
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
FLAGS = None
def deepnn(x):
"""deepnn builds the graph for a deep net for classifying digits.
Args:
x: an input tensor with the dimensions (N_examples, 784), where 784 is the
number of pixels in a standard MNIST image.
Returns:
A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with values
equal to the logits of classifying the digit into one of 10 classes (the
digits 0-9). keep_prob is a scalar placeholder for the probability of
dropout.
"""
# Reshape to use within a convolutional neural net.
# Last dimension is for "features" - there is only one here, since images are
# grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
with tf.name_scope('reshape'):
x_image = tf.reshape(x, [-1, 28, 28, 1])
# First convolutional layer - maps one grayscale image to 32 feature maps.
with tf.name_scope('conv1'):
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
# Pooling layer - downsamples by 2X.
with tf.name_scope('pool1'):
h_pool1 = max_pool_2x2(h_conv1)
# Second convolutional layer -- maps 32 feature maps to 64.
with tf.name_scope('conv2'):
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
# Second pooling layer.
with tf.name_scope('pool2'):
h_pool2 = max_pool_2x2(h_conv2)
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
# is down to 7x7x64 feature maps -- maps this to 1024 features.
with tf.name_scope('fc1'):
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# Dropout - controls the complexity of the model, prevents co-adaptation of
# features.
with tf.name_scope('dropout'):
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# Map the 1024 features to 10 classes, one for each digit
with tf.name_scope('fc2'):
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
return y_conv, keep_prob
def conv2d(x, W):
"""conv2d returns a 2d convolution layer with full stride."""
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
"""max_pool_2x2 downsamples a feature map by 2X."""
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
def weight_variable(shape):
"""weight_variable generates a weight variable of a given shape."""
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
"""bias_variable generates a bias variable of a given shape."""
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def main(_):
# Import data
mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
# Create the model
x = tf.placeholder(tf.float32, [None, 784])
# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, 10])
# Build the graph for the deep net
y_conv, keep_prob = deepnn(x)
with tf.name_scope('loss'):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
logits=y_conv)
cross_entropy = tf.reduce_mean(cross_entropy)
with tf.name_scope('adam_optimizer'):
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
correct_prediction = tf.cast(correct_prediction, tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
graph_location = tempfile.mkdtemp()
print('Saving graph to: %s' % graph_location)
train_writer = tf.summary.FileWriter(graph_location)
train_writer.add_graph(tf.get_default_graph())
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(20000):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print('step %d, training accuracy %g' % (i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print('test accuracy %g' % accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str,
default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

18
Notes.md Normal file
View File

@ -0,0 +1,18 @@
Deep Learning:
==============
Creating a model such that we don't have to hand engineer features, instead
architecting the model such that it is capable of inferring the features
on its own with large number of datasets and layers.
Input of softmax layer is called logits( classifier )
Optimization Momentum:
======================
using averaged gradients computed in previous iterations to identify how much
weight is given to the gradient descent.
Weight initialization:
======================
create a smaller network -> compute weights
use the weights and add new layer and -> compute weights
iterate and grow the network by using precomputed weights for deeper networks.

View File

@ -65,14 +65,17 @@ def create_model(input_dim,output_dim):
error_lower_bound = tf.constant(0.9,name='lower_bound')
x = tf.placeholder(tf.float32, [None,input_dim])
y = tf.placeholder(tf.float32, [None,output_dim])
W1 = tf.Variable(tf.random_normal([input_dim, 512],stddev=2.0/math.sqrt(input_dim)),name='layer_1_weights')
b1 = tf.Variable(tf.random_normal([512]),name='bias_1_weights')
layer_1 = tf.nn.relu(tf.add(tf.matmul(x,W1),b1))
W2 = tf.Variable(tf.random_normal([512, 128],stddev=2.0/math.sqrt(512)),name='layer_2_weights')
b2 = tf.Variable(tf.random_normal([128]),name='bias_2_weights')
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1,W2),b2))
W_o = tf.Variable(tf.random_normal([128, output_dim],stddev=2.0/math.sqrt(128)),name='layer_output_weights')
b_o = tf.Variable(tf.random_normal([output_dim]),name='bias_output_weights')
layer_1 = tf.nn.relu(tf.add(tf.matmul(x,W1),b1))
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1,W2),b2))
#y_ = tf.nn.softmax(tf.add(tf.matmul(layer_2,W_o),b_o))+1e-6
y_ = tf.add(tf.matmul(layer_2,W_o),b_o)#tf.nn.relu()