Compare commits

...

5 Commits

Author SHA1 Message Date
Malar Kannan
e792d29ba1 implemented conv2d of mnist from scratch added fourth week notes 2017-10-28 17:46:00 +05:30
Malar Kannan
64b89c326b update faceeyes code 2017-10-28 09:38:33 +05:30
Malar Kannan
2c91fa6eb5 updated to third week 2017-10-23 11:05:35 +05:30
Malar Kannan
6c62795d02 updated notes and implemented data generation for 10 digit recognition 2017-10-21 09:04:25 +05:30
Malar Kannan
16c98b53d5 reorganized 2017-10-14 16:02:31 +05:30
22 changed files with 1184 additions and 0 deletions

59
FourthSaturday/Notes.md Normal file
View File

@@ -0,0 +1,59 @@
Convolutional Neural Network
============================
Hubel and Weisel(1962) experiment -> inspiration for CNN
single neuron detects edges oriented at 45degress
filter kernel -> (a patch in image - matrix) (typical to 3x3|5x5|7x7
smaller the better)
returns a feature map
CNN -> multiple layers of kernels(1st layer computes on the input image,
subsequent layers computes on the feature maps generated by the previous
layer)
strides -> amount of pixels to overlap between kernel computation on the same
layer
(max) pooling kernel -> looks at a patch of image and
returns the (maximum) value in that patch
(doesn't have any learnable parameters)
usually the number of feature maps is doubled after a
pooling layer is computed
maps (n,n)eg.[(28x28)x128] -> (mxm)eg.[(14,14)x128] -> (x256)
No of weight required per layer = (k1xk1)xc1xc2 (c1 is channels in input layer)
(k1,k1) is the dimension of filter kernel
(c2 is number of feature maps in first layer)
-> in 1st layer
(k2,k2)xc2xc3 (c3) number of feature maps
conv2d -> padding 'same' adds 0's at the borders to make the output
dimension same as image size
'valid' does the convolution one actual pixels alone -> will return
a smaller dimension relative to the image
technique: use a smaller train/test data and try to overfit the model
(100% on train to verify that the model is expressive enough
to learn the data)
Deconvolutional Layers(misnomer):
upsampling an image using this layer
(tf.layers.conv2d_transpose,tf.nn.conv2d_transpose)
Transfer Learning:
==================
using pretrained networks as starting point for a task (using a subset of layers)
eg. VGG(Visual Geometry Group) networks (224x224 -> 1000 classes)
-> classification(what) & localization(where)
CNN works great for classification(since it is invariant to location)
to predict the location (use the earlier layers(cotains locality info)
for final output)
using it to identify a class not in the 1000 pretrained classes
using it to identify a class with input size 64x64(depends on the first layer filter size)
Regularization:
===============
Dropout based regularization is great for image classification application.
(Warning: not to be used on data without redundancy(image data has lot of redundancy
eg. identifing a partial face is quite easy))

View File

@@ -0,0 +1,96 @@
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('../mnist_data', one_hot=True)
x = tf.placeholder(tf.float32,shape=[None,28*28])
y_ = tf.placeholder(tf.float32,shape=[None,10])
# W = tf.Variable(tf.zeros([28*28,10]))
# b = tf.Variable(tf.zeros([10]))
#
# y = tf.matmul(x,W) + b
#
# cross_entropy = tf.reduce_mean(
# tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
#
# train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
#
# with tf.Session() as sess:
# sess.run(tf.global_variables_initializer())
# for _ in range(1000):
# batch = mnist.train.next_batch(100)
# sess.run([train_step],feed_dict={x: batch[0], y_: batch[1]})
# correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
# accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 2, 2, 1], padding='VALID')
# def max_pool_3x3(x):
# return tf.nn.max_pool(x, ksize=[1, 5, 5, 1],
# strides=[1, 2, 2, 1], padding='SAME')
x_image = tf.reshape(x, [-1, 28, 28, 1])
W_conv1 = weight_variable([4, 4, 1, 128])
b_conv1 = bias_variable([128])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
W_conv1
h_conv1
# h_pool1 = max_pool_3x3(h_conv1)
# h_pool1
h_conv1
W_conv2 = weight_variable([5, 5, 128, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2) + b_conv2)
h_conv2
# h_pool2 = max_pool_3x3(h_conv2)
# h_pool2
W_fc1 = weight_variable([5 * 5 * 64, 512])
W_fc1
b_fc1 = bias_variable([512])
h_pool2_flat = tf.reshape(h_conv2, [-1, 5*5*64])
h_pool2_flat
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
h_fc1
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
h_fc1_drop
W_fc2 = weight_variable([512, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
y_conv
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(20000):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print('step %d, training accuracy %g' % (i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print('test accuracy %g' % accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

View File

@@ -0,0 +1,176 @@
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A deep MNIST classifier using convolutional layers.
See extensive documentation at
https://www.tensorflow.org/get_started/mnist/pros
"""
# Disable linter warnings to maintain consistency with tutorial.
# pylint: disable=invalid-name
# pylint: disable=g-bad-import-order
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
import tempfile
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
FLAGS = None
def deepnn(x):
"""deepnn builds the graph for a deep net for classifying digits.
Args:
x: an input tensor with the dimensions (N_examples, 784), where 784 is the
number of pixels in a standard MNIST image.
Returns:
A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with values
equal to the logits of classifying the digit into one of 10 classes (the
digits 0-9). keep_prob is a scalar placeholder for the probability of
dropout.
"""
# Reshape to use within a convolutional neural net.
# Last dimension is for "features" - there is only one here, since images are
# grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
with tf.name_scope('reshape'):
x_image = tf.reshape(x, [-1, 28, 28, 1])
# First convolutional layer - maps one grayscale image to 32 feature maps.
with tf.name_scope('conv1'):
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
# Pooling layer - downsamples by 2X.
with tf.name_scope('pool1'):
h_pool1 = max_pool_2x2(h_conv1)
# Second convolutional layer -- maps 32 feature maps to 64.
with tf.name_scope('conv2'):
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
# Second pooling layer.
with tf.name_scope('pool2'):
h_pool2 = max_pool_2x2(h_conv2)
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
# is down to 7x7x64 feature maps -- maps this to 1024 features.
with tf.name_scope('fc1'):
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# Dropout - controls the complexity of the model, prevents co-adaptation of
# features.
with tf.name_scope('dropout'):
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# Map the 1024 features to 10 classes, one for each digit
with tf.name_scope('fc2'):
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
return y_conv, keep_prob
def conv2d(x, W):
"""conv2d returns a 2d convolution layer with full stride."""
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
"""max_pool_2x2 downsamples a feature map by 2X."""
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
def weight_variable(shape):
"""weight_variable generates a weight variable of a given shape."""
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
"""bias_variable generates a bias variable of a given shape."""
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def main(_):
# Import data
mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
# Create the model
x = tf.placeholder(tf.float32, [None, 784])
# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, 10])
# Build the graph for the deep net
y_conv, keep_prob = deepnn(x)
with tf.name_scope('loss'):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
logits=y_conv)
cross_entropy = tf.reduce_mean(cross_entropy)
with tf.name_scope('adam_optimizer'):
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
correct_prediction = tf.cast(correct_prediction, tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
graph_location = tempfile.mkdtemp()
print('Saving graph to: %s' % graph_location)
train_writer = tf.summary.FileWriter(graph_location)
train_writer.add_graph(tf.get_default_graph())
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(20000):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print('step %d, training accuracy %g' % (i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print('test accuracy %g' % accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str,
default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

View File

@@ -0,0 +1 @@

18
Notes.md Normal file
View File

@@ -0,0 +1,18 @@
Deep Learning:
==============
Creating a model such that we don't have to hand engineer features, instead
architecting the model such that it is capable of inferring the features
on its own with large number of datasets and layers.
Input of softmax layer is called logits( classifier )
Optimization Momentum:
======================
using averaged gradients computed in previous iterations to identify how much
weight is given to the gradient descent.
Weight initialization:
======================
create a smaller network -> compute weights
use the weights and add new layer and -> compute weights
iterate and grow the network by using precomputed weights for deeper networks.

View File

@@ -0,0 +1,34 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.14"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

74
SecondSaturday/Notes.md Normal file
View File

@@ -0,0 +1,74 @@
1. flowchart application -> ios mobile find symbol through gestures.
2. matrimony portal profiles -> finding matching profiles
3. swipe input keyboard for indian languages.
4. mnist hand-written digit database -> build application for recognizing full phone numbers(10 digit).
5. live cricket feed -> generating highlights of the match.
6. designing a chatbot for getting cricket scores.
# general approach to machine-learning
1. model, objective and learning algo
3. ml is technique for learning from examples
-> recommending smart-phone to a friend. price, branch, cam quality, screen size, processing speed. ==> model
-> objective ( why is he buying the phone) should be able to be boiled-down to a single number.
-> learning algo
binary features -> camera 0|1
-> screen small|large
-> battery good|bad
-> memory high|low
-> processing fast|slow
-> audio good|bad
prior probability -> probability of an event occuring without any knowledge of conditions
P(A)*P(B|A) = P(B)*P(A/B) = P(A,B)
P(+ve|[x1,x2,x3...xn]) = P([x1,x2...xn]|+ve)*P(+ve)/P([x1,x2...xn])
= P(x1|+ve)*P(x2|+ve)*P(x3|+ve)*P(+ve)/P([x1,x2...xn])
= Pi (i=1 to n) P(xi|+ve) * P(+ve)(Class variable)/ Sum (C=+ve to -ve) (P([x1,x2....xn],C))
= Pi (i=1 to n) P(xi|+ve) * P(+ve)(Class variable) / (( Pi (i=1 to n ) P(xi|+ve)*P(+ve))+..+ Pi (i=1 to n) P(xi|-ve)*P(-ve))
P(X,Y) = Sum (y=y1...yn) P(X,Y=y)
W2 = P(+ve|xi=1) (human approach) (Naive bayes) (naively thinking that all features are independent)
Regression : output contains real values -> (predicting the position of joints of a body given an image of a person)
Classification : output classifies to discrete set of classes
-> predicting the posture of a person (sitting,walking,standing,running) given an image of a person
(Numerical/Categorical)
Representation Learning: embedding
Deeplearning is all about hierarchical representation learning.
Metric Learning: distance( of facial features)/similarity(of fashion apparels)/relevance( of search document)
Structured Output(Models): auto-corrects dependent outputs based on output on the upper hierarchy outputs.
Types of input:
Bag of features,bag of words: ( finding whether a feature is present/not without caring where the feature occurs in the input)
eg: Using unsupervized learning to convert the input to a given set of classes(clusters) and use them as bag of features.
Spatial data(sequential data): if there local dependencies use CNN(convolutional nn) if there are near past dependencies in the data use RNN(Recurrent NN-LSTM)
eg: stock market temporal data / speech data/ image data
Non-Parametric models : k-NN(K-nearest neighbor), Decision Trees, Random Forests (independent of parameters)
-> very inaccurate because doesn't know much about the data
Parametric Models: based on fixed set of parameters,SVM
-> more accurate coz the knows more about the parameters from the data
Types of Learning:
supervized learning -> labeled data
unsupervized learning -> unlabeled data
exercise: *take 3s from mnist data *create a gmm model with them and *cluster them with 5/3/10 gaussians.
*take all images and cluster them to 10 gaussians.
semi-supervized learning -> combination of supervized and unsupervized models
Auto Encoder:
finding a low dimensional representation of a high dimensional data.
eg. image of 200x200 pixels create a fingerprint of image of 128 dimensions.
exercise: use the 128 dimensional data to reconstruct the 200x200 image(using inverse of the model).
Reinforcement learning:
eg:playing chess -> using the final result of the game to assign weights/score for moves that were made upto the final result. and training the model to predict based on those scores.

View File

@@ -0,0 +1,27 @@
# swipe input keyboard for indian languages.
given a gesture made on the keybard,language chosen-> keyboard layout
predict the word that matches closest.
## Input
gesture data -> polygon(shape,size,corners, path), (time, pauses)?, spatial data with word character correlation.
weighted-vocabulary,corpus for the language,history of gesture-word mappings/corrections for the user.
language, keyboard layout
## Output
Predict the word
## Model
Structured Output/HMM/CNN?
# mnist hand-written digit database -> build application for recognizing full phone numbers(10 digit).
## Input
mnist digit database, generated 10 digit images with random positioning,orientation,scale of individual digit images sampled randomly from the mnist database.
## Output
predict the phone number
## Model
regression model to identify the points where the split for the images has to be made and pass the
split images to mnist digit recognizer to identify the digit.

22
SecondSunday/Notes.md Normal file
View File

@@ -0,0 +1,22 @@
# Activation Function (AKA Transfer function)
In a neural network activation function adds non-linearity to it.
Types:
1. Sigmoid(Logistic) (used mostly for output layer(looks like probability))
2. RelU or Rectified Linear Unit (important discovery for NN - most-used for hidden layers)(not suitable for output layer if output is supposed to be probability) and leaky RelU with some slope on negative part
3. tanH (Hyperbolic) (-1 - 1) or ArcTan (Tan Inverse -> maps to -Pi/2 - Pi/2)
4. Linear(or Identity) layer (used for output layers(best for regression))
5. Softmax (classification giving probability) (probability coz outputs add upto 1)
6. SquareRoot
7. Exponential
8. Sine.
9. Ramp
10. Step (Binary)
11. Unit Sum
if the network computation is something that is multiplicative, use log as activation so that the sum becomes addition.
Constraint Optimization: optimize in such a way that the output is constrained to some value.
Steps => number of iteration of batches
Epoch => number of iterations of going throught the entire dataset

View File

@@ -0,0 +1,57 @@
%matplotlib inline
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
mnist = input_data.read_data_sets('./SecondSunday/mnist_data', one_hot=True)
label_number = mnist.train.labels.argmax(axis=1)
number_imgs = {str(i):mnist.train.images[np.argwhere(label_number == i).squeeze()] for i in range(10)}
DATA_COUNT = 10240
# phone_number_digits = np.random.randint(10**9,10**10,size=(DATA_COUNT,10))
phone_number_digits = np.random.randint(10,size=(DATA_COUNT,10))
phone_number_digits.astype(str)
phone_number_strings = pd.DataFrame(phone_number_digits.astype(str).T).apply(lambda x: ''.join(x)).values
def pick_img(num):
rand_idx = np.random.randint(number_imgs[num].shape[0])
img = number_imgs[num][rand_idx].reshape(28,28)
return img
def create_phone_img(phon_no):
return np.hstack(tuple([pick_img(d) for d in phon_no]))
def create_phone_images(phone_array):
phone_number_images = []
for phon_no in phone_array:
phone_number_images.append(create_phone_img(phon_no))
return np.array(phone_number_images).reshape(-1,28*280)
phone_number_imgs = create_phone_images(phone_number_strings)
train_imgs,test_imgs,train_digits,test_digits = train_test_split(phone_number_imgs,phone_number_digits)
from keras.models import Sequential
from keras.layers import Dense, Activation
# model = Sequential([
# Dense(32, input_shape=(7840,)),
# Activation('relu'),
# # Dense(24, input_shape=(32,)),
# # Activation('relu'),
# Dense(10),
# Activation('linear'),
# ])
# model.compile(optimizer='sgd',
# loss='mean_squared_error',
# metrics=['accuracy'])
#
# model.fit(train_imgs, train_digits,
# batch_size=128,
# epochs=100,
# validation_data=(test_imgs, test_digits))
# img_idx = np.random.randint(phone_number_imgs.shape[0])
# print(phone_number_strings[img_idx])
# plt.imshow(phone_number_imgs[img_idx].reshape(28,280))

71
ThirdSaturday/Mnist_tf.py Normal file
View File

@@ -0,0 +1,71 @@
# coding: utf-8
# In[40]:
from tensorflow.examples.tutorials.mnist import input_data
# In[41]:
mnist = input_data.read_data_sets('./mnist_data', one_hot=True)
# In[42]:
xtrain,xtest = mnist.train,mnist.test
import tensorflow as tf
# mnist.train.
# In[43]:
learning_rate = tf.constant(0.01,name='learning_rate')
xtrain.images.shape[1]
# In[44]:
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])
# In[45]:
W1 = tf.Variable(tf.zeros([784, 512]),name='layer_1_weights')
b1 = tf.Variable(tf.zeros([512]),name='bias_1_weights')
W2 = tf.Variable(tf.zeros([512, 128]),name='layer_2_weights')
b2 = tf.Variable(tf.zeros([128]),name='bias_2_weights')
W_o = tf.Variable(tf.zeros([128, 10]),name='layer_output_weights')
b_o = tf.Variable(tf.zeros([10]),name='bias_output_weights')
# In[46]:
layer_1 = tf.nn.relu(tf.add(tf.matmul(x,W1),b1))
layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1,W2),b2))
output_layer = tf.nn.softmax(tf.add(tf.matmul(layer_2,W_o),b_o))
# In[47]:
cross_entropy = tf.reduce_mean(-tf.reduce_sum(output_layer * tf.log(y), reduction_indices=[1]))
# In[48]:
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
# In[39]:
with tf.Session() as s:
tf.global_variables_initializer()
[_,val] = s.run([train_step,cross_entropy],feed_dict={x:xtrain.images,y:xtrain.labels})
# In[ ]:

View File

@@ -0,0 +1,85 @@
# coding: utf-8
# In[1]:
from tensorflow.examples.tutorials.mnist import input_data
# In[2]:
mnist = input_data.read_data_sets('./mnist_data', one_hot=True)
# In[3]:
xtrain,xtest = mnist.train,mnist.test
import tensorflow as tf
import math
# mnist.train.
# In[ ]:
# In[28]:
learning_rate = tf.constant(0.01,name='learning_rate')
beta = tf.constant(0.01,name='regularization_beta')
# In[5]:
x = tf.placeholder(tf.float32, [None, xtrain.images.shape[1]])
y = tf.placeholder(tf.float32, [None, 10])
# In[6]:
W1 = tf.Variable(tf.random_normal([784, 512],stddev=2.0/28.0),name='layer_1_weights')
b1 = tf.Variable(tf.random_normal([512]),name='bias_1_weights')
W2 = tf.Variable(tf.random_normal([512, 128],stddev=2.0/math.sqrt(512)),name='layer_2_weights')
b2 = tf.Variable(tf.random_normal([128]),name='bias_2_weights')
W_o = tf.Variable(tf.random_normal([128, 10],stddev=2.0/math.sqrt(128)),name='layer_output_weights')
b_o = tf.Variable(tf.random_normal([10]),name='bias_output_weights')
# In[20]:
layer_1 = tf.nn.relu(tf.add(tf.matmul(x,W1),b1))
layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1,W2),b2))
#y_ = tf.nn.softmax(tf.add(tf.matmul(layer_2,W_o),b_o))+1e-6
y_ = tf.add(tf.matmul(layer_2,W_o),b_o)
# In[38]:
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_)))
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
total_loss = cross_entropy+beta*(tf.nn.l2_loss(W1)+tf.nn.l2_loss(W2)+tf.nn.l2_loss(W_o)+tf.nn.l2_loss(b1)+tf.nn.l2_loss(b2)+tf.nn.l2_loss(b_o))
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(tf.nn.softmax(y_),1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# In[39]:
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(total_loss)
# In[40]:
with tf.Session() as s:
tf.global_variables_initializer().run()
for i in range(20000):
batch_xs, batch_ys = xtrain.next_batch(100)
[_] = s.run([train_step],feed_dict={x:batch_xs,y:batch_ys})
if i%1000 == 0:
print(s.run(accuracy, feed_dict={x: xtest.images, y: xtest.labels}))
# In[ ]:

27
ThirdSaturday/Notes.md Normal file
View File

@@ -0,0 +1,27 @@
Parameters:
variables that are learnt by the model through training.
HyperParameters:
variables that are empirical and have to be assigned manually.
Protocol:
Train,Test,Validation/Dev Set
update HyperParameters and try training with the devset accuracy.
pick the best params.
Depending on the datasize and the nature of the problem(no of classes to be classified to) decide the test datasize
Error rate : (Bayes Error rate) lower possible error rate for any classifier of a random outcome.
(accuracy of the model shouldn't be more than this,
Regularization:
if it is it means the model is overfitting to the training datas)
if the model is overfitting, use regularization to control it.
It is a technique to limit the expressiveness of the model.
eg.
1. L2 regularizer -> Loss' = Loss + lambda*Sum(wi^2) // lambda is the regularization param
makes |wi| =~= 0.
controls the degree of non-linearity of the model, without having to redesign the model
2. Dropout regularizer -> switching off some neurons
forcing the model learn from other features(neurons)

View File

@@ -0,0 +1,17 @@
from keras.layers import Dense, Activation
from keras.optimizers import RMSprop
from keras.models import Sequential
from keras import losses
from keras.models import Sequential
model = Sequential([Dense(units=64, input_dim=784),
Activation('relu'),
Dense(units=10),
Activation('softmax')])
model = Sequential([Dense(units=64, input_dim=784),
Activation('relu'),
Dense(units=10),
Activation('softmax')])
model.compile(optimizer=RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0), loss=losses.,metrics=['accuracy'])
model.fit(xtrain.images,xtrain.labels,batch_size=10,epochs=10,validation_data=(xtest.images,xtest.labes))

212
ThirdSunday/FaceEyes.ipynb Normal file
View File

@@ -0,0 +1,212 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import tensorflow as tf\n",
"import pandas as pd\n",
"import numpy as np\n",
"import math\n",
"import matplotlib.pyplot as plt\n",
"import msgpack as msg\n",
"import msgpack_numpy as m\n",
"from sklearn.model_selection import train_test_split\n",
"from skimage.transform import resize\n",
"# from sklearn.color import rgb2gray\n",
"m.patch()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def load_face_files():\n",
" all_data = [msg.load(open('./face_images/face_images{}.bin'.format(i),'rb')) for i in range(1,6)]\n",
" images = np.vstack([i[b'images'] for i in all_data])\n",
" gray_images = np.dot(images,np.array([0.2125,0.7154,0.0721]))/255.0\n",
"# print(gray_images.shape)\n",
"# scaled_gray_images = resize(,(32,32))/255.0\n",
"# import pdb;pdb.set_trace()\n",
" coords = np.vstack([i[b'co-ords'] for i in all_data])\n",
" coords_norm = coords/255.0\n",
" return gray_images,coords_norm\n",
"\n",
"images,coords = load_face_files()\n",
"\n",
"def plot_image(idx,n=1):\n",
" im = images[idx]\n",
" part_coords = np.split(coords[idx],3)\n",
" plt.figure(figsize=(16,16),dpi=80)\n",
" plt.subplot(n,4,1)\n",
" plt.imshow(im,'gray')\n",
" for i in range(2,5):\n",
" [x,y,w,h] = part_coords[i-2]#[:4]\n",
" # print([x,y,w,h],all([i<0 for i in [x,y,w,h]]))\n",
" if not all([j<0 for j in [x,y,w,h]]):\n",
" plt.subplot(n,4,i)\n",
" plt.imshow(im[y:y+h,x:x+w],'gray')\n",
"\n",
"\n",
"def get_head_images(c):\n",
" h_idx = []\n",
" for (idx,i) in enumerate(c):\n",
" head_coords = np.split(i,3)[0]\n",
" if not any([j<0 for j in head_coords]):\n",
" h_idx.append(idx)\n",
" return h_idx\n",
"\n",
"# plot_image(958)\n",
"\n",
"head_idxs = get_head_images(coords)\n",
"head_images = images[head_idxs].reshape(-1,images.shape[1]*images.shape[2])\n",
"head_coords = coords[head_idxs,:4].astype(np.float32)\n",
"tr_head_imgs,te_head_imgs,tr_head_crds,te_head_crds = train_test_split(head_images,head_coords,test_size=0.33)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true,
"scrolled": true
},
"outputs": [],
"source": [
"def create_model(input_dim,output_dim):\n",
" g = tf.Graph()\n",
" with g.as_default():\n",
" learning_rate = tf.constant(0.1,name='learning_rate')\n",
" beta = tf.constant(0.00001,name='regularization_beta')\n",
" error_upper_bound = tf.constant(1.1,name='upper_bound')\n",
" error_lower_bound = tf.constant(0.9,name='lower_bound')\n",
" x = tf.placeholder(tf.float32, [None,input_dim])\n",
" y = tf.placeholder(tf.float32, [None,output_dim])\n",
" W1 = tf.Variable(tf.random_normal([input_dim, 512],stddev=2.0/math.sqrt(input_dim)),name='layer_1_weights')\n",
" b1 = tf.Variable(tf.random_normal([512]),name='bias_1_weights')\n",
" W2 = tf.Variable(tf.random_normal([512, 128],stddev=2.0/math.sqrt(512)),name='layer_2_weights')\n",
" b2 = tf.Variable(tf.random_normal([128]),name='bias_2_weights')\n",
" W_o = tf.Variable(tf.random_normal([128, output_dim],stddev=2.0/math.sqrt(128)),name='layer_output_weights')\n",
" b_o = tf.Variable(tf.random_normal([output_dim]),name='bias_output_weights')\n",
" layer_1 = tf.nn.relu(tf.add(tf.matmul(x,W1),b1))\n",
" layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1,W2),b2))\n",
" y_ = tf.nn.sigmoid(tf.add(tf.matmul(layer_2,W_o),b_o))\n",
" distance = tf.losses.mean_squared_error(labels = y,predictions = y_)\n",
" regularized_loss = distance+beta*(tf.nn.l2_loss(W1)+tf.nn.l2_loss(W2)+tf.nn.l2_loss(W_o)+tf.nn.l2_loss(b1)+tf.nn.l2_loss(b2)+tf.nn.l2_loss(b_o))\n",
" ratio = tf.div(y,y_)\n",
" accuracy = distance#tf.reduce_mean(tf.cast((ratio < error_upper_bound) & (ratio > error_lower_bound), tf.float32))\n",
" tf.summary.scalar('distance', distance)\n",
" tf.summary.histogram('Weights1', W1)\n",
" tf.summary.histogram('Bias1', b1)\n",
" tf.summary.histogram('Weights2', W2)\n",
" tf.summary.histogram('Bias2', b2)\n",
" tf.summary.histogram('Weights_output', W_o)\n",
" tf.summary.histogram('Bias_output', b_o)\n",
" merged_summary = tf.summary.merge_all()\n",
" train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(regularized_loss)\n",
" return (g,x,y,y_,train_step,accuracy,merged_summary)\n",
"\n",
"(g,x,y,y_,train_step,accuracy,merged_summary) = create_model(tr_head_imgs.shape[1],tr_head_crds.shape[1])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"distance on validation set 0.123010858893\n",
"distance on validation set 0.0423361249268\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-5-ee62be87709e>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 21\u001b[0m \u001b[1;32mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"distance on validation set {}\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0macc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;31m#,'saved to ',save_path)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 22\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 23\u001b[1;33m \u001b[0mtrain_model\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mg\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0my_\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mtrain_step\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mmerged_summary\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m<ipython-input-5-ee62be87709e>\u001b[0m in \u001b[0;36mtrain_model\u001b[1;34m(g, x, y, y_, train_step, accuracy, merged_summary)\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m20000\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[0mbatch_xs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mbatch_ys\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbatch_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtr_head_imgs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mtr_head_crds\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m128\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 14\u001b[1;33m \u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mtrain_step\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mfeed_dict\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m{\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m\u001b[0mbatch_xs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m:\u001b[0m\u001b[0mbatch_ys\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 15\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m%\u001b[0m\u001b[1;36m100\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[0mt_batch_xs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mt_batch_ys\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbatch_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtr_head_imgs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mtr_head_crds\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m32\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/media/Data/Test/py/lib/python2.7/site-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36mrun\u001b[1;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[0;32m 887\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 888\u001b[0m result = self._run(None, fetches, feed_dict, options_ptr,\n\u001b[1;32m--> 889\u001b[1;33m run_metadata_ptr)\n\u001b[0m\u001b[0;32m 890\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 891\u001b[0m \u001b[0mproto_data\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/media/Data/Test/py/lib/python2.7/site-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_run\u001b[1;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[0;32m 1118\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mfinal_fetches\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mfinal_targets\u001b[0m \u001b[1;32mor\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mhandle\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mfeed_dict_tensor\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1119\u001b[0m results = self._do_run(handle, final_targets, final_fetches,\n\u001b[1;32m-> 1120\u001b[1;33m feed_dict_tensor, options, run_metadata)\n\u001b[0m\u001b[0;32m 1121\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1122\u001b[0m \u001b[0mresults\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/media/Data/Test/py/lib/python2.7/site-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_do_run\u001b[1;34m(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)\u001b[0m\n\u001b[0;32m 1315\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mhandle\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1316\u001b[0m return self._do_call(_run_fn, self._session, feeds, fetches, targets,\n\u001b[1;32m-> 1317\u001b[1;33m options, run_metadata)\n\u001b[0m\u001b[0;32m 1318\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1319\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_do_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_prun_fn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_session\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfeeds\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfetches\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/media/Data/Test/py/lib/python2.7/site-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_do_call\u001b[1;34m(self, fn, *args)\u001b[0m\n\u001b[0;32m 1321\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_do_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1322\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1323\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1324\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mOpError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1325\u001b[0m \u001b[0mmessage\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcompat\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mas_text\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/media/Data/Test/py/lib/python2.7/site-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_run_fn\u001b[1;34m(session, feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[0;32m 1300\u001b[0m return tf_session.TF_Run(session, options,\n\u001b[0;32m 1301\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget_list\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1302\u001b[1;33m status, run_metadata)\n\u001b[0m\u001b[0;32m 1303\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1304\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_prun_fn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msession\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"def batch_data(data_x,data_y,size=128):\n",
" batch_idxs = np.random.randint(0,data_x.shape[0],size=size)\n",
" return (data_x[batch_idxs],data_y[batch_idxs])\n",
"\n",
"def train_model(g,x,y,y_,train_step,accuracy,merged_summary):\n",
" with g.as_default():\n",
" with tf.Session() as s:\n",
" train_writer = tf.summary.FileWriter('./tensor_log',s.graph)\n",
" tf.global_variables_initializer().run()\n",
"# saver = tf.train.Saver()\n",
" # saver.restore(s, \"/tmp/model.ckpt\")\n",
" for i in range(20000):\n",
" batch_xs,batch_ys = batch_data(tr_head_imgs,tr_head_crds,128)\n",
" s.run([train_step],feed_dict={x:batch_xs,y:batch_ys})\n",
" if i%100 == 0:\n",
" t_batch_xs,t_batch_ys = batch_data(tr_head_imgs,tr_head_crds,32)\n",
" [acc]= s.run([accuracy], feed_dict={x: t_batch_xs,y: t_batch_ys})\n",
" # train_writer.add_summary(summary,i)\n",
"# save_path = saver.save(s, \"/tmp/model.ckpt\")\n",
"# print(y_vals,t_batch_ys)\n",
" print(\"distance on validation set {}\".format(acc))#,'saved to ',save_path)\n",
"\n",
"train_model(g,x,y,y_,train_step,accuracy,merged_summary)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

125
ThirdSunday/Faces.py Normal file
View File

@@ -0,0 +1,125 @@
#%matplotlib inline
import tensorflow as tf
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import msgpack as msg
import msgpack_numpy as m
from skimage.transform import resize
from sklearn.model_selection import train_test_split
# from sklearn.color import rgb2gray
m.patch()
def load_face_files():
all_data = [msg.load(open('./face_images/face_images{}.bin'.format(i),'rb')) for i in range(1,6)]
images = np.vstack([i[b'images'] for i in all_data])
gray_images = np.dot(images,np.array([0.2125,0.7154,0.0721]))/255.0
# print(gray_images.shape)
# scaled_gray_images = resize(,(32,32))/255.0
# import pdb;pdb.set_trace()
coords = np.vstack([i[b'co-ords'] for i in all_data])
coords_norm = coords/255.0
return gray_images,coords_norm
images,coords = load_face_files()
def plot_image(idx,n=1):
im = images[idx]
part_coords = np.split(coords[idx],3)
plt.figure(figsize=(16,16),dpi=80)
plt.subplot(n,4,1)
plt.imshow(im,'gray')
for i in range(2,5):
[x,y,w,h] = part_coords[i-2]#[:4]
# print([x,y,w,h],all([i<0 for i in [x,y,w,h]]))
if not all([j<0 for j in [x,y,w,h]]):
plt.subplot(n,4,i)
plt.imshow(im[y:y+h,x:x+w],'gray')
def get_head_images(c):
h_idx = []
for (idx,i) in enumerate(c):
head_coords = np.split(i,3)[0]
if not any([j<0 for j in head_coords]):
h_idx.append(idx)
return h_idx
# plot_image(958)
head_idxs = get_head_images(coords)
head_images = images[head_idxs].reshape(-1,images.shape[1]*images.shape[2])
head_coords = coords[head_idxs,:4].astype(np.float32)
tr_head_imgs,te_head_imgs,tr_head_crds,te_head_crds = train_test_split(head_images,head_coords,test_size=0.33)
def create_model(input_dim,output_dim):
g = tf.Graph()
with g.as_default():
learning_rate = tf.constant(0.01,name='learning_rate')
beta = tf.constant(0.001,name='regularization_beta')
error_upper_bound = tf.constant(1.1,name='upper_bound')
error_lower_bound = tf.constant(0.9,name='lower_bound')
x = tf.placeholder(tf.float32, [None,input_dim])
y = tf.placeholder(tf.float32, [None,output_dim])
W1 = tf.Variable(tf.random_normal([input_dim, 512],stddev=2.0/math.sqrt(input_dim)),name='layer_1_weights')
b1 = tf.Variable(tf.random_normal([512]),name='bias_1_weights')
layer_1 = tf.nn.relu(tf.add(tf.matmul(x,W1),b1))
W2 = tf.Variable(tf.random_normal([512, 128],stddev=2.0/math.sqrt(512)),name='layer_2_weights')
b2 = tf.Variable(tf.random_normal([128]),name='bias_2_weights')
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1,W2),b2))
W_o = tf.Variable(tf.random_normal([128, output_dim],stddev=2.0/math.sqrt(128)),name='layer_output_weights')
b_o = tf.Variable(tf.random_normal([output_dim]),name='bias_output_weights')
#y_ = tf.nn.softmax(tf.add(tf.matmul(layer_2,W_o),b_o))+1e-6
y_ = tf.add(tf.matmul(layer_2,W_o),b_o)#tf.nn.relu()
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_)))
# cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
# distance = tf.reduce_sum(tf.square(tf.subtract(y,y_)))
distance = tf.losses.mean_squared_error(labels = y,predictions = y_)
regularized_loss = distance+beta*(tf.nn.l2_loss(W1)+tf.nn.l2_loss(W2)+tf.nn.l2_loss(W_o)+tf.nn.l2_loss(b1)+tf.nn.l2_loss(b2)+tf.nn.l2_loss(b_o))
# correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(tf.nn.softmax(y_),1))
ratio = tf.div(y,y_)
accuracy = tf.reduce_mean(tf.cast((ratio < error_upper_bound) & (ratio > error_lower_bound), tf.float32))
tf.summary.scalar('distance', distance)
tf.summary.histogram('Weights1', W1)
tf.summary.histogram('Bias1', b1)
tf.summary.histogram('Weights2', W2)
tf.summary.histogram('Bias2', b2)
tf.summary.histogram('Weights_output', W_o)
tf.summary.histogram('Bias_output', b_o)
merged_summary = tf.summary.merge_all()
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(regularized_loss)
return (g,x,y,y_,train_step,accuracy,merged_summary)
(g,x,y,y_,train_step,accuracy,merged_summary) = create_model(tr_head_imgs.shape[1],tr_head_crds.shape[1])
def batch_data(data_x,data_y,size=128):
batch_idxs = np.random.randint(0,data_x.shape[0],size=size)
return (data_x[batch_idxs],data_y[batch_idxs])
def train_model(g,x,y,y_,train_step,accuracy,merged_summary):
with g.as_default():
with tf.Session() as s:
train_writer = tf.summary.FileWriter('./tensor_log',s.graph)
tf.global_variables_initializer().run()
saver = tf.train.Saver()
# saver.restore(s, "/tmp/model.ckpt")
for i in range(20000):
batch_xs,batch_ys = batch_data(tr_head_imgs,tr_head_crds,10)
s.run([train_step],feed_dict={x:batch_xs,y:batch_ys})
if i%100 == 0:
t_batch_xs,t_batch_ys = batch_data(tr_head_imgs,tr_head_crds,5)
[summary,acc,y_vals]= s.run([merged_summary,accuracy,y_], feed_dict={x: t_batch_xs,y: t_batch_ys})
train_writer.add_summary(summary,i)
save_path = saver.save(s, "/tmp/model.ckpt")
print(y_vals,t_batch_ys)
print("Accuracy on validation set {}".format(acc))#,'saved to ',save_path)
train_model(g,x,y,y_,train_step,accuracy,merged_summary)

73
ThirdSunday/Mnist.py Normal file
View File

@@ -0,0 +1,73 @@
# coding: utf-8
# In[1]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import math
# In[2]:
mnist = input_data.read_data_sets('./mnist_data', one_hot=True)
# In[3]:
learning_rate = tf.constant(0.01,name='learning_rate')
beta = tf.constant(0.001,name='regularization_beta')
x = tf.placeholder(tf.float32, [None, mnist.train.images.shape[1]])
y = tf.placeholder(tf.float32, [None, 10])
W1 = tf.Variable(tf.random_normal([784, 512],stddev=2.0/28.0),name='layer_1_weights')
b1 = tf.Variable(tf.random_normal([512]),name='bias_1_weights')
W2 = tf.Variable(tf.random_normal([512, 128],stddev=2.0/math.sqrt(512)),name='layer_2_weights')
b2 = tf.Variable(tf.random_normal([128]),name='bias_2_weights')
W_o = tf.Variable(tf.random_normal([128, 10],stddev=2.0/math.sqrt(128)),name='layer_output_weights')
b_o = tf.Variable(tf.random_normal([10]),name='bias_output_weights')
layer_1 = tf.nn.relu(tf.add(tf.matmul(x,W1),b1))
layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1,W2),b2))
#y_ = tf.nn.softmax(tf.add(tf.matmul(layer_2,W_o),b_o))+1e-6
y_ = tf.add(tf.matmul(layer_2,W_o),b_o)
# In[4]:
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_)))
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
total_loss = cross_entropy+beta*(tf.nn.l2_loss(W1)+tf.nn.l2_loss(W2)+tf.nn.l2_loss(W_o)+tf.nn.l2_loss(b1)+tf.nn.l2_loss(b2)+tf.nn.l2_loss(b_o))
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(tf.nn.softmax(y_),1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar('cross_entropy', cross_entropy)
tf.summary.histogram('Weights1', W1)
tf.summary.histogram('Bias1', b1)
tf.summary.histogram('Weights2', W2)
tf.summary.histogram('Bias2', b2)
tf.summary.histogram('Weights_output', W_o)
tf.summary.histogram('Bias_output', b_o)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)
#saver = tf.train.Saver()
# In[6]:
with tf.Session() as s:
# merged = tf.summary.merge_all()
# train_writer = tf.summary.FileWriter('./train',s.graph)
tf.global_variables_initializer().run()
# saver.restore(s, "/tmp/model.ckpt")
# print("Model restored.")
for i in range(20000):
batch_xs, batch_ys = mnist.train.next_batch(128)
s.run([train_step],feed_dict={x:batch_xs,y:batch_ys})
if i%1000 == 0:
[acc]= s.run([accuracy], feed_dict={x: mnist.test.images, y: mnist.test.labels})
train_writer.add_summary(summary,i)
# save_path = saver.save(s, "/tmp/model.ckpt")
print("Accuracy on validation set {}".format(acc))#,'saved to ',save_path)
# In[ ]:

View File

@@ -0,0 +1,10 @@
import itertools
# def siamese_data(group1,group2):
def get_true_false(group1,group2):
f = [(g1,g2) for g2 in group2 for g1 in group1]
t = [i for i in itertools.combinations(group1,2)]+[i for i in itertools.combinations(group2,2)]
return (t,f)
group1 = ['a','b','c','d']
group2 = ['A','B','C','D']
get_true_false(group1,group2)