updated to third week
parent
6c62795d02
commit
2c91fa6eb5
|
|
@ -1,16 +1,18 @@
|
||||||
%matplotlib inline
|
%matplotlib inline
|
||||||
from tensorflow.examples.tutorials.mnist import input_data
|
from tensorflow.examples.tutorials.mnist import input_data
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
mnist = input_data.read_data_sets('./SecondSunday/mnist_data', one_hot=True)
|
mnist = input_data.read_data_sets('./SecondSunday/mnist_data', one_hot=True)
|
||||||
label_number = mnist.train.labels.argmax(axis=1)
|
label_number = mnist.train.labels.argmax(axis=1)
|
||||||
number_imgs = {str(i):mnist.train.images[np.argwhere(label_number == i).squeeze()] for i in range(10)}
|
number_imgs = {str(i):mnist.train.images[np.argwhere(label_number == i).squeeze()] for i in range(10)}
|
||||||
DATA_COUNT = 100
|
DATA_COUNT = 10240
|
||||||
phone_number_digits = np.random.randint(10**9,10**10,size=(DATA_COUNT,10))
|
# phone_number_digits = np.random.randint(10**9,10**10,size=(DATA_COUNT,10))
|
||||||
phone_number_digits = np.random.randint(10,size=(DATA_COUNT,10))
|
phone_number_digits = np.random.randint(10,size=(DATA_COUNT,10))
|
||||||
phone_number_digits.astype(str)
|
phone_number_digits.astype(str)
|
||||||
phone_number_strings = phone_number_digits.astype(str)
|
phone_number_strings = pd.DataFrame(phone_number_digits.astype(str).T).apply(lambda x: ''.join(x)).values
|
||||||
|
|
||||||
def pick_img(num):
|
def pick_img(num):
|
||||||
rand_idx = np.random.randint(number_imgs[num].shape[0])
|
rand_idx = np.random.randint(number_imgs[num].shape[0])
|
||||||
|
|
@ -27,20 +29,29 @@ def create_phone_images(phone_array):
|
||||||
return np.array(phone_number_images).reshape(-1,28*280)
|
return np.array(phone_number_images).reshape(-1,28*280)
|
||||||
|
|
||||||
phone_number_imgs = create_phone_images(phone_number_strings)
|
phone_number_imgs = create_phone_images(phone_number_strings)
|
||||||
|
train_imgs,test_imgs,train_digits,test_digits = train_test_split(phone_number_imgs,phone_number_digits)
|
||||||
|
|
||||||
from keras.models import Sequential
|
from keras.models import Sequential
|
||||||
from keras.layers import Dense, Activation
|
from keras.layers import Dense, Activation
|
||||||
|
|
||||||
model = Sequential([
|
# model = Sequential([
|
||||||
Dense(32, input_shape=(7840,)),
|
# Dense(32, input_shape=(7840,)),
|
||||||
Activation('relu'),
|
# Activation('relu'),
|
||||||
Dense(10),
|
# # Dense(24, input_shape=(32,)),
|
||||||
Activation('linear'),
|
# # Activation('relu'),
|
||||||
])
|
# Dense(10),
|
||||||
|
# Activation('linear'),
|
||||||
|
# ])
|
||||||
|
|
||||||
model.compile(optimizer='rmsprop',
|
# model.compile(optimizer='sgd',
|
||||||
loss='categorical_crossentropy',
|
# loss='mean_squared_error',
|
||||||
metrics=['accuracy'])
|
# metrics=['accuracy'])
|
||||||
|
#
|
||||||
|
# model.fit(train_imgs, train_digits,
|
||||||
|
# batch_size=128,
|
||||||
|
# epochs=100,
|
||||||
|
# validation_data=(test_imgs, test_digits))
|
||||||
|
|
||||||
model.fit()
|
# img_idx = np.random.randint(phone_number_imgs.shape[0])
|
||||||
# plt.imshow(phone_number_imgs[np.random.randint(phone_number_imgs.shape[0])])
|
# print(phone_number_strings[img_idx])
|
||||||
|
# plt.imshow(phone_number_imgs[img_idx].reshape(28,280))
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,71 @@
|
||||||
|
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[40]:
|
||||||
|
|
||||||
|
from tensorflow.examples.tutorials.mnist import input_data
|
||||||
|
|
||||||
|
|
||||||
|
# In[41]:
|
||||||
|
|
||||||
|
mnist = input_data.read_data_sets('./mnist_data', one_hot=True)
|
||||||
|
|
||||||
|
|
||||||
|
# In[42]:
|
||||||
|
|
||||||
|
xtrain,xtest = mnist.train,mnist.test
|
||||||
|
import tensorflow as tf
|
||||||
|
# mnist.train.
|
||||||
|
|
||||||
|
|
||||||
|
# In[43]:
|
||||||
|
|
||||||
|
learning_rate = tf.constant(0.01,name='learning_rate')
|
||||||
|
xtrain.images.shape[1]
|
||||||
|
|
||||||
|
|
||||||
|
# In[44]:
|
||||||
|
|
||||||
|
x = tf.placeholder(tf.float32, [None, 784])
|
||||||
|
y = tf.placeholder(tf.float32, [None, 10])
|
||||||
|
|
||||||
|
|
||||||
|
# In[45]:
|
||||||
|
|
||||||
|
W1 = tf.Variable(tf.zeros([784, 512]),name='layer_1_weights')
|
||||||
|
b1 = tf.Variable(tf.zeros([512]),name='bias_1_weights')
|
||||||
|
W2 = tf.Variable(tf.zeros([512, 128]),name='layer_2_weights')
|
||||||
|
b2 = tf.Variable(tf.zeros([128]),name='bias_2_weights')
|
||||||
|
W_o = tf.Variable(tf.zeros([128, 10]),name='layer_output_weights')
|
||||||
|
b_o = tf.Variable(tf.zeros([10]),name='bias_output_weights')
|
||||||
|
|
||||||
|
|
||||||
|
# In[46]:
|
||||||
|
|
||||||
|
layer_1 = tf.nn.relu(tf.add(tf.matmul(x,W1),b1))
|
||||||
|
layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1,W2),b2))
|
||||||
|
output_layer = tf.nn.softmax(tf.add(tf.matmul(layer_2,W_o),b_o))
|
||||||
|
|
||||||
|
|
||||||
|
# In[47]:
|
||||||
|
|
||||||
|
cross_entropy = tf.reduce_mean(-tf.reduce_sum(output_layer * tf.log(y), reduction_indices=[1]))
|
||||||
|
|
||||||
|
|
||||||
|
# In[48]:
|
||||||
|
|
||||||
|
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
|
||||||
|
|
||||||
|
|
||||||
|
# In[39]:
|
||||||
|
|
||||||
|
with tf.Session() as s:
|
||||||
|
tf.global_variables_initializer()
|
||||||
|
[_,val] = s.run([train_step,cross_entropy],feed_dict={x:xtrain.images,y:xtrain.labels})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,85 @@
|
||||||
|
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[1]:
|
||||||
|
|
||||||
|
from tensorflow.examples.tutorials.mnist import input_data
|
||||||
|
|
||||||
|
|
||||||
|
# In[2]:
|
||||||
|
|
||||||
|
mnist = input_data.read_data_sets('./mnist_data', one_hot=True)
|
||||||
|
|
||||||
|
|
||||||
|
# In[3]:
|
||||||
|
|
||||||
|
xtrain,xtest = mnist.train,mnist.test
|
||||||
|
import tensorflow as tf
|
||||||
|
import math
|
||||||
|
# mnist.train.
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# In[28]:
|
||||||
|
|
||||||
|
learning_rate = tf.constant(0.01,name='learning_rate')
|
||||||
|
beta = tf.constant(0.01,name='regularization_beta')
|
||||||
|
|
||||||
|
|
||||||
|
# In[5]:
|
||||||
|
|
||||||
|
x = tf.placeholder(tf.float32, [None, xtrain.images.shape[1]])
|
||||||
|
y = tf.placeholder(tf.float32, [None, 10])
|
||||||
|
|
||||||
|
|
||||||
|
# In[6]:
|
||||||
|
|
||||||
|
W1 = tf.Variable(tf.random_normal([784, 512],stddev=2.0/28.0),name='layer_1_weights')
|
||||||
|
b1 = tf.Variable(tf.random_normal([512]),name='bias_1_weights')
|
||||||
|
W2 = tf.Variable(tf.random_normal([512, 128],stddev=2.0/math.sqrt(512)),name='layer_2_weights')
|
||||||
|
b2 = tf.Variable(tf.random_normal([128]),name='bias_2_weights')
|
||||||
|
W_o = tf.Variable(tf.random_normal([128, 10],stddev=2.0/math.sqrt(128)),name='layer_output_weights')
|
||||||
|
b_o = tf.Variable(tf.random_normal([10]),name='bias_output_weights')
|
||||||
|
|
||||||
|
|
||||||
|
# In[20]:
|
||||||
|
|
||||||
|
layer_1 = tf.nn.relu(tf.add(tf.matmul(x,W1),b1))
|
||||||
|
layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1,W2),b2))
|
||||||
|
#y_ = tf.nn.softmax(tf.add(tf.matmul(layer_2,W_o),b_o))+1e-6
|
||||||
|
y_ = tf.add(tf.matmul(layer_2,W_o),b_o)
|
||||||
|
|
||||||
|
|
||||||
|
# In[38]:
|
||||||
|
|
||||||
|
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_)))
|
||||||
|
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
|
||||||
|
total_loss = cross_entropy+beta*(tf.nn.l2_loss(W1)+tf.nn.l2_loss(W2)+tf.nn.l2_loss(W_o)+tf.nn.l2_loss(b1)+tf.nn.l2_loss(b2)+tf.nn.l2_loss(b_o))
|
||||||
|
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(tf.nn.softmax(y_),1))
|
||||||
|
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
|
||||||
|
|
||||||
|
|
||||||
|
# In[39]:
|
||||||
|
|
||||||
|
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(total_loss)
|
||||||
|
|
||||||
|
|
||||||
|
# In[40]:
|
||||||
|
|
||||||
|
with tf.Session() as s:
|
||||||
|
tf.global_variables_initializer().run()
|
||||||
|
for i in range(20000):
|
||||||
|
batch_xs, batch_ys = xtrain.next_batch(100)
|
||||||
|
[_] = s.run([train_step],feed_dict={x:batch_xs,y:batch_ys})
|
||||||
|
if i%1000 == 0:
|
||||||
|
print(s.run(accuracy, feed_dict={x: xtest.images, y: xtest.labels}))
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,27 @@
|
||||||
|
Parameters:
|
||||||
|
variables that are learnt by the model through training.
|
||||||
|
|
||||||
|
|
||||||
|
HyperParameters:
|
||||||
|
variables that are empirical and have to be assigned manually.
|
||||||
|
|
||||||
|
|
||||||
|
Protocol:
|
||||||
|
Train,Test,Validation/Dev Set
|
||||||
|
update HyperParameters and try training with the devset accuracy.
|
||||||
|
pick the best params.
|
||||||
|
Depending on the datasize and the nature of the problem(no of classes to be classified to) decide the test datasize
|
||||||
|
|
||||||
|
Error rate : (Bayes Error rate) lower possible error rate for any classifier of a random outcome.
|
||||||
|
(accuracy of the model shouldn't be more than this,
|
||||||
|
|
||||||
|
Regularization:
|
||||||
|
if it is it means the model is overfitting to the training datas)
|
||||||
|
if the model is overfitting, use regularization to control it.
|
||||||
|
It is a technique to limit the expressiveness of the model.
|
||||||
|
eg.
|
||||||
|
1. L2 regularizer -> Loss' = Loss + lambda*Sum(wi^2) // lambda is the regularization param
|
||||||
|
makes |wi| =~= 0.
|
||||||
|
controls the degree of non-linearity of the model, without having to redesign the model
|
||||||
|
2. Dropout regularizer -> switching off some neurons
|
||||||
|
forcing the model learn from other features(neurons)
|
||||||
|
|
@ -0,0 +1,17 @@
|
||||||
|
from keras.layers import Dense, Activation
|
||||||
|
from keras.optimizers import RMSprop
|
||||||
|
from keras.models import Sequential
|
||||||
|
from keras import losses
|
||||||
|
|
||||||
|
from keras.models import Sequential
|
||||||
|
model = Sequential([Dense(units=64, input_dim=784),
|
||||||
|
Activation('relu'),
|
||||||
|
Dense(units=10),
|
||||||
|
Activation('softmax')])
|
||||||
|
|
||||||
|
model = Sequential([Dense(units=64, input_dim=784),
|
||||||
|
Activation('relu'),
|
||||||
|
Dense(units=10),
|
||||||
|
Activation('softmax')])
|
||||||
|
model.compile(optimizer=RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0), loss=losses.,metrics=['accuracy'])
|
||||||
|
model.fit(xtrain.images,xtrain.labels,batch_size=10,epochs=10,validation_data=(xtest.images,xtest.labes))
|
||||||
|
|
@ -0,0 +1,224 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 65,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%matplotlib inline\n",
|
||||||
|
"import tensorflow as tf\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import math\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"import msgpack as msg\n",
|
||||||
|
"import msgpack_numpy as m\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"from skimage.transform import resize\n",
|
||||||
|
"# from sklearn.color import rgb2gray\n",
|
||||||
|
"m.patch()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 67,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def load_face_files():\n",
|
||||||
|
" all_data = [msg.load(open('./face_images/face_images{}.bin'.format(i),'rb')) for i in range(1,6)]\n",
|
||||||
|
" images = np.vstack([i[b'images'] for i in all_data])\n",
|
||||||
|
" gray_images = np.dot(images,np.array([0.2125,0.7154,0.0721]))/255.0\n",
|
||||||
|
"# print(gray_images.shape)\n",
|
||||||
|
"# scaled_gray_images = resize(,(32,32))/255.0\n",
|
||||||
|
"# import pdb;pdb.set_trace()\n",
|
||||||
|
" coords = np.vstack([i[b'co-ords'] for i in all_data])\n",
|
||||||
|
" coords_norm = coords/255.0\n",
|
||||||
|
" return gray_images,coords_norm\n",
|
||||||
|
"\n",
|
||||||
|
"images,coords = load_face_files()\n",
|
||||||
|
"\n",
|
||||||
|
"def plot_image(idx,n=1):\n",
|
||||||
|
" im = images[idx]\n",
|
||||||
|
" part_coords = np.split(coords[idx],3)\n",
|
||||||
|
" plt.figure(figsize=(16,16),dpi=80)\n",
|
||||||
|
" plt.subplot(n,4,1)\n",
|
||||||
|
" plt.imshow(im,'gray')\n",
|
||||||
|
" for i in range(2,5):\n",
|
||||||
|
" [x,y,w,h] = part_coords[i-2]#[:4]\n",
|
||||||
|
" # print([x,y,w,h],all([i<0 for i in [x,y,w,h]]))\n",
|
||||||
|
" if not all([j<0 for j in [x,y,w,h]]):\n",
|
||||||
|
" plt.subplot(n,4,i)\n",
|
||||||
|
" plt.imshow(im[y:y+h,x:x+w],'gray')\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def get_head_images(c):\n",
|
||||||
|
" h_idx = []\n",
|
||||||
|
" for (idx,i) in enumerate(c):\n",
|
||||||
|
" head_coords = np.split(i,3)[0]\n",
|
||||||
|
" if not any([j<0 for j in head_coords]):\n",
|
||||||
|
" h_idx.append(idx)\n",
|
||||||
|
" return h_idx\n",
|
||||||
|
"\n",
|
||||||
|
"# plot_image(958)\n",
|
||||||
|
"\n",
|
||||||
|
"head_idxs = get_head_images(coords)\n",
|
||||||
|
"head_images = images[head_idxs].reshape(-1,images.shape[1]*images.shape[2])\n",
|
||||||
|
"head_coords = coords[head_idxs,:4].astype(np.float32)\n",
|
||||||
|
"tr_head_imgs,te_head_imgs,tr_head_crds,te_head_crds = train_test_split(head_images,head_coords,test_size=0.33)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 68,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true,
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def create_model(input_dim,output_dim):\n",
|
||||||
|
" g = tf.Graph()\n",
|
||||||
|
" with g.as_default():\n",
|
||||||
|
" learning_rate = tf.constant(0.1,name='learning_rate')\n",
|
||||||
|
" beta = tf.constant(0.00001,name='regularization_beta')\n",
|
||||||
|
" error_upper_bound = tf.constant(1.1,name='upper_bound')\n",
|
||||||
|
" error_lower_bound = tf.constant(0.9,name='lower_bound')\n",
|
||||||
|
" x = tf.placeholder(tf.float32, [None,input_dim])\n",
|
||||||
|
" y = tf.placeholder(tf.float32, [None,output_dim])\n",
|
||||||
|
" W1 = tf.Variable(tf.random_normal([input_dim, 512],stddev=2.0/math.sqrt(input_dim)),name='layer_1_weights')\n",
|
||||||
|
" b1 = tf.Variable(tf.random_normal([512]),name='bias_1_weights')\n",
|
||||||
|
" W2 = tf.Variable(tf.random_normal([512, 128],stddev=2.0/math.sqrt(512)),name='layer_2_weights')\n",
|
||||||
|
" b2 = tf.Variable(tf.random_normal([128]),name='bias_2_weights')\n",
|
||||||
|
" W_o = tf.Variable(tf.random_normal([128, output_dim],stddev=2.0/math.sqrt(128)),name='layer_output_weights')\n",
|
||||||
|
" b_o = tf.Variable(tf.random_normal([output_dim]),name='bias_output_weights')\n",
|
||||||
|
" layer_1 = tf.nn.relu(tf.add(tf.matmul(x,W1),b1))\n",
|
||||||
|
" layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1,W2),b2))\n",
|
||||||
|
" y_ = tf.nn.sigmoid(tf.add(tf.matmul(layer_2,W_o),b_o))\n",
|
||||||
|
" distance = tf.losses.mean_squared_error(labels = y,predictions = y_)\n",
|
||||||
|
" regularized_loss = distance+beta*(tf.nn.l2_loss(W1)+tf.nn.l2_loss(W2)+tf.nn.l2_loss(W_o)+tf.nn.l2_loss(b1)+tf.nn.l2_loss(b2)+tf.nn.l2_loss(b_o))\n",
|
||||||
|
" ratio = tf.div(y,y_)\n",
|
||||||
|
" accuracy = distance#tf.reduce_mean(tf.cast((ratio < error_upper_bound) & (ratio > error_lower_bound), tf.float32))\n",
|
||||||
|
" tf.summary.scalar('distance', distance)\n",
|
||||||
|
" tf.summary.histogram('Weights1', W1)\n",
|
||||||
|
" tf.summary.histogram('Bias1', b1)\n",
|
||||||
|
" tf.summary.histogram('Weights2', W2)\n",
|
||||||
|
" tf.summary.histogram('Bias2', b2)\n",
|
||||||
|
" tf.summary.histogram('Weights_output', W_o)\n",
|
||||||
|
" tf.summary.histogram('Bias_output', b_o)\n",
|
||||||
|
" merged_summary = tf.summary.merge_all()\n",
|
||||||
|
" train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(regularized_loss)\n",
|
||||||
|
" return (g,x,y,y_,train_step,accuracy,merged_summary)\n",
|
||||||
|
"\n",
|
||||||
|
"(g,x,y,y_,train_step,accuracy,merged_summary) = create_model(tr_head_imgs.shape[1],tr_head_crds.shape[1])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 56,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"distance on validation set 0.0457288585603\n",
|
||||||
|
"distance on validation set 0.0384670570493\n",
|
||||||
|
"distance on validation set 0.0463402196765\n",
|
||||||
|
"distance on validation set 0.0418722033501\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ename": "KeyboardInterrupt",
|
||||||
|
"evalue": "",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[0;31m\u001b[0m",
|
||||||
|
"\u001b[0;31mKeyboardInterrupt\u001b[0mTraceback (most recent call last)",
|
||||||
|
"\u001b[0;32m<ipython-input-56-ee62be87709e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"distance on validation set {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0macc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;31m#,'saved to ',save_path)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 23\u001b[0;31m \u001b[0mtrain_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my_\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtrain_step\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmerged_summary\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
||||||
|
"\u001b[0;32m<ipython-input-56-ee62be87709e>\u001b[0m in \u001b[0;36mtrain_model\u001b[0;34m(g, x, y, y_, train_step, accuracy, merged_summary)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m20000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0mbatch_xs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mbatch_ys\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbatch_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtr_head_imgs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtr_head_crds\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m128\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtrain_step\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfeed_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mbatch_xs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mbatch_ys\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m%\u001b[0m\u001b[0;36m100\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mt_batch_xs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mt_batch_ys\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbatch_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtr_head_imgs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtr_head_crds\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||||
|
"\u001b[0;32m/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 765\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 766\u001b[0m result = self._run(None, fetches, feed_dict, options_ptr,\n\u001b[0;32m--> 767\u001b[0;31m run_metadata_ptr)\n\u001b[0m\u001b[1;32m 768\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 769\u001b[0m \u001b[0mproto_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||||
|
"\u001b[0;32m/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_run\u001b[0;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 963\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfinal_fetches\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mfinal_targets\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 964\u001b[0m results = self._do_run(handle, final_targets, final_fetches,\n\u001b[0;32m--> 965\u001b[0;31m feed_dict_string, options, run_metadata)\n\u001b[0m\u001b[1;32m 966\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 967\u001b[0m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||||
|
"\u001b[0;32m/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_do_run\u001b[0;34m(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 1013\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhandle\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1014\u001b[0m return self._do_call(_run_fn, self._session, feed_dict, fetch_list,\n\u001b[0;32m-> 1015\u001b[0;31m target_list, options, run_metadata)\n\u001b[0m\u001b[1;32m 1016\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1017\u001b[0m return self._do_call(_prun_fn, self._session, handle, feed_dict,\n",
|
||||||
|
"\u001b[0;32m/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_do_call\u001b[0;34m(self, fn, *args)\u001b[0m\n\u001b[1;32m 1020\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1021\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1022\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1023\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOpError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1024\u001b[0m \u001b[0mmessage\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||||
|
"\u001b[0;32m/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_run_fn\u001b[0;34m(session, feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[1;32m 1002\u001b[0m return tf_session.TF_Run(session, options,\n\u001b[1;32m 1003\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1004\u001b[0;31m status, run_metadata)\n\u001b[0m\u001b[1;32m 1005\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1006\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_prun_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||||
|
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"def batch_data(data_x,data_y,size=128):\n",
|
||||||
|
" batch_idxs = np.random.randint(0,data_x.shape[0],size=size)\n",
|
||||||
|
" return (data_x[batch_idxs],data_y[batch_idxs])\n",
|
||||||
|
"\n",
|
||||||
|
"def train_model(g,x,y,y_,train_step,accuracy,merged_summary):\n",
|
||||||
|
" with g.as_default():\n",
|
||||||
|
" with tf.Session() as s:\n",
|
||||||
|
" train_writer = tf.summary.FileWriter('./tensor_log',s.graph)\n",
|
||||||
|
" tf.global_variables_initializer().run()\n",
|
||||||
|
"# saver = tf.train.Saver()\n",
|
||||||
|
" # saver.restore(s, \"/tmp/model.ckpt\")\n",
|
||||||
|
" for i in range(20000):\n",
|
||||||
|
" batch_xs,batch_ys = batch_data(tr_head_imgs,tr_head_crds,128)\n",
|
||||||
|
" s.run([train_step],feed_dict={x:batch_xs,y:batch_ys})\n",
|
||||||
|
" if i%100 == 0:\n",
|
||||||
|
" t_batch_xs,t_batch_ys = batch_data(tr_head_imgs,tr_head_crds,32)\n",
|
||||||
|
" [acc]= s.run([accuracy], feed_dict={x: t_batch_xs,y: t_batch_ys})\n",
|
||||||
|
" # train_writer.add_summary(summary,i)\n",
|
||||||
|
"# save_path = saver.save(s, \"/tmp/model.ckpt\")\n",
|
||||||
|
"# print(y_vals,t_batch_ys)\n",
|
||||||
|
" print(\"distance on validation set {}\".format(acc))#,'saved to ',save_path)\n",
|
||||||
|
"\n",
|
||||||
|
"train_model(g,x,y,y_,train_step,accuracy,merged_summary)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": true,
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"hide_input": false,
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 2",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python2"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 2
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython2",
|
||||||
|
"version": "2.7.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,121 @@
|
||||||
|
#%matplotlib inline
|
||||||
|
import tensorflow as tf
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import math
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import msgpack as msg
|
||||||
|
import msgpack_numpy as m
|
||||||
|
from skimage.transform import resize
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
# from sklearn.color import rgb2gray
|
||||||
|
m.patch()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def load_face_files():
|
||||||
|
all_data = [msg.load(open('./face_images/face_images{}.bin'.format(i),'rb')) for i in range(1,6)]
|
||||||
|
images = np.vstack([i[b'images'] for i in all_data])
|
||||||
|
gray_images = np.dot(images,np.array([0.2125,0.7154,0.0721]))
|
||||||
|
coords = np.vstack([i[b'co-ords'] for i in all_data])
|
||||||
|
return gray_images,coords
|
||||||
|
|
||||||
|
images,coords = load_face_files()
|
||||||
|
|
||||||
|
def plot_image(idx,n=1):
|
||||||
|
im = images[idx]
|
||||||
|
part_coords = np.split(coords[idx],3)
|
||||||
|
plt.figure(figsize=(16,16),dpi=80)
|
||||||
|
plt.subplot(n,4,1)
|
||||||
|
plt.imshow(im,'gray')
|
||||||
|
for i in range(2,5):
|
||||||
|
[x,y,w,h] = part_coords[i-2]#[:4]
|
||||||
|
# print([x,y,w,h],all([i<0 for i in [x,y,w,h]]))
|
||||||
|
if not all([j<0 for j in [x,y,w,h]]):
|
||||||
|
plt.subplot(n,4,i)
|
||||||
|
plt.imshow(im[y:y+h,x:x+w],'gray')
|
||||||
|
|
||||||
|
|
||||||
|
def get_head_images(c):
|
||||||
|
h_idx = []
|
||||||
|
for (idx,i) in enumerate(c):
|
||||||
|
head_coords = np.split(i,3)[0]
|
||||||
|
if not any([j<0 for j in head_coords]):
|
||||||
|
h_idx.append(idx)
|
||||||
|
return h_idx
|
||||||
|
|
||||||
|
# plot_image(958)
|
||||||
|
|
||||||
|
head_idxs = get_head_images(coords)
|
||||||
|
head_images = images[head_idxs].reshape(-1,images.shape[1]*images.shape[2])
|
||||||
|
head_coords = coords[head_idxs,:4].astype(np.float32)
|
||||||
|
tr_head_imgs,te_head_imgs,tr_head_crds,te_head_crds = train_test_split(head_images,head_coords,test_size=0.33)
|
||||||
|
|
||||||
|
|
||||||
|
def create_model(input_dim,output_dim):
|
||||||
|
g = tf.Graph()
|
||||||
|
with g.as_default():
|
||||||
|
learning_rate = tf.constant(0.01,name='learning_rate')
|
||||||
|
beta = tf.constant(0.001,name='regularization_beta')
|
||||||
|
error_upper_bound = tf.constant(1.1,name='upper_bound')
|
||||||
|
error_lower_bound = tf.constant(0.9,name='lower_bound')
|
||||||
|
x = tf.placeholder(tf.float32, [None,input_dim])
|
||||||
|
y = tf.placeholder(tf.float32, [None,output_dim])
|
||||||
|
W1 = tf.Variable(tf.random_normal([input_dim, 512],stddev=2.0/math.sqrt(input_dim)),name='layer_1_weights')
|
||||||
|
b1 = tf.Variable(tf.random_normal([512]),name='bias_1_weights')
|
||||||
|
W2 = tf.Variable(tf.random_normal([512, 128],stddev=2.0/math.sqrt(512)),name='layer_2_weights')
|
||||||
|
b2 = tf.Variable(tf.random_normal([128]),name='bias_2_weights')
|
||||||
|
W_o = tf.Variable(tf.random_normal([128, output_dim],stddev=2.0/math.sqrt(128)),name='layer_output_weights')
|
||||||
|
b_o = tf.Variable(tf.random_normal([output_dim]),name='bias_output_weights')
|
||||||
|
layer_1 = tf.nn.relu(tf.add(tf.matmul(x,W1),b1))
|
||||||
|
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1,W2),b2))
|
||||||
|
#y_ = tf.nn.softmax(tf.add(tf.matmul(layer_2,W_o),b_o))+1e-6
|
||||||
|
y_ = tf.add(tf.matmul(layer_2,W_o),b_o)#tf.nn.relu()
|
||||||
|
|
||||||
|
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_)))
|
||||||
|
# cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
|
||||||
|
# distance = tf.reduce_sum(tf.square(tf.subtract(y,y_)))
|
||||||
|
distance = tf.losses.mean_squared_error(labels = y,predictions = y_)
|
||||||
|
regularized_loss = distance+beta*(tf.nn.l2_loss(W1)+tf.nn.l2_loss(W2)+tf.nn.l2_loss(W_o)+tf.nn.l2_loss(b1)+tf.nn.l2_loss(b2)+tf.nn.l2_loss(b_o))
|
||||||
|
# correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(tf.nn.softmax(y_),1))
|
||||||
|
ratio = tf.div(y,y_)
|
||||||
|
accuracy = tf.reduce_mean(tf.cast((ratio < error_upper_bound) & (ratio > error_lower_bound), tf.float32))
|
||||||
|
tf.summary.scalar('distance', distance)
|
||||||
|
tf.summary.histogram('Weights1', W1)
|
||||||
|
tf.summary.histogram('Bias1', b1)
|
||||||
|
tf.summary.histogram('Weights2', W2)
|
||||||
|
tf.summary.histogram('Bias2', b2)
|
||||||
|
tf.summary.histogram('Weights_output', W_o)
|
||||||
|
tf.summary.histogram('Bias_output', b_o)
|
||||||
|
merged_summary = tf.summary.merge_all()
|
||||||
|
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(regularized_loss)
|
||||||
|
return (g,x,y,y_,train_step,accuracy,merged_summary)
|
||||||
|
|
||||||
|
(g,x,y,y_,train_step,accuracy,merged_summary) = create_model(tr_head_imgs.shape[1],tr_head_crds.shape[1])
|
||||||
|
|
||||||
|
def batch_data(data_x,data_y,size=128):
|
||||||
|
batch_idxs = np.random.randint(0,data_x.shape[0],size=size)
|
||||||
|
return (data_x[batch_idxs],data_y[batch_idxs])
|
||||||
|
|
||||||
|
def train_model(g,x,y,y_,train_step,accuracy,merged_summary):
|
||||||
|
with g.as_default():
|
||||||
|
with tf.Session() as s:
|
||||||
|
train_writer = tf.summary.FileWriter('./tensor_log',s.graph)
|
||||||
|
tf.global_variables_initializer().run()
|
||||||
|
saver = tf.train.Saver()
|
||||||
|
# saver.restore(s, "/tmp/model.ckpt")
|
||||||
|
for i in range(20000):
|
||||||
|
batch_xs,batch_ys = batch_data(tr_head_imgs,tr_head_crds,10)
|
||||||
|
s.run([train_step],feed_dict={x:batch_xs,y:batch_ys})
|
||||||
|
if i%100 == 0:
|
||||||
|
t_batch_xs,t_batch_ys = batch_data(tr_head_imgs,tr_head_crds,5)
|
||||||
|
[summary,acc,y_vals]= s.run([merged_summary,accuracy,y_], feed_dict={x: t_batch_xs,y: t_batch_ys})
|
||||||
|
train_writer.add_summary(summary,i)
|
||||||
|
save_path = saver.save(s, "/tmp/model.ckpt")
|
||||||
|
print(y_vals,t_batch_ys)
|
||||||
|
print("Accuracy on validation set {}".format(acc))#,'saved to ',save_path)
|
||||||
|
|
||||||
|
|
||||||
|
bb = np.array([[100,200,300,400], [100,200,300,400], [100,200,300,400], [100,200,300,400]])
|
||||||
|
bb[:,1]
|
||||||
|
train_model(g,x,y,y_,train_step,accuracy,merged_summary)
|
||||||
|
|
@ -0,0 +1,73 @@
|
||||||
|
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[1]:
|
||||||
|
|
||||||
|
from tensorflow.examples.tutorials.mnist import input_data
|
||||||
|
import tensorflow as tf
|
||||||
|
import math
|
||||||
|
|
||||||
|
|
||||||
|
# In[2]:
|
||||||
|
|
||||||
|
mnist = input_data.read_data_sets('./mnist_data', one_hot=True)
|
||||||
|
|
||||||
|
|
||||||
|
# In[3]:
|
||||||
|
|
||||||
|
learning_rate = tf.constant(0.01,name='learning_rate')
|
||||||
|
beta = tf.constant(0.001,name='regularization_beta')
|
||||||
|
x = tf.placeholder(tf.float32, [None, mnist.train.images.shape[1]])
|
||||||
|
y = tf.placeholder(tf.float32, [None, 10])
|
||||||
|
W1 = tf.Variable(tf.random_normal([784, 512],stddev=2.0/28.0),name='layer_1_weights')
|
||||||
|
b1 = tf.Variable(tf.random_normal([512]),name='bias_1_weights')
|
||||||
|
W2 = tf.Variable(tf.random_normal([512, 128],stddev=2.0/math.sqrt(512)),name='layer_2_weights')
|
||||||
|
b2 = tf.Variable(tf.random_normal([128]),name='bias_2_weights')
|
||||||
|
W_o = tf.Variable(tf.random_normal([128, 10],stddev=2.0/math.sqrt(128)),name='layer_output_weights')
|
||||||
|
b_o = tf.Variable(tf.random_normal([10]),name='bias_output_weights')
|
||||||
|
layer_1 = tf.nn.relu(tf.add(tf.matmul(x,W1),b1))
|
||||||
|
layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1,W2),b2))
|
||||||
|
#y_ = tf.nn.softmax(tf.add(tf.matmul(layer_2,W_o),b_o))+1e-6
|
||||||
|
y_ = tf.add(tf.matmul(layer_2,W_o),b_o)
|
||||||
|
|
||||||
|
|
||||||
|
# In[4]:
|
||||||
|
|
||||||
|
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_)))
|
||||||
|
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
|
||||||
|
total_loss = cross_entropy+beta*(tf.nn.l2_loss(W1)+tf.nn.l2_loss(W2)+tf.nn.l2_loss(W_o)+tf.nn.l2_loss(b1)+tf.nn.l2_loss(b2)+tf.nn.l2_loss(b_o))
|
||||||
|
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(tf.nn.softmax(y_),1))
|
||||||
|
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
|
||||||
|
tf.summary.scalar('cross_entropy', cross_entropy)
|
||||||
|
tf.summary.histogram('Weights1', W1)
|
||||||
|
tf.summary.histogram('Bias1', b1)
|
||||||
|
tf.summary.histogram('Weights2', W2)
|
||||||
|
tf.summary.histogram('Bias2', b2)
|
||||||
|
tf.summary.histogram('Weights_output', W_o)
|
||||||
|
tf.summary.histogram('Bias_output', b_o)
|
||||||
|
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)
|
||||||
|
#saver = tf.train.Saver()
|
||||||
|
|
||||||
|
|
||||||
|
# In[6]:
|
||||||
|
|
||||||
|
with tf.Session() as s:
|
||||||
|
# merged = tf.summary.merge_all()
|
||||||
|
# train_writer = tf.summary.FileWriter('./train',s.graph)
|
||||||
|
tf.global_variables_initializer().run()
|
||||||
|
# saver.restore(s, "/tmp/model.ckpt")
|
||||||
|
# print("Model restored.")
|
||||||
|
for i in range(20000):
|
||||||
|
batch_xs, batch_ys = mnist.train.next_batch(128)
|
||||||
|
s.run([train_step],feed_dict={x:batch_xs,y:batch_ys})
|
||||||
|
if i%1000 == 0:
|
||||||
|
[acc]= s.run([accuracy], feed_dict={x: mnist.test.images, y: mnist.test.labels})
|
||||||
|
train_writer.add_summary(summary,i)
|
||||||
|
# save_path = saver.save(s, "/tmp/model.ckpt")
|
||||||
|
print("Accuracy on validation set {}".format(acc))#,'saved to ',save_path)
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Loading…
Reference in New Issue