아래와 같은 폴더 구조로 만든다(GoogleNet/workspace)




predict.py

readme.txt

retrain.py


readme.txt 파일을 참고하여 

이미지 파일을 다운로드 받고, Training을 위해 학습 명령어를 Terminal에 입력한다.

1
python retrain.py --bottleneck_dir=./workspace/bottlenecks --model_dir=./workspace/inception --output_graph=./workspace/flowers_graph.pb --output_labels=./workspace/flowers_labels.txt --image_dir ./workspace/flower_photos --how_many_training_steps 1000
cs


Training이 완료되면 Terminal에서 아래와 같이 predict.py 파일을 실행해서 예측을 해본다.

1
python predict.py ./workspace/flower_photos/daisy/267148092_4bb874af58.jpg
cs






예제 코드 :

cnn_basic_3x3.ipynb




*Max Pooling : 

가장 큰 값을 가져온다

코드상에서는 

1
2
pool = tf.nn.max_pool(image, ksize=[1221],
                    strides=[1111], padding='VALID')  # 1(무시), 1(옆으로 1칸), 1(아래로 1칸), 1(무시)
cs


*Padding(원본과 Output이 같음) :


Padding을 적용하면 원본과 Output이 기본적으로 같지만

Strides를 2로 적용할 경우 사이즈가 반으로 줄어든다.


1, 2, 3, 0

4, 5, 6, 0

7, 8, 9, 0

0, 0, 0, 0


1
2
3
4
5
6
7
8
9
10
11
12
13
14
 
 
(1331)
[[[[5.]
   [6.]
   [6.]]
 
  [[8.]
   [9.]
   [9.]]
 
  [[8.]
   [9.]
   [9.]]]]
cs





*CNN 예제 코드 :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import tensorflow as tf
 
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./MNIST_data/", one_hot=True)
 
= tf.placeholder(tf.float32, [None, 28281])
= tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
 
# L1 Conv shape=(?, 28, 28, 32)
#    Pool     ->(?, 14, 14, 32)
W1 = tf.Variable(tf.random_normal([33132], stddev=0.01))
L1 = tf.nn.conv2d(X, W1, strides=[1111], padding='SAME')
L1 = tf.nn.relu(L1)
L1 = tf.nn.max_pool(L1, ksize=[1221], strides=[1221]
                    , padding='SAME')
# L1 = tf.nn.dropout(L1, keep_prob)
 
# L2 Conv shape=(?, 14, 14, 64)
#    Pool     ->(?, 7, 7, 64)
W2 = tf.Variable(tf.random_normal([333264], stddev=0.01))
L2 = tf.nn.conv2d(L1, W2, strides=[1111], padding='SAME')
L2 = tf.nn.relu(L2)
L2 = tf.nn.max_pool(L2, ksize=[1221], strides=[1221]
                    , padding='SAME')
# L2 = tf.nn.dropout(L2, keep_prob)
 
#  (?, 7, 7, 64) Reshape  ->(?, 256)
W3 = tf.Variable(tf.random_normal([7 * 7 * 64256], stddev=0.01))
L3 = tf.reshape(L2, [-17 * 7 * 64])
L3 = tf.matmul(L3, W3)
L3 = tf.nn.relu(L3)
L3 = tf.nn.dropout(L3, keep_prob)
 
W4 = tf.Variable(tf.random_normal([25610], stddev=0.01))
model = tf.matmul(L3, W4)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=model, labels=Y))
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)
# optimizer = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
 
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
 
batch_size = 100
total_batch = int(mnist.train.num_examples / batch_size)
 
for epoch in range(15):
    total_cost = 0
 
    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        batch_xs = batch_xs.reshape(-128281)
 
        _, cost_val = sess.run([optimizer, cost],
                               feed_dict={X: batch_xs,
                                          Y: batch_ys,
                                          keep_prob: 0.7})
        total_cost += cost_val
 
    print('Epoch:''%04d' % (epoch + 1),
          'Avg. cost =''{:.3f}'.format(total_cost / total_batch))
 
is_correct = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
print('accuracy', sess.run(accuracy,
                        feed_dict={X: mnist.test.images.reshape(-128281),
                                   Y: mnist.test.labels,
                                   keep_prob: 1}))
 
 
cs





*CNN 예제코드2 :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import tensorflow as tf
 
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./MNIST_data/", one_hot=True)
 
= tf.placeholder(tf.float32, [None, 28281])
= tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
 
# L1 Conv shape=(?, 28, 28, 32)
#    Pool     ->(?, 14, 14, 32)
W1 = tf.Variable(tf.random_normal([33132], stddev=0.01))
L1 = tf.nn.conv2d(X, W1, strides=[1111], padding='SAME')
L1 = tf.nn.relu(L1)
L1 = tf.nn.max_pool(L1, ksize=[1221], strides=[1221]
                    , padding='SAME')
# L1 = tf.nn.dropout(L1, keep_prob)
 
# L2 Conv shape=(?, 14, 14, 64)
#    Pool     ->(?, 7, 7, 64)
W2 = tf.Variable(tf.random_normal([333264], stddev=0.01))
L2 = tf.nn.conv2d(L1, W2, strides=[1111], padding='SAME')
L2 = tf.nn.relu(L2)
L2 = tf.nn.max_pool(L2, ksize=[1221], strides=[1221]
                    , padding='SAME')
# L2 = tf.nn.dropout(L2, keep_prob)
 
##########################################################
#conv: 3x3 filter를 128개, stride=1, padding 적용
#relu
#maxpool: 2x2 filter, stride=2, padding 적용
##########################################################
 
W3 = tf.Variable(tf.random_normal([3364128], stddev=0.01))
L3 = tf.nn.conv2d(L2, W3, strides=[1111], padding='SAME')
L3 = tf.nn.relu(L3)
L3 = tf.nn.max_pool(L3, ksize=[1221], strides=[1221]
                    , padding='SAME')
print(L3)
 
#  (?, 7, 7, 64) Reshape  ->(?, 256)
W4 = tf.Variable(tf.random_normal([4 * 4 * 128256], stddev=0.01))
L4 = tf.reshape(L3, [-14 * 4 * 128])
L4 = tf.matmul(L4, W4)
L4 = tf.nn.relu(L4)
L4 = tf.nn.dropout(L4, keep_prob)
 
W5 = tf.Variable(tf.random_normal([25610], stddev=0.01))
model = tf.matmul(L4, W5)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=model, labels=Y))
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)
# optimizer = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
 
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
 
batch_size = 100
total_batch = int(mnist.train.num_examples / batch_size)
 
for epoch in range(15):
    total_cost = 0
 
    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        batch_xs = batch_xs.reshape(-128281)
 
        _, cost_val = sess.run([optimizer, cost],
                               feed_dict={X: batch_xs,
                                          Y: batch_ys,
                                          keep_prob: 0.7})
        total_cost += cost_val
 
    print('Epoch:''%04d' % (epoch + 1),
          'Avg. cost =''{:.3f}'.format(total_cost / total_batch))
 
is_correct = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
print('accuracy', sess.run(accuracy,
                        feed_dict={X: mnist.test.images.reshape(-128281),
                                   Y: mnist.test.labels,
                                   keep_prob: 1}))
 
cs


*Neural Nets(NN) for MNIST :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from tensorflow.examples.tutorials.mnist import input_data
 
import tensorflow as tf
import random
import matplotlib.pylab as plt
 
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1
 
mnist = input_data.read_data_sets('./MNIST_data/', one_hot=True)
 
sess = tf.InteractiveSession()
 
# Create the model
= tf.placeholder(tf.float32, [None, 784]) #열만 784개로 맞춰라
= tf.placeholder(tf.float32, [None, 10])  #열만 10개로 맞춰라
 
W1 = tf.Variable(tf.random_normal([784256]))
W2 = tf.Variable(tf.random_normal([256256]))
W3 = tf.Variable(tf.random_normal([25610]))
 
b1 = tf.Variable(tf.random_normal([256]))
b2 = tf.Variable(tf.random_normal([256]))
b3 = tf.Variable(tf.random_normal([10]))
 
L1 = tf.nn.relu(tf.add(tf.matmul(X, W1), b1))
L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), b2))
hypothesis = tf.add(tf.matmul(L2, W3), b3)
 
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
 
init = tf.initialize_all_variables()
 
with tf.Session() as sess:
    sess.run(init)
 
    for epoch in range (training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={X:batch_xs, Y:batch_ys})
            avg_cost += sess.run(cost, feed_dict={X: batch_xs, Y:batch_ys})/total_batch
        if epoch % display_step ==0:
            print("Epoch:"'%04d' % (epoch+1), "cost=""{:.9f}".format(avg_cost))
 
    print("Optimization Finished")
 
    correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
 
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Accuracy:", accuracy.eval({X: mnist.test.images, Y: mnist.test.labels}))
cs


*Xavier initialization :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from tensorflow.examples.tutorials.mnist import input_data
 
import tensorflow as tf
import random
import matplotlib.pylab as plt
 
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1
 
mnist = input_data.read_data_sets('./MNIST_data/', one_hot=True)
 
sess = tf.InteractiveSession()
 
 
def xavier_init(n_inputs, n_outputs, uniform=True):
    if uniform:
         init_range = tf.sqrt(6.0 / (n_inputs + n_outputs))
         return tf.random_uniform_initializer(-init_range, init_range)
    else:
         stddev = tf.sqrt(3.0 / (n_inputs + n_outputs))
         return tf.truncated_normal_initializer(stddev=stddev)
 
# Create the model
= tf.placeholder(tf.float32, [None, 784]) #열만 784개로 맞춰라
= tf.placeholder(tf.float32, [None, 10])  #열만 10개로 맞춰라
 
# W1 = tf.Variable(tf.random_normal([784, 256]))
# W2 = tf.Variable(tf.random_normal([256, 256]))
# W3 = tf.Variable(tf.random_normal([256, 10]))
W1 = tf.get_variable("W1", shape=[784256], initializer=xavier_init(784256))
W2 = tf.get_variable("W2", shape=[256256], initializer=xavier_init(784256))
W3 = tf.get_variable("W3", shape=[25610], initializer=xavier_init(784256))
 
 
b1 = tf.Variable(tf.random_normal([256]))
b2 = tf.Variable(tf.random_normal([256]))
b3 = tf.Variable(tf.random_normal([10]))
 
L1 = tf.nn.relu(tf.add(tf.matmul(X, W1), b1))
L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), b2))
hypothesis = tf.add(tf.matmul(L2, W3), b3)
 
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
 
 
 
init = tf.initialize_all_variables()
 
with tf.Session() as sess:
    sess.run(init)
 
    for epoch in range (training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={X:batch_xs, Y:batch_ys})
            avg_cost += sess.run(cost, feed_dict={X: batch_xs, Y:batch_ys})/total_batch
        if epoch % display_step ==0:
            print("Epoch:"'%04d' % (epoch+1), "cost=""{:.9f}".format(avg_cost))
 
    print("Optimization Finished")
 
    correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
 
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Accuracy:", accuracy.eval({X: mnist.test.images, Y: mnist.test.labels}))
cs





* Mnist 5NN :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import tensorflow as tf
import random
import matplotlib.pyplot as plt
 
from tensorflow.examples.tutorials.mnist import input_data
 
tf.set_random_seed(777)  # reproducibility
 
mnist = input_data.read_data_sets("./MNIST_data/", one_hot=True)
 
# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
 
# input place holders
= tf.placeholder(tf.float32, [None, 784])
= tf.placeholder(tf.float32, [None, 10])
 
# dropout (keep_prob) rate  0.7 on training, but should be 1 for testing
keep_prob = tf.placeholder(tf.float32)
 
# weights & bias for nn layers
# http://stackoverflow.com/questions/33640581/how-to-do-xavier-initialization-on-tensorflow
W1 = tf.get_variable("W1", shape=[784512],
                     initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([512]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)
L1 = tf.nn.dropout(L1, keep_prob=keep_prob)
 
W2 = tf.get_variable("W2", shape=[512512],
                     initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([512]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)
L2 = tf.nn.dropout(L2, keep_prob=keep_prob)
 
W3 = tf.get_variable("W3", shape=[512512],
                     initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([512]))
L3 = tf.nn.relu(tf.matmul(L2, W3) + b3)
L3 = tf.nn.dropout(L3, keep_prob=keep_prob)
 
W4 = tf.get_variable("W4", shape=[512512],
                     initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([512]))
L4 = tf.nn.relu(tf.matmul(L3, W4) + b4)
L4 = tf.nn.dropout(L4, keep_prob=keep_prob)
 
W5 = tf.get_variable("W5", shape=[51210],
                     initializer=tf.contrib.layers.xavier_initializer())
b5 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L4, W5) + b5
 
# define cost/loss & optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
 
# initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())
 
# train my model
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)
 
    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys, keep_prob: 0.7}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch
 
    print('Epoch:''%04d' % (epoch + 1), 'cost =''{:.9f}'.format(avg_cost))
 
print('Learning Finished!')
 
# Test model and check accuracy
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Accuracy:', sess.run(accuracy, feed_dict={
      X: mnist.test.images, Y: mnist.test.labels, keep_prob: 1}))
 
# Get one and predict
= random.randint(0, mnist.test.num_examples - 1)
print("Label: ", sess.run(tf.argmax(mnist.test.labels[r:r + 1], 1)))
print("Prediction: ", sess.run(
    tf.argmax(hypothesis, 1), feed_dict={X: mnist.test.images[r:r + 1], keep_prob: 1}))
 
plt.imshow(mnist.test.images[r:r + 1].
          reshape(2828), cmap='Greys', interpolation='nearest')
plt.show()
 
'''
Epoch: 0001 cost = 0.447322626
Epoch: 0002 cost = 0.157285590
Epoch: 0003 cost = 0.121884535
Epoch: 0004 cost = 0.098128681
Epoch: 0005 cost = 0.082901778
Epoch: 0006 cost = 0.075337573
Epoch: 0007 cost = 0.069752543
Epoch: 0008 cost = 0.060884363
Epoch: 0009 cost = 0.055276413
Epoch: 0010 cost = 0.054631256
Epoch: 0011 cost = 0.049675195
Epoch: 0012 cost = 0.049125314
Epoch: 0013 cost = 0.047231930
Epoch: 0014 cost = 0.041290121
Epoch: 0015 cost = 0.043621063
Learning Finished!
Accuracy: 0.9804
'''
 
cs


* More Deep & Dropout :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import random
import matplotlib.pylab as plt
 
tf.set_random_seed(777)
 
mnist = input_data.read_data_sets('../MNIST_data/', one_hot=True)
 
sess = tf.InteractiveSession()
 
= tf.placeholder(tf.float32, [None, 784])
= tf.placeholder(tf.float32, [None, 10])
 
W1 = tf.get_variable("W1", shape=[784256]
                     , initializer=tf.contrib.layers.xavier_initializer())
W2 = tf.get_variable("W2", shape=[256256]
                     , initializer=tf.contrib.layers.xavier_initializer())
W3 = tf.get_variable("W3", shape=[25610]
                     , initializer=tf.contrib.layers.xavier_initializer())
 
b1 = tf.Variable(tf.zeros([256]))
b2 = tf.Variable(tf.zeros([256]))
b3 = tf.Variable(tf.zeros([10]))
 
dropout_rate = tf.placeholder(tf.float32)
_L1 = tf.nn.relu(tf.matmul(X, W1) + b1)
L1 = tf.nn.dropout(_L1, keep_prob=dropout_rate)
_L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)
L2 = tf.nn.dropout(_L2, keep_prob=dropout_rate)
hypothesis = tf.matmul(L2, W3) + b3
 
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
train = tf.train.AdamOptimizer(0.001).minimize(cost)
 
tf.global_variables_initializer().run()
 
for i in range(5500):  # 5500
    batch_xs, batch_ys = mnist.train.next_batch(100)
    train.run({X: batch_xs, Y: batch_ys, dropout_rate: 0.7})
    print("cost:", cost.eval({X: batch_xs, Y: batch_ys, dropout_rate: 0.7}))
 
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval({X: mnist.test.images, Y: mnist.test.labels, dropout_rate: 1}))
print(hypothesis.eval({X: mnist.test.images, Y: mnist.test.labels, dropout_rate: 1}))
 
= random.randint(0, mnist.test.num_examples - 1)
print('Label:', sess.run(tf.argmax(mnist.test.labels[r:r + 1], 1)))
print('Prediction:', sess.run(tf.argmax(hypothesis, 1), {X: mnist.test.images[r:r + 1], dropout_rate: 1}))
print(mnist.test.images[r:r + 1])
 
plt.imshow(mnist.test.images[r:r + 1].reshape(2828)
           , cmap='Greys', interpolation='nearest')
plt.show()
 
cs






*9 Hidden Layer with tensorboard :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility
 
xy = np.loadtxt('./data/07train.txt')
 
x_data =  xy[:,0:-1]
y_data = xy[:,[-1]]
 
= tf.placeholder(tf.float32, [None, 2])
= tf.placeholder(tf.float32, [None, 1])
 
# 계층은 3단에 뉴런의 개수를 5개로 지정했을 시 :
W1 = tf.Variable(tf.random_uniform([25], -1.1.))
W2 = tf.Variable(tf.random_uniform([55], -1.1.))
W3 = tf.Variable(tf.random_uniform([55], -1.1.))
W4 = tf.Variable(tf.random_uniform([55], -1.1.))
W5 = tf.Variable(tf.random_uniform([55], -1.1.))
W6 = tf.Variable(tf.random_uniform([55], -1.1.))
W7 = tf.Variable(tf.random_uniform([55], -1.1.))
W8 = tf.Variable(tf.random_uniform([55], -1.1.))
W9 = tf.Variable(tf.random_uniform([55], -1.1.))
W10 = tf.Variable(tf.random_uniform([55], -1.1.))
W11 = tf.Variable(tf.random_uniform([51], -1.1.))
 
b1 = tf.Variable(tf.zeros([5]))
b2 = tf.Variable(tf.zeros([5]))
b3 = tf.Variable(tf.zeros([5]))
b4 = tf.Variable(tf.zeros([5]))
b5 = tf.Variable(tf.zeros([5]))
b6 = tf.Variable(tf.zeros([5]))
b7 = tf.Variable(tf.zeros([5]))
b8 = tf.Variable(tf.zeros([5]))
b9 = tf.Variable(tf.zeros([5]))
b10 = tf.Variable(tf.zeros([5]))
b11 = tf.Variable(tf.zeros([1]))
 
# Hypotheis
with  tf.name_scope("layer1") as scope:
    L1 = tf.sigmoid(tf.matmul(X, W1) + b1)
with  tf.name_scope("layer2") as scope:
    L2 = tf.sigmoid(tf.matmul(L1, W2) + b2)
with  tf.name_scope("layer3") as scope:
    L3 = tf.sigmoid(tf.matmul(L2, W3) + b3)
with  tf.name_scope("layer4") as scope:
    L4 = tf.sigmoid(tf.matmul(L3, W4) + b4)
with  tf.name_scope("layer5") as scope:
    L5 = tf.sigmoid(tf.matmul(L4, W5) + b5)
with  tf.name_scope("layer6") as scope:
    L6 = tf.sigmoid(tf.matmul(L5, W6) + b6)
with  tf.name_scope("layer7") as scope:
    L7 = tf.sigmoid(tf.matmul(L6, W7) + b7)
with  tf.name_scope("layer8") as scope:
    L8 = tf.sigmoid(tf.matmul(L7, W8) + b8)
with  tf.name_scope("layer9") as scope:
    L9 = tf.sigmoid(tf.matmul(L8, W9) + b9)
with  tf.name_scope("layer10") as scope:
    L10 = tf.sigmoid(tf.matmul(L9, W10) + b10)
with  tf.name_scope("layer1") as scope:
    hypothesis = tf.sigmoid(tf.matmul(L10, W11) + b11)
 
# 3단으로 쌓을 시 :
L1 = tf.sigmoid(tf.matmul(X,W1) + b1)
L2 = tf.sigmoid(tf.matmul(L1,W2) + b2)
hypothesis = tf.sigmoid(tf.matmul(L2, W3) + b3)
 
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
                       tf.log(1 - hypothesis))
 
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
 
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
 
    for step in range(10001):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 100 == 0:
            print(step, sess.run(cost, feed_dict={
                  X: x_data, Y: y_data}))
 
    h, c, a = sess.run([hypothesis, predicted, accuracy],
                       feed_dict={X: x_data, Y: y_data})
    print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)
 
 
cs





*TensorBoard 란 :
TV logging /debugging tool

=> Visualize your TF Graph

=> Plot quntitative metrics

=> show additional data


*Tensorboard를 사용하는 5가지 방법

=> From TF Graph, decide which tensors you want to log

=> merge all summeries

=> create write and add graph

=> run summary merge and summary

=> lanuch Tensor board




*Tensorboard 출력하기 :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#$tensorboard --logdir=/tmp/xor_logs2
import tensorflow as tf
import numpy as np
 
xy = np.loadtxt('./data/07train.txt', unpack=True)
x_data = np.transpose(xy[0:-1])
y_data = np.reshape(xy[-1], (41))
 
print(x_data)
print(y_data)
 
= tf.placeholder(tf.float32, name='x-input')
= tf.placeholder(tf.float32, name='y-input')
 
w1 = tf.Variable(tf.random_uniform([210], -1.01.0), name='weight1')
w2 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight2')
w3 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight3')
w4 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight4')
w5 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight5')
w6 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight6')
w7 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight7')
w8 = tf.Variable(tf.random_uniform([101], -1.01.0), name='weight8')
 
b1 = tf.Variable(tf.zeros([10]), name="Bias1")
b3 = tf.Variable(tf.zeros([10]), name="Bias3")
b2 = tf.Variable(tf.zeros([10]), name="Bias2")
b4 = tf.Variable(tf.zeros([10]), name="Bias4")
b5 = tf.Variable(tf.zeros([10]), name="Bias5")
b6 = tf.Variable(tf.zeros([10]), name="Bias6")
b7 = tf.Variable(tf.zeros([10]), name="Bias7")
b8 = tf.Variable(tf.zeros([1]), name="Bias8")
 
# L2 = tf.nn.relu(tf.matmul(X, w1) + b1)
# L3 = tf.nn.relu(tf.matmul(L2, w2) + b2)
# L4 = tf.nn.relu(tf.matmul(L3, w3) + b3)
# L5 = tf.nn.relu(tf.matmul(L4, w4) + b4)
# L6 = tf.nn.relu(tf.matmul(L5, w5) + b5)
# L7 = tf.nn.relu(tf.matmul(L6, w6) + b6)
# L8 = tf.nn.relu(tf.matmul(L7, w7) + b7)
with tf.name_scope("layer1") as scope:
    L2 = tf.sigmoid(tf.matmul(X, w1) + b1)
with tf.name_scope("layer2") as scope:
    L3 = tf.sigmoid(tf.matmul(L2, w2) + b2)
with tf.name_scope("layer3") as scope:
    L4 = tf.sigmoid(tf.matmul(L3, w3) + b3)
with tf.name_scope("layer4") as scope:  
    L5 = tf.sigmoid(tf.matmul(L4, w4) + b4)
with tf.name_scope("layer5") as scope:
    L6 = tf.sigmoid(tf.matmul(L5, w5) + b5)
with tf.name_scope("layer6") as scope:
    L7 = tf.sigmoid(tf.matmul(L6, w6) + b6)
with tf.name_scope("layer7") as scope:
    L8 = tf.sigmoid(tf.matmul(L7, w7) + b7)
with tf.name_scope("layer8") as scope:
    hypothesis = tf.sigmoid(tf.matmul(L8, w8) + b8)
 
with tf.name_scope('cost') as scope:
    cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1-Y) * tf.log(1 - hypothesis))
    tf.summary.scalar("cost", cost)
 
with tf.name_scope('train') as scope:
    a = tf.Variable(0.003)
    optimizer = tf.train.GradientDescentOptimizer(a)
    train = optimizer.minimize(cost)
 
w1_hist = tf.summary.histogram("weights1", w1)
w2_hist = tf.summary.histogram("weights2", w2)
b1_hist = tf.summary.histogram("biases1", b1)
b2_hist = tf.summary.histogram("biases2", b2)
y_hist = tf.summary.histogram("y", Y)
 
with tf.name_scope('accuracy') as scope:
    correct_prediction = tf.equal(tf.floor(hypothesis+0.5), Y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    tf.summary.scalar("accuracy", accuracy)
 
init = tf.global_variables_initializer()
 
with tf.Session() as sess:
    sess.run(init)
 
    merged = tf.summary.merge_all()
    writer = tf.summary.FileWriter("./tmp/xor_logs3",  sess.graph)
 
    for step in range(20000):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 200 == 0:
            summary = sess.run(merged, feed_dict={X: x_data, Y: y_data})
            writer.add_summary(summary, step)
            print(step, sess.run(cost, feed_dict={X: x_data, Y: y_data}), sess.run(w1), sess.run(w2))
 
    print(sess.run([hypothesis, tf.floor(hypothesis+0.5), correct_prediction], feed_dict={X: x_data, Y: y_data}))
    print("accuracy", sess.run(accuracy, feed_dict={X: x_data, Y: y_data}))
 
cs


위코드를 실행 후 

View > Tool Windows > Terminal 선택 후 

아래 Command 실행 :


>>> tensorboard --logdir=./tmp/xor_logs3


http://localhost:6006 브라우저에서 열기






*ReLU 소스코드 :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import tensorflow as tf
import numpy as np
 
xy = np.loadtxt('./data/07train.txt')
x_data =  xy[:,0:-1]
y_data = xy[:,[-1]]
 
= tf.placeholder(tf.float32, [None, 2])
= tf.placeholder(tf.float32, [None, 1])
 
w1 = tf.Variable(tf.random_uniform([2,  10], -1.01.0), name='weight1')
w2 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight2')
w3 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight3')
w4 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight4')
w5 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight5')
w6 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight6')
w7 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight7')
w8 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight8')
w9 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight9')
w10 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight10')
w11 = tf.Variable(tf.random_uniform([101], -1.01.0), name='weight11')
 
b1 = tf.Variable(tf.zeros([10]), name="Bias1")
b3 = tf.Variable(tf.zeros([10]), name="Bias3")
b2 = tf.Variable(tf.zeros([10]), name="Bias2")
b4 = tf.Variable(tf.zeros([10]), name="Bias4")
b5 = tf.Variable(tf.zeros([10]), name="Bias5")
b6 = tf.Variable(tf.zeros([10]), name="Bias6")
b7 = tf.Variable(tf.zeros([10]), name="Bias7")
b8 = tf.Variable(tf.zeros([10]), name="Bias8")
b9 = tf.Variable(tf.zeros([10]), name="Bias9")
b10 = tf.Variable(tf.zeros([10]), name="Bias10")
b11 = tf.Variable(tf.zeros([1]), name="Bias11")
 
#L1 = tf.sigmoid(tf.matmul(X, w1) + b1)
L1 = tf.nn.relu(tf.matmul(X, w1) + b1)
L2 = tf.nn.relu(tf.matmul(L1, w2) + b2)
L3 = tf.nn.relu(tf.matmul(L2, w3) + b3)
L4 = tf.nn.relu(tf.matmul(L3, w4) + b4)
L5 = tf.nn.relu(tf.matmul(L4, w5) + b5)
L6 = tf.nn.relu(tf.matmul(L5, w6) + b6)
L7 = tf.nn.relu(tf.matmul(L6, w7) + b7)
L8 = tf.nn.relu(tf.matmul(L7, w8) + b8)
L9 = tf.nn.relu(tf.matmul(L8, w9) + b9)
L10 = tf.nn.relu(tf.matmul(L9, w10) + b10)
hypothesis = tf.sigmoid(tf.matmul(L10, w11) + b11)
 
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1-Y) * tf.log(1 - hypothesis))
 
= tf.Variable(0.01)
optimizer = tf.train.GradientDescentOptimizer(a)
train = optimizer.minimize(cost)
 
init = tf.global_variables_initializer()
 
with tf.Session() as sess:
    sess.run(init)
 
    for step in range(10000):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 200 == 0:
            print(step, sess.run(cost, feed_dict={X: x_data, Y: y_data}), sess.run(w1), sess.run(w2))
 
    correct_prediction = tf.equal(tf.floor(hypothesis+0.5), Y)
 
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print(sess.run([hypothesis, tf.floor(hypothesis+0.5), correct_prediction], feed_dict={X: x_data, Y: y_data}))
    print("accuracy", accuracy.eval({X: x_data, Y: y_data}))
 
cs



*Activation Function : 자극에 반응을 할지 안 할지 결정(뉴런이 하는 역할과 동일)


X-> W-> S -> Y(예측값)

여러 뉴런들이 각 특징들을 잘 뽑아내서 이해를 잘 시킨다.


*CNN 알고리즘(Convolutional Neural Networks)

1980(LeCun)

=>Big Problem : 

사람의 두뇌를 구성하려면, 15층 16층 정도로 깊게 쌓아야 하는데

Neural Network로는 레이어 구성으로 동작이 잘 안된다는 것을 깨달음

=> Breakthrough :

Neural networks with many layers really could be trained well, if the weights are initialized in a clever way rather than randomly




*Geoffrey Hinton's Summary of findings up to today

- Our labeled datasets were thousands of times too small

- 컴퓨터가 너무 느렸다.

- 초기값을 잘못 줬다.

- We used the wrong type of non-linearity


2단의 NN을 쌓으려면 

1단의 출력값을 2단의 입력값으로 쓰는 방식으로 연결해야 한다.


1) K(x) = sigmoid(WX1 + B1)

2) Y + H(x) = sigmoid(K(x) W2 + B2)




*XOR With logistic regression :


07train.txt

1
2
3
4
5
6
# xor
# x1 x2 y
0   0   0
0   1   1
1   0   1
1   1   0
cs



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility
 
xy = np.loadtxt('./data/07train.txt')
 
x_data =  xy[:,0:-1]
y_data = xy[:,[-1]]
 
= tf.placeholder(tf.float32, [None, 2])
= tf.placeholder(tf.float32, [None, 1])
 
= tf.Variable(tf.random_uniform([21], -1.1.))
= tf.Variable(tf.random_uniform([1], -1.1.))
 
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)
 
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
                       tf.log(1 - hypothesis))
 
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
 
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
 
    for step in range(10001):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 100 == 0:
            print(step, sess.run(cost, feed_dict={
                  X: x_data, Y: y_data}), sess.run(W))
 
    h, c, a = sess.run([hypothesis, predicted, accuracy],
                       feed_dict={X: x_data, Y: y_data})
    print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)
 
 
cs





*Neuron이 2개일 때 소스코드 : 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility
 
xy = np.loadtxt('./data/07train.txt')
 
x_data =  xy[:,0:-1]
y_data = xy[:,[-1]]
 
= tf.placeholder(tf.float32, [None, 2])
= tf.placeholder(tf.float32, [None, 1])
 
# 2단으로 쌓는다 W 2개 b 2개  - W값은 [2, 2] 와 [2, 1]으로 되어 있는데 대각선으로 값이 맞아야 한다(2 = 2)
# b1, b2의 값은 W의 마지막 열과 일치해야 한다
W1 = tf.Variable(tf.random_uniform([22], -1.1.))
W2 = tf.Variable(tf.random_uniform([21], -1.1.))
b1 = tf.Variable(tf.random_uniform([2], -1.1.))
b2 = tf.Variable(tf.random_uniform([1], -1.1.))
 
L1 = tf.sigmoid(tf.matmul(X,W1) + b1)
hypothesis = tf.sigmoid(tf.matmul(L1, W2) + b2)
 
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
                       tf.log(1 - hypothesis))
 
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
 
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
 
    for step in range(10001):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 100 == 0:
            print(step, sess.run(cost, feed_dict={
                  X: x_data, Y: y_data}))
 
    h, c, a = sess.run([hypothesis, predicted, accuracy],
                       feed_dict={X: x_data, Y: y_data})
    print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)
 
 
cs



*계층은 2단에 뉴런 10개를 사용한다고 했을 때 소스 코드 :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility
 
xy = np.loadtxt('./data/07train.txt')
 
x_data =  xy[:,0:-1]
y_data = xy[:,[-1]]
 
= tf.placeholder(tf.float32, [None, 2])
= tf.placeholder(tf.float32, [None, 1])
 
# 2단으로 쌓는다 W 2개 b 2개  - W값은 [2, 2] 와 [2, 1]으로 되어 있는데 대각선으로 값이 맞아야 한다(2 = 2)
# b1, b2의 값은 W의 마지막 열과 일치해야 한다
# 계층은 2단에 뉴런의 개수를 10개로 지정했을 시 : 
W1 = tf.Variable(tf.random_uniform([210], -1.1.))
W2 = tf.Variable(tf.random_uniform([101], -1.1.))
b1 = tf.Variable(tf.random_uniform([10], -1.1.))
b2 = tf.Variable(tf.random_uniform([1], -1.1.))
 
L1 = tf.sigmoid(tf.matmul(X,W1) + b1)
hypothesis = tf.sigmoid(tf.matmul(L1, W2) + b2)
 
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
                       tf.log(1 - hypothesis))
 
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
 
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
 
    for step in range(10001):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 100 == 0:
            print(step, sess.run(cost, feed_dict={
                  X: x_data, Y: y_data}))
 
    h, c, a = sess.run([hypothesis, predicted, accuracy],
                       feed_dict={X: x_data, Y: y_data})
    print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)
 
 
cs


*계층은 3단에 뉴런 5개를 사용한다고 했을 때 소스 코드 :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility
 
xy = np.loadtxt('./data/07train.txt')
 
x_data =  xy[:,0:-1]
y_data = xy[:,[-1]]
 
= tf.placeholder(tf.float32, [None, 2])
= tf.placeholder(tf.float32, [None, 1])
 
# 계층은 3단에 뉴런의 개수를 5개로 지정했을 시 :
W1 = tf.Variable(tf.random_uniform([25], -1.1.))
W2 = tf.Variable(tf.random_uniform([54], -1.1.))
W3 = tf.Variable(tf.random_uniform([41], -1.1.))
b1 = tf.Variable(tf.random_uniform([5], -1.1.))
b2 = tf.Variable(tf.random_uniform([4], -1.1.))
b3 = tf.Variable(tf.random_uniform([1], -1.1.))
 
# 3단으로 쌓을 시 :
L1 = tf.sigmoid(tf.matmul(X,W1) + b1)
L2 = tf.sigmoid(tf.matmul(L1,W2) + b2)
hypothesis = tf.sigmoid(tf.matmul(L2, W3) + b3)
 
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
                       tf.log(1 - hypothesis))
 
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
 
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
 
    for step in range(10001):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 100 == 0:
            print(step, sess.run(cost, feed_dict={
                  X: x_data, Y: y_data}))
 
    h, c, a = sess.run([hypothesis, predicted, accuracy],
                       feed_dict={X: x_data, Y: y_data})
    print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)
 
 
cs




*Learning Rate :

큰 Learning Rate 값은 overshooting을 유발하고

값이 너무 작을 경우에는 오래걸린다.


*Data normalization 필요성(표준화 방법):


1
2
x_data[:,1= (x_data[:,1- x_data[:,1].mean()) / x_data[:,1].std()
 
cs





*Overfitting :

머신러닝 학습의 가장 큰 문제점 중 하나

- Our model is very good with training data set(with memorization)

- Not good at test dataset or in real use


*Solution for overfitting:

- Data가 많으면 많을 수록 좋다.

- Reduce the number of features

- Regularization

=> let's not have too big numbers in the weight


*Underfit VS Overfit


- Training Data 만 잘 표현하는 모델 : Training Error < test Error

- 새로운 데이터까지 잘 표현할 수 있는가 체크

- 적절한 복잡도까지 무엇인지 체크 -> model selection problem


*Bias(퍼져있는 상태)-Variance Tradeoff

Variance가 큰 모델은 좋은 모델이 아니다:

Low variance와 Low Bias가 가장 좋은 모델

bias : 중앙으로부터 떨어진 에러, hbar는 예측의 평균값

variance : 예측의 펴균값과 예측값들의 분산


Bias는 모델의 복잡도가 높아질 수록 error 발생률이 줄어든다

반면 Variance는 모델의 복잡도가 높아질 수록 error 발생률이 늘어난다.

둘의 에러가 가장 적은 상태(model complexity가 중간쯤인)가 가장 최적화된 모델 상태이다.





*Online Learning :
대량데이터에 대해 부분으로 나누어 학습을 시키거나, 학습이 끝난 후 추가적인 데이터에 대해 학습시키는 방법.


*MNIST Example :
=> 손글씨인 이미지 데이터를 학습시켜 예측하도록 하는 예제


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from tensorflow.examples.tutorials.mnist import input_data
 
import tensorflow as tf
import random
import matplotlib.pylab as plt
 
mnist = input_data.read_data_sets('./MNIST_data/', one_hot=True)
 
sess = tf.InteractiveSession()
 
# Create the model
= tf.placeholder(tf.float32, [None, 784])
= tf.placeholder(tf.float32, [None, 10])
 
= tf.Variable(tf.zeros([78410]))
= tf.Variable(tf.zeros([10]))
hypothesis = tf.nn.softmax(tf.matmul(x, W) + b)
 
# Define loss and optimizer
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(hypothesis), axis=1)) #row
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
 
# Train
tf.global_variables_initializer().run()
 
for i in range(5500):  #5500
    batch_xs, batch_ys = mnist.train.next_batch(100)
    train_step.run({x: batch_xs, y: batch_ys})
    print ("cost:",cross_entropy.eval({x: batch_xs, y: batch_ys}))
  
# Test trained model
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
 
= random.randint(0, mnist.test.num_examples -1)
print('Label:', sess.run(tf.argmax(mnist.test.labels[r:r+1],1)))
print('Prediction:', sess.run(tf.argmax(hypothesis,1),{x:mnist.test.images[r:r+1]}))
 
plt.imshow(mnist.test.images[r:r+1].reshape(28,28)
           , cmap='Greys', interpolation='nearest')
plt.show()
 
 
cs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from tensorflow.examples.tutorials.mnist import input_data
 
import tensorflow as tf
import random
import matplotlib.pylab as plt
 
mnist = input_data.read_data_sets('./MNIST_data/', one_hot=True)
 
sess = tf.InteractiveSession()
 
# Create the model
= tf.placeholder(tf.float32, [None, 784]) #열만 784개로 맞춰라
= tf.placeholder(tf.float32, [None, 10])  #열만 10개로 맞춰라
 
= tf.Variable(tf.zeros([78410]))
= tf.Variable(tf.zeros([10]))
hypothesis = tf.nn.softmax(tf.matmul(x, W) + b)
 
# Define loss and optimizer
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(hypothesis), axis=1)) #row
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
 
# Train
tf.global_variables_initializer().run()
 
for i in range(5500):  #5500
    batch_xs, batch_ys = mnist.train.next_batch(100)  #100건씩 끊어서 가져오겠다.
    train_step.run({x: batch_xs, y: batch_ys})
    print ("cost:",cross_entropy.eval({x: batch_xs, y: batch_ys}))
  
# Test trained model
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(y, 1))  # hypothesis와 결과와 비교한다.
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))  # 평균을 내서 accuracy를 구한다
print(accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
 
= random.randint(0, mnist.test.num_examples -1)
print('Label:', sess.run(tf.argmax(mnist.test.labels[r:r+1],1)))
print('Prediction:', sess.run(tf.argmax(hypothesis,1),{x:mnist.test.images[r:r+1]}))
 
plt.imshow(mnist.test.images[r:r+1].reshape(28,28)
           , cmap='Greys', interpolation='nearest')
plt.show()
 
 
cs






+ Recent posts