*Sequence Data :

순서가 바뀔 수 있는(의미가 있는) data

예) 주식 Data : 6일차의 Data를 보유하고 있을 때, 전 날 Data와 전전 날 Data가 영향을 준다(각각의 단계 마다 시퀀스별 관계를 담는다).


=> Recurrent Data(Data가 해당 Data에 영향을 미친다)


*RNN 활용분야:
- Language Modeling

- Speech Recognition

- Machine Translation

- Conversation Modeling / Question Answering

- Image /Video Captioning

- Image /Music /Dance Generation



* LSTM(Long Short-Term Memory Units) :

RNNs의 변형으로 90년대 중반에 처음으로 등장

-> Back propagation 하는 과정에서 오차의 값이 더 잘 유지되는데, 결과적으로 1000단계가 넘게 거슬러 올라갈 수 있음


*LSTM 학습단계 :

1) Forget Gate Layer(무엇을 잊을지, 완전히 제거할지)에 의해 결정됨

2) Input gate layer인 시그모이드 층이 어떤 값들을 갱신할지 결정

3) 이전 cell 상태 Ct-1을 새 cell 상태 Ct로 갱신하는 단계

4) 무엇을 출력할지 결정하는 단계




*RNN 구현 소스(teach hello):


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # reproducibility
 
idx2char = ['h''i''e''l''o']
# Teach hello: hihell -> ihello
x_data = [[010233]]   # hihell
x_one_hot = [[[10000],   # h 0
              [01000],   # i 1
              [10000],   # h 0
              [00100],   # e 2
              [00010],   # l 3
              [00010]]]  # l 3
 
y_data = [[102334]]    # ihello
 
input_dim = 5  # one-hot size
hidden_size = 5  # output from the LSTM. 5 to directly predict one-hot
batch_size = 1   # one sentence
sequence_length = 6  # |ihello| == 6
 
= tf.placeholder(tf.float32, [None, sequence_length, hidden_size])  # X one-hot
= tf.placeholder(tf.int32, [None, sequence_length])  # Y label
 
#cell = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)
 
initial_state = cell.zero_state(batch_size, tf.float32)
outputs, _states = tf.nn.dynamic_rnn(
    cell, X, initial_state=initial_state, dtype=tf.float32)
 
weights = tf.ones([batch_size, sequence_length])
sequence_loss = tf.contrib.seq2seq.sequence_loss(
    logits=outputs, targets=Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)
train = tf.train.AdamOptimizer(learning_rate=0.1).minimize(loss)
 
prediction = tf.argmax(outputs, axis=2)
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(2000):
        l, _ = sess.run([loss, train], feed_dict={X: x_one_hot, Y: y_data})
        result = sess.run(prediction, feed_dict={X: x_one_hot})
        print(i, "loss:", l, "prediction: ", result, "true Y: ", y_data)
 
        # print char using dic
        result_str = [idx2char[c] for c in np.squeeze(result)]
        print("\tPrediction str: "''.join(result_str))
 
'''
0 loss: 1.55474 prediction:  [[3 3 3 3 4 4]] true Y:  [[1, 0, 2, 3, 3, 4]]
    Prediction str:  lllloo
1 loss: 1.55081 prediction:  [[3 3 3 3 4 4]] true Y:  [[1, 0, 2, 3, 3, 4]]
    Prediction str:  lllloo
2 loss: 1.54704 prediction:  [[3 3 3 3 4 4]] true Y:  [[1, 0, 2, 3, 3, 4]]
    Prediction str:  lllloo
3 loss: 1.54342 prediction:  [[3 3 3 3 4 4]] true Y:  [[1, 0, 2, 3, 3, 4]]
    Prediction str:  lllloo
...
1998 loss: 0.75305 prediction:  [[1 0 2 3 3 4]] true Y:  [[1, 0, 2, 3, 3, 4]]
    Prediction str:  ihello
1999 loss: 0.752973 prediction:  [[1 0 2 3 3 4]] true Y:  [[1, 0, 2, 3, 3, 4]]
    Prediction str:  ihello
'''
 
cs


*RNN - LSTM 구현 소스 (아리아리아리랑)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#-*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # reproducibility
 
idx2char = ['아''리''랑']
# 아리아리아리 -> 리아리아리랑
x_data = [[010101]]   # 아리아리아리랑
x_one_hot = [[[100],   # 아 0
              [010],   # 리 1
              [100],   # 아 0
              [010],   # 리 1
              [100],   # 아 0
              [010]]]  # 리 1
y_data = [[101012]]    # 리아리아리랑
 
input_dim = 3  # one-hot size
hidden_size = 3  # output from the LSTM.
batch_size = 1   # one sentence
sequence_length = 6  # |리아리아리랑| == 6
 
= tf.placeholder(tf.float32, [None, sequence_length, hidden_size])  # X one-hot
= tf.placeholder(tf.int32, [None, sequence_length])  # Y label
 
cell = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
#cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)
 
initial_state = cell.zero_state(batch_size, tf.float32)
outputs, _states = tf.nn.dynamic_rnn(
    cell, X, initial_state=initial_state, dtype=tf.float32)
 
weights = tf.ones([batch_size, sequence_length])
sequence_loss = tf.contrib.seq2seq.sequence_loss(
    logits=outputs, targets=Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)
train = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
 
prediction = tf.argmax(outputs, axis=2)
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(2000):
        l, _ = sess.run([loss, train], feed_dict={X: x_one_hot, Y: y_data})
        result = sess.run(prediction, feed_dict={X: x_one_hot})
        print(i, "loss:", l, "prediction: ", result, "true Y: ", y_data)
 
        # print char using dic
        result_str = [idx2char[c] for c in np.squeeze(result)]
        print("\tPrediction str: "''.join(result_str))
 
 
 
 
 
cs





*문장을 Char별로 분석해서 숫자 부여하기


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
tf.set_random_seed(777)  # reproducibility
 
sentence = ("A recurrent neural network is a class of artificial neural network "
            "where connections between units form a directed cycle. "
            "This allows it to exhibit dynamic temporal behavior. Unlike feedforward neural networks,"
            "RNNs can use their internal memory to process arbitrary sequences of inputs.")
 
print(sentence)
char_set = list(set(sentence))  # Set에 넣어서 중복 제거 후 List에 넣기
print(char_set)
 
char_dic = {w: i for i, w in enumerate(char_set)}
print(char_dic)
cs


*Full Sentence 분석하기 :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
tf.set_random_seed(777)  # reproducibility
 
sentence = ("A recurrent neural network is a class of artificial neural network "
            "where connections between units form a directed cycle. "
            "This allows it to exhibit dynamic temporal behavior. Unlike feedforward neural networks,"
            "RNNs can use their internal memory to process arbitrary sequences of inputs.")
 
print(sentence) 
char_set = list(set(sentence))  # Set에 넣어서 중복 제거 후 List에 넣기
 
char_dic = {w: i for i, w in enumerate(char_set)}
print(char_dic)
 
data_dim = len(char_set)
hidden_size = len(char_set)
num_classes = len(char_set)
seq_length = 10  # Any arbitrary number
 
dataX = []
dataY = []
print(len(sentence))
for i in range(0len(sentence) - seq_length):
    x_str = sentence[i:i + seq_length]
    y_str = sentence[i + 1: i + seq_length + 1]
    print(i, x_str, '->', y_str)
 
    x = [char_dic[c] for c in x_str]  # x str to index
    y = [char_dic[c] for c in y_str]  # y str to index
 
    dataX.append(x)
    dataY.append(y)
 
 
batch_size = len(dataX)
print('batch_size:', batch_size)
 
= tf.placeholder(tf.int32, [None, seq_length])
= tf.placeholder(tf.int32, [None, seq_length])
 
# One-hot encoding
X_one_hot = tf.one_hot(X, num_classes) # one hot을 알아서 처리해주는 함수
print(X_one_hot)  # check out the shape
 
# Make a lstm cell with hidden_size (each unit output vector size)
cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True) # LSTM으로 
cell = rnn.MultiRNNCell([cell] * 2, state_is_tuple=True) # MultiRNN 2개 계층으로 쌓기
 
# outputs: unfolding size x hidden size, state = hidden size
outputs, _states = tf.nn.dynamic_rnn(cell, X_one_hot, dtype=tf.float32)  # dynamic rnn 실행을 통해 output과 state 받기
 
print('output', outputs)
# (optional) softmax layer
X_for_softmax = tf.reshape(outputs, [-1, hidden_size])
softmax_w = tf.get_variable("softmax_w", [hidden_size, num_classes])
softmax_b = tf.get_variable("softmax_b", [num_classes])
outputs = tf.matmul(X_for_softmax, softmax_w) + softmax_b
 
# reshape out for sequence_loss
outputs = tf.reshape(outputs, [batch_size, seq_length, num_classes])
# All weights are 1 (equal weights)
weights = tf.ones([batch_size, seq_length])
sequence_loss = tf.contrib.seq2seq.sequence_loss(
    logits=outputs, targets=Y, weights=weights)
mean_loss = tf.reduce_mean(sequence_loss)
train_op = tf.train.AdamOptimizer(learning_rate=0.1).minimize(mean_loss)
 
sess = tf.Session()
sess.run(tf.global_variables_initializer())
 
for i in range(500):
    _, l, results = sess.run(
        [train_op, mean_loss, outputs], feed_dict={X: dataX, Y: dataY})
    for j, result in enumerate(results):
        index = np.argmax(result, axis=1)
        print(i, j, ''.join([char_set[t] for t in index]), l)
 
# Let's print the last char of each result to check it works
results = sess.run(outputs, feed_dict={X: dataX})
# result=170
 
# Full Sentense 뽑기
for j, result in enumerate(results):
    index = np.argmax(result, axis=1)
    if j is 0:  # print all for the first result to make a sentence
        print(''.join([char_set[t] for t in index]), end='')
    else:
        print(char_set[index[-1]], end='')
 
 
cs






아래와 같은 폴더 구조로 만든다(GoogleNet/workspace)




predict.py

readme.txt

retrain.py


readme.txt 파일을 참고하여 

이미지 파일을 다운로드 받고, Training을 위해 학습 명령어를 Terminal에 입력한다.

1
python retrain.py --bottleneck_dir=./workspace/bottlenecks --model_dir=./workspace/inception --output_graph=./workspace/flowers_graph.pb --output_labels=./workspace/flowers_labels.txt --image_dir ./workspace/flower_photos --how_many_training_steps 1000
cs


Training이 완료되면 Terminal에서 아래와 같이 predict.py 파일을 실행해서 예측을 해본다.

1
python predict.py ./workspace/flower_photos/daisy/267148092_4bb874af58.jpg
cs






예제 코드 :

cnn_basic_3x3.ipynb




*Max Pooling : 

가장 큰 값을 가져온다

코드상에서는 

1
2
pool = tf.nn.max_pool(image, ksize=[1221],
                    strides=[1111], padding='VALID')  # 1(무시), 1(옆으로 1칸), 1(아래로 1칸), 1(무시)
cs


*Padding(원본과 Output이 같음) :


Padding을 적용하면 원본과 Output이 기본적으로 같지만

Strides를 2로 적용할 경우 사이즈가 반으로 줄어든다.


1, 2, 3, 0

4, 5, 6, 0

7, 8, 9, 0

0, 0, 0, 0


1
2
3
4
5
6
7
8
9
10
11
12
13
14
 
 
(1331)
[[[[5.]
   [6.]
   [6.]]
 
  [[8.]
   [9.]
   [9.]]
 
  [[8.]
   [9.]
   [9.]]]]
cs





*CNN 예제 코드 :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import tensorflow as tf
 
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./MNIST_data/", one_hot=True)
 
= tf.placeholder(tf.float32, [None, 28281])
= tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
 
# L1 Conv shape=(?, 28, 28, 32)
#    Pool     ->(?, 14, 14, 32)
W1 = tf.Variable(tf.random_normal([33132], stddev=0.01))
L1 = tf.nn.conv2d(X, W1, strides=[1111], padding='SAME')
L1 = tf.nn.relu(L1)
L1 = tf.nn.max_pool(L1, ksize=[1221], strides=[1221]
                    , padding='SAME')
# L1 = tf.nn.dropout(L1, keep_prob)
 
# L2 Conv shape=(?, 14, 14, 64)
#    Pool     ->(?, 7, 7, 64)
W2 = tf.Variable(tf.random_normal([333264], stddev=0.01))
L2 = tf.nn.conv2d(L1, W2, strides=[1111], padding='SAME')
L2 = tf.nn.relu(L2)
L2 = tf.nn.max_pool(L2, ksize=[1221], strides=[1221]
                    , padding='SAME')
# L2 = tf.nn.dropout(L2, keep_prob)
 
#  (?, 7, 7, 64) Reshape  ->(?, 256)
W3 = tf.Variable(tf.random_normal([7 * 7 * 64256], stddev=0.01))
L3 = tf.reshape(L2, [-17 * 7 * 64])
L3 = tf.matmul(L3, W3)
L3 = tf.nn.relu(L3)
L3 = tf.nn.dropout(L3, keep_prob)
 
W4 = tf.Variable(tf.random_normal([25610], stddev=0.01))
model = tf.matmul(L3, W4)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=model, labels=Y))
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)
# optimizer = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
 
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
 
batch_size = 100
total_batch = int(mnist.train.num_examples / batch_size)
 
for epoch in range(15):
    total_cost = 0
 
    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        batch_xs = batch_xs.reshape(-128281)
 
        _, cost_val = sess.run([optimizer, cost],
                               feed_dict={X: batch_xs,
                                          Y: batch_ys,
                                          keep_prob: 0.7})
        total_cost += cost_val
 
    print('Epoch:''%04d' % (epoch + 1),
          'Avg. cost =''{:.3f}'.format(total_cost / total_batch))
 
is_correct = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
print('accuracy', sess.run(accuracy,
                        feed_dict={X: mnist.test.images.reshape(-128281),
                                   Y: mnist.test.labels,
                                   keep_prob: 1}))
 
 
cs





*CNN 예제코드2 :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import tensorflow as tf
 
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./MNIST_data/", one_hot=True)
 
= tf.placeholder(tf.float32, [None, 28281])
= tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
 
# L1 Conv shape=(?, 28, 28, 32)
#    Pool     ->(?, 14, 14, 32)
W1 = tf.Variable(tf.random_normal([33132], stddev=0.01))
L1 = tf.nn.conv2d(X, W1, strides=[1111], padding='SAME')
L1 = tf.nn.relu(L1)
L1 = tf.nn.max_pool(L1, ksize=[1221], strides=[1221]
                    , padding='SAME')
# L1 = tf.nn.dropout(L1, keep_prob)
 
# L2 Conv shape=(?, 14, 14, 64)
#    Pool     ->(?, 7, 7, 64)
W2 = tf.Variable(tf.random_normal([333264], stddev=0.01))
L2 = tf.nn.conv2d(L1, W2, strides=[1111], padding='SAME')
L2 = tf.nn.relu(L2)
L2 = tf.nn.max_pool(L2, ksize=[1221], strides=[1221]
                    , padding='SAME')
# L2 = tf.nn.dropout(L2, keep_prob)
 
##########################################################
#conv: 3x3 filter를 128개, stride=1, padding 적용
#relu
#maxpool: 2x2 filter, stride=2, padding 적용
##########################################################
 
W3 = tf.Variable(tf.random_normal([3364128], stddev=0.01))
L3 = tf.nn.conv2d(L2, W3, strides=[1111], padding='SAME')
L3 = tf.nn.relu(L3)
L3 = tf.nn.max_pool(L3, ksize=[1221], strides=[1221]
                    , padding='SAME')
print(L3)
 
#  (?, 7, 7, 64) Reshape  ->(?, 256)
W4 = tf.Variable(tf.random_normal([4 * 4 * 128256], stddev=0.01))
L4 = tf.reshape(L3, [-14 * 4 * 128])
L4 = tf.matmul(L4, W4)
L4 = tf.nn.relu(L4)
L4 = tf.nn.dropout(L4, keep_prob)
 
W5 = tf.Variable(tf.random_normal([25610], stddev=0.01))
model = tf.matmul(L4, W5)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=model, labels=Y))
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)
# optimizer = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
 
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
 
batch_size = 100
total_batch = int(mnist.train.num_examples / batch_size)
 
for epoch in range(15):
    total_cost = 0
 
    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        batch_xs = batch_xs.reshape(-128281)
 
        _, cost_val = sess.run([optimizer, cost],
                               feed_dict={X: batch_xs,
                                          Y: batch_ys,
                                          keep_prob: 0.7})
        total_cost += cost_val
 
    print('Epoch:''%04d' % (epoch + 1),
          'Avg. cost =''{:.3f}'.format(total_cost / total_batch))
 
is_correct = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
print('accuracy', sess.run(accuracy,
                        feed_dict={X: mnist.test.images.reshape(-128281),
                                   Y: mnist.test.labels,
                                   keep_prob: 1}))
 
cs


*Neural Nets(NN) for MNIST :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from tensorflow.examples.tutorials.mnist import input_data
 
import tensorflow as tf
import random
import matplotlib.pylab as plt
 
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1
 
mnist = input_data.read_data_sets('./MNIST_data/', one_hot=True)
 
sess = tf.InteractiveSession()
 
# Create the model
= tf.placeholder(tf.float32, [None, 784]) #열만 784개로 맞춰라
= tf.placeholder(tf.float32, [None, 10])  #열만 10개로 맞춰라
 
W1 = tf.Variable(tf.random_normal([784256]))
W2 = tf.Variable(tf.random_normal([256256]))
W3 = tf.Variable(tf.random_normal([25610]))
 
b1 = tf.Variable(tf.random_normal([256]))
b2 = tf.Variable(tf.random_normal([256]))
b3 = tf.Variable(tf.random_normal([10]))
 
L1 = tf.nn.relu(tf.add(tf.matmul(X, W1), b1))
L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), b2))
hypothesis = tf.add(tf.matmul(L2, W3), b3)
 
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
 
init = tf.initialize_all_variables()
 
with tf.Session() as sess:
    sess.run(init)
 
    for epoch in range (training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={X:batch_xs, Y:batch_ys})
            avg_cost += sess.run(cost, feed_dict={X: batch_xs, Y:batch_ys})/total_batch
        if epoch % display_step ==0:
            print("Epoch:"'%04d' % (epoch+1), "cost=""{:.9f}".format(avg_cost))
 
    print("Optimization Finished")
 
    correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
 
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Accuracy:", accuracy.eval({X: mnist.test.images, Y: mnist.test.labels}))
cs


*Xavier initialization :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from tensorflow.examples.tutorials.mnist import input_data
 
import tensorflow as tf
import random
import matplotlib.pylab as plt
 
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1
 
mnist = input_data.read_data_sets('./MNIST_data/', one_hot=True)
 
sess = tf.InteractiveSession()
 
 
def xavier_init(n_inputs, n_outputs, uniform=True):
    if uniform:
         init_range = tf.sqrt(6.0 / (n_inputs + n_outputs))
         return tf.random_uniform_initializer(-init_range, init_range)
    else:
         stddev = tf.sqrt(3.0 / (n_inputs + n_outputs))
         return tf.truncated_normal_initializer(stddev=stddev)
 
# Create the model
= tf.placeholder(tf.float32, [None, 784]) #열만 784개로 맞춰라
= tf.placeholder(tf.float32, [None, 10])  #열만 10개로 맞춰라
 
# W1 = tf.Variable(tf.random_normal([784, 256]))
# W2 = tf.Variable(tf.random_normal([256, 256]))
# W3 = tf.Variable(tf.random_normal([256, 10]))
W1 = tf.get_variable("W1", shape=[784256], initializer=xavier_init(784256))
W2 = tf.get_variable("W2", shape=[256256], initializer=xavier_init(784256))
W3 = tf.get_variable("W3", shape=[25610], initializer=xavier_init(784256))
 
 
b1 = tf.Variable(tf.random_normal([256]))
b2 = tf.Variable(tf.random_normal([256]))
b3 = tf.Variable(tf.random_normal([10]))
 
L1 = tf.nn.relu(tf.add(tf.matmul(X, W1), b1))
L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), b2))
hypothesis = tf.add(tf.matmul(L2, W3), b3)
 
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
 
 
 
init = tf.initialize_all_variables()
 
with tf.Session() as sess:
    sess.run(init)
 
    for epoch in range (training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={X:batch_xs, Y:batch_ys})
            avg_cost += sess.run(cost, feed_dict={X: batch_xs, Y:batch_ys})/total_batch
        if epoch % display_step ==0:
            print("Epoch:"'%04d' % (epoch+1), "cost=""{:.9f}".format(avg_cost))
 
    print("Optimization Finished")
 
    correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
 
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Accuracy:", accuracy.eval({X: mnist.test.images, Y: mnist.test.labels}))
cs





* Mnist 5NN :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import tensorflow as tf
import random
import matplotlib.pyplot as plt
 
from tensorflow.examples.tutorials.mnist import input_data
 
tf.set_random_seed(777)  # reproducibility
 
mnist = input_data.read_data_sets("./MNIST_data/", one_hot=True)
 
# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
 
# input place holders
= tf.placeholder(tf.float32, [None, 784])
= tf.placeholder(tf.float32, [None, 10])
 
# dropout (keep_prob) rate  0.7 on training, but should be 1 for testing
keep_prob = tf.placeholder(tf.float32)
 
# weights & bias for nn layers
# http://stackoverflow.com/questions/33640581/how-to-do-xavier-initialization-on-tensorflow
W1 = tf.get_variable("W1", shape=[784512],
                     initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([512]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)
L1 = tf.nn.dropout(L1, keep_prob=keep_prob)
 
W2 = tf.get_variable("W2", shape=[512512],
                     initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([512]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)
L2 = tf.nn.dropout(L2, keep_prob=keep_prob)
 
W3 = tf.get_variable("W3", shape=[512512],
                     initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([512]))
L3 = tf.nn.relu(tf.matmul(L2, W3) + b3)
L3 = tf.nn.dropout(L3, keep_prob=keep_prob)
 
W4 = tf.get_variable("W4", shape=[512512],
                     initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([512]))
L4 = tf.nn.relu(tf.matmul(L3, W4) + b4)
L4 = tf.nn.dropout(L4, keep_prob=keep_prob)
 
W5 = tf.get_variable("W5", shape=[51210],
                     initializer=tf.contrib.layers.xavier_initializer())
b5 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L4, W5) + b5
 
# define cost/loss & optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
 
# initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())
 
# train my model
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)
 
    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys, keep_prob: 0.7}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch
 
    print('Epoch:''%04d' % (epoch + 1), 'cost =''{:.9f}'.format(avg_cost))
 
print('Learning Finished!')
 
# Test model and check accuracy
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Accuracy:', sess.run(accuracy, feed_dict={
      X: mnist.test.images, Y: mnist.test.labels, keep_prob: 1}))
 
# Get one and predict
= random.randint(0, mnist.test.num_examples - 1)
print("Label: ", sess.run(tf.argmax(mnist.test.labels[r:r + 1], 1)))
print("Prediction: ", sess.run(
    tf.argmax(hypothesis, 1), feed_dict={X: mnist.test.images[r:r + 1], keep_prob: 1}))
 
plt.imshow(mnist.test.images[r:r + 1].
          reshape(2828), cmap='Greys', interpolation='nearest')
plt.show()
 
'''
Epoch: 0001 cost = 0.447322626
Epoch: 0002 cost = 0.157285590
Epoch: 0003 cost = 0.121884535
Epoch: 0004 cost = 0.098128681
Epoch: 0005 cost = 0.082901778
Epoch: 0006 cost = 0.075337573
Epoch: 0007 cost = 0.069752543
Epoch: 0008 cost = 0.060884363
Epoch: 0009 cost = 0.055276413
Epoch: 0010 cost = 0.054631256
Epoch: 0011 cost = 0.049675195
Epoch: 0012 cost = 0.049125314
Epoch: 0013 cost = 0.047231930
Epoch: 0014 cost = 0.041290121
Epoch: 0015 cost = 0.043621063
Learning Finished!
Accuracy: 0.9804
'''
 
cs


* More Deep & Dropout :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import random
import matplotlib.pylab as plt
 
tf.set_random_seed(777)
 
mnist = input_data.read_data_sets('../MNIST_data/', one_hot=True)
 
sess = tf.InteractiveSession()
 
= tf.placeholder(tf.float32, [None, 784])
= tf.placeholder(tf.float32, [None, 10])
 
W1 = tf.get_variable("W1", shape=[784256]
                     , initializer=tf.contrib.layers.xavier_initializer())
W2 = tf.get_variable("W2", shape=[256256]
                     , initializer=tf.contrib.layers.xavier_initializer())
W3 = tf.get_variable("W3", shape=[25610]
                     , initializer=tf.contrib.layers.xavier_initializer())
 
b1 = tf.Variable(tf.zeros([256]))
b2 = tf.Variable(tf.zeros([256]))
b3 = tf.Variable(tf.zeros([10]))
 
dropout_rate = tf.placeholder(tf.float32)
_L1 = tf.nn.relu(tf.matmul(X, W1) + b1)
L1 = tf.nn.dropout(_L1, keep_prob=dropout_rate)
_L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)
L2 = tf.nn.dropout(_L2, keep_prob=dropout_rate)
hypothesis = tf.matmul(L2, W3) + b3
 
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
train = tf.train.AdamOptimizer(0.001).minimize(cost)
 
tf.global_variables_initializer().run()
 
for i in range(5500):  # 5500
    batch_xs, batch_ys = mnist.train.next_batch(100)
    train.run({X: batch_xs, Y: batch_ys, dropout_rate: 0.7})
    print("cost:", cost.eval({X: batch_xs, Y: batch_ys, dropout_rate: 0.7}))
 
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval({X: mnist.test.images, Y: mnist.test.labels, dropout_rate: 1}))
print(hypothesis.eval({X: mnist.test.images, Y: mnist.test.labels, dropout_rate: 1}))
 
= random.randint(0, mnist.test.num_examples - 1)
print('Label:', sess.run(tf.argmax(mnist.test.labels[r:r + 1], 1)))
print('Prediction:', sess.run(tf.argmax(hypothesis, 1), {X: mnist.test.images[r:r + 1], dropout_rate: 1}))
print(mnist.test.images[r:r + 1])
 
plt.imshow(mnist.test.images[r:r + 1].reshape(2828)
           , cmap='Greys', interpolation='nearest')
plt.show()
 
cs






*9 Hidden Layer with tensorboard :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility
 
xy = np.loadtxt('./data/07train.txt')
 
x_data =  xy[:,0:-1]
y_data = xy[:,[-1]]
 
= tf.placeholder(tf.float32, [None, 2])
= tf.placeholder(tf.float32, [None, 1])
 
# 계층은 3단에 뉴런의 개수를 5개로 지정했을 시 :
W1 = tf.Variable(tf.random_uniform([25], -1.1.))
W2 = tf.Variable(tf.random_uniform([55], -1.1.))
W3 = tf.Variable(tf.random_uniform([55], -1.1.))
W4 = tf.Variable(tf.random_uniform([55], -1.1.))
W5 = tf.Variable(tf.random_uniform([55], -1.1.))
W6 = tf.Variable(tf.random_uniform([55], -1.1.))
W7 = tf.Variable(tf.random_uniform([55], -1.1.))
W8 = tf.Variable(tf.random_uniform([55], -1.1.))
W9 = tf.Variable(tf.random_uniform([55], -1.1.))
W10 = tf.Variable(tf.random_uniform([55], -1.1.))
W11 = tf.Variable(tf.random_uniform([51], -1.1.))
 
b1 = tf.Variable(tf.zeros([5]))
b2 = tf.Variable(tf.zeros([5]))
b3 = tf.Variable(tf.zeros([5]))
b4 = tf.Variable(tf.zeros([5]))
b5 = tf.Variable(tf.zeros([5]))
b6 = tf.Variable(tf.zeros([5]))
b7 = tf.Variable(tf.zeros([5]))
b8 = tf.Variable(tf.zeros([5]))
b9 = tf.Variable(tf.zeros([5]))
b10 = tf.Variable(tf.zeros([5]))
b11 = tf.Variable(tf.zeros([1]))
 
# Hypotheis
with  tf.name_scope("layer1") as scope:
    L1 = tf.sigmoid(tf.matmul(X, W1) + b1)
with  tf.name_scope("layer2") as scope:
    L2 = tf.sigmoid(tf.matmul(L1, W2) + b2)
with  tf.name_scope("layer3") as scope:
    L3 = tf.sigmoid(tf.matmul(L2, W3) + b3)
with  tf.name_scope("layer4") as scope:
    L4 = tf.sigmoid(tf.matmul(L3, W4) + b4)
with  tf.name_scope("layer5") as scope:
    L5 = tf.sigmoid(tf.matmul(L4, W5) + b5)
with  tf.name_scope("layer6") as scope:
    L6 = tf.sigmoid(tf.matmul(L5, W6) + b6)
with  tf.name_scope("layer7") as scope:
    L7 = tf.sigmoid(tf.matmul(L6, W7) + b7)
with  tf.name_scope("layer8") as scope:
    L8 = tf.sigmoid(tf.matmul(L7, W8) + b8)
with  tf.name_scope("layer9") as scope:
    L9 = tf.sigmoid(tf.matmul(L8, W9) + b9)
with  tf.name_scope("layer10") as scope:
    L10 = tf.sigmoid(tf.matmul(L9, W10) + b10)
with  tf.name_scope("layer1") as scope:
    hypothesis = tf.sigmoid(tf.matmul(L10, W11) + b11)
 
# 3단으로 쌓을 시 :
L1 = tf.sigmoid(tf.matmul(X,W1) + b1)
L2 = tf.sigmoid(tf.matmul(L1,W2) + b2)
hypothesis = tf.sigmoid(tf.matmul(L2, W3) + b3)
 
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
                       tf.log(1 - hypothesis))
 
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
 
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
 
    for step in range(10001):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 100 == 0:
            print(step, sess.run(cost, feed_dict={
                  X: x_data, Y: y_data}))
 
    h, c, a = sess.run([hypothesis, predicted, accuracy],
                       feed_dict={X: x_data, Y: y_data})
    print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)
 
 
cs





*TensorBoard 란 :
TV logging /debugging tool

=> Visualize your TF Graph

=> Plot quntitative metrics

=> show additional data


*Tensorboard를 사용하는 5가지 방법

=> From TF Graph, decide which tensors you want to log

=> merge all summeries

=> create write and add graph

=> run summary merge and summary

=> lanuch Tensor board




*Tensorboard 출력하기 :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#$tensorboard --logdir=/tmp/xor_logs2
import tensorflow as tf
import numpy as np
 
xy = np.loadtxt('./data/07train.txt', unpack=True)
x_data = np.transpose(xy[0:-1])
y_data = np.reshape(xy[-1], (41))
 
print(x_data)
print(y_data)
 
= tf.placeholder(tf.float32, name='x-input')
= tf.placeholder(tf.float32, name='y-input')
 
w1 = tf.Variable(tf.random_uniform([210], -1.01.0), name='weight1')
w2 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight2')
w3 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight3')
w4 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight4')
w5 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight5')
w6 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight6')
w7 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight7')
w8 = tf.Variable(tf.random_uniform([101], -1.01.0), name='weight8')
 
b1 = tf.Variable(tf.zeros([10]), name="Bias1")
b3 = tf.Variable(tf.zeros([10]), name="Bias3")
b2 = tf.Variable(tf.zeros([10]), name="Bias2")
b4 = tf.Variable(tf.zeros([10]), name="Bias4")
b5 = tf.Variable(tf.zeros([10]), name="Bias5")
b6 = tf.Variable(tf.zeros([10]), name="Bias6")
b7 = tf.Variable(tf.zeros([10]), name="Bias7")
b8 = tf.Variable(tf.zeros([1]), name="Bias8")
 
# L2 = tf.nn.relu(tf.matmul(X, w1) + b1)
# L3 = tf.nn.relu(tf.matmul(L2, w2) + b2)
# L4 = tf.nn.relu(tf.matmul(L3, w3) + b3)
# L5 = tf.nn.relu(tf.matmul(L4, w4) + b4)
# L6 = tf.nn.relu(tf.matmul(L5, w5) + b5)
# L7 = tf.nn.relu(tf.matmul(L6, w6) + b6)
# L8 = tf.nn.relu(tf.matmul(L7, w7) + b7)
with tf.name_scope("layer1") as scope:
    L2 = tf.sigmoid(tf.matmul(X, w1) + b1)
with tf.name_scope("layer2") as scope:
    L3 = tf.sigmoid(tf.matmul(L2, w2) + b2)
with tf.name_scope("layer3") as scope:
    L4 = tf.sigmoid(tf.matmul(L3, w3) + b3)
with tf.name_scope("layer4") as scope:  
    L5 = tf.sigmoid(tf.matmul(L4, w4) + b4)
with tf.name_scope("layer5") as scope:
    L6 = tf.sigmoid(tf.matmul(L5, w5) + b5)
with tf.name_scope("layer6") as scope:
    L7 = tf.sigmoid(tf.matmul(L6, w6) + b6)
with tf.name_scope("layer7") as scope:
    L8 = tf.sigmoid(tf.matmul(L7, w7) + b7)
with tf.name_scope("layer8") as scope:
    hypothesis = tf.sigmoid(tf.matmul(L8, w8) + b8)
 
with tf.name_scope('cost') as scope:
    cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1-Y) * tf.log(1 - hypothesis))
    tf.summary.scalar("cost", cost)
 
with tf.name_scope('train') as scope:
    a = tf.Variable(0.003)
    optimizer = tf.train.GradientDescentOptimizer(a)
    train = optimizer.minimize(cost)
 
w1_hist = tf.summary.histogram("weights1", w1)
w2_hist = tf.summary.histogram("weights2", w2)
b1_hist = tf.summary.histogram("biases1", b1)
b2_hist = tf.summary.histogram("biases2", b2)
y_hist = tf.summary.histogram("y", Y)
 
with tf.name_scope('accuracy') as scope:
    correct_prediction = tf.equal(tf.floor(hypothesis+0.5), Y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    tf.summary.scalar("accuracy", accuracy)
 
init = tf.global_variables_initializer()
 
with tf.Session() as sess:
    sess.run(init)
 
    merged = tf.summary.merge_all()
    writer = tf.summary.FileWriter("./tmp/xor_logs3",  sess.graph)
 
    for step in range(20000):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 200 == 0:
            summary = sess.run(merged, feed_dict={X: x_data, Y: y_data})
            writer.add_summary(summary, step)
            print(step, sess.run(cost, feed_dict={X: x_data, Y: y_data}), sess.run(w1), sess.run(w2))
 
    print(sess.run([hypothesis, tf.floor(hypothesis+0.5), correct_prediction], feed_dict={X: x_data, Y: y_data}))
    print("accuracy", sess.run(accuracy, feed_dict={X: x_data, Y: y_data}))
 
cs


위코드를 실행 후 

View > Tool Windows > Terminal 선택 후 

아래 Command 실행 :


>>> tensorboard --logdir=./tmp/xor_logs3


http://localhost:6006 브라우저에서 열기






*ReLU 소스코드 :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import tensorflow as tf
import numpy as np
 
xy = np.loadtxt('./data/07train.txt')
x_data =  xy[:,0:-1]
y_data = xy[:,[-1]]
 
= tf.placeholder(tf.float32, [None, 2])
= tf.placeholder(tf.float32, [None, 1])
 
w1 = tf.Variable(tf.random_uniform([2,  10], -1.01.0), name='weight1')
w2 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight2')
w3 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight3')
w4 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight4')
w5 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight5')
w6 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight6')
w7 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight7')
w8 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight8')
w9 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight9')
w10 = tf.Variable(tf.random_uniform([1010], -1.01.0), name='weight10')
w11 = tf.Variable(tf.random_uniform([101], -1.01.0), name='weight11')
 
b1 = tf.Variable(tf.zeros([10]), name="Bias1")
b3 = tf.Variable(tf.zeros([10]), name="Bias3")
b2 = tf.Variable(tf.zeros([10]), name="Bias2")
b4 = tf.Variable(tf.zeros([10]), name="Bias4")
b5 = tf.Variable(tf.zeros([10]), name="Bias5")
b6 = tf.Variable(tf.zeros([10]), name="Bias6")
b7 = tf.Variable(tf.zeros([10]), name="Bias7")
b8 = tf.Variable(tf.zeros([10]), name="Bias8")
b9 = tf.Variable(tf.zeros([10]), name="Bias9")
b10 = tf.Variable(tf.zeros([10]), name="Bias10")
b11 = tf.Variable(tf.zeros([1]), name="Bias11")
 
#L1 = tf.sigmoid(tf.matmul(X, w1) + b1)
L1 = tf.nn.relu(tf.matmul(X, w1) + b1)
L2 = tf.nn.relu(tf.matmul(L1, w2) + b2)
L3 = tf.nn.relu(tf.matmul(L2, w3) + b3)
L4 = tf.nn.relu(tf.matmul(L3, w4) + b4)
L5 = tf.nn.relu(tf.matmul(L4, w5) + b5)
L6 = tf.nn.relu(tf.matmul(L5, w6) + b6)
L7 = tf.nn.relu(tf.matmul(L6, w7) + b7)
L8 = tf.nn.relu(tf.matmul(L7, w8) + b8)
L9 = tf.nn.relu(tf.matmul(L8, w9) + b9)
L10 = tf.nn.relu(tf.matmul(L9, w10) + b10)
hypothesis = tf.sigmoid(tf.matmul(L10, w11) + b11)
 
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1-Y) * tf.log(1 - hypothesis))
 
= tf.Variable(0.01)
optimizer = tf.train.GradientDescentOptimizer(a)
train = optimizer.minimize(cost)
 
init = tf.global_variables_initializer()
 
with tf.Session() as sess:
    sess.run(init)
 
    for step in range(10000):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 200 == 0:
            print(step, sess.run(cost, feed_dict={X: x_data, Y: y_data}), sess.run(w1), sess.run(w2))
 
    correct_prediction = tf.equal(tf.floor(hypothesis+0.5), Y)
 
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print(sess.run([hypothesis, tf.floor(hypothesis+0.5), correct_prediction], feed_dict={X: x_data, Y: y_data}))
    print("accuracy", accuracy.eval({X: x_data, Y: y_data}))
 
cs



*Activation Function : 자극에 반응을 할지 안 할지 결정(뉴런이 하는 역할과 동일)


X-> W-> S -> Y(예측값)

여러 뉴런들이 각 특징들을 잘 뽑아내서 이해를 잘 시킨다.


*CNN 알고리즘(Convolutional Neural Networks)

1980(LeCun)

=>Big Problem : 

사람의 두뇌를 구성하려면, 15층 16층 정도로 깊게 쌓아야 하는데

Neural Network로는 레이어 구성으로 동작이 잘 안된다는 것을 깨달음

=> Breakthrough :

Neural networks with many layers really could be trained well, if the weights are initialized in a clever way rather than randomly




*Geoffrey Hinton's Summary of findings up to today

- Our labeled datasets were thousands of times too small

- 컴퓨터가 너무 느렸다.

- 초기값을 잘못 줬다.

- We used the wrong type of non-linearity


2단의 NN을 쌓으려면 

1단의 출력값을 2단의 입력값으로 쓰는 방식으로 연결해야 한다.


1) K(x) = sigmoid(WX1 + B1)

2) Y + H(x) = sigmoid(K(x) W2 + B2)




*XOR With logistic regression :


07train.txt

1
2
3
4
5
6
# xor
# x1 x2 y
0   0   0
0   1   1
1   0   1
1   1   0
cs



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility
 
xy = np.loadtxt('./data/07train.txt')
 
x_data =  xy[:,0:-1]
y_data = xy[:,[-1]]
 
= tf.placeholder(tf.float32, [None, 2])
= tf.placeholder(tf.float32, [None, 1])
 
= tf.Variable(tf.random_uniform([21], -1.1.))
= tf.Variable(tf.random_uniform([1], -1.1.))
 
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)
 
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
                       tf.log(1 - hypothesis))
 
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
 
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
 
    for step in range(10001):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 100 == 0:
            print(step, sess.run(cost, feed_dict={
                  X: x_data, Y: y_data}), sess.run(W))
 
    h, c, a = sess.run([hypothesis, predicted, accuracy],
                       feed_dict={X: x_data, Y: y_data})
    print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)
 
 
cs





*Neuron이 2개일 때 소스코드 : 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility
 
xy = np.loadtxt('./data/07train.txt')
 
x_data =  xy[:,0:-1]
y_data = xy[:,[-1]]
 
= tf.placeholder(tf.float32, [None, 2])
= tf.placeholder(tf.float32, [None, 1])
 
# 2단으로 쌓는다 W 2개 b 2개  - W값은 [2, 2] 와 [2, 1]으로 되어 있는데 대각선으로 값이 맞아야 한다(2 = 2)
# b1, b2의 값은 W의 마지막 열과 일치해야 한다
W1 = tf.Variable(tf.random_uniform([22], -1.1.))
W2 = tf.Variable(tf.random_uniform([21], -1.1.))
b1 = tf.Variable(tf.random_uniform([2], -1.1.))
b2 = tf.Variable(tf.random_uniform([1], -1.1.))
 
L1 = tf.sigmoid(tf.matmul(X,W1) + b1)
hypothesis = tf.sigmoid(tf.matmul(L1, W2) + b2)
 
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
                       tf.log(1 - hypothesis))
 
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
 
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
 
    for step in range(10001):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 100 == 0:
            print(step, sess.run(cost, feed_dict={
                  X: x_data, Y: y_data}))
 
    h, c, a = sess.run([hypothesis, predicted, accuracy],
                       feed_dict={X: x_data, Y: y_data})
    print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)
 
 
cs



*계층은 2단에 뉴런 10개를 사용한다고 했을 때 소스 코드 :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility
 
xy = np.loadtxt('./data/07train.txt')
 
x_data =  xy[:,0:-1]
y_data = xy[:,[-1]]
 
= tf.placeholder(tf.float32, [None, 2])
= tf.placeholder(tf.float32, [None, 1])
 
# 2단으로 쌓는다 W 2개 b 2개  - W값은 [2, 2] 와 [2, 1]으로 되어 있는데 대각선으로 값이 맞아야 한다(2 = 2)
# b1, b2의 값은 W의 마지막 열과 일치해야 한다
# 계층은 2단에 뉴런의 개수를 10개로 지정했을 시 : 
W1 = tf.Variable(tf.random_uniform([210], -1.1.))
W2 = tf.Variable(tf.random_uniform([101], -1.1.))
b1 = tf.Variable(tf.random_uniform([10], -1.1.))
b2 = tf.Variable(tf.random_uniform([1], -1.1.))
 
L1 = tf.sigmoid(tf.matmul(X,W1) + b1)
hypothesis = tf.sigmoid(tf.matmul(L1, W2) + b2)
 
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
                       tf.log(1 - hypothesis))
 
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
 
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
 
    for step in range(10001):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 100 == 0:
            print(step, sess.run(cost, feed_dict={
                  X: x_data, Y: y_data}))
 
    h, c, a = sess.run([hypothesis, predicted, accuracy],
                       feed_dict={X: x_data, Y: y_data})
    print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)
 
 
cs


*계층은 3단에 뉴런 5개를 사용한다고 했을 때 소스 코드 :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility
 
xy = np.loadtxt('./data/07train.txt')
 
x_data =  xy[:,0:-1]
y_data = xy[:,[-1]]
 
= tf.placeholder(tf.float32, [None, 2])
= tf.placeholder(tf.float32, [None, 1])
 
# 계층은 3단에 뉴런의 개수를 5개로 지정했을 시 :
W1 = tf.Variable(tf.random_uniform([25], -1.1.))
W2 = tf.Variable(tf.random_uniform([54], -1.1.))
W3 = tf.Variable(tf.random_uniform([41], -1.1.))
b1 = tf.Variable(tf.random_uniform([5], -1.1.))
b2 = tf.Variable(tf.random_uniform([4], -1.1.))
b3 = tf.Variable(tf.random_uniform([1], -1.1.))
 
# 3단으로 쌓을 시 :
L1 = tf.sigmoid(tf.matmul(X,W1) + b1)
L2 = tf.sigmoid(tf.matmul(L1,W2) + b2)
hypothesis = tf.sigmoid(tf.matmul(L2, W3) + b3)
 
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
                       tf.log(1 - hypothesis))
 
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
 
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
 
    for step in range(10001):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 100 == 0:
            print(step, sess.run(cost, feed_dict={
                  X: x_data, Y: y_data}))
 
    h, c, a = sess.run([hypothesis, predicted, accuracy],
                       feed_dict={X: x_data, Y: y_data})
    print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)
 
 
cs




+ Recent posts