*Binary Classification :


- 활용분야 :
=> spam email Detection : Spam or Ham

=> Facebook feed : Show or hide

=> Credit Card Fraud Detection : fraud or not


그래프 그리기 online : 

https://www.desmos.com/calculator



=> w값이 클 수록 기울기가 y 축에 가까워진다 :









I = log 1/p 


I = 정보량

1/p = 전체 사건 수


2^ = 6  => 3비트

- 3비트 불확실성이 크다


2^ = 2 => 1비트

- 2비트 불확실성이 낮다


=> 불확실성이 작은 것이 더 좋다!(entropy가 낮은것)


*Cost Function  :


C:(H(x), y) = -ylog(H(x)) - (1-y)log( 1-H(x) )


=> cost(W) = - 1/m SUM (ylog(H(x)) + 1-y) log (1- H(x)) )


*Logistic Regression (Hypothesis & Cost 함수 & Cost 최소화):





*Logistic Regression 소스 에제:


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import tensorflow as tf
import numpy as np
tf.set_random_seed(888)
 
xy = np.loadtxt('./data/04train.txt', dtype='float32')
x_data = xy[:, 0:-1] # 행은 전체 행, 열은 0열 부터 끝에서 1번째 열까지 x_data로 지정
y_data = xy[:, [-1]] # 행은 전체 행, 열은 끝에서 1번째 열만 y_data로 지정
 
print(x_data.shape, y_data.shape) #(6, 3) (6, 1)  따라서 W는 [3, 1]로 만들어준다
 
= tf.placeholder(tf.float32)
= tf.placeholder(tf.float32)
 
= tf.Variable(tf.random_uniform([3,1], -1.1. )) #(6, 3) (6, 1)  따라서 W는 [3, 1]로 만들어준다
= tf.matmul(X,W)  # matrix multiplication
hypothesis = tf.div(1.1. + tf.exp(-h))
 
cost = -tf.reduce_mean(Y * tf.log(hypothesis) +
                       (1+Y) * tf.log(1-hypothesis))
optimizer = tf.train.GradientDescentOptimizer(0.1)
train = optimizer.minimize(cost)
 
sess = tf.Session()
sess.run(tf.global_variables_initializer())
 
for step in range(2001):
    sess.run(train, feed_dict={X:x_data, Y:y_data})
    if step % 20 == 0:
        print(step, sess.run(cost, feed_dict={X:x_data, Y:y_data})
              , sess.run(W))
cs






* Logistic Regression (with Accuracy) :


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import tensorflow as tf
import numpy as np
tf.set_random_seed(888)
 
xy = np.loadtxt('./data/04train.txt', dtype='float32')
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
 
print(x_data.shape, y_data.shape) #(6, 3) (6, 1)  따라서 W는 [3, 1]로 만들어준다
 
= tf.placeholder(tf.float32)
= tf.placeholder(tf.float32)
 
= tf.Variable(tf.random_uniform([3,1], -1.1. )) #(6, 3) (6, 1)  따라서 W는 [3, 1]로 만들어준다
= tf.matmul(X,W)  # matrix multiplication
hypothesis = tf.div(1.1. + tf.exp(-h))
 
cost = -tf.reduce_mean(Y * tf.log(hypothesis) +
                       (1+Y) * tf.log(1-hypothesis))
optimizer = tf.train.GradientDescentOptimizer(0.1)
train = optimizer.minimize(cost)
 
sess = tf.Session()
sess.run(tf.global_variables_initializer())
 
for step in range(2001):
    sess.run(train, feed_dict={X:x_data, Y:y_data})
    if step % 20 == 0:
        print(step, sess.run(cost, feed_dict={X:x_data, Y:y_data})
              , sess.run(W))
 
= sess.run(hypothesis, feed_dict={X:[[1,2,2]]})
print(a > 0.5)
 
#예측과 정답 비율
# 0     0
# 0     1
# 1     1
# 정답 비율은 2/3.
# 이걸 수식으로 옮겨보면 :
 
predicted = tf.cast(hypothesis > 0.5, dtype = tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y)
                                  , dtype=tf.float32))
acc = sess.run(accuracy, feed_dict={X:x_data, Y:y_data})
print(acc)
 
cs




*Diabetes 유관 Dataset을 분석 :



★문제풀이 :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import tensorflow as tf
import numpy as np
 
xy = np.loadtxt('./data/diabetes.csv', delimiter=',', dtype='float32')
print(xy)
 
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
 
# print(x_data, y_data)
print(x_data.shape, y_data.shape)  # [8:1]
 
= tf.placeholder(tf.float32)
= tf.placeholder(tf.float32)
 
= tf.Variable(tf.random_uniform([8,1], 1.1.))
 
= tf.matmul(X,W)  # matrix multiplication
hypothesis = tf.div(1.1. + tf.exp(-h))
 
cost = -tf.reduce_mean(Y * tf.log(hypothesis) +
                       (1+Y) * tf.log(1-hypothesis))
optimizer = tf.train.GradientDescentOptimizer(0.1)
train = optimizer.minimize(cost)
 
sess = tf.Session()
sess.run(tf.global_variables_initializer())
 
for step in range(200001):
    sess.run(train, feed_dict={X:x_data, Y:y_data})
    if step % 20 == 0:
        print(step, sess.run(cost, feed_dict={X:x_data, Y:y_data})
              , sess.run(W))
 
predicted = tf.cast(hypothesis > 0.5, dtype = tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y)
                                  , dtype=tf.float32))
acc = sess.run(accuracy, feed_dict={X:x_data, Y:y_data})
print(acc)
 
cs


★정답 :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  
 
xy = np.loadtxt('./data/diabetes.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
 
print(x_data.shape, y_data.shape)
 
= tf.placeholder(tf.float32, shape=[None, 8])
= tf.placeholder(tf.float32, shape=[None, 1])
 
= tf.Variable(tf.random_uniform([81], -1.,1.))
= tf.Variable(tf.random_uniform([1], -1.1.))
 
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)
 
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
                       tf.log(1 - hypothesis))
 
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
 
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
 
    for step in range(30001):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data, Y: y_data})
        if step % 200 == 0:
            print(step, cost_val)
 
    h, c, a = sess.run([hypothesis, predicted, accuracy],
                       feed_dict={X: x_data, Y: y_data})
    print("\nHypothesis: ", h, "\nCorrect (Y): ", c, "\nAccuracy: ", a)
 
 
cs

+ Recent posts