forked from lazyprogrammer/machine_learning_examples
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprobit.py
More file actions
129 lines (105 loc) · 3.66 KB
/
probit.py
File metadata and controls
129 lines (105 loc) · 3.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from __future__ import print_function
# probit regression
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm
# from scipy.stats import t
from sortedcontainers import SortedList
class ProbitRegression:
def fit(self, X, Y, sigma=1.5, lam=1, show_w=set(), Q=None):
# setup
N, D = X.shape
self.w = np.random.randn(D) / np.sqrt(D) # does not work if you don't scale first!
Eq = np.zeros(N)
idx1 = (Y == 1)
idx0 = (Y == 0)
A = lam*np.eye(D) + X.T.dot(X) / sigma**2
costs = []
for t in xrange(100):
# calculate ln(p(Y, w | X))
cdf = norm.cdf(X.dot(self.w) / sigma)
cost = -lam/2 * self.w.dot(self.w) + Y.dot(np.log(cdf)) + (1 - Y).dot(np.log(1 - cdf))
costs.append(cost)
# E step
Xw = X.dot(self.w)
pdf = norm.pdf(-Xw / sigma)
cdf = norm.cdf(-Xw / sigma)
Eq[idx1] = Xw[idx1] + sigma*(pdf[idx1] / (1 - cdf[idx1]))
Eq[idx0] = Xw[idx0] + sigma*(-pdf[idx0] / cdf[idx0])
# M step
b = X.T.dot(Eq) / sigma**2
self.w = np.linalg.solve(A, b)
if show_w and t in show_w:
plot_image(self.w, Q, "iteration: %s" % (t+1))
plt.plot(costs)
plt.show()
self.sigma = sigma
self.lam = lam
def predict_proba(self, X):
N, D = X.shape
return norm.cdf(X.dot(self.w) / self.sigma)
def predict(self, X):
return np.round(self.predict_proba(X))
def score(self, X, Y):
return np.mean(self.predict(X) == Y)
def confusion_matrix(self, X, Y):
P = self.predict(X)
M = np.zeros((2, 2))
M[0,0] = np.sum(P[Y == 0] == Y[Y == 0])
M[0,1] = np.sum(P[Y == 0] != Y[Y == 0])
M[1,0] = np.sum(P[Y == 1] != Y[Y == 1])
M[1,1] = np.sum(P[Y == 1] == Y[Y == 1])
return M
def get_3_misclassified(self, X, Y):
P = self.predict(X)
N = len(Y)
samples = np.random.choice(N, 3, replace=False, p=(P != Y)/float(np.sum(P != Y)))
return X[samples], Y[samples], P[samples]
def get_3_most_ambiguous(self, X, Y):
P = self.predict_proba(X)
N = len(X)
sl = SortedList(load=3) # stores (distance, sample index) tuples
for n in xrange(N):
p = P[n]
dist = np.abs(p - 0.5)
if len(sl) < 3:
sl.add( (dist, n) )
else:
if dist < sl[-1][0]:
del sl[-1]
sl.add( (dist, n) )
indexes = [v for k, v in sl]
return X[indexes], Y[indexes]
def plot_image(x, Q, title):
im = Q.dot(x)
plt.imshow(im.reshape(28,28), cmap='gray')
plt.title(title)
plt.show()
if __name__ == '__main__':
Xtrain = pd.read_csv('Xtrain.csv', header=None).as_matrix()
Xtest = pd.read_csv('Xtest.csv', header=None).as_matrix()
Ytrain = pd.read_csv('ytrain.csv', header=None).as_matrix().flatten()
# print "Ytrain.shape:", Ytrain.shape
Ytest = pd.read_csv('ytest.csv', header=None).as_matrix().flatten()
model = ProbitRegression()
model.fit(Xtrain, Ytrain)
print("train accuracy:", model.score(Xtrain, Ytrain))
print("test accuracy:", model.score(Xtest, Ytest))
# confusion matrix
M = model.confusion_matrix(Xtest, Ytest)
print("confusion matrix:")
print(M)
print("N:", len(Ytest))
print("sum(M):", M.sum())
# plot 3 misclassified
Q = pd.read_csv('Q.csv', header=None).as_matrix()
misclassified, targets, predictions = model.get_3_misclassified(Xtrain, Ytrain)
for x, y, p in zip(misclassified, targets, predictions):
plot_image(x, Q, 'misclassified target=%s prediction=%s' % (y, int(p)))
# ambiguous
ambiguous, targets = model.get_3_most_ambiguous(Xtrain, Ytrain)
for x, y in zip(ambiguous, targets):
plot_image(x, Q, 'ambiguous target=%s' % y)
# show w
model.fit(Xtrain, Ytrain, show_w=set([0, 4, 9, 24, 49, 99]), Q=Q)