machine_learning_examples/bayesian_ml/2/probit.py at 1d2be2c7071562c551d6235ae2da18123a66d479 · cclauss/machine_learning_examples · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from __future__ import print_function
# probit regression

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm
# from scipy.stats import t
from sortedcontainers import SortedList


class ProbitRegression:
  def fit(self, X, Y, sigma=1.5, lam=1, show_w=set(), Q=None):

    # setup
    N, D = X.shape
    self.w = np.random.randn(D) / np.sqrt(D) # does not work if you don't scale first!
    Eq = np.zeros(N)
    idx1 = (Y == 1)
    idx0 = (Y == 0)
    A = lam*np.eye(D) + X.T.dot(X) / sigma**2
    costs = []

    for t in xrange(100):
      # calculate ln(p(Y, w | X))
      cdf = norm.cdf(X.dot(self.w) / sigma)
      cost = -lam/2 * self.w.dot(self.w) + Y.dot(np.log(cdf)) + (1 - Y).dot(np.log(1 - cdf))
      costs.append(cost)

      # E step
      Xw = X.dot(self.w)
      pdf = norm.pdf(-Xw / sigma)
      cdf = norm.cdf(-Xw / sigma)
      Eq[idx1] = Xw[idx1] + sigma*(pdf[idx1] / (1 - cdf[idx1]))
      Eq[idx0] = Xw[idx0] + sigma*(-pdf[idx0] / cdf[idx0])

      # M step
      b = X.T.dot(Eq) / sigma**2
      self.w = np.linalg.solve(A, b)

      if show_w and t in show_w:
        plot_image(self.w, Q, "iteration: %s" % (t+1))


    plt.plot(costs)
    plt.show()

    self.sigma = sigma
    self.lam = lam


  def predict_proba(self, X):
    N, D = X.shape
    return norm.cdf(X.dot(self.w) / self.sigma)

  def predict(self, X):
    return np.round(self.predict_proba(X))

  def score(self, X, Y):
    return np.mean(self.predict(X) == Y)

  def confusion_matrix(self, X, Y):
    P = self.predict(X)
    M = np.zeros((2, 2))
    M[0,0] = np.sum(P[Y == 0] == Y[Y == 0])
    M[0,1] = np.sum(P[Y == 0] != Y[Y == 0])
    M[1,0] = np.sum(P[Y == 1] != Y[Y == 1])
    M[1,1] = np.sum(P[Y == 1] == Y[Y == 1])
    return M

  def get_3_misclassified(self, X, Y):
    P = self.predict(X)
    N = len(Y)
    samples = np.random.choice(N, 3, replace=False, p=(P != Y)/float(np.sum(P != Y)))
    return X[samples], Y[samples], P[samples]

  def get_3_most_ambiguous(self, X, Y):
    P = self.predict_proba(X)
    N = len(X)
    sl = SortedList(load=3) # stores (distance, sample index) tuples
    for n in xrange(N):
      p = P[n]
      dist = np.abs(p - 0.5)
      if len(sl) < 3:
        sl.add( (dist, n) )
      else:
        if dist < sl[-1][0]:
          del sl[-1]
          sl.add( (dist, n) )
    indexes = [v for k, v in sl]
    return X[indexes], Y[indexes]

def plot_image(x, Q, title):
  im = Q.dot(x)
  plt.imshow(im.reshape(28,28), cmap='gray')
  plt.title(title)
  plt.show()

if __name__ == '__main__':
  Xtrain = pd.read_csv('Xtrain.csv', header=None).as_matrix()
  Xtest = pd.read_csv('Xtest.csv', header=None).as_matrix()
  Ytrain = pd.read_csv('ytrain.csv', header=None).as_matrix().flatten()
  # print "Ytrain.shape:", Ytrain.shape
  Ytest = pd.read_csv('ytest.csv', header=None).as_matrix().flatten()
  model = ProbitRegression()
  model.fit(Xtrain, Ytrain)
  print("train accuracy:", model.score(Xtrain, Ytrain))
  print("test accuracy:", model.score(Xtest, Ytest))

  # confusion matrix
  M = model.confusion_matrix(Xtest, Ytest)
  print("confusion matrix:")
  print(M)
  print("N:", len(Ytest))
  print("sum(M):", M.sum())

  # plot 3 misclassified
  Q = pd.read_csv('Q.csv', header=None).as_matrix()
  misclassified, targets, predictions = model.get_3_misclassified(Xtrain, Ytrain)
  for x, y, p in zip(misclassified, targets, predictions):
    plot_image(x, Q, 'misclassified target=%s prediction=%s' % (y, int(p)))

  # ambiguous
  ambiguous, targets = model.get_3_most_ambiguous(Xtrain, Ytrain)
  for x, y in zip(ambiguous, targets):
    plot_image(x, Q, 'ambiguous target=%s' % y)

  # show w
  model.fit(Xtrain, Ytrain, show_w=set([0, 4, 9, 24, 49, 99]), Q=Q)