44from theano .sandbox .rng_mrg import MRG_RandomStreams as RandomStreams
55from collections import OrderedDict
66
7+
78class RBM (object ):
89 """Bernoulli-bernoulli restricted Boltzmann machine (RBM) """
910
1011 def __init__ (self , input = None , n_visible = 1024 , n_hidden = 1024 ,
1112 W = None , hbias = None , vbias = None , numpy_rng = None ,
12- theano_rng = None , activation = T . nnet . sigmoid ):
13+ theano_rng = None ):
1314
1415 self .n_visible = n_visible
1516 self .n_hidden = n_hidden
16- self . activation = activation
17+
1718
1819 if numpy_rng is None :
1920 numpy_rng = numpy .random .RandomState (1234 )
@@ -44,9 +45,12 @@ def __init__(self, input=None, n_visible=1024, n_hidden=1024,
4445 if not input :
4546 self .input = T .matrix ('input' )
4647
47- self .delta_W = theano .shared (value = numpy .zeros_like (W .get_value (borrow = True ), dtype = theano .config .floatX ), name = 'delta_W' )
48- self .delta_hbias = theano .shared (value = numpy .zeros_like (hbias .get_value (borrow = True ), dtype = theano .config .floatX ), name = 'delta_hbias' )
49- self .delta_vbias = theano .shared (value = numpy .zeros_like (vbias .get_value (borrow = True ), dtype = theano .config .floatX ), name = 'delta_vbias' )
48+ self .delta_W = theano .shared (value = numpy .zeros_like (W .get_value (borrow = True ),
49+ dtype = theano .config .floatX ), name = 'delta_W' )
50+ self .delta_hbias = theano .shared (value = numpy .zeros_like (hbias .get_value (borrow = True ),
51+ dtype = theano .config .floatX ), name = 'delta_hbias' )
52+ self .delta_vbias = theano .shared (value = numpy .zeros_like (vbias .get_value (borrow = True ),
53+ dtype = theano .config .floatX ), name = 'delta_vbias' )
5054
5155 self .W = W
5256 self .hbias = hbias
@@ -67,7 +71,7 @@ def free_energy(self, v_sample):
6771 def propup (self , vis ):
6872 ''' Propagate the visible activations up to the hidden units '''
6973 pre_sigmoid_activation = T .dot (vis , self .W ) + self .hbias
70- return [pre_sigmoid_activation , self . activation (pre_sigmoid_activation )]
74+ return [pre_sigmoid_activation , T . nnet . sigmoid (pre_sigmoid_activation )]
7175
7276 def sample_h_given_v (self , v0_sample ):
7377 ''' Generates hidden unit outputs given visible inputs '''
@@ -82,7 +86,7 @@ def sample_h_given_v(self, v0_sample):
8286 def propdown (self , hid ):
8387 '''Propagates the hidden activation downwards to the visible units'''
8488 pre_sigmoid_activation = T .dot (hid , self .W .T ) + self .vbias
85- return [pre_sigmoid_activation , self . activation (pre_sigmoid_activation )]
89+ return [pre_sigmoid_activation , T . nnet . sigmoid (pre_sigmoid_activation )]
8690
8791 def sample_v_given_h (self , h0_sample ):
8892 ''' Generates visible units given hidden units '''
@@ -106,7 +110,8 @@ def gibbs_vhv(self, v0_sample):
106110 pre_sigmoid_v1 , v1_mean , v1_sample = self .sample_v_given_h (h1_sample )
107111 return [pre_sigmoid_h1 , h1_mean , h1_sample , pre_sigmoid_v1 , v1_mean , v1_sample ]
108112
109- # def get_cost_updates(self, batch_size = 128, lr = 0.0001, momentum=0.5, weight_cost=0.00001, persistent=None, k=1):
113+ # def get_cost_updates(self, batch_size = 128, lr = 0.0001, momentum=0.5,
114+ # weight_cost=0.00001, persistent=None, k=1):
110115 def get_cost_updates (self , batch_size = 128 , lr = 0.0001 , momentum = 0.5 , weight_cost = 0.00001 ):
111116 """
112117 get the cost and the gradient corresponding to one step of CD-k (k=1)
@@ -117,9 +122,15 @@ def get_cost_updates(self, batch_size = 128, lr = 0.0001, momentum=0.5, weight_c
117122
118123 # gradient of parameters
119124 updates = OrderedDict ()
120- updates [self .delta_W ] = momentum * self .delta_W + lr * (1.0 / batch_size ) * (T .dot (self .input .T , hp_data ) - T .dot (v_rec_sigm .T , hp_rec )) - lr * weight_cost * self .W
121- updates [self .delta_hbias ] = momentum * self .delta_hbias + lr * (1.0 / batch_size ) * (T .sum (h_data , axis = 0 ) - T .sum (hp_rec , axis = 0 ))
122- updates [self .delta_vbias ] = momentum * self .delta_vbias + lr * (1.0 / batch_size ) * (T .sum (self .input , axis = 0 ) - T .sum (v_rec_sigm , axis = 0 ))
125+ updates [self .delta_W ] = (
126+ momentum * self .delta_W + lr * (1.0 / batch_size ) * (T .dot (self .input .T , hp_data ) -
127+ T .dot (v_rec_sigm .T , hp_rec )) - lr * weight_cost * self .W )
128+ updates [self .delta_hbias ] = (
129+ momentum * self .delta_hbias + lr * (1.0 / batch_size ) *
130+ (T .sum (h_data , axis = 0 ) - T .sum (hp_rec , axis = 0 )))
131+ updates [self .delta_vbias ] = (
132+ momentum * self .delta_vbias + lr * (1.0 / batch_size ) *
133+ (T .sum (self .input , axis = 0 ) - T .sum (v_rec_sigm , axis = 0 )))
123134
124135 for param , dparam in zip (self .params , self .delta_params ):
125136 updates [param ] = param + updates [dparam ]
@@ -138,14 +149,13 @@ class GBRBM(RBM):
138149
139150 def __init__ (self , input = None , n_visible = 351 , n_hidden = 1000 ,
140151 W = None , hbias = None , vbias = None , numpy_rng = None ,
141- theano_rng = None , activation = T . nnet . sigmoid ):
152+ theano_rng = None ):
142153
143154 super (GBRBM , self ).__init__ (input = input , n_visible = n_visible ,
144155 n_hidden = n_hidden ,
145156 W = W , hbias = hbias ,
146157 vbias = vbias , numpy_rng = numpy_rng ,
147- theano_rng = theano_rng ,
148- activation = activation )
158+ theano_rng = theano_rng )
149159
150160 def free_energy (self , v_sample ):
151161 ''' Compute the free energy '''
@@ -163,7 +173,8 @@ def sample_v_given_h(self, h0_sample):
163173
164174 return [pre_sigmoid_v1 , v1_mean , v1_sample ]
165175
166- # def get_cost_updates(self, batch_size = 128, lr = 0.0001, momentum=0.5, weight_cost=0.00001, persistent=None, k = 1):
176+ # def get_cost_updates(self, batch_size = 128, lr = 0.0001,
177+ # momentum=0.5, weight_cost=0.00001, persistent=None, k = 1):
167178 def get_cost_updates (self , batch_size = 128 , lr = 0.0001 , momentum = 0.5 , weight_cost = 0.00001 ):
168179
169180 x , hp_data , h_data = self .sample_h_given_v (self .input )
@@ -172,9 +183,15 @@ def get_cost_updates(self, batch_size = 128, lr = 0.0001, momentum=0.5, weight_c
172183
173184 updates = OrderedDict ()
174185
175- updates [self .delta_W ] = momentum * self .delta_W + lr * (1.0 / batch_size ) * (T .dot (self .input .T , hp_data ) - T .dot (v_rec .T , hp_rec )) - lr * weight_cost * self .W
176- updates [self .delta_hbias ] = momentum * self .delta_hbias + lr * (1.0 / batch_size ) * (T .sum (h_data , axis = 0 ) - T .sum (hp_rec , axis = 0 ))
177- updates [self .delta_vbias ] = momentum * self .delta_vbias + lr * (1.0 / batch_size ) * (T .sum (self .input , axis = 0 ) - T .sum (v_rec , axis = 0 ))
186+ updates [self .delta_W ] = (
187+ momentum * self .delta_W + lr * (1.0 / batch_size ) * (T .dot (self .input .T , hp_data ) -
188+ T .dot (v_rec .T , hp_rec )) - lr * weight_cost * self .W )
189+ updates [self .delta_hbias ] = (
190+ momentum * self .delta_hbias + lr * (1.0 / batch_size ) *
191+ (T .sum (h_data , axis = 0 ) - T .sum (hp_rec , axis = 0 )))
192+ updates [self .delta_vbias ] = (
193+ momentum * self .delta_vbias + lr * (1.0 / batch_size ) *
194+ (T .sum (self .input , axis = 0 ) - T .sum (v_rec , axis = 0 )))
178195
179196 updates [self .W ] = self .W + updates [self .delta_W ]
180197 updates [self .hbias ] = self .hbias + updates [self .delta_hbias ]
0 commit comments