Added:Activation Fn

abilng · abilng · commit d1628ff517d0 · 2014-08-27T17:08:33.000+05:30
diff --git a/layers/dA.py b/layers/dA.py
@@ -62,7 +62,7 @@ class dA(object):
 
     def __init__(self, numpy_rng, theano_rng=None, input=None,
                  n_visible=784, n_hidden=500,
-                 W=None, bhid=None, bvis=None):
+                 W=None, bhid=None, bvis=None,activation=T.nnet.sigmoid):
         """
         Initialize the dA class by specifying the number of visible units (the
         dimension d of the input ), the number of hidden units ( the dimension
@@ -107,10 +107,13 @@ def __init__(self, numpy_rng, theano_rng=None, input=None,
                      visible units) that should be shared belong dA and another
                      architecture; if dA should be standalone set this to None
 
+        :type activation: <theano.tensor.elemwise.Elemwise object>
+        :param activation: activation function
 
         """
         self.n_visible = n_visible
         self.n_hidden = n_hidden
+        self.activation = activation
 
         # create a Theano random generator that gives symbolic random values
         if not theano_rng:
@@ -186,14 +189,14 @@ def get_corrupted_input(self, input, corruption_level):
 
     def get_hidden_values(self, input):
         """ Computes the values of the hidden layer """
-        return T.nnet.sigmoid(T.dot(input, self.W) + self.b)
+        return self.activation(T.dot(input, self.W) + self.b)
 
     def get_reconstructed_input(self, hidden):
         """Computes the reconstructed input given the values of the
         hidden layer
 
         """
-        return  T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
+        return  self.activation(T.dot(hidden, self.W_prime) + self.b_prime)
 
     def get_cost_updates(self, corruption_level, learning_rate):
         """ This function computes the cost and the updates for one trainng
diff --git a/layers/rbm.py b/layers/rbm.py
@@ -9,10 +9,11 @@ class RBM(object):
     
     def __init__(self, input=None, n_visible=1024, n_hidden=1024,
                  W = None, hbias = None, vbias = None, numpy_rng = None,
-                 theano_rng = None):
+                 theano_rng = None,activation=T.nnet.sigmoid):
                
         self.n_visible = n_visible
         self.n_hidden  = n_hidden
+        self.activation = activation
 
         if numpy_rng is None:
             numpy_rng = numpy.random.RandomState(1234)
@@ -66,7 +67,7 @@ def free_energy(self, v_sample):
     def propup(self, vis):
         ''' Propagate the visible activations up to the hidden units '''
         pre_sigmoid_activation = T.dot(vis, self.W) + self.hbias
-        return [pre_sigmoid_activation, T.nnet.sigmoid(pre_sigmoid_activation)]
+        return [pre_sigmoid_activation, self.activation(pre_sigmoid_activation)]
 
     def sample_h_given_v(self, v0_sample):
         ''' Generates hidden unit outputs given visible inputs '''
@@ -81,7 +82,7 @@ def sample_h_given_v(self, v0_sample):
     def propdown(self, hid):
         '''Propagates the hidden activation downwards to the visible units'''
         pre_sigmoid_activation = T.dot(hid, self.W.T) + self.vbias
-        return [pre_sigmoid_activation, T.nnet.sigmoid(pre_sigmoid_activation)]
+        return [pre_sigmoid_activation, self.activation(pre_sigmoid_activation)]
 
     def sample_v_given_h(self, h0_sample):
         ''' Generates visible units given hidden units '''
@@ -136,11 +137,15 @@ class GBRBM(RBM):
     """Gaussian-bernoulli restricted Boltzmann machine"""
     
     def __init__(self, input=None, n_visible=351, n_hidden=1000,
-                 W = None, hbias = None, vbias = None,
-                 numpy_rng = None, theano_rng = None):
+                 W = None, hbias = None, vbias = None, numpy_rng = None,
+                 theano_rng = None,activation=T.nnet.sigmoid):
         
-        super(GBRBM, self).__init__(input=input, n_visible=n_visible, n_hidden=n_hidden,
-                    W=W, hbias=hbias, vbias=vbias, numpy_rng=numpy_rng, theano_rng=theano_rng)
+        super(GBRBM, self).__init__(input=input, n_visible=n_visible,
+                    n_hidden=n_hidden,
+                    W=W, hbias=hbias,
+                    vbias=vbias, numpy_rng=numpy_rng, 
+                    theano_rng=theano_rng,
+                    activation=activation)
     
     def free_energy(self, v_sample):
         ''' Compute the free energy '''
diff --git a/models/dbn.py b/models/dbn.py
@@ -25,7 +25,7 @@ class DBN(nnet):
 
     def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                  hidden_layers_sizes=[500, 500], n_outs=10,
-                 first_layer_gb = True,pretrainedLayers=None):
+                 first_layer_gb = True,pretrainedLayers=None,activation=T.nnet.sigmoid):
         """This class is made to support a variable number of layers.
 
         :type numpy_rng: numpy.random.RandomState
@@ -101,7 +101,7 @@ def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                                         input=layer_input,
                                         n_in=input_size,
                                         n_out=hidden_layers_sizes[i],
-                                        activation=T.nnet.sigmoid)
+                                        activation=activation)
 
             # add the layer to our list of layers
             self.sigmoid_layers.append(sigmoid_layer)
@@ -122,15 +122,17 @@ def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                               n_visible=input_size,
                               n_hidden=hidden_layers_sizes[i],
                               W=sigmoid_layer.W,
-                              hbias=sigmoid_layer.b)
+                              hbias=sigmoid_layer.b,
+                              activation=activation)
             else:
                 rbm_layer = RBM(numpy_rng=numpy_rng,
                               theano_rng=theano_rng,
                               input=layer_input,
                               n_visible=input_size,
                               n_hidden=hidden_layers_sizes[i],
                               W=sigmoid_layer.W,
-                              hbias=sigmoid_layer.b)
+                              hbias=sigmoid_layer.b,
+                              activation=activation)
             self.rbm_layers.append(rbm_layer)            
 
         # We now need to add a logistic layer on top of the MLP
diff --git a/models/sda.py b/models/sda.py
@@ -56,7 +56,7 @@ class SDA(nnet):
 
     def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                  hidden_layers_sizes=[500, 500], n_outs=10,
-                 corruption_levels=[0.1, 0.1]):
+                 corruption_levels=[0.1, 0.1],activation=T.nnet.sigmoid):
         """ This class is made to support a variable number of layers.
 
         :type numpy_rng: numpy.random.RandomState
@@ -147,7 +147,8 @@ def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                           n_visible=input_size,
                           n_hidden=hidden_layers_sizes[i],
                           W=sigmoid_layer.W,
-                          bhid=sigmoid_layer.b)
+                          bhid=sigmoid_layer.b,
+                          activation=T.nnet.sigmoid)
             self.dA_layers.append(dA_layer)
 
         # We now need to add a logistic layer on top of the MLP
diff --git a/run/run_DBN.py b/run/run_DBN.py
@@ -29,6 +29,7 @@
 from io_modules.file_reader import read_dataset
 from io_modules import setLogger
 from utils.learn_rates import LearningRate
+from utils.utils import parse_activation
 from io_modules.model_io import _nnet2file, _file2nnet
 
 from models import fineTunning,testing
@@ -104,11 +105,14 @@ def runRBM(arg):
     numpy_rng = numpy.random.RandomState(rbm_config['random_seed'])
     theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
 
+    activationFn = parse_activation(rbm_config['activation']);
+
 
     dbn = DBN(numpy_rng=numpy_rng, theano_rng = theano_rng, n_ins=rbm_config['n_ins'],
             hidden_layers_sizes=rbm_config['hidden_layers'],n_outs=rbm_config['n_outs'],
             first_layer_gb = rbm_config['first_layer_gb'],
-            pretrainedLayers=rbm_config['pretrained_layers'])
+            pretrainedLayers=rbm_config['pretrained_layers'],
+            activation=activationFn)
 
     train_sets, train_xy, train_x, train_y = read_dataset(data_spec['training'])
 
diff --git a/run/run_SDA.py b/run/run_SDA.py
@@ -22,12 +22,14 @@
 import theano
 
 #module imports
-from utils.load_conf import load_model,load_sda_spec,load_data_spec
+from io_modules.model_io import _nnet2file, _file2nnet
 from io_modules.file_reader import read_dataset
 from io_modules import setLogger
 from utils.learn_rates import LearningRate
+from utils.utils import parse_activation
+from utils.load_conf import load_model,load_sda_spec,load_data_spec
+
 from models.sda import SDA
-from io_modules.model_io import _nnet2file, _file2nnet
 from models import fineTunning,testing
 
 
@@ -75,11 +77,14 @@ def runSdA(arg):
     numpy_rng = numpy.random.RandomState(sda_config['random_seed'])
     #theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
 
+    #get Activation function
+    activationFn = parse_activation(sda_config['activation']); 
+
     logger.info('building the model')
     # construct the stacked denoising autoencoder class
     sda = SDA(numpy_rng=numpy_rng, n_ins=sda_config['n_ins'],
               hidden_layers_sizes=sda_config['hidden_layers'],
-              n_outs=sda_config['n_outs'])
+              n_outs=sda_config['n_outs'],activation=activationFn)
 
 
 
diff --git a/utils/load_conf.py b/utils/load_conf.py
@@ -98,10 +98,22 @@ def load_data_spec(input_file):
 	logger.info("Loading data specification properties from %s..",input_file)
 	data = load_json(input_file);
 	for x in ['training','testing','validation']:
+		logger.debug('Validating data specification: %s',x)
+		requiredKeys=['base_path','filename','partition','reader_type']
+
 		if not data.has_key(x):
 			continue;
 		if not data[x].has_key('keep_flatten') or not type(data[x]['keep_flatten']) is bool:
-			data[x]['keep_flatten']=False
+			data[x]['keep_flatten'] = False
+		
+		if not data[x]['keep_flatten'] :
+			requiredKeys.append('dim_shuffle');
+		if not data[x].has_key('random') or not type(data[x]['keep_flatten']) is bool:
+			data[x]['keep_flatten'] = True
+		
+		if not isKeysPresents(data[x],requiredKeys):
+			logger.critical("The mandatory arguments are missing in data spec(%s)",x)
+			exit(1)
 	return data
 
 
diff --git a/utils/utils.py b/utils/utils.py
@@ -35,12 +35,17 @@ def dimshuffle(a,shuffle):
 	return a
 
 
-def parse_activation(act_str):
-    if act_str == 'sigmoid':
-        return T.nnet.sigmoid
-    if act_str == 'tanh':
+def parse_activation(activation):
+    if activation == 'tanh':
         return T.tanh
-    return T.nnet.sigmoid
+    elif activation == 'sigmoid':
+        return T.nnet.sigmoid
+    elif activation == 'relu':
+        return lambda x: x * (x > 0)
+    elif activation == 'cappedrelu':
+        return lambda x: T.minimum(x * (x > 0), 6)
+    else:
+        raise NotImplementedError
 
 def activation_to_txt(act_func):
     if act_func == T.nnet.sigmoid: