Skip to content

Commit 02a7036

Browse files
committed
Unstable:DNN
1 parent 1fdec06 commit 02a7036

5 files changed

Lines changed: 185 additions & 89 deletions

File tree

config/DNN/README

Whitespace-only changes.

config/DNN/data_spec.json

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"validation": {
3+
"base_path" : "data/NPFILE",
4+
"filename" : "val.dat",
5+
"partition" : 200,
6+
"random" : true,
7+
"random_seed" : 123,
8+
"keep_flatten" : true,
9+
"reader_type" : "NP"
10+
},
11+
12+
"training" : {
13+
"base_path" : "data/NPFILE",
14+
"filename" : "train.dat",
15+
"partition" : 200,
16+
"random" : true,
17+
"random_seed" : 123,
18+
"keep_flatten" : true,
19+
"reader_type" : "NP"
20+
},
21+
22+
"testing" : {
23+
"base_path" : "data/NPFILE",
24+
"filename" : "train.dat",
25+
"partition" : 200,
26+
"random" : true,
27+
"random_seed" : 123,
28+
"keep_flatten" : true,
29+
"reader_type" : "NP"
30+
}
31+
}

config/DNN/model_conf.json

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
{
2+
"comment" : "nnetType :: (Mandatory) specify Type of Network (CNN,RBM) ",
3+
"nnetType" : "RBM",
4+
5+
"comment" : "train_data :: (Mandatory) specify the working directory containing data configuration and output ",
6+
"wdir" : "wdir",
7+
8+
"comment" : "valid_data (Mandatory) specify the path of the validation data relative to the working directory",
9+
"data_spec" : "data_spec.json",
10+
11+
"comment" : "rbm_nnet_spec:: (Mandatory) specify the path of RBM network configuration specification relative to working directory",
12+
"nnet_spec" : "rbm_spec.json",
13+
14+
"comment" : "output_file :: (Mandatory) specify the path of RBM network output file relative to working directory",
15+
"output_file" : "rbm_out.model",
16+
17+
"comment" : "batch_size :: specify the mini batch size while training, default 128",
18+
"batch_size" : 128,
19+
20+
"comment": "",
21+
"n_ins":2352,
22+
23+
"comment":"",
24+
"n_outs":200,
25+
26+
"comment" :"TODO",
27+
"gbrbm_learning_rate":0.005,
28+
"learning_rate":0.08,
29+
"pretraining_epochs":10,
30+
31+
"comment" : "initial_momentum,final_momentum,initial_momentum_epoch :: Specify the momentum factor while training default 0.5,0.9,5",
32+
"initial_momentum":0.5,
33+
"final_momentum":0.9,
34+
"initial_momentum_epoch":5,
35+
36+
"comment" : "finetune_method:: Two methods are supported C: Constant learning rate and E : Exponential decay",
37+
"finetune_method":"C",
38+
39+
"comment" : "finetune_rate :: learning rate configuration",
40+
"finetune_rate" : {
41+
"learning_rate" : 0.08,
42+
"epoch_num" : 10,
43+
44+
"start_rate" : 0.08,
45+
"scale_by" : 0.5,
46+
"min_derror_decay_start" : 0.05,
47+
"min_derror_stop" : 0.05,
48+
"min_epoch_decay_start" : 15,
49+
"init_error" :100
50+
},
51+
52+
"comment" : "finetune_momentum :: Specify the momentum factor while finetuning",
53+
"finetune_momentum": 0.5,
54+
55+
"processes":{
56+
"pretraining":false,
57+
"finetuning":false,
58+
"testing":true,
59+
"export_data":false
60+
}
61+
62+
}

config/DNN/rbm_spec.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
2+
{
3+
4+
"comment" : "layers :: RBM layer configuration (No: of Nodes)",
5+
"hidden_layers": [ 2350,1024,1024,1024,1024,1901 ],
6+
7+
"comment" : "activation :: sigmoid or tanh",
8+
"activation" : "sigmoid",
9+
10+
"comment" : "pretrained_layers:number of layers to be pre-trained",
11+
"pretrained_layers" : 5,
12+
13+
"comment" : "first_layer_type::type for the first layer; either 'bb' (Bernoulli-Bernoulli) or 'gb' (Gaussian-Bernoulli)",
14+
"first_layer_type" : "gb",
15+
16+
"comment" : "random_seed::",
17+
"random_seed" : 89677
18+
}

run/run_DNN.py

Lines changed: 74 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,20 @@
4040
def runDNN(configFile):
4141

4242
model_config = load_model(configFile)
43-
dnn_config = load_dnn_spec(model_config['dnn_nnet_spec'])
44-
data_spec = load_data_spec(model_config['data_spec']);
43+
dnn_config = load_dnn_spec(model_config['nnet_spec'])
44+
data_spec = load_data_spec(model_config['data_spec'],model_configs['batch_size']);
4545

46+
47+
#generating Random
48+
numpy_rng = numpy.random.RandomState(rbm_config['random_seed'])
49+
theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
4650

47-
#generating Random
48-
numpy_rng = numpy.random.RandomState(dnn_config['random_seed'])
49-
theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
51+
activationFn = parse_activation(rbm_config['activation']);
52+
53+
#create working dir
54+
createDir(model_config['wdir']);
55+
56+
batch_size = model_configs['batch_size'];
5057

5158
# pretraining
5259
ptr_file = dnn_config['ptr_file']
@@ -57,113 +64,91 @@ def runDNN(configFile):
5764
l2_reg = dnn_config['l2_reg']
5865

5966

60-
# learning rate
61-
lrate = LearningRate.get_instance(model_configs['l_rate_method'],
62-
model_configs['l_rate']);
63-
64-
# batch_size and momentum
65-
batch_size = model_configs['batch_size'];
66-
momentum = model_configs['momentum']
67-
6867

69-
n_ins = dnn_configs['n_ins']
68+
n_ins = model_config['n_ins']
7069
hidden_layers_sizes = dnn_config['hidden_layers']
71-
n_outs = dnn_configs['n_outs']
70+
n_outs = model_config['n_outs']
7271

73-
if dnn_configs['activation'] == 'sigmoid':
74-
activation = T.nnet.sigmoid
75-
else:
76-
activation = T.tanh
77-
78-
do_maxout = dnn_configs['do_maxout']
79-
pool_size = dnn_configs['pool_size']
80-
do_pnorm = dnn_configs['do_pnorm']
81-
pnorm_order = dnn_configs['pnorm_order']
8272

83-
do_dropout = dnn_configs['do_dropout']
84-
dropout_factor = dnn_configs['dropout_factor']
85-
input_dropout_factor = dnn_configs['input_dropout_factor']
86-
87-
train_sets, train_xy, train_x, train_y = read_dataset(data_spec['training'])
88-
valid_sets, valid_xy, valid_x, valid_y = read_dataset(data_spec['validation'])
73+
do_maxout = dnn_config['do_maxout']
74+
pool_size = dnn_config['pool_size']
75+
do_pnorm = dnn_config['do_pnorm']
76+
pnorm_order = dnn_config['pnorm_order']
8977

78+
do_dropout = dnn_config['do_dropout']
79+
dropout_factor = dnn_config['dropout_factor']
80+
input_dropout_factor = dnn_config['input_dropout_factor']
9081

9182
numpy_rng = numpy.random.RandomState(89677)
9283
theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
9384

9485
logger.info('Building the model')
86+
9587
if do_dropout:
9688
dnn = DNN_Dropout(numpy_rng=numpy_rng, theano_rng = theano_rng, n_ins=n_ins,
9789
hidden_layers_sizes=hidden_layers_sizes, n_outs=n_outs,
98-
activation = activation, dropout_factor = dropout_factor, input_dropout_factor = input_dropout_factor,
90+
activation = activationFn, dropout_factor = dropout_factor,
91+
input_dropout_factor = input_dropout_factor,
9992
do_maxout = do_maxout, pool_size = pool_size,
10093
max_col_norm = max_col_norm, l1_reg = l1_reg, l2_reg = l2_reg)
10194
else:
10295
dnn = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, n_ins=n_ins,
10396
hidden_layers_sizes=hidden_layers_sizes, n_outs=n_outs,
104-
activation = activation, do_maxout = do_maxout, pool_size = pool_size,
97+
activation = activationFn, do_maxout = do_maxout, pool_size = pool_size,
10598
do_pnorm = do_pnorm, pnorm_order = pnorm_order,
10699
max_col_norm = max_col_norm, l1_reg = l1_reg, l2_reg = l2_reg)
107100

108-
if ptr_layer_number > 0:
109-
_file2nnet(dnn.sigmoid_layers, set_layer_num = ptr_layer_number, filename = ptr_file, withfinal=False)
110-
111-
# get the training, validation and testing function for the model
112-
logger.info('Getting the finetuning functions')
113-
train_fn, valid_fn = dnn.build_finetune_functions(
114-
(train_x, train_y), (valid_x, valid_y),
115-
batch_size=batch_size)
116-
117-
logger.info('Finetunning the model')
118-
start_time = time.clock()
119-
while (lrate.get_rate() != 0):
120-
train_error = []
121-
while (not train_sets.is_finish()):
122-
train_sets.load_next_partition(train_xy)
123-
for batch_index in xrange(train_sets.cur_frame_num / batch_size): # loop over mini-batches
124-
train_error.append(train_fn(index=batch_index, learning_rate = lrate.get_rate(), momentum = momentum))
125-
train_sets.initialize_read()
126-
logger.info('Epoch %d, training error %f' % (lrate.epoch, numpy.mean(train_error)))
127-
128-
valid_error = []
129-
while (not valid_sets.is_finish()):
130-
valid_sets.load_next_partition(valid_xy)
131-
for batch_index in xrange(valid_sets.cur_frame_num / batch_size): # loop over mini-batches
132-
valid_error.append(valid_fn(index=batch_index))
133-
valid_sets.initialize_read()
134-
logger.info('Epoch %d, lrate %f, validation error %f' % (lrate.epoch, lrate.get_rate(), numpy.mean(valid_error)))
135-
136-
lrate.get_next_rate(current_error = 100 * numpy.mean(valid_error))
137-
101+
try:
102+
_file2nnet(dnn.sigmoid_layers, set_layer_num = ptr_layer_number,
103+
filename = ptr_file, withfinal=False)
104+
except Exception, e:
105+
logger.error(str(e));
106+
logger.error('Model cannot be initialize from input file ')
107+
108+
########################
109+
# FINETUNING THE MODEL #
110+
########################
111+
if model_config['processes']['finetuning']:
112+
try:
113+
train_sets, train_xy, train_x, train_y = read_dataset(data_spec['training'])
114+
valid_sets, valid_xy, valid_x, valid_y = read_dataset(data_spec['validation'])
115+
except KeyError:
116+
#raise e
117+
logger.error("No validation/Test set:Skiping Fine tunning");
118+
else:
119+
try:
120+
finetune_method = model_config['finetune_method']
121+
finetune_config = model_config['finetune_rate']
122+
momentum = model_config['finetune_momentum']
123+
lrate = LearningRate.get_instance(finetune_method,finetune_config);
124+
except KeyError, e:
125+
print("KeyMissing:"+str(e));
126+
print("Fine tunning Paramters Missing")
127+
sys.exit(2)
128+
129+
130+
fineTunning(dbn,train_sets,train_xy,train_x,train_y,
131+
valid_sets,valid_xy,valid_x,valid_y,lrate,momentum,batch_size)
132+
133+
134+
########################
135+
# TESTING THE MODEL #
136+
########################
137+
if model_config['processes']['testing']:
138+
try:
139+
test_sets, test_xy, test_x, test_y = read_dataset(data_spec['testing'])
140+
except KeyError:
141+
#raise e
142+
logger.info("No testing set:Skiping Testing");
143+
else:
144+
testing(dbn,test_sets, test_xy, test_x, test_y,batch_size)
145+
146+
logger.info('Saving model to ' + str(model_config['output_file']) + '....')
138147
if do_dropout:
139-
_nnet2file(dnn.sigmoid_layers, filename=wdir + '/nnet.finetune.tmp', input_factor = input_dropout_factor, factor = dropout_factor)
148+
_nnet2file(dnn.sigmoid_layers, filename=model_config['output_file'],
149+
input_factor = input_dropout_factor, factor = dropout_factor)
140150
else:
141-
_nnet2file(dnn.sigmoid_layers, filename=wdir + '/nnet.finetune.tmp')
142-
143-
# determine whether it's BNF based on layer sizes
144-
set_layer_num = -1
145-
withfinal = True
146-
bnf_layer_index = 1
147-
while bnf_layer_index < len(hidden_layers_sizes):
148-
if hidden_layers_sizes[bnf_layer_index] < hidden_layers_sizes[bnf_layer_index - 1]:
149-
break
150-
bnf_layer_index = bnf_layer_index + 1
151-
152-
if bnf_layer_index < len(hidden_layers_sizes): # is bottleneck
153-
set_layer_num = bnf_layer_index+1
154-
withfinal = False
155-
156-
end_time = time.clock()
157-
158-
logger.info('The Training ran for %.2fm' % ((end_time - start_time) / 60.))
159-
160-
if do_maxout:
161-
_nnet2janus_maxout(nnet_spec, pool_size = pool_size, set_layer_num = set_layer_num, filein = wdir + '/nnet.finetune.tmp', fileout = output_file, withfinal=withfinal)
162-
else:
163-
_nnet2janus(nnet_spec, set_layer_num = set_layer_num, filein = wdir + '/nnet.finetune.tmp', fileout = output_file, withfinal=withfinal)
164-
165-
166-
151+
_nnet2file(dnn.sigmoid_layers, filename=model_config['output_file'])
167152

168153
if __name__ == '__main__':
169154
import sys

0 commit comments

Comments
 (0)