Skip to content

Commit 2392e3a

Browse files
committed
Putting cnn file model in single file.......... Feature exporting done
1 parent a9be9b7 commit 2392e3a

12 files changed

Lines changed: 123 additions & 139 deletions

File tree

data/TFILE/val.dat

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
50 2
2-
val_c1.dat
3-
val_c2.dat
2+
c1/val_c1.dat
3+
c2/val_c2.dat
44

55

io_modules/file_reader.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
import logging
77
logger = logging.getLogger(__name__)
88

9-
def read_dataset(options,batch_size,pad_zeros=False):
9+
def read_dataset(options,pad_zeros=False):
1010
filepath = options['base_path'] + os.sep + options['filename'];
1111
logger.info("%s dataset will be initialized to reader to %s",
1212
options['reader_type'],filepath);
1313
logger.debug("options : %s" % str(options))
1414

15-
file_reader = FileReader.get_instance(filepath,batch_size,options)
15+
file_reader = FileReader.get_instance(filepath,options)
1616
file_header = file_reader.read_file_info()
1717

1818
shared_xy = file_reader.create_shared(pad_zeros)
@@ -39,16 +39,16 @@ class FileReader(object):
3939
num_pad_frames = 0;
4040

4141
@staticmethod
42-
def get_instance(filepath,batch_size,options):
42+
def get_instance(filepath,options):
4343
file_reader = None;
4444
if options['reader_type']=='NP':
45-
file_reader = NPFileReader(filepath,batch_size,options);
45+
file_reader = NPFileReader(filepath,options);
4646
elif options['reader_type']=='TD':
47-
file_reader = TDFileReader(filepath,batch_size,options);
47+
file_reader = TDFileReader(filepath,options);
4848
elif options['reader_type']=='T1':
49-
file_reader = T1FileReader(filepath,batch_size,options);
49+
file_reader = T1FileReader(filepath,options);
5050
elif options['reader_type']=='T2':
51-
file_reader = T2FileReader(filepath,batch_size,options);
51+
file_reader = T2FileReader(filepath,options);
5252
else:
5353
logger.critical('\'%s\' reader_type is not defined...'\
5454
%options['reader_type'])
@@ -125,10 +125,10 @@ def initialize_read(self):
125125

126126
class TDFileReader(FileReader):
127127
''' Reads the data stored in as Simple Text File'''
128-
def __init__(self,path,batch_size,options):
128+
def __init__(self,path,options):
129129
self.filepath = path;
130130
self.options = options;
131-
self.batch_size = batch_size
131+
self.batch_size = options['batch_size']
132132
self.lbl = options['label'];
133133
self.filehandle = open(self.filepath,'rb')
134134

@@ -192,10 +192,10 @@ def read_next_partition_data(self,already_read=0,pad_zeros=False):
192192

193193
class T2FileReader(FileReader):
194194
''' Reads the data stored in as Simple Text File With Two level header structure'''
195-
def __init__(self,path,batch_size,options):
195+
def __init__(self,path,options):
196196
self.filepath = path;
197197
self.options=options;
198-
self.batch_size=batch_size
198+
self.batch_size = options['batch_size']
199199
self.filehandle = open(self.filepath,'rb')
200200

201201
def read_file_info(self):
@@ -300,10 +300,10 @@ def read_next_partition_data(self,already_read=0,pad_zeros=False):
300300

301301
class T1FileReader(FileReader):
302302
''' Reads the data stored in as Simple Text File With One level header structure'''
303-
def __init__(self,path,batch_size,options):
303+
def __init__(self,path,options):
304304
self.filepath = path;
305-
self.batch_size = batch_size
306305
self.options=options;
306+
self.batch_size = options['batch_size']
307307
self.filehandle = open(self.filepath,'rb')
308308

309309
def read_file_info(self):
@@ -334,7 +334,7 @@ def read_file_info(self):
334334
child_options['label']= i;
335335
child_options['keep_flatten'] = True
336336
data_file = child_options['base_path'] + os.sep + child_options['filename']
337-
self.filehandles.append(TDFileReader(data_file,self.batch_size,child_options));
337+
self.filehandles.append(TDFileReader(data_file,child_options));
338338
self.filehandles[-1].read_file_info();
339339

340340
if self.frames_per_partition < self.classes:
@@ -393,10 +393,10 @@ def read_next_partition_data(self,already_read=0,pad_zeros=False):
393393

394394
class NPFileReader(FileReader):
395395
''' Reads the data stored in as Numpy Array'''
396-
def __init__(self,path,batch_size,options):
396+
def __init__(self,path,options):
397397
self.filepath = path;
398-
self.batch_size = batch_size
399398
self.options=options;
399+
self.batch_size = options['batch_size']
400400
self.filehandle = open(self.filepath,'rb')
401401

402402
def read_file_info(self):

io_modules/file_writer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ def write_data(self,vector_array,labels):
7171
flatten_vector = vector.flatten();
7272
if self.feat_dim!=len(flatten_vector):
7373
logger.critical('Feature dimension mentioned in header and vector length are mismatching');
74+
exit(0)
7475
else:
7576
data['d']=flatten_vector; data['l']=label;
7677
data.tofile(self.filehandle);
@@ -97,7 +98,8 @@ def write_data(self,vector_array,labels):
9798
for vector,label in zip(vector_array,labels):
9899
flatten_vector = vector.flatten();
99100
if self.feat_dim!=len(flatten_vector):
100-
logger.critical('Feature dimension mentioned in header and vector length are mismatching');
101+
logger.critical('Feature dimension mentioned in header and vector length are mismatching');
102+
exit(0)
101103
else:
102104
for element in vector:
103105
self.filehandle.write('%f ' % element)

io_modules/model_io.py

Lines changed: 61 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ def array_2_string(array):
1515
def string_2_array(string):
1616
str_in = StringIO(string)
1717
return np.loadtxt(str_in)
18+
1819

1920
def _nnet2file(layers, set_layer_num = -1, filename='nnet.out', activation='sigmoid', start_layer = 0, withfinal=True,
2021
input_factor = 0.0, factor=[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]):
@@ -64,47 +65,93 @@ def _file2nnet(layers, set_layer_num = -1, filename='nnet.in', activation='sigmo
6465
layers[-1].params[1].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))
6566
logger.info('Loaded the neural_net model in %s',(filename))
6667

67-
def _cnn2file(conv_layers, filename='nnet.out', activation='sigmoid', withfinal=True, input_factor = 1.0, factor=1.0):
68+
def _cnn2file(conv_layers,mlp_layers,filename='nnet.out',
69+
start_layer = 0,set_layer_num=-1,withfinal=True, input_factor = 1.0, factor=1.0):
70+
#Dumping CNN Configuration
6871
n_layers = len(conv_layers)
69-
nnet_dict = {}
72+
cnn_dict = {}
7073
for i in xrange(n_layers):
7174
conv_layer = conv_layers[i]
7275
filter_shape = conv_layer.filter_shape
73-
7476
for next_X in xrange(filter_shape[0]):
7577
for this_X in xrange(filter_shape[1]):
7678
dict_a = 'W ' + str(i) + ' ' + str(next_X) + ' ' + str(this_X)
7779
if i == 0:
78-
nnet_dict[dict_a] = array_2_string(input_factor * (conv_layer.W.get_value())[next_X, this_X])
80+
cnn_dict[dict_a] = array_2_string(input_factor * (conv_layer.W.get_value())[next_X, this_X])
7981
else:
80-
nnet_dict[dict_a] = array_2_string(factor * (conv_layer.W.get_value())[next_X, this_X])
82+
cnn_dict[dict_a] = array_2_string(factor * (conv_layer.W.get_value())[next_X, this_X])
8183

8284
dict_a = 'b ' + str(i)
83-
nnet_dict[dict_a] = array_2_string(conv_layer.b.get_value())
85+
cnn_dict[dict_a] = array_2_string(conv_layer.b.get_value())
86+
87+
#Dumping MLP Configuration
88+
n_layers = len(mlp_layers)
89+
mlp_dict = {}
90+
if set_layer_num == -1:
91+
set_layer_num = n_layers - 1
92+
for i in range(start_layer,set_layer_num):
93+
dict_a = str(i) +' W'
94+
if i == 0:
95+
mlp_dict[dict_a] = array_2_string(input_factor * mlp_layers[i].params[0].get_value())
96+
else:
97+
print mlp_layers[i].params[0]
98+
mlp_dict[dict_a] = array_2_string(factor * mlp_layers[i].params[0].get_value())
99+
dict_a = str(i) + ' b'
100+
mlp_dict[dict_a] = array_2_string(mlp_layers[i].params[1].get_value())
101+
102+
if withfinal:
103+
dict_a = 'logreg W'
104+
mlp_dict[dict_a] = array_2_string(factor * mlp_layers[-1].params[0].get_value())
105+
dict_a = 'logreg b'
106+
mlp_dict[dict_a] = array_2_string(mlp_layers[-1].params[1].get_value())
84107

108+
nnet_dict = {};
109+
nnet_dict['cnn'] = cnn_dict;
110+
nnet_dict['mlp'] = mlp_dict;
111+
85112
with open(filename, 'wb') as fp:
86113
json.dump(nnet_dict, fp, indent=2, sort_keys = True)
87114
fp.flush()
88115
logger.info('Dumped the conv_net model in %s',(filename))
89116

90-
def _file2cnn(conv_layers, filename='nnet.in', activation='sigmoid', withfinal=True, factor=1.0):
91-
n_layers = len(conv_layers)
117+
def _file2cnn(conv_layers,mlp_layers, filename='nnet.in',set_layer_num=-1,withfinal=True, factor=1.0):
92118
nnet_dict = {}
93-
119+
94120
with open(filename, 'rb') as fp:
95121
nnet_dict = json.load(fp)
122+
123+
##Loading CNN Configuration
124+
n_layers = len(conv_layers)
125+
cnn_dict = nnet_dict['cnn'];
96126
for i in xrange(n_layers):
97127
conv_layer = conv_layers[i]
98128
filter_shape = conv_layer.filter_shape
99129
W_array = conv_layer.W.get_value()
100-
130+
101131
for next_X in xrange(filter_shape[0]):
102132
for this_X in xrange(filter_shape[1]):
103133
dict_a = 'W ' + str(i) + ' ' + str(next_X) + ' ' + str(this_X)
104-
W_array[next_X, this_X, :, :] = factor * np.asarray(string_2_array(nnet_dict[dict_a]))
105-
134+
W_array[next_X, this_X, :, :] = factor * np.asarray(string_2_array(cnn_dict[dict_a]))
135+
106136
conv_layer.W.set_value(W_array)
107-
108137
dict_a = 'b ' + str(i)
109-
conv_layer.b.set_value(np.asarray(string_2_array(nnet_dict[dict_a]), dtype=theano.config.floatX))
138+
conv_layer.b.set_value(np.asarray(string_2_array(cnn_dict[dict_a]), dtype=theano.config.floatX))
139+
140+
##Loading MLP Configuration
141+
if not mlp_layers is None:
142+
n_layers = len(mlp_layers)
143+
mlp_dict = nnet_dict['mlp'];
144+
if set_layer_num == -1:
145+
set_layer_num = n_layers - 1
146+
for i in xrange(set_layer_num):
147+
dict_key = str(i) + ' W'
148+
mlp_layers[i].params[0].set_value(factor * np.asarray(string_2_array(mlp_dict[dict_key]), dtype=theano.config.floatX))
149+
dict_key = str(i) + ' b'
150+
mlp_layers[i].params[1].set_value(np.asarray(string_2_array(mlp_dict[dict_key]), dtype=theano.config.floatX))
151+
if withfinal:
152+
dict_key = 'logreg W'
153+
mlp_layers[-1].params[0].set_value(np.asarray(string_2_array(mlp_dict[dict_key]), dtype=theano.config.floatX))
154+
dict_key = 'logreg b'
155+
mlp_layers[-1].params[1].set_value(np.asarray(string_2_array(mlp_dict[dict_key]), dtype=theano.config.floatX))
156+
110157
logger.info('Loaded the conv_net model in %s',(filename))

models/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def __init__(self):
1919
#placeholders
2020
self.output = None
2121
self.features = None
22+
self.features_dim = None
2223
self.errors = None
2324
self.finetune_cost = None
2425

@@ -129,7 +130,7 @@ def getFeaturesFunction(self):
129130
A function takes input features
130131
"""
131132
#in_x = T.matrix('in_x');
132-
in_x = x.type('in_x');
133-
fn = theano.function(inputs=[in_x],outputs=[self.features],
133+
in_x = self.x.type('in_x');
134+
fn = theano.function(inputs=[in_x],outputs=self.features,
134135
givens={self.x: in_x},name='features')#,on_unused_input='warn')
135-
return fn
136+
return fn

models/cnn.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,12 @@ def __init__(self, numpy_rng, theano_rng, batch_size, n_outs,conv_layer_configs,
7777
self.logLayer = LogisticRegression(input=self.layers[-1].output,n_in=hidden_layers_sizes[-1],n_out=n_outs)
7878

7979
self.layers.append(self.logLayer)
80-
self.params.extend(self.logLayer.params)
81-
self.delta_params.extend(self.logLayer.delta_params)
80+
self.params.extend(self.logLayer.params)
81+
self.delta_params.extend(self.logLayer.delta_params)
8282

8383
self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
84-
8584
self.errors = self.logLayer.errors(self.y)
86-
self.output = self.logLayer.prediction();
85+
self.output = self.logLayer.prediction()
86+
87+
self.features = self.conv_layers[-1].output;
88+
self.features_dim = self.conv_output_dim;

run/__init__.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,11 @@ def valid_score():
8181
return best_validation_loss
8282

8383

84-
def getFeatures(nnetModel,data_spec_testing):
84+
def exportFeatures(nnetModel,export_path,data_spec_testing):
85+
from io_modules.data_exporter import export_data
86+
print nnetModel.features_dim
8587
out_function = nnetModel.getFeaturesFunction()
86-
test_sets, test_xy, test_x, test_y = read_dataset(data_spec_testing)
87-
while (not test_sets.is_finish()):
88-
data = out_function(test_sets.feat)
89-
test_sets.read_next_partition_data()
90-
#TODO write data
88+
export_data(data_spec_testing,export_path,out_function,nnetModel.features_dim);
9189

9290
def createDir(wdir):
9391
"""create working dir"""

run/run_CNN.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@
2727
from io_modules.file_reader import read_dataset
2828
from utils.learn_rates import LearningRate
2929
from utils.utils import parse_activation
30-
from io_modules.model_io import _cnn2file,_nnet2file
30+
from io_modules.model_io import _cnn2file,_file2cnn
3131
from io_modules import setLogger
3232

33-
from run import fineTunning,testing,createDir
33+
from run import fineTunning,testing,exportFeatures,createDir
3434

3535
import logging
3636
logger = logging.getLogger(__name__)
@@ -46,7 +46,7 @@ def runCNN(arg):
4646
conv_config,conv_layer_config,mlp_config = load_conv_spec(model_config['nnet_spec'],model_config['batch_size'],
4747
model_config['input_shape'])
4848

49-
data_spec = load_data_spec(model_config['data_spec']);
49+
data_spec = load_data_spec(model_config['data_spec'],model_config['batch_size']);
5050

5151

5252
numpy_rng = numpy.random.RandomState(89677)
@@ -68,20 +68,19 @@ def runCNN(arg):
6868
n_outs=model_config['n_outs'],hidden_layers_sizes=mlp_config['layers'], conv_activation = conv_activation,
6969
hidden_activation = hidden_activation,use_fast = conv_config['use_fast'])
7070

71-
train_sets, train_xy, train_x, train_y = read_dataset(data_spec['training'],model_config['batch_size'])
72-
valid_sets, valid_xy, valid_x, valid_y = read_dataset(data_spec['validation'],model_config['batch_size'])
71+
train_sets, train_xy, train_x, train_y = read_dataset(data_spec['training'])
72+
valid_sets, valid_xy, valid_x, valid_y = read_dataset(data_spec['validation'])
7373

7474
err=fineTunning(cnn,train_sets,train_xy,train_x,train_y,
75-
valid_sets,valid_xy,valid_x,valid_y,lrate,momentum,batch_size);
75+
valid_sets,valid_xy,valid_x,valid_y,lrate,momentum,batch_size);
7676

77-
_cnn2file(cnn.layers[0:cnn.conv_layer_num], filename=model_config['output_file'],activation=conv_config['activation']);
78-
_nnet2file(cnn.layers[cnn.conv_layer_num:], filename=model_config['output_file'],activation=mlp_config['activation']);
77+
_cnn2file(cnn.layers[0:cnn.conv_layer_num],cnn.layers[cnn.conv_layer_num:], filename=model_config['output_file']);
7978

8079
####################
8180
## TESTING ##
8281
####################
8382
try:
84-
test_sets, test_xy, test_x, test_y = read_dataset(data_spec['testing'],model_config['batch_size'])
83+
test_sets, test_xy, test_x, test_y = read_dataset(data_spec['testing'])
8584
except KeyError:
8685
#raise e
8786
logger.info("No testing set:Skiping Testing");
@@ -90,7 +89,13 @@ def runCNN(arg):
9089

9190
pred,err=testing(cnn,test_sets, test_xy, test_x, test_y,batch_size)
9291

92+
####################
93+
## Export Features ##
94+
####################
95+
mlp_layers = cnn.layers[cnn.conv_layer_num:]
96+
_file2cnn(cnn.conv_layers,mlp_layers, filename=model_config['output_file'])
97+
98+
exportFeatures(cnn,model_config['export_path'],data_spec['testing'])
9399

94100
if __name__ == '__main__':
95-
setLogger(level="DEBUG");
96101
runCNN(sys.argv[1])

0 commit comments

Comments
 (0)