66import logging
77logger = logging .getLogger (__name__ )
88
9- def read_dataset (options ,pad_zeros = False ):
9+ def read_dataset (options ,batch_size , pad_zeros = False ):
1010 filepath = options ['base_path' ] + os .sep + options ['filename' ];
1111 logger .info ("%s dataset will be initialized to reader to %s" ,
1212 options ['reader_type' ],filepath );
1313 logger .debug ("options : %s" % str (options ))
1414
15- file_reader = FileReader .get_instance (filepath ,options )
15+ file_reader = FileReader .get_instance (filepath ,batch_size , options )
1616 file_header = file_reader .read_file_info ()
1717
1818 shared_xy = file_reader .create_shared (pad_zeros )
@@ -39,16 +39,16 @@ class FileReader(object):
3939 num_pad_frames = 0 ;
4040
4141 @staticmethod
42- def get_instance (filepath ,options ):
42+ def get_instance (filepath ,batch_size , options ):
4343 file_reader = None ;
4444 if options ['reader_type' ]== 'NP' :
45- file_reader = NPFileReader (filepath ,options );
45+ file_reader = NPFileReader (filepath ,batch_size , options );
4646 elif options ['reader_type' ]== 'TD' :
47- file_reader = TDFileReader (filepath ,options );
47+ file_reader = TDFileReader (filepath ,batch_size , options );
4848 elif options ['reader_type' ]== 'T1' :
49- file_reader = T1FileReader (filepath ,options );
49+ file_reader = T1FileReader (filepath ,batch_size , options );
5050 elif options ['reader_type' ]== 'T2' :
51- file_reader = T2FileReader (filepath ,options );
51+ file_reader = T2FileReader (filepath ,batch_size , options );
5252 else :
5353 logger .critical ('\' %s\' reader_type is not defined...' \
5454 % options ['reader_type' ])
@@ -125,9 +125,10 @@ def initialize_read(self):
125125
126126class TDFileReader (FileReader ):
127127 ''' Reads the data stored in as Simple Text File'''
128- def __init__ (self ,path ,options ):
128+ def __init__ (self ,path ,batch_size , options ):
129129 self .filepath = path ;
130- self .options = options ;
130+ self .options = options ;
131+ self .batch_size = batch_size
131132 self .lbl = options ['label' ];
132133 self .filehandle = open (self .filepath ,'rb' )
133134
@@ -141,7 +142,7 @@ def read_file_info(self):
141142 #self.frames_remaining = long(self.header[1])
142143 # partitions specifies approximate amount data to be loaded one operation
143144 self .frames_per_partition = self .options ['partition' ] * 1000 * 1000 / (self .feat_dim * 4 )
144- batch_residual = self .frames_per_partition % self .options [ ' batch_size' ]
145+ batch_residual = self .frames_per_partition % self .batch_size
145146 self .frames_per_partition = self .frames_per_partition - batch_residual
146147 return self .header
147148
@@ -191,9 +192,10 @@ def read_next_partition_data(self,already_read=0,pad_zeros=False):
191192
192193class T2FileReader (FileReader ):
193194 ''' Reads the data stored in as Simple Text File With Two level header structure'''
194- def __init__ (self ,path ,options ):
195+ def __init__ (self ,path ,batch_size , options ):
195196 self .filepath = path ;
196197 self .options = options ;
198+ self .batch_size = batch_size
197199 self .filehandle = open (self .filepath ,'rb' )
198200
199201 def read_file_info (self ):
@@ -213,7 +215,7 @@ def read_file_info(self):
213215 self .header ['featdim' ] = self .feat_dim
214216 self .header ['classes' ] = self .classes
215217
216- batch_size = self .options [ ' batch_size' ]
218+ batch_size = self .batch_size
217219
218220 # partitions specifies approximate amount data to be loaded one operation
219221 self .frames_per_partition = self .options ['partition' ] * 1000 * 1000 / (self .feat_dim * 4 )
@@ -276,6 +278,7 @@ def read_next_partition_data(self,already_read=0,pad_zeros=False):
276278 self .partition_num = self .partition_num + 1
277279
278280 if not self .options ['keep_flatten' ] : #reshape the vector if needed
281+ logger .debug ('T2 Filereader : Reshape input...' )
279282 shape = [self .cur_frame_num ];
280283 shape .extend (self .options ['input_shape' ]);
281284 self .feat = self .feat .reshape (shape );
@@ -297,8 +300,9 @@ def read_next_partition_data(self,already_read=0,pad_zeros=False):
297300
298301class T1FileReader (FileReader ):
299302 ''' Reads the data stored in as Simple Text File With One level header structure'''
300- def __init__ (self ,path ,options ):
303+ def __init__ (self ,path ,batch_size , options ):
301304 self .filepath = path ;
305+ self .batch_size = batch_size
302306 self .options = options ;
303307 self .filehandle = open (self .filepath ,'rb' )
304308
@@ -314,7 +318,7 @@ def read_file_info(self):
314318
315319 self .header = {};
316320 self .header ['featdim' ] = self .feat_dim
317- batch_size = self .options [ ' batch_size' ]
321+ batch_size = self .batch_size
318322
319323 logger .debug ('T1 Filereader : feat : %d' % self .feat_dim )
320324
@@ -370,6 +374,7 @@ def read_next_partition_data(self,already_read=0,pad_zeros=False):
370374 self .partition_num = self .partition_num + 1
371375
372376 if not self .options ['keep_flatten' ] : #reshape the vector if needed
377+ logger .debug ('T1 Filereader : Reshape input...' )
373378 shape = [self .cur_frame_num ];
374379 shape .extend (self .options ['input_shape' ]);
375380 self .feat = self .feat .reshape (shape );
@@ -388,8 +393,9 @@ def read_next_partition_data(self,already_read=0,pad_zeros=False):
388393
389394class NPFileReader (FileReader ):
390395 ''' Reads the data stored in as Numpy Array'''
391- def __init__ (self ,path ,options ):
396+ def __init__ (self ,path ,batch_size , options ):
392397 self .filepath = path ;
398+ self .batch_size = batch_size
393399 self .options = options ;
394400 self .filehandle = open (self .filepath ,'rb' )
395401
@@ -400,7 +406,7 @@ def read_file_info(self):
400406 logger .debug ("NP Filereader : feats : %d" % self .feat_dim );
401407 # partitions specifies approximate amount data to be loaded one operation
402408 self .frames_per_partition = self .options ['partition' ] * 1000 * 1000 / (self .feat_dim * 4 )
403- batch_residual = self .frames_per_partition % self .options [ ' batch_size' ]
409+ batch_residual = self .frames_per_partition % self .batch_size
404410 self .frames_per_partition = self .frames_per_partition - batch_residual
405411 self .dtype = numpy .dtype ({'names' : ['d' ,'l' ],'formats' : [('>f2' ,self .feat_dim ),'>i2' ]})
406412 return self .header
@@ -417,9 +423,11 @@ def read_next_partition_data(self,already_read=0,pad_zeros=False):
417423 for x in xrange (self .num_pad_frames ):
418424 self .label = numpy .append (self .label ,[0 ]* self .feat_dim )
419425
420- logger .debug ('NP Filereader : from file %s, %d partition has %d frames' % (self .filepath ,self .partition_num ,self .cur_frame_num ));
426+ logger .debug ('NP Filereader : from file %s, %d partition has %d frames' ,
427+ self .filepath ,self .partition_num ,self .cur_frame_num );
421428
422429 if not self .options ['keep_flatten' ] : #reshape the vector if needed
430+ logger .debug ('NP Filereader : Reshape input...' )
423431 shape = [self .cur_frame_num ];
424432 shape .extend (self .header ['input_shape' ]);
425433 self .feat = self .feat .reshape (shape );
0 commit comments