@@ -46,6 +46,7 @@ def location_choice_model(
4646 settings_file = "{name}_model_settings.yaml" ,
4747 landuse_file = "{name}_landuse.csv" ,
4848 return_data = False ,
49+ alt_values_to_feather = False ,
4950 chunking_size = None ,
5051):
5152 model_selector = name .replace ("_location" , "" )
@@ -85,12 +86,15 @@ def _file_exists(filename):
8586 index_col = "coefficient_name" ,
8687 )
8788 spec = _read_csv (spec_file , comment = "#" )
89+
90+ # read alternative values either as csv or feather file
8891 alt_values_fea_file = alt_values_file .replace (".csv" , ".fea" )
8992 if os .path .exists (os .path .join (edb_directory , alt_values_fea_file .format (name = name ))):
9093 alt_values = _read_feather (alt_values_fea_file )
9194 else :
9295 alt_values = _read_csv (alt_values_file )
93- _to_feather (df = alt_values , filename = alt_values_fea_file )
96+ if alt_values_to_feather :
97+ _to_feather (df = alt_values , filename = alt_values_fea_file )
9498 chooser_data = _read_csv (chooser_file )
9599 landuse = _read_csv (landuse_file , index_col = "zone_id" )
96100 master_size_spec = _read_csv (size_spec_file )
@@ -181,10 +185,12 @@ def split(a, n):
181185 k , m = divmod (len (a ), n )
182186 return (a [i * k + min (i , m ):(i + 1 ) * k + min (i + 1 , m )] for i in range (n ))
183187
188+ # process x_ca with cv_to_ca with or without chunking
184189 x_ca_pickle_file = "{name}_x_ca.pkl"
185190 if chunking_size == None :
186191 x_ca = cv_to_ca (alt_values .set_index ([chooser_index_name , alt_values .columns [1 ]]))
187192 elif _file_exists (x_ca_pickle_file ):
193+ # if pickle file from previous x_ca processing exist, load it to save time
188194 time_start = datetime .now ()
189195 x_ca = _read_pickle (x_ca_pickle_file )
190196 print (
@@ -208,6 +214,7 @@ def split(a, n):
208214 )
209215 i = i + 1
210216 x_ca = pd .concat (x_ca_list , axis = 0 )
217+ # save final x_ca result as pickle file to save time for future data loading
211218 _to_pickle (df = x_ca , filename = x_ca_pickle_file )
212219 print (
213220 f"x_ca compute done - time elapsed { (datetime .now () - time_start ).total_seconds ()} " )
0 commit comments