1+ """through command line, generate a config yml file
2+ for optimization_process.py"""
3+
4+ import pandas as pd
5+ import ast
6+ import logging
7+ import numpy as np
8+ import datetime
9+ import argparse
10+ import yaml
11+
12+
13+ logging .basicConfig (
14+ level = logging .INFO ,
15+ format = '%(asctime)s - %(levelname)s - %(message)s'
16+ )
17+ logger = logging .getLogger (__name__ )
18+
19+ def parse_arguments ():
20+ parser = argparse .ArgumentParser (description = "Convert Excel config to YAML for optimization pipeline." )
21+ parser .add_argument (
22+ "--config_excel" ,
23+ type = str ,
24+ required = True ,
25+ help = "Path to Excel configuration file (e.g., data/test_config.xlsx)"
26+ )
27+ return parser .parse_args ()
28+
29+
30+ def main ():
31+ args = parse_arguments ()
32+ print (args .config_excel )
33+ optimization_settings = pd .read_excel (args .config_excel , sheet_name = 'optimization_settings' )
34+ optimization_settings = dict (zip (optimization_settings ['names' ].values ,optimization_settings ['settings' ].values ))
35+ cycle_info = pd .read_excel (args .config_excel , sheet_name = 'cycle_info' ,index_col = 0 ).to_dict ()
36+ parameter_names_and_values = pd .read_excel (args .config_excel , sheet_name = 'parameter_names_and_values' )
37+ parameter_names_and_values = dict (zip (parameter_names_and_values ['parameter_name' ].values ,parameter_names_and_values ['values' ].values ))
38+ parameter_names_and_values = {i : ast .literal_eval (k ) for i ,k in parameter_names_and_values .items ()}
39+
40+ output_config_name = f"{ optimization_settings ['output_filepath' ]} /{ optimization_settings ['output_name' ]} "
41+
42+ # start filling in the configuration file
43+ config = {}
44+
45+ #checks if all the necessary components were in the excel sheet
46+ assert all (key in optimization_settings for key in ["model_filepath" ,"model_name" , "target" , "n_cycles" ,
47+ "noise_type" , "noise_percentage" , "t_start" , "t_end" , "timepoints" ])
48+
49+ model_filepath = f"{ optimization_settings ['model_filepath' ]} /{ optimization_settings ['model_name' ]} .xml"
50+ optimization_settings .pop ("model_filepath" )
51+ optimization_settings .pop ("model_name" )
52+ config ['optimization_settings' ] = {"model_filepath" : model_filepath }
53+ config ['optimization_settings' ] = {** config ['optimization_settings' ],** optimization_settings }
54+
55+ config ['optimization_settings' ]['parameters_perturbation_values' ] = parameter_names_and_values
56+
57+
58+
59+
60+ cycle_information = {}
61+ for i in range (config ['optimization_settings' ]['n_cycles' ]):
62+ design_build_test = {
63+ "cycle_status" : i ,
64+ "n_strains" : cycle_info [i ]["n_strains" ],
65+ "n_engineered_positions" : cycle_info [i ]['n_engineered_positions' ],
66+ "design_method" : cycle_info [i ]['design_method' ], # this should be a list input
67+ "noise_percentage" : config ['optimization_settings' ]['noise_percentage' ],
68+ "noise_type" : config ['optimization_settings' ]['noise_type' ],}
69+
70+ method_hyperparams = {}
71+ method_hyperparams ['sequencing_selection_method' ] = cycle_info [i ]['sequencing_selection_method' ]
72+ method_hyperparams ['n_screened_strains' ] = cycle_info [i ]['n_screened_strains' ]
73+
74+ if cycle_info [i ]['design_method' ]== "ml_assisted_library_transform" :
75+ assert 'beta' in cycle_info [i ]
76+ assert 'n_screened_strains' in cycle_info [i ]
77+
78+
79+ if np .isnan (cycle_info [i ]['beta' ]):
80+ logger .error ("For ml_assisted_library_transform, beta needs to be specified in excel file" )
81+ else :
82+ method_hyperparams ['beta' ] = 2 ** cycle_info [i ]['beta' ]
83+
84+ if 'ml_method' in cycle_info [i ]:
85+ method_hyperparams ['ml_method' ] = cycle_info [i ]['ml_model' ]
86+ else :
87+ logger .info ("ml_method not specified in cycle_info, automatically set to xgboost." )
88+ method_hyperparams ['ml_method' ] = "xgboost"
89+
90+ if "data_strategy" in cycle_info [i ]:
91+ method_hyperparams ['data_strategy' ] = cycle_info [i ]['data_strategy' ]
92+ else :
93+ method_hyperparams ['data_strategy' ] = "all"
94+ logger .info ("data_strategy not specified in cycle_info, automatically set to all strategy." )
95+
96+ design_build_test ['design_method_hyperparams' ] = method_hyperparams
97+
98+ learn_recommend = {
99+ "recommender_method" : cycle_info [i ]['strain_recommendation' ],
100+ "recommender_method_hyperparams" : None ,
101+ }
102+
103+ cycle_information [f"cycle_{ i } " ] = {
104+ "design_build_test" : design_build_test ,
105+ "learn_recommend" : learn_recommend ,
106+ }
107+ config ['cycles' ] = cycle_information
108+
109+
110+
111+ with open (f"{ output_config_name } .yml" , "w" ) as file :
112+ yaml .dump (config , file , sort_keys = False )
113+
114+
115+ if __name__ == "__main__" :
116+ main ()
0 commit comments