Skip to content

Commit c5c9ce7

Browse files
committed
initial analysis experiments
1 parent 0088ac3 commit c5c9ce7

5 files changed

Lines changed: 974 additions & 6 deletions

File tree

scripts/analysis_experiments.ipynb

Lines changed: 441 additions & 0 deletions
Large diffs are not rendered by default.

scripts/analysis_experiments_r2.ipynb

Lines changed: 406 additions & 0 deletions
Large diffs are not rendered by default.

scripts/process_excel_config.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
"""through command line, generate a config yml file
2+
for optimization_process.py"""
3+
4+
import pandas as pd
5+
import ast
6+
import logging
7+
import numpy as np
8+
import datetime
9+
import argparse
10+
import yaml
11+
12+
13+
logging.basicConfig(
14+
level=logging.INFO,
15+
format='%(asctime)s - %(levelname)s - %(message)s'
16+
)
17+
logger = logging.getLogger(__name__)
18+
19+
def parse_arguments():
20+
parser = argparse.ArgumentParser(description="Convert Excel config to YAML for optimization pipeline.")
21+
parser.add_argument(
22+
"--config_excel",
23+
type=str,
24+
required=True,
25+
help="Path to Excel configuration file (e.g., data/test_config.xlsx)"
26+
)
27+
return parser.parse_args()
28+
29+
30+
def main():
31+
args = parse_arguments()
32+
print(args.config_excel)
33+
optimization_settings = pd.read_excel(args.config_excel, sheet_name='optimization_settings')
34+
optimization_settings = dict(zip(optimization_settings['names'].values,optimization_settings['settings'].values))
35+
cycle_info = pd.read_excel(args.config_excel, sheet_name='cycle_info',index_col=0).to_dict()
36+
parameter_names_and_values = pd.read_excel(args.config_excel, sheet_name='parameter_names_and_values')
37+
parameter_names_and_values = dict(zip(parameter_names_and_values['parameter_name'].values,parameter_names_and_values['values'].values))
38+
parameter_names_and_values = {i: ast.literal_eval(k) for i,k in parameter_names_and_values.items()}
39+
40+
output_config_name = f"{optimization_settings['output_filepath']}/{optimization_settings['output_name']}"
41+
42+
# start filling in the configuration file
43+
config = {}
44+
45+
#checks if all the necessary components were in the excel sheet
46+
assert all(key in optimization_settings for key in ["model_filepath","model_name", "target", "n_cycles",
47+
"noise_type", "noise_percentage", "t_start", "t_end", "timepoints"])
48+
49+
model_filepath = f"{optimization_settings['model_filepath']}/{optimization_settings['model_name']}.xml"
50+
optimization_settings.pop("model_filepath")
51+
optimization_settings.pop("model_name")
52+
config['optimization_settings'] = {"model_filepath": model_filepath }
53+
config['optimization_settings'] = {**config['optimization_settings'],**optimization_settings}
54+
55+
config['optimization_settings']['parameters_perturbation_values'] = parameter_names_and_values
56+
57+
58+
59+
60+
cycle_information = {}
61+
for i in range(config['optimization_settings']['n_cycles']):
62+
design_build_test = {
63+
"cycle_status": i,
64+
"n_strains": cycle_info[i]["n_strains"],
65+
"n_engineered_positions": cycle_info[i]['n_engineered_positions'],
66+
"design_method": cycle_info[i]['design_method'], # this should be a list input
67+
"noise_percentage": config['optimization_settings']['noise_percentage'],
68+
"noise_type": config['optimization_settings']['noise_type'],}
69+
70+
method_hyperparams = {}
71+
method_hyperparams['sequencing_selection_method'] = cycle_info[i]['sequencing_selection_method']
72+
method_hyperparams['n_screened_strains'] = cycle_info[i]['n_screened_strains']
73+
74+
if cycle_info[i]['design_method']=="ml_assisted_library_transform":
75+
assert 'beta' in cycle_info[i]
76+
assert 'n_screened_strains' in cycle_info[i]
77+
78+
79+
if np.isnan(cycle_info[i]['beta']):
80+
logger.error("For ml_assisted_library_transform, beta needs to be specified in excel file")
81+
else:
82+
method_hyperparams['beta'] = 2**cycle_info[i]['beta']
83+
84+
if 'ml_method' in cycle_info[i]:
85+
method_hyperparams['ml_method'] = cycle_info[i]['ml_model']
86+
else:
87+
logger.info("ml_method not specified in cycle_info, automatically set to xgboost.")
88+
method_hyperparams['ml_method'] = "xgboost"
89+
90+
if "data_strategy" in cycle_info[i]:
91+
method_hyperparams['data_strategy'] = cycle_info[i]['data_strategy']
92+
else:
93+
method_hyperparams['data_strategy'] = "all"
94+
logger.info("data_strategy not specified in cycle_info, automatically set to all strategy.")
95+
96+
design_build_test['design_method_hyperparams'] = method_hyperparams
97+
98+
learn_recommend = {
99+
"recommender_method": cycle_info[i]['strain_recommendation'],
100+
"recommender_method_hyperparams": None,
101+
}
102+
103+
cycle_information[f"cycle_{i}"] = {
104+
"design_build_test": design_build_test,
105+
"learn_recommend": learn_recommend,
106+
}
107+
config['cycles'] = cycle_information
108+
109+
110+
111+
with open(f"{output_config_name}.yml", "w") as file:
112+
yaml.dump(config, file, sort_keys=False)
113+
114+
115+
if __name__=="__main__":
116+
main()

scripts/run_processconfig_simulations.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ def main():
9595

9696
cycle_names = optimization_process.data.keys()
9797
data = pd.concat([optimization_process.data[i] for i in cycle_names])
98+
file = file.replace(".yml","")
9899
data.to_csv(f"{output_dir}/"
99100
f"{file}_run{k}.csv")
100101

scripts/setupconfigfile.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,16 @@
77

88
target = "product_A"
99

10-
output_dir = "data/config_files_yml/pathwayA/experiment3"
10+
output_dir = "data/config_files_yml/pathwayA/experiment6"
1111
Path(output_dir).mkdir(parents=True, exist_ok=True)
1212

13-
run_id = 1
13+
run_id = 5
1414
n_cycles = 5
1515
n_experiments = [50]*n_cycles
1616
n_screened = n_experiments[0]*2
1717
screening_sampling_strategy = "stratified_sampling"
1818
n_features = 10 # maximum number
19-
n_engineered_positions = [10]*n_cycles
19+
n_engineered_positions = [6]*n_cycles
2020
design_method_per_cycle = ["library_transform",
2121
"ml_assisted_library_transform",
2222
"ml_assisted_library_transform",
@@ -28,13 +28,13 @@
2828
print(len(design_method_per_cycle))
2929
assert len(design_method_per_cycle) == n_cycles
3030
noise_percentage = 0.1 # not a percentage
31-
beta = 10
31+
beta = 30
3232
noise_type = "homoscedastic"
3333
model_filepath = "models/bioprocess_models"
3434
model_name = "batch_model_pathwayA"
3535

3636

37-
promoter_values = [0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2]
37+
promoter_values = [0.5, 1, 1.5, 2]
3838

3939

4040

@@ -46,7 +46,8 @@
4646
f"X{len(promoter_values)}"
4747
f"N{n_experiments[0]}"
4848
f"F{n_features}"
49-
f"P{n_engineered_positions[0]}")
49+
f"P{n_engineered_positions[0]}_"
50+
f"{run_id}")
5051

5152
# stratified_sampling
5253

@@ -69,14 +70,17 @@
6970
"enzyme_17": promoter_values,
7071
"enzyme_4": promoter_values,
7172
"enzyme_5": promoter_values,
73+
7274
"enzyme_12": promoter_values,
7375
"enzyme_14": promoter_values,
7476
"enzyme_2": promoter_values,
7577
"enzyme_16": promoter_values,
78+
7679
"enzyme_15": promoter_values,
7780
"enzyme_7": promoter_values,
7881
# "enzyme_6": promoter_values,
7982
# "enzyme_10": promoter_values,
83+
#
8084
# "enzyme_11": promoter_values,
8185
# "enzyme_9": promoter_values,
8286
# "enzyme_8": promoter_values,

0 commit comments

Comments
 (0)