Skip to content

Commit a3e8bce

Browse files
committed
small fix data (SHK0073) and other analysis files
1 parent 27d3b2b commit a3e8bce

15 files changed

Lines changed: 2788 additions & 1745 deletions
35.3 KB
Loading

figures/optimization_results/sample_size_exp0.svg

Lines changed: 741 additions & 299 deletions
Loading
-2.47 KB
Loading

figures/optimization_results/screening_sequencing_control_exp1.svg

Lines changed: 285 additions & 606 deletions
Loading
-193 Bytes
Loading

figures/optimization_results/screening_sequencing_exp1.svg

Lines changed: 377 additions & 738 deletions
Loading
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
"""Runs specific pathway model combinatorial experiment analysis (linear modelling)"""
2+
import pandas as pd
3+
import numpy as np
4+
import matplotlib.pyplot as plt
5+
import os
6+
from source.visualization import plot_dbtl_cycles
7+
from matplotlib.colors import ListedColormap
8+
from sklearn.linear_model import LinearRegression
9+
from sklearn.preprocessing import PolynomialFeatures
10+
from sklearn.linear_model import LinearRegression
11+
from sklearn.pipeline import make_pipeline
12+
import statsmodels.formula.api as smf
13+
import pandas as pd
14+
import statsmodels.formula.api as smf
15+
16+
name = "pathwayA"
17+
target = 'Y_product_A'
18+
base_dir = f"results/optimization_results/stratified_sampling/{name}/experiment1"
19+
output_dir = f"results/statistics_combinatorial_exp/stratified_sampling/{name}/"
20+
dirs = os.listdir(base_dir)
21+
22+
23+
#pathway A settings
24+
Number_of_Samples = [50, 100, 200]
25+
Screening_Ratio = [1, 2, 4, 6]
26+
Positions = [4, 6, 8, 10]
27+
Features = [6, 12, 19]
28+
29+
# # # pathway B settings
30+
# Number_of_Samples = [50, 100,200]
31+
# Screening_Ratio = [2, 4, 6]
32+
# Positions = [4, 6, 8, 10]
33+
# Features = [6, 10,12]
34+
#
35+
# # #pathway C settings
36+
# Number_of_Samples = [50, 100,200]
37+
# Screening_Ratio = [2, 4, 6]
38+
# Positions = [4, 6, 8, 10]
39+
# Features = [6, 10, 12, 17]
40+
# #
41+
# # #pathway D settings
42+
# Number_of_Samples = [50, 100,200,300]
43+
# Screening_Ratio = [2, 4, 6]
44+
# Positions = [4, 6, 8, 10]
45+
# Features = [5, 10, 15, 21]
46+
# #
47+
# # pathway E settings
48+
# Number_of_Samples = [50, 100, 200, 300]
49+
# Screening_Ratio = [1, 2, 4, 6]
50+
# Positions = [4, 6, 8, 10]
51+
# Features = [5, 10, 15, 20, 25]
52+
53+
54+
option_combinations = []
55+
list_of_list_combinations = []
56+
for F in Features:
57+
for P in Positions:
58+
for N in Number_of_Samples:
59+
for R in Screening_Ratio:
60+
grid_str = f"S{R*N}X4N{N}F{F}P{P}"
61+
option_combinations.append(grid_str)
62+
list_of_list_combinations.append([R*N, N, F, P])
63+
64+
print("number of combinations", len(list_of_list_combinations))
65+
66+
results = {}
67+
for k, option in enumerate(option_combinations):
68+
option_values = []
69+
for dir in dirs:
70+
working_dir = os.path.join(base_dir, dir)
71+
files = os.listdir(working_dir)
72+
for file in files:
73+
74+
if option in file:
75+
file_path = os.path.join(working_dir, file)
76+
process = pd.read_csv(file_path, index_col=0)
77+
78+
max_val = process[process.index.str.startswith('cycle4')][target].median() # calculates the maximum after 5 rounds
79+
80+
#calculates an auc
81+
test = process.copy()
82+
test.index = test.index.astype(str)
83+
test["cycle"] = test.index.str.extract(r"(cycle\d+)",expand=False)
84+
cycle_auc = test.groupby("cycle")[target].mean().sum()
85+
max_val = cycle_auc
86+
87+
option_values.append(max_val)
88+
results[option]=option_values
89+
90+
gridsearch_results = pd.DataFrame.from_dict(results,
91+
orient='index').iloc[:,:10] #only takes 10 repeats (sometimes more experiments were done)
92+
93+
94+
print("number of missing experiments", sum(gridsearch_results.isna().sum(axis=1)))
95+
96+
97+
98+
99+
# First, make a DataFrame with column names
100+
mean_performance = gridsearch_results.mean(axis=1)
101+
scenarios = pd.DataFrame(np.array(list_of_list_combinations), columns = ['S','N','F','P'])
102+
scenarios['mean_performance'] = np.array(mean_performance)
103+
scenarios= scenarios.sort_values('mean_performance', ascending=False)
104+
scenarios['SNratio'] = scenarios['S']/scenarios['N']
105+
106+
107+
df = scenarios.copy()
108+
109+
# Build formula with interactions
110+
# Example: 'target ~ A + B + C + A:B + A:C + B:C'
111+
features = ['SNratio','F','P']
112+
target = 'mean_performance'
113+
# formula = f"{target} ~ " + " + ".join(features) + " + " + " + ".join([f"{a}:{b}" for i, a in enumerate(features) for b in features[i+1:]])
114+
formula = 'mean_performance ~ SNratio + F + P'
115+
116+
117+
model = smf.ols(formula, data=df).fit()
118+
print(model.summary())
119+
with open(f"{output_dir}/{name}_lm_SNratio_interaction.txt", "w") as f:
120+
f.write(model.summary().as_text())
121+
122+
model.summary()
123+
from statsmodels.stats.multitest import multipletests
124+
125+
# Assume you already have a fitted model
126+
pvals = model.pvalues
127+
128+
# Apply Benjamini-Hochberg (FDR) or Bonferroni correction
129+
corrected = multipletests(pvals, alpha=0.05, method='fdr_bh') # or method='bonferroni'
130+
131+
# Output adjusted p-values
132+
133+
adjusted_pvals = pd.DataFrame({
134+
'coef': model.params,
135+
'raw_pval': pvals,
136+
'adj_pval': corrected[1],
137+
'significant (FDR)': corrected[0]
138+
})
139+
140+
adjusted_pvals.to_csv(f"{output_dir}/mht_{name}_lm_with_interaction.csv")
141+
142+
143+
import statsmodels.formula.api as smf
144+
145+
# First, make a DataFrame with column names
146+
df = scenarios.copy()
147+
148+
# Build formula with interactions
149+
# Example: 'target ~ A + B + C + A:B + A:C + B:C'
150+
features = df.columns[:-1]
151+
target = df.columns[-1]
152+
formula = 'mean_performance ~ S + N + F + P'
153+
154+
155+
model = smf.ols(formula, data=df).fit()
156+
print(model.summary())
157+
with open(f"{output_dir}/{name}_lm_SandN_interaction.txt", "w") as f:
158+
f.write(model.summary().as_text())
159+
160+
model.summary()
161+
from statsmodels.stats.multitest import multipletests
162+
163+
# Assume you already have a fitted model
164+
pvals = model.pvalues
165+
166+
# Apply Benjamini-Hochberg (FDR) or Bonferroni correction
167+
corrected = multipletests(pvals, alpha=0.05, method='fdr_bh') # or method='bonferroni'
168+
169+
# Output adjusted p-values
170+
import pandas as pd
171+
adjusted_pvals = pd.DataFrame({
172+
'coef': model.params,
173+
'raw_pval': pvals,
174+
'adj_pval': corrected[1],
175+
'significant (FDR)': corrected[0]
176+
})
177+
178+
179+
adjusted_pvals.to_csv(f"{output_dir}/mht_{name}_lm_without_interaction.csv")

scripts/analysis/analysis_experiments.ipynb

Lines changed: 28 additions & 23 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)