Skip to content

Commit 41b4cb0

Browse files
authored
Merge pull request #85 from openproblems-bio/jalil
multiple new metrics added
2 parents f5628fc + 72e4570 commit 41b4cb0

28 files changed

Lines changed: 1200 additions & 1279 deletions

File tree

scripts/sync_resources.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,5 @@ set -e
2525
# aws s3 sync resources/grn_benchmark/prior s3://openproblems-data/resources/grn/grn_benchmark/prior --delete
2626
# aws s3 sync resources/extended_data/ s3://openproblems-data/resources/grn/extended_data --delete
2727
# aws s3 sync resources/results/experiment s3://openproblems-data/resources/grn/results/experiment --delete
28-
aws s3 sync resources_test s3://openproblems-data/resources_test/grn/ --delete
28+
# aws s3 sync resources_test s3://openproblems-data/resources_test/grn/ --delete
29+
aws s3 sync resources/grn_benchmark/ground_truth s3://openproblems-data/resources/grn/grn_benchmark/ground_truth

src/methods/geneformer/config.vsh.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ resources:
5353
path: script.py
5454
- path: /src/utils/util.py
5555
dest: util.py
56+
- path: helper.py
5657

5758
engines:
5859
- type: docker

src/methods/scenicplus/config.vsh.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,4 @@ runners:
5454
- type: executable
5555
- type: nextflow
5656
directives:
57-
label: [twodaytime, veryhighmem, highcpu]
57+
label: [twodaytime, veryveryhighmem, highcpu]

src/metrics/all_metrics/helper.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@
1111

1212
from regression_2.helper import main as main_reg2
1313
from ws_distance.helper import main as main_ws_distance
14-
from sem.helper import main as main_sem
15-
from vc_v2.helper import main as main_vc_v2
16-
from tf_recovery import main as main_tf_rec
14+
from experimental.sem.helper import main as main_sem
15+
# from vc_v2.helper import main as main_vc_v2
16+
from tf_recovery.helper import main as main_tf_rec
17+
from tf_binding.helper import main as main_tf_binding
18+
from replica_consistency.helper import main as main_replica_consistency
1719

1820

1921
def main(par):
@@ -27,6 +29,22 @@ def main(par):
2729
tf_rec = pd.DataFrame()
2830
print("tf_rec done: ", tf_rec)
2931
rr_store.append(tf_rec)
32+
if True:
33+
try:
34+
tf_binding = main_tf_binding(par)
35+
except Exception as e:
36+
print(f"Error in main_tf_binding metrics: {e}")
37+
tf_binding = pd.DataFrame()
38+
print("tf_binding done: ", tf_binding)
39+
rr_store.append(tf_binding)
40+
if True:
41+
try:
42+
rr_replica = main_replica_consistency(par)
43+
except Exception as e:
44+
print(f"Error in replica consistency metrics: {e}")
45+
rr_replica = pd.DataFrame()
46+
print("replica consistency done: ", rr_replica)
47+
rr_store.append(rr_replica)
3048

3149

3250
# if True:
@@ -47,14 +65,14 @@ def main(par):
4765
# rr_store.append(rr_reg2)
4866
# print("reg2 done: ", rr_reg2)
4967

50-
# if False:
51-
# try:
52-
# rr_sem = main_sem(par)
53-
# except Exception as e:
54-
# print(f"Error in sem metrics: {e}")
55-
# rr_sem = pd.DataFrame()
56-
# print("sem done: ", rr_sem)
57-
# rr_store.append(rr_sem)
68+
if True:
69+
try:
70+
rr_sem = main_sem(par)
71+
except Exception as e:
72+
print(f"Error in sem metrics: {e}")
73+
rr_sem = pd.DataFrame()
74+
print("sem done: ", rr_sem)
75+
rr_store.append(rr_sem)
5876

5977

6078
# try:

src/metrics/experimental/recovery_2/helper.py

Lines changed: 0 additions & 182 deletions
This file was deleted.

src/metrics/experimental/regression_3/helper.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def main(par):
7373
if dataset_id not in DATASET_GROUPS:
7474
raise ValueError(f"Dataset {dataset_id} not found in DATASET_GROUPS")
7575

76-
anchor_cols = DATASET_GROUPS[dataset_id].get('anchors', ['donor_id', 'plate_name'])
76+
anchor_cols = DATASET_GROUPS[dataset_id]['anchors']
7777
print(f"Using anchor variables: {anchor_cols}")
7878

7979
# Manage layer
@@ -115,7 +115,7 @@ def main(par):
115115
gene_mask = np.logical_or(np.any(A, axis=1), np.any(A, axis=0))
116116
in_degrees = np.sum(A != 0, axis=0)
117117
out_degrees = np.sum(A != 0, axis=1)
118-
idx = np.argsort(np.maximum(out_degrees, in_degrees))[:-1000]
118+
idx = np.argsort(np.maximum(out_degrees, in_degrees))[:-2000]
119119
gene_mask[idx] = False
120120
X = X[:, gene_mask]
121121
X = X.toarray() if isinstance(X, csr_matrix) else X
@@ -142,9 +142,9 @@ def main(par):
142142
X_test = X[~mask, :]
143143

144144
# Standardize features
145-
#scaler = StandardScaler()
146-
#X_train = scaler.fit_transform(X_train)
147-
#X_test = scaler.transform(X_test)
145+
scaler = StandardScaler()
146+
X_train = scaler.fit_transform(X_train)
147+
X_test = scaler.transform(X_test)
148148

149149
for j in tqdm.tqdm(range(X_train.shape[1])):
150150

@@ -178,7 +178,7 @@ def main(par):
178178
baseline_scores.append(np.mean(coefs))
179179
scores = np.array(scores)
180180
baseline_scores = np.array(baseline_scores)
181-
181+
reg3_lift = np.mean(scores) / (np.mean(baseline_scores) + 1e-6)
182182
p_value = wilcoxon(baseline_scores, scores, alternative="greater").pvalue
183183
p_value = max(p_value, 1e-300)
184184

@@ -189,7 +189,8 @@ def main(par):
189189

190190
# Return results as DataFrame
191191
results = {
192-
'regression_3': [final_score]
192+
'reg3_precision': [reg3_lift],
193+
'reg3_balanced': [final_score]
193194
}
194195

195196
df_results = pd.DataFrame(results)

src/metrics/experimental/regression_3/run_local.sh

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ datasets=('op' 'parsebioscience' "300BCG" "adamson" "replogle" "xaira_HEK293T
2020
datasets=('op' ) #"300BCG" "ibd" 'parsebioscience', 'xaira_HEK293T'
2121

2222
# methods to process
23-
methods=( "pearson_corr" "positive_control" "negative_control" "ppcor" "portia" "scenic" "grnboost" "scprint" "scenicplus" "celloracle" "scglue" "figr" "granie")
24-
methods=( "pearson_corr" "negative_control" "positive_control" )
23+
methods=( "scprint" "pearson_corr" "positive_control" "negative_control" "ppcor" "portia" "scenic" "grnboost" "scenicplus" "celloracle" "scglue" "figr" "granie")
24+
# methods=( "pearson_corr" "negative_control" "positive_control" )
2525

2626
# temporary file to collect CSV rows
2727
combined_csv="${save_dir}/reg2_scores.csv"
@@ -47,7 +47,6 @@ for dataset in "${datasets[@]}"; do
4747
--prediction "$prediction" \
4848
--evaluation_data "$evaluation_data" \
4949
--regulators_consensus "resources/grn_benchmark/prior/regulators_consensus_${dataset}.json" \
50-
--group_specific cell_type \
5150
--score "$score"
5251

5352
# Extract metrics from the .h5ad and append to CSV

0 commit comments

Comments
 (0)