Skip to content

Commit 5e64224

Browse files
committed
tf binding updated to only have top-k based evaluation
1 parent 1d736c9 commit 5e64224

20 files changed

Lines changed: 1521 additions & 1099 deletions

File tree

scripts/run_all.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
set -e
22

3-
datasets=( 'replogle') #'replogle' 'op' 'nakatake' 'adamson' 'norman' 'xaira_HEK293T' 'xaira_HCT116' 'parsebioscience' 'ibd' '300BCG'
3+
datasets=( 'op' 'replogle' '300BCG') #'replogle' 'op' 'nakatake' 'adamson' 'norman' 'xaira_HEK293T' 'xaira_HCT116' 'parsebioscience' 'ibd' '300BCG'
44

5-
run_local=true # set to true to run locally, false to run on AWS
5+
run_local=false # set to true to run locally, false to run on AWS
66

77
run_grn_inference=false
88
run_grn_evaluation=true

scripts/run_grn_inference.sh

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,13 +115,20 @@ HERE
115115
}
116116

117117
if [[ "$DATASET" =~ ^(replogle|parsescience|xaira_HEK293T|xaira_HCT116)$ ]]; then
118-
append_entry "$DATASET" "[pearson_corr, negative_control, positive_control, grnboost, ppcor, portia, scenic]"
118+
methods="[pearson_corr, negative_control, positive_control, grnboost, ppcor, portia, scenic]"
119+
append_entry "$DATASET" "$methods"
119120
append_entry "$DATASET" "[scprint]" "true"
121+
echo $methods
120122
elif [ "$DATASET" = "op" ] || [ "$DATASET" = "ibd" ]; then
121-
append_entry "$DATASET" "[pearson_corr, spearman_corr, negative_control, positive_control, grnboost, ppcor, portia, scenic, scprint, geneformer, scgpt, figr, scenicplus, celloracle, granie, scglue]"
123+
methods="[geneformer, scgpt]"
124+
# append_entry "$DATASET" "[pearson_corr, spearman_corr, negative_control, positive_control, grnboost, ppcor, portia, scenic, scprint, geneformer, scgpt, figr, scenicplus, celloracle, granie, scglue]"
125+
append_entry "$DATASET" "$methods"
126+
echo $methods
122127

123128
else
124-
append_entry "$DATASET" "[pearson_corr, negative_control, positive_control, grnboost, ppcor, portia, scenic, scprint]"
129+
methods="[pearson_corr, negative_control, positive_control, grnboost, ppcor, portia, scenic, scprint]"
130+
append_entry "$DATASET" "$methods"
131+
echo $methods
125132
fi
126133
# append_entry "$DATASET" "[pearson_corr, negative_control, positive_control, scprint, portia, scgpt]"
127134
# append_entry "$DATASET" "[scenicplus, figr, celloracle]"

scripts/sync_resources.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,6 @@ set -e
2727
# aws s3 sync resources/results/experiment s3://openproblems-data/resources/grn/results/experiment --delete
2828
# aws s3 sync resources_test s3://openproblems-data/resources_test/grn/ --delete
2929
# aws s3 sync resources/grn_benchmark/ground_truth s3://openproblems-data/resources/grn/grn_benchmark/ground_truth
30-
aws s3 sync resources/grn_benchmark/evaluation_data s3://openproblems-data/resources/grn/grn_benchmark/evaluation_data --delete
30+
aws s3 sync resources/grn_benchmark/ s3://openproblems-data/resources/grn/grn_benchmark/ --delete
3131

3232
# aws s3 sync s3://openproblems-data/resources/grn/grn_benchmark/ground_truth resources/grn_benchmark/ground_truth --no-sign-request

src/exp_analysis/helper.py

Lines changed: 0 additions & 501 deletions
This file was deleted.

src/exp_analysis/peak_annotation/config.vsh.yaml

Lines changed: 0 additions & 35 deletions
This file was deleted.

src/exp_analysis/peak_annotation/run.sh

Lines changed: 0 additions & 7 deletions
This file was deleted.

src/exp_analysis/peak_annotation/script.R

Lines changed: 0 additions & 82 deletions
This file was deleted.

src/methods/geneformer/helper.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
12
import logging
23
import pickle
34
from collections import Counter
@@ -978,7 +979,10 @@ def compute_geneformer_network(
978979
token=None,
979980
model_dir=None,
980981
tokenized_dir=None,
981-
embedding_dir=None
982+
embedding_dir=None,
983+
model_details=None,
984+
gene_median=None,
985+
gene_mapping_file=None
982986
):
983987
if not all([gene_mapping_dict, token, model_dir, tokenized_dir, embedding_dir]):
984988
raise ValueError("Missing required parameters for compute_geneformer_network")
@@ -1002,8 +1006,17 @@ def compute_geneformer_network(
10021006
if os.path.exists(tokenized_data_path):
10031007
shutil.rmtree(tokenized_data_path)
10041008

1005-
# Note: This would need proper model_details, gene_median, gene_mapping_file parameters
1006-
tryParallelFunction(tokenize_data, "Tokenizing data", temp_dir=geneformer_folder)
1009+
# Pass all required parameters to tokenize_data
1010+
tryParallelFunction(
1011+
tokenize_data,
1012+
"Tokenizing data",
1013+
temp_dir=geneformer_folder,
1014+
model_details=model_details,
1015+
gene_median=gene_median,
1016+
token=token,
1017+
gene_mapping_file=gene_mapping_file,
1018+
tokenized_dir=tokenized_dir
1019+
)
10071020

10081021
embex = EmbExtractor(
10091022
model_type="Pretrained", # CellClassifier
@@ -1162,7 +1175,10 @@ def main(par):
11621175
token=token,
11631176
model_dir=model_dir,
11641177
tokenized_dir=tokenized_dir,
1165-
embedding_dir=embedding_dir
1178+
embedding_dir=embedding_dir,
1179+
model_details=model_details,
1180+
gene_median=gene_median,
1181+
gene_mapping_file=gene_mapping_file
11661182
)
11671183
gene_names = subadata.var["symbol"].values
11681184
print(net.shape, net.sum(), len(gene_names), gene_names[:10])

src/methods/geneformer/script.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
"rna": "resources_test/grn_benchmark/inference_data/op_rna.h5ad",
1111
"tf_all": "resources_test/prior/tf_all.csv",
1212
"prediction": "output/geneformer/prediction.h5ad",
13+
"model": "Geneformer-V2-104M",
14+
1315
"max_n_links": 50000,
1416
"batch_size": 16,
1517
"temp_dir": "output/geneformer",

src/metrics/all_metrics/helper.py

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@
1111

1212
from regression_2.helper import main as main_reg2
1313
from ws_distance.helper import main as main_ws_distance
14-
from experimental.sem.helper import main as main_sem
15-
# from vc_v2.helper import main as main_vc_v2
14+
from sem.helper import main as main_sem
1615
from tf_recovery.helper import main as main_tf_rec
1716
from tf_binding.helper import main as main_tf_binding
1817
from replica_consistency.helper import main as main_replica_consistency
@@ -45,25 +44,14 @@ def main(par):
4544
rr_replica = pd.DataFrame()
4645
print("replica consistency done: ", rr_replica)
4746
rr_store.append(rr_replica)
48-
49-
50-
# if True:
51-
# try:
52-
# rr_vc = main_vc_v2(par)
53-
# except Exception as e:
54-
# print(f"Error in vc metrics: {e}")
55-
# rr_vc = pd.DataFrame()
56-
# print("vc done: ", rr_vc)
57-
# rr_store.append(rr_vc)
58-
59-
60-
# try:
61-
# rr_reg2 = main_reg2(par)
62-
# except Exception as e:
63-
# print(f"Error in regression 2 metrics: {e}")
64-
# rr_reg2 = pd.DataFrame()
65-
# rr_store.append(rr_reg2)
66-
# print("reg2 done: ", rr_reg2)
47+
if True:
48+
try:
49+
rr_reg2 = main_reg2(par)
50+
except Exception as e:
51+
print(f"Error in regression 2 metrics: {e}")
52+
rr_reg2 = pd.DataFrame()
53+
rr_store.append(rr_reg2)
54+
print("reg2 done: ", rr_reg2)
6755

6856
if True:
6957
try:

0 commit comments

Comments
 (0)