Skip to content

Commit 8e6b9e0

Browse files
committed
smalle changes to the workflow
1 parent 336175e commit 8e6b9e0

19 files changed

Lines changed: 78 additions & 85 deletions

File tree

scripts/run_all.sh

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
set -e
22

3-
datasets=( 'replogle' 'op' 'nakatake' 'adamson' 'norman' 'xaira_HEK293T' 'xaira_HCT116' 'parsebioscience' 'ibd_uc' 'ibd_cd' '300BCG' ) #'replogle' 'op' 'nakatake' 'adamson' 'norman' 'xaira_HEK293T' 'xaira_HCT116' 'parsebioscience' 'ibd_uc' 'ibd_cd' '300BCG') #
4-
datasets=( 'replogle' 'op' ) #'replogle' 'op' 'nakatake' 'adamson' 'norman' 'xaira_HEK293T' 'xaira_HCT116' 'parsebioscience' 'ibd_uc' 'ibd_cd' '300BCG') #
3+
# datasets=( 'replogle' 'op' 'nakatake' 'adamson' 'norman' 'xaira_HEK293T' 'xaira_HCT116' 'parsebioscience' 'ibd_uc' 'ibd_cd' '300BCG' ) #'replogle' 'op' 'nakatake' 'adamson' 'norman' 'xaira_HEK293T' 'xaira_HCT116' 'parsebioscience' 'ibd_uc' 'ibd_cd' '300BCG') #
4+
datasets=( 'op' 'replogle' ) #'replogle' 'op' 'nakatake' 'adamson' 'norman' 'xaira_HEK293T' 'xaira_HCT116' 'parsebioscience' 'ibd_uc' 'ibd_cd' '300BCG') #
55
run_local=false # set to true to run locally, false to run on AWS
66

77
run_grn_inference=false
@@ -31,29 +31,29 @@ for dataset in "${datasets[@]}"; do
3131
fi
3232

3333
if [ "$run_grn_evaluation" = true ]; then
34-
if [ -f "$trace_file" ]; then
35-
dir=$(dirname "$trace_file")
36-
base=$(basename "$trace_file" .txt)
37-
today=$(date +%Y-%m-%d)
38-
cp "$trace_file" "${dir}/${base}_${today}.txt"
39-
fi
34+
# if [ -f "$trace_file" ]; then
35+
# dir=$(dirname "$trace_file")
36+
# base=$(basename "$trace_file" .txt)
37+
# today=$(date +%Y-%m-%d)
38+
# cp "$trace_file" "${dir}/${base}_${today}.txt"
39+
# fi
4040

4141
# if [ "$run_local" = false ]; then
4242
# echo "Uploading inference results to AWS"
4343
# aws s3 sync resources/results/$dataset s3://openproblems-data/resources/grn/results/$dataset
4444
# aws s3 sync s3://openproblems-data/resources/grn/results/$dataset resources/results/$dataset
4545
# fi
46-
if [ "$run_local" = false ]; then
47-
echo "Downloading inference results from AWS"
48-
aws s3 sync s3://openproblems-data/resources/grn/results/$dataset resources/results/$dataset
49-
fi
50-
echo "Running consensus for dataset: $dataset"
51-
bash scripts/prior/run_consensus.sh $dataset # run consensus for Regression and ws distance -> needs to be run after adding each method and dataset
46+
# if [ "$run_local" = false ]; then
47+
# echo "Downloading inference results from AWS"
48+
# aws s3 sync s3://openproblems-data/resources/grn/results/$dataset resources/results/$dataset
49+
# fi
50+
# echo "Running consensus for dataset: $dataset"
51+
# bash scripts/prior/run_consensus.sh $dataset # run consensus for Regression and ws distance -> needs to be run after adding each method and dataset
5252

53-
if [ "$run_local" = false ]; then
54-
echo "Syncing prior results to AWS"
55-
aws s3 sync resources/grn_benchmark/prior s3://openproblems-data/resources/grn/grn_benchmark/prior
56-
fi
53+
# if [ "$run_local" = false ]; then
54+
# echo "Syncing prior results to AWS"
55+
# aws s3 sync resources/grn_benchmark/prior s3://openproblems-data/resources/grn/grn_benchmark/prior
56+
# fi
5757

5858
echo "Running GRN evaluation for dataset: $dataset"
5959
bash scripts/run_grn_evaluation.sh --dataset=$dataset --run_local=$run_local --build_images=false

src/methods/geneformer/helper.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -928,11 +928,6 @@ def plot_embs(
928928
plot_heatmap(embs, emb_dims, label, output_file, kwargs_dict)
929929

930930

931-
def parse_args(par):
932-
"""Parse arguments - placeholder function"""
933-
return par
934-
935-
936931
def efficient_melting(matrix, gene_names, symmetric=False):
937932
"""Convert matrix to long format"""
938933
import pandas as pd

src/metrics/all_metrics/helper.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from regression.helper import main as regression
1111
from ws_distance.helper import main as ws_distance
1212
from sem.helper import main as sem
13-
from ar.helper import main as ar
13+
from anchor_regression.helper import main as ar
1414
from tf_recovery.helper import main as tf_recovery
1515
from tf_binding.helper import main as tf_binding
1616
from rc_tf_act.helper import main as rc_tf_act
@@ -19,7 +19,6 @@
1919

2020
from config import DATASETS_METRICS
2121

22-
# Create a mapping from metric name (string) to function
2322
METRIC_FUNCTIONS = {
2423
'regression': regression,
2524
'ws_distance': ws_distance,
@@ -36,14 +35,14 @@ def main(par):
3635
dataset_id = ad.read_h5ad(par['evaluation_data'], backed='r').uns['dataset_id']
3736
rr_store = []
3837
metrics = DATASETS_METRICS[dataset_id]
38+
# metrics = ['gs_recovery', 'tf_binding']
3939

4040
for metric_name in metrics:
4141
print(f"Computing metric: {metric_name}")
4242
metric_func = METRIC_FUNCTIONS.get(metric_name)
4343
if metric_func is None:
4444
print(f"Warning: No function found for metric '{metric_name}'")
4545
continue
46-
4746

4847
rr = metric_func(par)
4948

src/metrics/ar/config.vsh.yaml renamed to src/metrics/anchor_regression/config.vsh.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
__merge__: ../../api/comp_metric.yaml
22

3-
name: ar
3+
name: anchor_regression
44
namespace: "metrics"
55
info:
66
label: Anchor Regression
File renamed without changes.
File renamed without changes.

src/metrics/gs_recovery/config.vsh.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,12 @@ arguments:
9191
required: false
9292
default: "zero_centered"
9393
description: "Method for determining pathway activity baseline (zero_centered, permutation, or random_genesets)"
94+
95+
- name: "--output_detailed_metrics"
96+
type: boolean
97+
required: false
98+
default: false
99+
description: Whether to output detailed per-geneset metrics or only summary metrics
94100

95101
resources:
96102
- type: python_script

src/metrics/gs_recovery/helper.py

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -792,7 +792,20 @@ def main(par: dict) -> pd.DataFrame:
792792
"""
793793

794794
# Load data
795-
print("\n[1/5] Loading data...")
795+
pathway_files = {}
796+
geneset_mapping = {
797+
'geneset_hallmark_2020': 'hallmark_2020',
798+
'geneset_kegg_2021': 'kegg_2021',
799+
'geneset_reactome_2022': 'reactome_2022',
800+
'geneset_go_bp_2023': 'go_bp_2023',
801+
'geneset_bioplanet_2019': 'bioplanet_2019',
802+
'geneset_wikipathways_2019': 'wikipathways_2019',
803+
}
804+
805+
for arg_name, geneset_name in geneset_mapping.items():
806+
pathway_files[geneset_name] = par[arg_name]
807+
808+
par['pathway_files'] = pathway_files
796809
evaluation_data = ad.read_h5ad(par['evaluation_data'], backed='r')
797810
all_genes = set(evaluation_data.var_names.tolist())
798811
prediction = read_prediction(par)
@@ -846,21 +859,25 @@ def main(par: dict) -> pd.DataFrame:
846859
all_results.append(result_dict)
847860

848861

849-
final_dict = {}
862+
detailed_dict = {}
850863
for result in all_results:
851864
geneset_name = result['geneset_name']
852-
final_dict[f'{geneset_name}_gs_precision'] = result['precision']
853-
final_dict[f'{geneset_name}_gs_recall'] = result['recall']
854-
final_dict[f'{geneset_name}_gs_f1'] = result['f1']
855-
final_dict[f'{geneset_name}_gs_n_active'] = result['n_active_pathways']
865+
detailed_dict[f'{geneset_name}_gs_precision'] = result['precision']
866+
detailed_dict[f'{geneset_name}_gs_recall'] = result['recall']
867+
detailed_dict[f'{geneset_name}_gs_f1'] = result['f1']
868+
detailed_dict[f'{geneset_name}_gs_n_active'] = result['n_active_pathways']
856869

857870
# Calculate mean across all gene sets
871+
short_dict = {}
858872
if all_results:
859-
final_dict['gs_precision'] = np.mean([r['precision'] for r in all_results])
860-
final_dict['gs_recall'] = np.mean([r['recall'] for r in all_results])
861-
final_dict['gs_f1'] = np.mean([r['f1'] for r in all_results])
862-
final_dict['gs_n_active'] = np.mean([r['n_active_pathways'] for r in all_results])
863-
873+
short_dict['gs_precision'] = np.mean([r['precision'] for r in all_results])
874+
short_dict['gs_recall'] = np.mean([r['recall'] for r in all_results])
875+
short_dict['gs_f1'] = np.mean([r['f1'] for r in all_results])
876+
short_dict['gs_n_active'] = np.mean([r['n_active_pathways'] for r in all_results])
877+
if par['output_detailed_metrics']:
878+
final_dict = {**short_dict, **detailed_dict}
879+
else:
880+
final_dict = short_dict
864881
summary_df = pd.DataFrame([final_dict])
865882
print(summary_df)
866883
return summary_df

0 commit comments

Comments
 (0)