Skip to content

Commit 4ed3a61

Browse files
committed
consens run receives a model
1 parent 891a037 commit 4ed3a61

12 files changed

Lines changed: 316 additions & 148 deletions

File tree

docs/source/images/datasets.png

-222 KB
Binary file not shown.
25.6 KB
Binary file not shown.

src/local_workflows/run_evaluation.sh renamed to scripts/local_workflows/run_grn_evaluation_local.sh

Lines changed: 6 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -82,34 +82,12 @@ mkdir -p "$TEMP_DIR"
8282

8383
# Generate and source dataset configuration
8484
echo "Generating dataset configuration..."
85-
python src/utils/config.py --output src/utils/dataset_config.env
86-
source src/utils/dataset_config.env
85+
python src/utils/config.py
86+
source src/utils/config.env
8787

8888
# Get list of datasets from config
89-
DATASETS=($(python -c "from src.utils.config import DATASET_GROUPS; print(' '.join(DATASET_GROUPS.keys()))"))
90-
# DATASETS=('norman')
91-
92-
echo "Datasets to evaluate: ${DATASETS[@]}"
93-
94-
# Method names to check
95-
GRN_METHODS=(
96-
"positive_control"
97-
"pearson_corr"
98-
"negative_control"
99-
"spearman_corr"
100-
"scglue"
101-
"scenicplus"
102-
"celloracle"
103-
"granie"
104-
"figr"
105-
"grnboost"
106-
"portia"
107-
"scenic"
108-
"scprint"
109-
"geneformer"
110-
"scgpt"
111-
"ppcor"
112-
)
89+
DATASETS=(${DATASETS//,/ })
90+
METHODS=(${METHODS//,/ })
11391

11492
# Function to submit a metric evaluation job
11593
submit_metric_job() {
@@ -256,78 +234,7 @@ EOF
256234
# Function to run consensus for a dataset
257235
run_consensus() {
258236
local dataset=$1
259-
260-
echo ""
261-
echo "=========================================="
262-
echo "Running Consensus for Dataset: $dataset"
263-
echo "=========================================="
264-
265-
# Build list of available predictions
266-
local models_dir="resources/results/${dataset}"
267-
local predictions=()
268-
269-
for method in "${GRN_METHODS[@]}"; do
270-
local file="${models_dir}/${dataset}.${method}.${method}.prediction.h5ad"
271-
if [[ -f "$file" ]]; then
272-
predictions+=("$file")
273-
echo " Found: ${method}"
274-
fi
275-
done
276-
277-
if [[ ${#predictions[@]} -eq 0 ]]; then
278-
echo " [WARNING] No prediction files found for ${dataset}"
279-
return
280-
fi
281-
282-
echo " Total predictions: ${#predictions[@]}"
283-
284-
# Run Regression consensus
285-
echo ""
286-
echo "Running Regression consensus..."
287-
python src/metrics/regression/consensus/script.py \
288-
--dataset "$dataset" \
289-
--regulators_consensus "resources/grn_benchmark/prior/regulators_consensus_${dataset}.json" \
290-
--evaluation_data "resources/grn_benchmark/evaluation_data/${dataset}_bulk.h5ad" \
291-
--predictions "${predictions[@]}"
292-
293-
# Run WS Distance consensus (only for applicable datasets)
294-
local applicable_datasets=("norman" "adamson" "replogle" "xaira_HEK293T" "xaira_HCT116")
295-
local skip=true
296-
297-
for d in "${applicable_datasets[@]}"; do
298-
if [[ "$dataset" == "$d" ]]; then
299-
skip=false
300-
break
301-
fi
302-
done
303-
304-
if $skip; then
305-
echo ""
306-
echo "Skipping WS Distance consensus for ${dataset} (not applicable)"
307-
else
308-
echo ""
309-
echo "Running WS Distance consensus..."
310-
311-
# Extract model names for ws consensus
312-
local models=()
313-
for method in "${GRN_METHODS[@]}"; do
314-
local file="${models_dir}/${dataset}.${method}.${method}.prediction.h5ad"
315-
if [[ -f "$file" ]]; then
316-
models+=("$method")
317-
fi
318-
done
319-
320-
python src/metrics/ws_distance/consensus/script.py \
321-
--dataset "$dataset" \
322-
--models_dir "$models_dir" \
323-
--ws_consensus "resources/grn_benchmark/prior/ws_consensus_${dataset}.csv" \
324-
--tf_all "resources/grn_benchmark/prior/tf_all.csv" \
325-
--evaluation_data_sc "resources/processed_data/${dataset}_evaluation_sc.h5ad" \
326-
--models "${models[@]}"
327-
fi
328-
329-
echo ""
330-
echo "Consensus completed for ${dataset}"
237+
bash scripts/prior/run_consensus.sh --dataset "$dataset"
331238
}
332239

333240
# Main execution
@@ -359,12 +266,11 @@ if [[ "$RUN_METRICS" == "true" ]]; then
359266
# echo "Looking in: $models_folder"
360267

361268
# Check each method for this dataset
362-
for method in "${GRN_METHODS[@]}"; do
269+
for method in "${METHODS[@]}"; do
363270
prediction_file="${models_folder}/${dataset}.${method}.${method}.prediction.h5ad"
364271

365272
if [[ -f "$prediction_file" ]]; then
366273
submit_metric_job "$dataset" "$method" "$prediction_file"
367-
# echo " Submitting job: ${dataset}_${method}"
368274
((job_count++))
369275
else
370276
echo " [NOT FOUND] ${prediction_file}"

src/local_workflows/run_inference.sh renamed to scripts/local_workflows/run_grn_inference_local.sh

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,14 @@
33
set -e
44

55
run_prefix='sbatch' #bash
6-
DATASETS=('op' 'adamson' 'replogle' 'norman' 'nakatake' 'parsebioscience' '300BCG' 'xaira_HCT116' 'xaira_HEK293T') #'op' 'adamson' 'replogle' 'norman' 'nakatake' 'parsebioscience' '300BCG' 'xaira_HCT116' 'xaira_HEK293T' 'ibd_uc' 'ibd_cd'
7-
DATASETS=('parsebioscience') #'op' 'adamson' 'replogle' 'norman' 'nakatake' 'parsebioscience' '300BCG' 'xaira_HCT116' 'xaira_HEK293T' 'ibd_uc' 'ibd_cd'
6+
python src/utils/config.py
7+
source src/utils/config.env
88

9-
# METHODS=('negative_control' 'positive_control' 'pearson_corr' 'portia' 'ppcor' 'grnboost' 'scenic' 'scenicplus' 'scglue' 'figr' 'granie')
10-
METHODS=( 'scenic' 'grnboost') #'negative_control' 'positive_control' 'pearson_corr' 'portia' 'ppcor' 'grnboost' 'scenic' 'scenicplus' 'scglue' 'figr' 'granie'
9+
DATASETS=(${DATASETS//,/ })
10+
# DATASETS=('parsebioscience') #'op' 'adamson' 'replogle' 'norman' 'nakatake' 'parsebioscience' '300BCG' 'xaira_HCT116' 'xaira_HEK293T' 'ibd_uc' 'ibd_cd'
11+
12+
METHODS=(${METHODS//,/ })
13+
# METHODS=( 'scenic' 'grnboost') #'negative_control' 'positive_control' 'pearson_corr' 'portia' 'ppcor' 'grnboost' 'scenic' 'scenicplus' 'scglue' 'figr' 'granie'
1114

1215
methods_dir='src/methods/'
1316
ctr_methods_dir='src/methods/'
@@ -33,10 +36,8 @@ run_func() {
3336
if [[ "$run_prefix" == "bash" ]]; then
3437
bash "$script" $arguments
3538
elif [[ "$run_prefix" == "sbatch" ]]; then
36-
# submit the job and capture the job ID
3739
output=$(sbatch "$script" $arguments)
3840
echo "$output"
39-
# sbatch usually returns: "Submitted batch job 12345678"
4041
jobid=$(echo "$output" | awk '{print $4}')
4142
echo "Job ID: $jobid"
4243
else

scripts/prior/run_consensus.sh

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,55 @@
1313

1414
set -e
1515

16-
DATASET=$1
16+
DATASET=""
17+
NEW_MODEL_PATH=""
18+
19+
while [[ $# -gt 0 ]]; do
20+
case $1 in
21+
--dataset)
22+
DATASET="$2"
23+
shift 2
24+
;;
25+
--new_model)
26+
NEW_MODEL_PATH="$2"
27+
shift 2
28+
;;
29+
*)
30+
echo "Unknown option: $1"
31+
echo "Usage: sbatch run_consensus.sh --dataset <dataset> [--new_model <path>]"
32+
exit 1
33+
;;
34+
esac
35+
done
36+
1737
if [ -z "$DATASET" ]; then
18-
echo "Usage: sbatch run_consensus.sh <dataset>"
38+
echo "Usage: sbatch run_consensus.sh --dataset <dataset> [--new_model <path>]"
1939
exit 1
2040
fi
2141

2242
models_dir="resources/results/$DATASET"
2343
models=("pearson_corr" "positive_control" "portia" "ppcor" "scenic" "scprint" "grnboost" "scenicplus" "scglue" "granie" "figr" "celloracle" "scgpt" "geneformer" "spearman_corr")
44+
python src/utils/config.py
45+
source src/utils/config.env
46+
METHODS=(${METHODS//,/ })
47+
2448
predictions=()
25-
for model in "${models[@]}"; do
49+
for model in "${METHODS[@]}"; do
2650
file="${models_dir}/${DATASET}.${model}.${model}.prediction.h5ad"
2751
if [ -e "$file" ]; then
2852
predictions+=("$file")
2953
fi
3054
done
55+
56+
if [ -n "$NEW_MODEL_PATH" ]; then
57+
if [ -e "$NEW_MODEL_PATH" ]; then
58+
echo "Adding new model: $NEW_MODEL_PATH"
59+
predictions+=("$NEW_MODEL_PATH")
60+
else
61+
echo "Warning: New model path does not exist: $NEW_MODEL_PATH"
62+
fi
63+
fi
64+
3165
printf '%s\n' "${predictions[@]}"
3266

3367
echo "Running consensus for Regression"

scripts/run_all.sh

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
11
set -e
22

3-
# datasets=( 'replogle' 'op' 'nakatake' 'norman' 'xaira_HEK293T' 'xaira_HCT116' 'parsebioscience' 'ibd_uc' 'ibd_cd' '300BCG' ) #'replogle' 'op' 'nakatake' 'norman' 'xaira_HEK293T' 'xaira_HCT116' 'parsebioscience' 'ibd_uc' 'ibd_cd' '300BCG') #
4-
datasets=( 'op' ) #'replogle' 'op' 'nakatake' 'norman' 'xaira_HEK293T' 'xaira_HCT116' 'parsebioscience' 'ibd_uc' 'ibd_cd' '300BCG') #
5-
run_local=false # set to true to run locally, false to run on AWS
3+
python src/utils/config.py
4+
source src/utils/config.env
5+
DATASETS=(${DATASETS//,/ })
66

7-
run_grn_inference=false
8-
run_grn_evaluation=true
9-
run_download=false
7+
run_local=false
8+
run_grn_inference=false #arg
9+
run_consensus=true
10+
run_grn_evaluation=true #arg
11+
run_sync=false
1012

1113
num_workers=20
1214

1315

14-
for dataset in "${datasets[@]}"; do
16+
for dataset in "${DATASETS[@]}"; do
1517
trace_file="resources/results/$dataset/trace.txt"
1618

1719
if [ "$run_grn_inference" = true ]; then
@@ -48,12 +50,14 @@ for dataset in "${datasets[@]}"; do
4850
# fi
4951

5052

51-
if [ "$run_local" = false ]; then
52-
echo "Downloading inference results from AWS"
53-
aws s3 sync s3://openproblems-data/resources/grn/results/$dataset resources/results/$dataset
53+
# if [ "$run_local" = false ]; then
54+
# echo "Downloading inference results from AWS"
55+
# aws s3 sync s3://openproblems-data/resources/grn/results/$dataset resources/results/$dataset
56+
# fi
57+
if [ "$run_consensus" = true ]; then
58+
echo "Running consensus for dataset: $dataset"
59+
bash scripts/prior/run_consensus.sh --dataset $dataset # run consensus for Regression and ws distance -> needs to be run after adding each method and dataset
5460
fi
55-
echo "Running consensus for dataset: $dataset"
56-
bash scripts/prior/run_consensus.sh $dataset # run consensus for Regression and ws distance -> needs to be run after adding each method and dataset
5761

5862
if [ "$run_local" = false ]; then
5963
echo "Syncing prior results to AWS"
@@ -64,7 +68,7 @@ for dataset in "${datasets[@]}"; do
6468
bash scripts/run_grn_evaluation.sh --dataset=$dataset --run_local=$run_local --build_images=false --num_workers=$num_workers
6569
fi
6670

67-
if [ "$run_download" = true ]; then
71+
if [ "$run_sync" = true ]; then
6872
if [ "$run_local" = false ]; then
6973
echo "Downloading evaluation results from AWS"
7074
aws s3 sync s3://openproblems-data/resources/grn/results/$dataset resources/results/$dataset

scripts/run_consensus.sh

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
#!/bin/bash
2+
# Consensus Calculation Script
3+
# This script runs consensus calculations for both Regression and WS distance metrics
4+
# Usage: bash scripts/run_consensus.sh <dataset> [run_mode]
5+
# dataset: name of the dataset (e.g., replogle, op, norman)
6+
# run_mode: 'local' (default) or 'aws'
7+
8+
set -e
9+
10+
DATASET=$1
11+
12+
if [ -z "$DATASET" ]; then
13+
echo "Usage: bash scripts/run_consensus.sh <dataset> [run_mode]"
14+
echo " dataset: name of the dataset (required)"
15+
echo " run_mode: 'local' (default) or 'aws'"
16+
exit 1
17+
fi
18+
19+
echo "=========================================="
20+
echo "Running Consensus Calculation"
21+
echo "Dataset: $DATASET"
22+
echo "Run mode: $RUN_MODE"
23+
echo "=========================================="
24+
25+
# Set paths based on run mode
26+
resources_dir="./resources"
27+
models_dir="${resources_dir}/results/$DATASET"
28+
29+
# Get available methods from config
30+
echo "Checking available methods..."
31+
available_methods=$(python -c "
32+
from src.utils.config import METHODS
33+
import os
34+
methods = []
35+
for method in METHODS:
36+
file = f'resources/results/$DATASET/$DATASET.{method}.{method}.prediction.h5ad'
37+
if os.path.exists(file):
38+
methods.append(method)
39+
print(' '.join(methods))
40+
")
41+
42+
if [ -z "$available_methods" ]; then
43+
echo "No prediction files found for dataset: $DATASET"
44+
exit 1
45+
fi
46+
47+
echo "Available methods: $available_methods"
48+
49+
# Convert space-separated list to array
50+
methods_array=($available_methods)
51+
52+
# Build predictions list
53+
predictions=()
54+
for method in "${methods_array[@]}"; do
55+
file="resources/results/${DATASET}/${DATASET}.${method}.${method}.prediction.h5ad"
56+
if [ -e "$file" ]; then
57+
predictions+=("$file")
58+
fi
59+
done
60+
61+
if [ ${#predictions[@]} -eq 0 ]; then
62+
echo "No prediction files found for consensus calculation"
63+
exit 1
64+
fi
65+
66+
echo "Found ${#predictions[@]} prediction files for consensus calculation"
67+
printf '%s\n' "${predictions[@]}"
68+
69+
# Run Regression consensus
70+
echo ""
71+
echo "Running Regression consensus..."
72+
python src/metrics/regression/consensus/script.py \
73+
--dataset "$DATASET" \
74+
--regulators_consensus "resources/grn_benchmark/prior/regulators_consensus_${DATASET}.json" \
75+
--evaluation_data "resources/grn_benchmark/evaluation_data/${DATASET}_bulk.h5ad" \
76+
--predictions "${predictions[@]}"
77+
78+
echo "Regression consensus completed successfully"
79+
80+
# Run WS distance consensus (only for applicable datasets)
81+
applicable_datasets=("norman" "adamson" "replogle" "xaira_HEK293T" "xaira_HCT116")
82+
skip_ws=true
83+
for d in "${applicable_datasets[@]}"; do
84+
if [[ "$DATASET" == "$d" ]]; then
85+
skip_ws=false
86+
break
87+
fi
88+
done
89+
90+
if [ "$skip_ws" = true ]; then
91+
echo ""
92+
echo "Skipping WS distance consensus (not applicable for dataset: $DATASET)"
93+
else
94+
echo ""
95+
echo "Running WS distance consensus..."
96+
python src/metrics/ws_distance/consensus/script.py \
97+
--dataset "$DATASET" \
98+
--models_dir "resources/results/$DATASET" \
99+
--ws_consensus "resources/grn_benchmark/prior/ws_consensus_${DATASET}.csv" \
100+
--tf_all "resources/grn_benchmark/prior/tf_all.csv" \
101+
--evaluation_data_sc "resources/processed_data/${DATASET}_evaluation_sc.h5ad" \
102+
--models "${methods_array[@]}"
103+
104+
echo "WS distance consensus completed successfully"
105+
fi
106+
107+
# Sync results to AWS if needed
108+
if [ "$RUN_MODE" = "aws" ]; then
109+
echo ""
110+
echo "Syncing consensus results to AWS..."
111+
aws s3 sync resources/grn_benchmark/prior s3://openproblems-data/resources/grn/grn_benchmark/prior
112+
echo "Sync completed"
113+
fi
114+
115+
echo ""
116+
echo "=========================================="
117+
echo "Consensus calculation completed for $DATASET"
118+
echo "=========================================="

0 commit comments

Comments
 (0)