|
| 1 | +#!/bin/bash |
| 2 | +# Consensus Calculation Script |
| 3 | +# This script runs consensus calculations for both Regression and WS distance metrics |
| 4 | +# Usage: bash scripts/run_consensus.sh <dataset> [run_mode] |
| 5 | +# dataset: name of the dataset (e.g., replogle, op, norman) |
| 6 | +# run_mode: 'local' (default) or 'aws' |
| 7 | + |
| 8 | +set -e |
| 9 | + |
| 10 | +DATASET=$1 |
| 11 | + |
| 12 | +if [ -z "$DATASET" ]; then |
| 13 | + echo "Usage: bash scripts/run_consensus.sh <dataset> [run_mode]" |
| 14 | + echo " dataset: name of the dataset (required)" |
| 15 | + echo " run_mode: 'local' (default) or 'aws'" |
| 16 | + exit 1 |
| 17 | +fi |
| 18 | + |
| 19 | +echo "==========================================" |
| 20 | +echo "Running Consensus Calculation" |
| 21 | +echo "Dataset: $DATASET" |
| 22 | +echo "Run mode: $RUN_MODE" |
| 23 | +echo "==========================================" |
| 24 | + |
| 25 | +# Set paths based on run mode |
| 26 | +resources_dir="./resources" |
| 27 | +models_dir="${resources_dir}/results/$DATASET" |
| 28 | + |
| 29 | +# Get available methods from config |
| 30 | +echo "Checking available methods..." |
| 31 | +available_methods=$(python -c " |
| 32 | +from src.utils.config import METHODS |
| 33 | +import os |
| 34 | +methods = [] |
| 35 | +for method in METHODS: |
| 36 | + file = f'resources/results/$DATASET/$DATASET.{method}.{method}.prediction.h5ad' |
| 37 | + if os.path.exists(file): |
| 38 | + methods.append(method) |
| 39 | +print(' '.join(methods)) |
| 40 | +") |
| 41 | + |
| 42 | +if [ -z "$available_methods" ]; then |
| 43 | + echo "No prediction files found for dataset: $DATASET" |
| 44 | + exit 1 |
| 45 | +fi |
| 46 | + |
| 47 | +echo "Available methods: $available_methods" |
| 48 | + |
| 49 | +# Convert space-separated list to array |
| 50 | +methods_array=($available_methods) |
| 51 | + |
| 52 | +# Build predictions list |
| 53 | +predictions=() |
| 54 | +for method in "${methods_array[@]}"; do |
| 55 | + file="resources/results/${DATASET}/${DATASET}.${method}.${method}.prediction.h5ad" |
| 56 | + if [ -e "$file" ]; then |
| 57 | + predictions+=("$file") |
| 58 | + fi |
| 59 | +done |
| 60 | + |
| 61 | +if [ ${#predictions[@]} -eq 0 ]; then |
| 62 | + echo "No prediction files found for consensus calculation" |
| 63 | + exit 1 |
| 64 | +fi |
| 65 | + |
| 66 | +echo "Found ${#predictions[@]} prediction files for consensus calculation" |
| 67 | +printf '%s\n' "${predictions[@]}" |
| 68 | + |
| 69 | +# Run Regression consensus |
| 70 | +echo "" |
| 71 | +echo "Running Regression consensus..." |
| 72 | +python src/metrics/regression/consensus/script.py \ |
| 73 | + --dataset "$DATASET" \ |
| 74 | + --regulators_consensus "resources/grn_benchmark/prior/regulators_consensus_${DATASET}.json" \ |
| 75 | + --evaluation_data "resources/grn_benchmark/evaluation_data/${DATASET}_bulk.h5ad" \ |
| 76 | + --predictions "${predictions[@]}" |
| 77 | + |
| 78 | +echo "Regression consensus completed successfully" |
| 79 | + |
| 80 | +# Run WS distance consensus (only for applicable datasets) |
| 81 | +applicable_datasets=("norman" "adamson" "replogle" "xaira_HEK293T" "xaira_HCT116") |
| 82 | +skip_ws=true |
| 83 | +for d in "${applicable_datasets[@]}"; do |
| 84 | + if [[ "$DATASET" == "$d" ]]; then |
| 85 | + skip_ws=false |
| 86 | + break |
| 87 | + fi |
| 88 | +done |
| 89 | + |
| 90 | +if [ "$skip_ws" = true ]; then |
| 91 | + echo "" |
| 92 | + echo "Skipping WS distance consensus (not applicable for dataset: $DATASET)" |
| 93 | +else |
| 94 | + echo "" |
| 95 | + echo "Running WS distance consensus..." |
| 96 | + python src/metrics/ws_distance/consensus/script.py \ |
| 97 | + --dataset "$DATASET" \ |
| 98 | + --models_dir "resources/results/$DATASET" \ |
| 99 | + --ws_consensus "resources/grn_benchmark/prior/ws_consensus_${DATASET}.csv" \ |
| 100 | + --tf_all "resources/grn_benchmark/prior/tf_all.csv" \ |
| 101 | + --evaluation_data_sc "resources/processed_data/${DATASET}_evaluation_sc.h5ad" \ |
| 102 | + --models "${methods_array[@]}" |
| 103 | + |
| 104 | + echo "WS distance consensus completed successfully" |
| 105 | +fi |
| 106 | + |
| 107 | +# Sync results to AWS if needed |
| 108 | +if [ "$RUN_MODE" = "aws" ]; then |
| 109 | + echo "" |
| 110 | + echo "Syncing consensus results to AWS..." |
| 111 | + aws s3 sync resources/grn_benchmark/prior s3://openproblems-data/resources/grn/grn_benchmark/prior |
| 112 | + echo "Sync completed" |
| 113 | +fi |
| 114 | + |
| 115 | +echo "" |
| 116 | +echo "==========================================" |
| 117 | +echo "Consensus calculation completed for $DATASET" |
| 118 | +echo "==========================================" |
0 commit comments