Skip to content

Commit f94ae04

Browse files
committed
feat: add interaction analysis script and configuration for method-dataset preprocessing with SLURM
1 parent 30d79d9 commit f94ae04

2 files changed

Lines changed: 156 additions & 0 deletions

File tree

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# run arguments:
2+
method: diffdock # the method for which to preprocess interactions as H5 files
3+
dataset: astex_diverse # the dataset to use - NOTE: must be one of (`astex_diverse`, `casp15`, `dockgen`, `posebusters_benchmark`)
4+
repeat_index: 1 # the repeat index to preprocess - NOTE: currently only repeat_index=1 is supported
5+
output_script_dir: ${oc.env:PROJECT_ROOT}/scripts/inference # the directory in which to save the output script
6+
# sweep arguments:
7+
sweep: true # whether to build all combinations of method-dataset preprocessing scripts
8+
methods_to_sweep: [
9+
"vina_p2rank",
10+
"diffdock",
11+
"dynamicbind",
12+
"neuralplexer",
13+
"rfaa",
14+
"chai-lab_ss",
15+
"chai-lab",
16+
"boltz_ss",
17+
"boltz",
18+
"alphafold3_ss",
19+
"alphafold3",
20+
] # the methods to sweep
21+
datasets_to_sweep: [
22+
"astex_diverse",
23+
"casp15",
24+
"dockgen",
25+
"posebusters_benchmark",
26+
] # the datasets to sweep
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# -------------------------------------------------------------------------------------------------------------------------------------
2+
# Following code curated for PoseBench: (https://github.com/BioinfoMachineLearning/PoseBench)
3+
# -------------------------------------------------------------------------------------------------------------------------------------
4+
5+
import logging
6+
import os
7+
from typing import List, Literal
8+
9+
import hydra
10+
import rootutils
11+
from beartype import beartype
12+
from omegaconf import DictConfig
13+
14+
rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
15+
16+
from posebench.models.ensemble_generation import insert_hpc_headers
17+
18+
logging.basicConfig(format="[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s")
19+
logger = logging.getLogger(__name__)
20+
21+
22+
INTERACTION_ANALYSIS_METHODS = Literal[
23+
"vina_p2rank",
24+
"diffdock",
25+
"dynamicbind",
26+
"neuralplexer",
27+
"rfaa",
28+
"chai-lab_ss",
29+
"chai-lab",
30+
"boltz_ss",
31+
"boltz",
32+
"alphafold3_ss",
33+
"alphafold3",
34+
]
35+
INTERACTION_ANALYSIS_DATASETS = Literal[
36+
"astex_diverse",
37+
"casp15",
38+
"dockgen",
39+
"posebusters_benchmark",
40+
]
41+
42+
DATASET_TO_NOTEBOOK = {
43+
"astex_diverse": "notebooks/astex_method_interaction_analysis_plotting_slurm.py",
44+
"casp15": "notebooks/casp15_method_interaction_analysis_plotting_slurm.py",
45+
"dockgen": "notebooks/dockgen_method_interaction_analysis_plotting_slurm.py",
46+
"posebusters_benchmark": "notebooks/posebusters_method_interaction_analysis_plotting_slurm.py",
47+
}
48+
49+
50+
@beartype
51+
def build_interaction_analysis_script(
52+
method: INTERACTION_ANALYSIS_METHODS,
53+
dataset: INTERACTION_ANALYSIS_DATASETS,
54+
output_script_dir: str,
55+
repeat_index: int = 1,
56+
):
57+
"""Build a SLURM script to preprocess one method's interactions for one dataset."""
58+
if repeat_index != 1:
59+
raise ValueError("Only repeat_index=1 is supported for interaction preprocessing.")
60+
61+
os.makedirs(output_script_dir, exist_ok=True)
62+
output_script = os.path.join(
63+
output_script_dir,
64+
f"{method}_{dataset}_hpc_interaction_analysis_{repeat_index}.sh",
65+
)
66+
67+
with open(output_script, "w") as f:
68+
f.write("#!/bin/bash -l\n\n")
69+
f.write(insert_hpc_headers(method="diffdock"))
70+
f.write(
71+
"# Store model weights in a larger storage location\n"
72+
+ 'export TORCH_HOME="/pscratch/sd/a/$USER/torch_cache"\n'
73+
+ 'export HF_HOME="/pscratch/sd/a/$USER/hf_cache"\n\n'
74+
+ 'mkdir -p "$TORCH_HOME"\n'
75+
+ 'mkdir -p "$HF_HOME"\n\n'
76+
)
77+
f.write("# Preprocess method interaction H5 files only\n")
78+
f.write(
79+
"srun --kill-on-bad-exit=1 shifter "
80+
f"python3 {DATASET_TO_NOTEBOOK[dataset]} --method {method} --exit-after-preprocessing\n\n"
81+
)
82+
f.write(f"echo 'Interaction preprocessing for {method} on {dataset} completed.'\n")
83+
84+
os.chmod(output_script, 0o755)
85+
logger.info(f"Script {output_script} created successfully.")
86+
87+
88+
@beartype
89+
def build_interaction_analysis_scripts(
90+
methods_to_sweep: List[INTERACTION_ANALYSIS_METHODS],
91+
datasets_to_sweep: List[INTERACTION_ANALYSIS_DATASETS],
92+
output_script_dir: str,
93+
repeat_index: int = 1,
94+
):
95+
"""Build interaction preprocessing scripts for a method-dataset sweep."""
96+
for method in methods_to_sweep:
97+
for dataset in datasets_to_sweep:
98+
build_interaction_analysis_script(
99+
method=method,
100+
dataset=dataset,
101+
output_script_dir=output_script_dir,
102+
repeat_index=repeat_index,
103+
)
104+
105+
106+
@hydra.main(
107+
version_base="1.3",
108+
config_path="../configs/scripts",
109+
config_name="build_interaction_analysis_script.yaml",
110+
)
111+
def main(cfg: DictConfig):
112+
"""Build interaction analysis scripts according to user arguments."""
113+
if cfg.sweep:
114+
build_interaction_analysis_scripts(
115+
methods_to_sweep=list(cfg.methods_to_sweep),
116+
datasets_to_sweep=list(cfg.datasets_to_sweep),
117+
output_script_dir=cfg.output_script_dir,
118+
repeat_index=cfg.repeat_index,
119+
)
120+
else:
121+
build_interaction_analysis_script(
122+
method=cfg.method,
123+
dataset=cfg.dataset,
124+
output_script_dir=cfg.output_script_dir,
125+
repeat_index=cfg.repeat_index,
126+
)
127+
128+
129+
if __name__ == "__main__":
130+
main()

0 commit comments

Comments
 (0)