Skip to content

Commit 30d79d9

Browse files
committed
update: refactor code structure for improved readability and maintainability
1 parent 6a2be10 commit 30d79d9

4 files changed

Lines changed: 288 additions & 292 deletions

File tree

Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ WORKDIR /app/posebench
4646
ARG GIT_TAG=main
4747
RUN git clone https://github.com/BioinfoMachineLearning/posebench . --branch ${GIT_TAG} \
4848
&& conda env update -f environments/posebench_environment.yaml \
49+
&& conda install -y -c conda-forge openff-toolkit=0.16.0 \
4950
&& pip install -e . \
5051
&& pip install numpy==1.26.4 --no-dependencies \
5152
&& pip install prody==2.4.1 --no-dependencies \
Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,37 @@
11
# run arguments:
2-
method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `boltz`, `vina`, `ensemble`)
3-
vina_binding_site_method: p2rank # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `boltz`, `p2rank`)
2+
method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `boltz`, `alphafold3`, `vina`, `ensemble`)
3+
vina_binding_site_method: p2rank # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `boltz`, `alphafold3`, `p2rank`)
44
ensemble_ranking_method: consensus # the method to use for ensemble ranking - NOTE: must be one of (`consensus`, `ff`)
55
dataset: astex_diverse # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`)
66
repeat_index: 1 # the repeat index which was used for inference
77
cuda_device_index: 0 # the CUDA device index to use for inference (for all methods except AutoDock-Vina)
88
output_script_dir: ${oc.env:PROJECT_ROOT}/scripts/inference # the directory in which to save the output script
9-
pocket_only_baseline: null # whether to perform a pocket-only baseline for the PoseBusters Benchmark set - NOTE: not applicable only to `tulip`
9+
pocket_only_baseline: false # whether to perform a pocket-only baseline for the PoseBusters Benchmark set - NOTE: not applicable only to `tulip`
1010
v1_baseline: false # whether to perform the V1 baseline for DiffDock
11-
no_ilcl: null # whether to use model weights trained with an inter-ligand clash loss (ILCL) for the CASP15 set - NOTE: only applicable to `neuralplexer`
12-
relax_protein: null # whether to relax the protein structure before scoring - NOTE: currently in an experimental state
11+
no_ilcl: false # whether to use model weights trained with an inter-ligand clash loss (ILCL) for the CASP15 set - NOTE: only applicable to `neuralplexer`
12+
relax_protein: false # whether to relax the protein structure before scoring - NOTE: currently in an experimental state
1313
export_hpc_headers: true # whether to insert high-performance computing (by default, SLURM) headers into the output script
1414
verbose: false # whether to print verbose (e.g., invalid configuration) output
1515
# sweep arguments:
1616
sweep: false # whether to build all combinations of method-dataset run scripts
1717
methods_to_sweep: [
1818
"diffdock",
19-
"fabind",
2019
"dynamicbind",
2120
"neuralplexer",
22-
"flowdock",
2321
"rfaa",
22+
# "chai-lab_ss",
2423
"chai-lab",
24+
# "boltz_ss",
2525
"boltz",
26+
# "alphafold3_ss",
27+
"alphafold3",
2628
"vina",
27-
"ensemble",
2829
] # the methods to sweep
29-
vina_binding_site_methods_to_sweep: ["diffdock", "p2rank"] # the Vina binding site prediction methods to sweep
30+
vina_binding_site_methods_to_sweep: ["p2rank"] # the Vina binding site prediction methods to sweep
3031
ensemble_ranking_methods_to_sweep: ["consensus"] # the ensemble ranking methods to sweep - NOTE: must be one of (`consensus`, `ff`)
3132
datasets_to_sweep: [
3233
"posebusters_benchmark",
3334
"astex_diverse",
3435
"dockgen",
35-
"casp15",
3636
] # the datasets to sweep
3737
num_sweep_repeats: 3 # the number of repeats to run for each method-dataset sweep (if the method is a generative method)

posebench/models/ensemble_generation.py

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from posebench.analysis.complex_alignment import align_complex_to_protein_only
3939
from posebench.data.components.protein_apo_to_holo_alignment import read_molecule
4040
from posebench.models.inference_relaxation import relax_single_filepair
41-
from posebench.models.minimize_energy import minimize_energy
41+
# from posebench.models.minimize_energy import minimize_energy
4242
from posebench.utils.data_utils import (
4343
extract_sequences_from_protein_structure_file,
4444
renumber_biopython_structure_residues,
@@ -145,32 +145,26 @@ def insert_hpc_headers(
145145
:return: Batch headers string for SLURM job scheduling.
146146
"""
147147
return f"""######################### Batch Headers #########################
148-
#SBATCH --partition {gpu_partition} # use reserved partition `chengji-lab-gpu`
149-
#SBATCH --account {gpu_account} # NOTE: this must be specified to use the reserved partition above
150-
#SBATCH --nodes=1 # NOTE: this needs to match Lightning's `Trainer(num_nodes=...)`
151-
#SBATCH --gres gpu:{f'{gpu_type}:' if gpu_type else ''}1 # request {gpu_type} GPU resource(s)
152-
#SBATCH --ntasks-per-node=1 # NOTE: this needs to be `1` on SLURM clusters when using Lightning's `ddp_spawn` strategy`; otherwise, set to match Lightning's quantity of `Trainer(devices=...)`
153-
#SBATCH --mem={cpu_memory_in_gb}G # NOTE: use `--mem=0` to request all memory "available" on the assigned node
154-
#SBATCH -t {time_limit} # time limit for the job (up to two days: `2-00:00:00`)
155-
#SBATCH -J posebench_{method}_ensembling # job name
156-
#SBATCH --output=R-%x.%j.out # output log file
157-
#SBATCH --error=R-%x.%j.err # error log file
158-
159-
module purge
160-
module load cuda/11.8.0_gcc_9.5.0
161-
162-
# determine location of the project directory
163-
use_private_project_dir=false # NOTE: customize as needed
164-
if [ "$use_private_project_dir" = true ]; then
165-
project_dir="/home/$USER/data/Repositories/Lab_Repositories/PoseBench"
166-
else
167-
project_dir="/cluster/pixstor/chengji-lab/$USER/Repositories/Lab_Repositories/PoseBench"
168-
fi
169-
170-
# shellcheck source=/dev/null
171-
source /home/$USER/mambaforge/etc/profile.d/conda.sh
172-
173-
cd "$project_dir" || exit"""
148+
#SBATCH --qos=shared # use specified partition for job
149+
#SBATCH --image=registry.nersc.gov/m5008/acmwhb/posebench:0.0.1 # use specified container image
150+
#SBATCH --account=m5008 # use specified account for billing (e.g., `m5008` for AI4Science projects)
151+
#SBATCH --nodes=1 # NOTE: this needs to match Lightning's `Trainer(num_nodes=...)`
152+
#SBATCH --ntasks-per-node=1 # NOTE: this needs to be `1` on SLURM clusters when using Lightning's `ddp_spawn` strategy`; otherwise, set to match Lightning's quantity of `Trainer(devices=...)`
153+
#SBATCH --time=00-05:00:00 # time limit for the job (up to 2 days: `02-00:00:00`)
154+
#SBATCH --job-name=inference_analysis_sweep # job name
155+
#SBATCH --output=scripts/perlmutter/regular/logs/inference_analysis_sweep%j.out # output log file
156+
#SBATCH --error=scripts/perlmutter/regular/logs/inference_analysis_sweep%j.err # error log file
157+
158+
# Wait for 5-10 seconds randomly to avoid race condition
159+
sleep $((RANDOM % 6 + 5))
160+
161+
# Determine location of the project's directory
162+
# PROJECT_ID="m5008"
163+
# PROJECT_DIR="/global/cfs/cdirs/$PROJECT_ID/$USER/Repositories/posebench" # long term storage community drive
164+
PROJECT_DIR="/pscratch/sd/a/$USER/Repositories/posebench" # high-performance storage scratch drive with an 8-week purge policy
165+
cd "$PROJECT_DIR" || exit
166+
167+
"""
174168

175169

176170
def create_diffdock_bash_script(

0 commit comments

Comments
 (0)