|
4 | 4 | #SBATCH -A BIF135 |
5 | 5 | #SBATCH -p batch |
6 | 6 | #SBATCH -J make_evcoupling_dataset |
7 | | -#SBATCH -t 0-12:00 |
| 7 | +#SBATCH -t 0-24:00 |
8 | 8 | #SBATCH --mem 224G |
9 | 9 | #SBATCH --nodes 4 |
10 | 10 | #SBATCH --ntasks-per-node 1 |
11 | 11 | ############################################################### |
12 | 12 |
|
| 13 | +# Remote paths # |
| 14 | +export PROJDIR=/gpfs/alpine/scratch/"$USER"/bif135/Repositories/Lab_Repositories/DIPS-Plus |
| 15 | +export PSAIADIR=/ccs/home/"$USER"/Programs/PSAIA_1.0_source/bin/linux/psa |
| 16 | +export OMP_NUM_THREADS=8 |
| 17 | + |
13 | 18 | # Remote Conda environment # |
14 | 19 | source "$PROJDIR"/miniconda3/bin/activate |
15 | 20 | conda activate DIPS-Plus |
16 | 21 |
|
17 | 22 | # Load CUDA module for DGL |
18 | 23 | module load cuda/10.2.89 |
19 | 24 |
|
20 | | -# Remote paths # |
21 | | -export PROJDIR=/gpfs/alpine/scratch/"$USER"/bif135/Repositories/Lab_Repositories/DIPS-Plus |
22 | | -export PSAIADIR=/ccs/home/"$USER"/Programs/PSAIA_1.0_source/bin/linux/psa |
23 | | -export OMP_NUM_THREADS=8 |
24 | | - |
25 | 25 | # Default to using the Big Fantastic Database (BFD) of protein sequences (approx. 270GB compressed) |
26 | 26 | export HHSUITE_DB=/gpfs/alpine/scratch/$USER/bif132/Data/Databases/bfd_metaclust_clu_complete_id30_c90_final_seq |
27 | 27 |
|
28 | 28 | # Run dataset compilation scripts |
29 | 29 | cd "$PROJDIR"/project || exit |
30 | 30 |
|
31 | | -srun python3 "$PROJDIR"/project/datasets/builder/generate_hhsuite_features.py "$PROJDIR"/project/datasets/DB5/interim/parsed "$PROJDIR"/project/datasets/DB5/interim/parsed "$HHSUITE_DB" "$PROJDIR"/project/datasets/DB5/interim/external_feats --rank "$1" --size "$2" --num_cpu_jobs 4 --num_cpus_per_job 8 --num_iter 2 --source_type evcoupling --write_file |
| 31 | +srun python3 "$PROJDIR"/project/datasets/builder/generate_hhsuite_features.py "$PROJDIR"/project/datasets/EVCoupling/interim/parsed "$PROJDIR"/project/datasets/EVCoupling/interim/parsed "$HHSUITE_DB" "$PROJDIR"/project/datasets/EVCoupling/interim/external_feats --rank "$1" --size "$2" --num_cpu_jobs 4 --num_cpus_per_job 8 --num_iter 2 --source_type evcoupling --read_file |
32 | 32 |
|
33 | | -#srun python3 "$PROJDIR"/project/datasets/builder/postprocess_pruned_pairs.py "$PROJDIR"/project/datasets/DB5/raw "$PROJDIR"/project/datasets/DB5/interim/pairs "$PROJDIR"/project/datasets/DB5/interim/external_feats "$PROJDIR"/project/datasets/DB5/final/raw --num_cpus 32 --rank "$1" --size "$2" --source_type db5 |
| 33 | +#srun python3 "$PROJDIR"/project/datasets/builder/postprocess_pruned_pairs.py "$PROJDIR"/project/datasets/EVCoupling/raw "$PROJDIR"/project/datasets/EVCoupling/interim/pairs "$PROJDIR"/project/datasets/EVCoupling/interim/external_feats "$PROJDIR"/project/datasets/EVCoupling/final/raw --num_cpus 32 --rank "$1" --size "$2" --source_type EVCoupling |
34 | 34 |
|
35 | | -#python3 "$PROJDIR"/project/datasets/builder/partition_dataset_filenames.py "$PROJDIR"/project/datasets/DB5/final/raw --source_type db5 --rank "$1" --size "$2" |
36 | | -#python3 "$PROJDIR"/project/datasets/builder/collect_dataset_statistics.py "$PROJDIR"/project/datasets/DB5/final/raw --rank "$1" --size "$2" |
37 | | -#python3 "$PROJDIR"/project/datasets/builder/log_dataset_statistics.py "$PROJDIR"/project/datasets/DB5/final/raw --rank "$1" --size "$2" |
38 | | -#python3 "$PROJDIR"/project/datasets/builder/impute_missing_feature_values.py "$PROJDIR"/project/datasets/DB5/final/raw --impute_atom_features False --num_cpus 32 --rank "$1" --size "$2" |
| 35 | +#python3 "$PROJDIR"/project/datasets/builder/partition_dataset_filenames.py "$PROJDIR"/project/datasets/EVCoupling/final/raw --source_type EVCoupling --rank "$1" --size "$2" |
| 36 | +#python3 "$PROJDIR"/project/datasets/builder/collect_dataset_statistics.py "$PROJDIR"/project/datasets/EVCoupling/final/raw --rank "$1" --size "$2" |
| 37 | +#python3 "$PROJDIR"/project/datasets/builder/log_dataset_statistics.py "$PROJDIR"/project/datasets/EVCoupling/final/raw --rank "$1" --size "$2" |
| 38 | +#python3 "$PROJDIR"/project/datasets/builder/impute_missing_feature_values.py "$PROJDIR"/project/datasets/EVCoupling/final/raw --impute_atom_features False --num_cpus 32 --rank "$1" --size "$2" |
39 | 39 |
|
40 | 40 | # Optionally convert each postprocessed (final 'raw') complex into a pair of DGL graphs (final 'processed') with labels |
41 | | -#python3 "$PROJDIR"/project/datasets/builder/convert_complexes_to_graphs.py "$PROJDIR"/project/datasets/DB5/final/raw "$PROJDIR"/project/datasets/DB5/final/processed --num_cpus 32 --edge_dist_cutoff 15.0 --edge_limit 5000 --self_loops True --rank "$1" --size "$2" |
| 41 | +#python3 "$PROJDIR"/project/datasets/builder/convert_complexes_to_graphs.py "$PROJDIR"/project/datasets/EVCoupling/final/raw "$PROJDIR"/project/datasets/EVCoupling/final/processed --num_cpus 32 --edge_dist_cutoff 15.0 --edge_limit 5000 --self_loops True --rank "$1" --size "$2" |
0 commit comments