-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtrain_supervised.sh
More file actions
62 lines (40 loc) · 2.87 KB
/
train_supervised.sh
File metadata and controls
62 lines (40 loc) · 2.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/bash
#SBATCH --gres=gpu:1
#SBATCH --job-name=supervDef
#SBATCH --partition=all
#SBATCH --qos=default
#SBATCH --cpus-per-task=8
#SBATCH --open-mode=append
#SBATCH --signal=B:USR1@120
#SBATCH --requeue
#SBATCH --output=logs/%A_%a.stdout
#SBATCH --error=logs/%A_%a.stderr
#SBATCH --array=0-5
#SBATCH --exclude=n[1-5]
SECONDS=0
restart(){
echo "Calling restart"
scontrol requeue $SLURM_JOB_ID
echo "Scheduled job for restart"
}
ignore(){
echo "Ignored SIGTERM"
}
trap restart USR1
trap ignore TERM
date
args=()
# regular supervised learning retaining on defender data, w. reasonable HP setting, no overfit
args+=("supervised_train_resnet50_defender.py --train_path data/QMNIST_ppml_ImageFolder/defender --val_path data/QMNIST_ppml_ImageFolder/reserve --batch_size 64 --weight_decay 1e-4 --scheduler_patience 4 --scheduler_factor 0.1 --epochs 40 --random_seed 68 --train_mode fc --num_workers 8")
args+=("supervised_train_resnet50_defender.py --train_path data/QMNIST_ppml_ImageFolder/defender --val_path data/QMNIST_ppml_ImageFolder/reserve --batch_size 64 --weight_decay 1e-4 --scheduler_patience 4 --scheduler_factor 0.1 --epochs 40 --random_seed 68 --train_mode whole --num_workers 8")
# regular supervised learning, train to maximum overfitting, train a long time, no regularization
args+=("supervised_train_resnet50_defender.py --train_path data/QMNIST_ppml_ImageFolder/defender --val_path data/QMNIST_ppml_ImageFolder/reserve --batch_size 64 --weight_decay 0 --scheduler_patience 4 --scheduler_factor 0.1 --epochs 80 --random_seed 68 --train_mode fc --overfit --num_workers 8")
args+=("supervised_train_resnet50_defender.py --train_path data/QMNIST_ppml_ImageFolder/defender --val_path data/QMNIST_ppml_ImageFolder/reserve --batch_size 64 --weight_decay 0 --scheduler_patience 4 --scheduler_factor 0.1 --epochs 80 --random_seed 68 --train_mode whole --overfit --num_workers 8")
# same as the previous one but with labels flipped in both defender and reserve datasets
args+=("supervised_train_resnet50_defender.py --train_path data/QMNIST_ppml_flipped_ImageFolder/defender --val_path data/QMNIST_ppml_flipped_ImageFolder/reserve --batch_size 64 --weight_decay 0 --scheduler_patience 4 --scheduler_factor 0.1 --epochs 80 --random_seed 68 --train_mode fc --overfit --random_labels --num_workers 8")
args+=("supervised_train_resnet50_defender.py --train_path data/QMNIST_ppml_flipped_ImageFolder/defender --val_path data/QMNIST_ppml_flipped_ImageFolder/reserve --batch_size 64 --weight_decay 0 --scheduler_patience 4 --scheduler_factor 0.1 --epochs 80 --random_seed 68 --train_mode whole --overfit --random_labels --num_workers 8")
echo "Starting python ${args[${SLURM_ARRAY_TASK_ID}]}"
srun python ${args[${SLURM_ARRAY_TASK_ID}]}
echo "End python ${args[${SLURM_ARRAY_TASK_ID}]}"
DURATION=$SECONDS
echo "End of the program! $(($DURATION / 60)) minutes and $(($DURATION % 60)) seconds elapsed."