Skip to content

Commit f017a90

Browse files
author
dmoi
committed
add monodecoder config tester nb
1 parent 348aa2e commit f017a90

29 files changed

Lines changed: 4175 additions & 189616 deletions

GITHUB_PAGES_SETUP.md

Lines changed: 0 additions & 45 deletions
This file was deleted.
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# FoldTree2 Configuration: 10 Embeddings
2+
# All three decoders: Sequence + Geometry + FoldX
3+
4+
model_name: "ft2_10_embeddings"
5+
output_dir: "./models/embedding_comparison/final/"
6+
run_name: "10_embeddings_all_decoders_final"
7+
8+
# Dataset
9+
dataset: "structs_train_final.h5"
10+
data_dir: "../../datasets/foldtree2/"
11+
aapropcsv: "config/aaindex1.csv"
12+
13+
# Training Parameters
14+
epochs: 100
15+
batch_size: 10
16+
gradient_accumulation_steps: 2
17+
18+
use_muon: true
19+
muon_lr: 0.02 # Muon learning rate for hidden weights
20+
adamw_lr: 0.0001 # AdamW learning rate (1e-4 from notebook)
21+
22+
seed: 42
23+
# Model Dimensions - 10 EMBEDDINGS
24+
hidden_size: 150
25+
num_embeddings: 10
26+
embedding_dim: 128
27+
28+
29+
hetero_gae: false # Use MultiMonoDecoder
30+
overwrite : true
31+
32+
# Encoder Configuration
33+
EMA: true
34+
35+
# Commitment cost scheduling - THE FOCUS OF THIS CONFIG
36+
commitment_cost: 0.90 # Final commitment cost (high value for strong regularization)
37+
use_commitment_scheduling: false # Enable scheduling
38+
commitment_schedule: linear # Smooth linear warmup
39+
commitment_warmup_steps: 10000 # Warmup over 1k steps (~5 epochs with batch_size=20)
40+
commitment_start: 0.05 # Start with very low commitment cost
41+
42+
43+
# Decoder Outputs
44+
output_fft: false
45+
output_rt: false
46+
output_foldx: false
47+
hetero_gae: false
48+
49+
# Optimization
50+
clip_grad: true
51+
gradient_accumulation_steps: 2
52+
mixed_precision: true
53+
tensor_core_precision: "high"
54+
55+
# Learning Rate Schedule
56+
lr_schedule: "plateau"
57+
lr_warmup_ratio: 0.1
58+
lr_min: 0.000001
59+
60+
# Loss Weights
61+
edgeweight: 0.1
62+
logitweight: 0.01
63+
xweight: 0.1
64+
# fft2weight = 0.01
65+
vqweight: 0.001
66+
angles_weight: 0.001
67+
ss_weight: 0.1
68+
69+
70+
71+
# pLDDT Masking
72+
mask_plddt: true
73+
plddt_threshold: 0.3
74+
75+
# Early Stopping
76+
early_stopping: true
77+
early_stopping_metric: "val/loss"
78+
early_stopping_mode: "min"
79+
early_stopping_patience: 20
80+
early_stopping_min_delta: 0.0
81+
early_stopping_warmup_epochs: 0
82+
83+
# Logging
84+
tensorboard_dir: "./runs/"
85+
device: null
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# FoldTree2 Configuration: 15 Embeddings
2+
# All three decoders: Sequence + Geometry + FoldX
3+
4+
model_name: "ft2_15_embeddings"
5+
output_dir: "./models/embedding_comparison/final/"
6+
run_name: "15_embeddings_all_decoders_final"
7+
8+
9+
overwrite : true
10+
11+
# Dataset
12+
dataset: "structs_train_final.h5"
13+
data_dir: "../../datasets/foldtree2/"
14+
aapropcsv: "config/aaindex1.csv"
15+
16+
# Training Parameters
17+
epochs: 100
18+
batch_size: 10
19+
gradient_accumulation_steps: 2
20+
21+
use_muon: true
22+
muon_lr: 0.02 # Muon learning rate for hidden weights
23+
adamw_lr: 0.0001 # AdamW learning rate (1e-4 from notebook)
24+
25+
seed: 42
26+
27+
# Model Dimensions - 15 EMBEDDINGS
28+
hidden_size: 150
29+
num_embeddings: 15
30+
embedding_dim: 128
31+
32+
33+
hetero_gae: false # Use MultiMonoDecoder
34+
35+
# Encoder Configuration
36+
EMA: true
37+
38+
# Commitment cost scheduling - THE FOCUS OF THIS CONFIG
39+
commitment_cost: 0.90 # Final commitment cost (high value for strong regularization)
40+
use_commitment_scheduling: false # Enable scheduling
41+
commitment_schedule: linear # Smooth linear warmup
42+
commitment_warmup_steps: 1000 # Warmup over 1k steps (~5 epochs with batch_size=20)
43+
commitment_start: 0.05 # Start with very low commitment cost
44+
45+
46+
# Decoder Outputs
47+
output_fft: false
48+
output_rt: false
49+
output_foldx: false
50+
hetero_gae: false
51+
52+
# Optimization
53+
clip_grad: true
54+
gradient_accumulation_steps: 2
55+
mixed_precision: true
56+
tensor_core_precision: "high"
57+
58+
# Learning Rate Schedule
59+
lr_schedule: "plateau"
60+
lr_warmup_ratio: 0.1
61+
lr_min: 0.000001
62+
63+
# Loss Weights
64+
edgeweight: 0.1
65+
logitweight: 0.01
66+
xweight: 0.1
67+
# fft2weight = 0.01
68+
vqweight: 0.001
69+
angles_weight: 0.001
70+
ss_weight: 0.1
71+
72+
73+
74+
# pLDDT Masking
75+
mask_plddt: true
76+
plddt_threshold: 0.3
77+
78+
# Early Stopping
79+
early_stopping: true
80+
early_stopping_metric: "val/loss"
81+
early_stopping_mode: "min"
82+
early_stopping_patience: 20
83+
early_stopping_min_delta: 0.0
84+
early_stopping_warmup_epochs: 0
85+
86+
# Logging
87+
tensorboard_dir: "./runs/"
88+
device: null
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# FoldTree2 Configuration: 20 Embeddings
2+
# All three decoders: Sequence + Geometry + FoldX
3+
4+
model_name: "ft2_20_embeddings"
5+
output_dir: "./models/embedding_comparison/final/"
6+
run_name: "20_embeddings_all_decoders_final"
7+
8+
# Dataset
9+
dataset: "structs_train_final.h5"
10+
data_dir: "../../datasets/foldtree2/"
11+
aapropcsv: "config/aaindex1.csv"
12+
13+
# Training Parameters
14+
epochs: 100
15+
batch_size: 10
16+
gradient_accumulation_steps: 2
17+
18+
use_muon: true
19+
muon_lr: 0.02 # Muon learning rate for hidden weights
20+
adamw_lr: 0.0001 # AdamW learning rate (1e-4 from notebook)
21+
22+
seed: 42
23+
24+
# Model Dimensions - 20 EMBEDDINGS
25+
hidden_size: 200
26+
num_embeddings: 20
27+
embedding_dim: 128
28+
29+
30+
hetero_gae: false # Use MultiMonoDecoder
31+
overwrite : true
32+
33+
# Encoder Configuration
34+
EMA: true
35+
36+
# Commitment cost scheduling - THE FOCUS OF THIS CONFIG
37+
commitment_cost: 0.90 # Final commitment cost (high value for strong regularization)
38+
use_commitment_scheduling: false # Enable scheduling
39+
commitment_schedule: linear # Smooth linear warmup
40+
commitment_warmup_steps: 1000 # Warmup over 1k steps (~5 epochs with batch_size=20)
41+
commitment_start: 0.05 # Start with very low commitment cost
42+
43+
44+
# Decoder Outputs
45+
output_fft: false
46+
output_rt: false
47+
output_foldx: false
48+
hetero_gae: false
49+
50+
# Optimization
51+
clip_grad: true
52+
gradient_accumulation_steps: 2
53+
mixed_precision: true
54+
tensor_core_precision: "high"
55+
56+
# Learning Rate Schedule
57+
lr_schedule: "plateau"
58+
lr_warmup_ratio: 0.1
59+
lr_min: 0.000001
60+
61+
# Loss Weights
62+
edgeweight: 0.1
63+
logitweight: 0.01
64+
xweight: 0.1
65+
# fft2weight = 0.01
66+
vqweight: 0.001
67+
angles_weight: 0.001
68+
ss_weight: 0.1
69+
70+
71+
72+
# pLDDT Masking
73+
mask_plddt: true
74+
plddt_threshold: 0.3
75+
76+
# Early Stopping
77+
early_stopping: true
78+
early_stopping_metric: "val/loss"
79+
early_stopping_mode: "min"
80+
early_stopping_patience: 20
81+
early_stopping_min_delta: 0.0
82+
early_stopping_warmup_epochs: 0
83+
84+
# Logging
85+
tensorboard_dir: "./runs/"
86+
device: null
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# FoldTree2 Configuration: 25 Embeddings
2+
# All three decoders: Sequence + Geometry + FoldX
3+
4+
model_name: "ft2_25_embeddings"
5+
output_dir: "./models/embedding_comparison/final/"
6+
run_name: "25_embeddings_all_decoders_final"
7+
8+
# Dataset
9+
dataset: "structs_train_final.h5"
10+
data_dir: "../../datasets/foldtree2/"
11+
aapropcsv: "config/aaindex1.csv"
12+
13+
# Training Parameters
14+
epochs: 100
15+
batch_size: 10
16+
gradient_accumulation_steps: 2
17+
18+
use_muon: true
19+
muon_lr: 0.02 # Muon learning rate for hidden weights
20+
adamw_lr: 0.0001 # AdamW learning rate (1e-4 from notebook)
21+
22+
23+
seed: 42
24+
25+
# Model Dimensions - 25 EMBEDDINGS
26+
hidden_size: 200
27+
num_embeddings: 25
28+
embedding_dim: 128
29+
30+
31+
hetero_gae: false # Use MultiMonoDecoder
32+
overwrite : true
33+
34+
# Encoder Configuration
35+
EMA: true
36+
37+
# Commitment cost scheduling - THE FOCUS OF THIS CONFIG
38+
commitment_cost: 0.90 # Final commitment cost (high value for strong regularization)
39+
use_commitment_scheduling: false # Enable scheduling
40+
commitment_schedule: linear # Smooth linear warmup
41+
commitment_warmup_steps: 1000 # Warmup over 1k steps (~5 epochs with batch_size=20)
42+
commitment_start: 0.05 # Start with very low commitment cost
43+
44+
45+
# Decoder Outputs
46+
output_fft: false
47+
output_rt: false
48+
output_foldx: false
49+
hetero_gae: false
50+
51+
# Optimization
52+
clip_grad: true
53+
gradient_accumulation_steps: 2
54+
mixed_precision: true
55+
tensor_core_precision: "high"
56+
57+
# Learning Rate Schedule
58+
lr_schedule: "plateau"
59+
lr_warmup_ratio: 0.1
60+
lr_min: 0.000001
61+
62+
# Loss Weights
63+
edgeweight: 0.1
64+
logitweight: 0.01
65+
xweight: 0.1
66+
# fft2weight = 0.01
67+
vqweight: 0.001
68+
angles_weight: 0.001
69+
ss_weight: 0.1
70+
71+
72+
# pLDDT Masking
73+
mask_plddt: true
74+
plddt_threshold: 0.3
75+
76+
# Early Stopping
77+
early_stopping: true
78+
early_stopping_metric: "val/loss"
79+
early_stopping_mode: "min"
80+
early_stopping_patience: 20
81+
early_stopping_min_delta: 0.0
82+
early_stopping_warmup_epochs: 0
83+
84+
# Logging
85+
tensorboard_dir: "./runs/"
86+
device: null

0 commit comments

Comments
 (0)