DessimozLab
diff --git a/‎benchmark_configs/config_10_embeddings.yaml‎
Lines changed: 6 additions & 4 deletions b/‎benchmark_configs/config_10_embeddings.yaml‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎benchmark_configs/config_15_embeddings.yaml‎
Lines changed: 4 additions & 3 deletions b/‎benchmark_configs/config_15_embeddings.yaml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎benchmark_configs/config_20_embeddings.yaml‎
Lines changed: 4 additions & 3 deletions b/‎benchmark_configs/config_20_embeddings.yaml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎benchmark_configs/config_25_embeddings.yaml‎
Lines changed: 4 additions & 4 deletions b/‎benchmark_configs/config_25_embeddings.yaml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎benchmark_configs/config_30_embeddings.yaml‎
Lines changed: 4 additions & 5 deletions b/‎benchmark_configs/config_30_embeddings.yaml‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎benchmark_configs/config_35_embeddings.yaml‎
Lines changed: 6 additions & 6 deletions b/‎benchmark_configs/config_35_embeddings.yaml‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎benchmark_configs/config_40_embeddings.yaml‎
Lines changed: 6 additions & 5 deletions b/‎benchmark_configs/config_40_embeddings.yaml‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎config_notebook_1k_epochs.yaml‎
Lines changed: 15 additions & 9 deletions b/‎config_notebook_1k_epochs.yaml‎
Lines changed: 15 additions & 9 deletions
@@ -17,7 +17,9 @@ gradient_accumulation_steps: 2
 
 use_muon: true
 muon_lr: 0.02      # Muon learning rate for hidden weights
-adamw_lr: 0.0001   # AdamW learning rate (1e-4 from notebook)
+adamw_lr: 0.0001   # AdamW learning rate (1e-5 from notebook)
+learning_rate: 0.0001
+
 
 seed: 42
 # Model Dimensions - 10 EMBEDDINGS
@@ -27,16 +29,16 @@ embedding_dim: 128
 
 
 hetero_gae: false  # Use MultiMonoDecoder
-overwrite : true
+overwrite : false
 
 # Encoder Configuration
 EMA: true
 
 # Commitment cost scheduling - THE FOCUS OF THIS CONFIG
 commitment_cost: 0.90  # Final commitment cost (high value for strong regularization)
-use_commitment_scheduling: false  # Enable scheduling
+use_commitment_scheduling: true  # Enable scheduling
 commitment_schedule: linear  # Smooth linear warmup
-commitment_warmup_steps: 10000  # Warmup over 1k steps (~5 epochs with batch_size=20)
+commitment_warmup_steps: 1000  # Warmup over 1k steps (~5 epochs with batch_size=20)
 commitment_start: 0.05  # Start with very low commitment cost
 
 
 
@@ -6,7 +6,7 @@ output_dir: "./models/embedding_comparison/final/"
 run_name: "15_embeddings_all_decoders_final"
 
 
-overwrite : true
+overwrite : false
 
 # Dataset
 dataset: "structs_train_final.h5"
@@ -20,7 +20,8 @@ gradient_accumulation_steps: 2
 
 use_muon: true
 muon_lr: 0.02      # Muon learning rate for hidden weights
-adamw_lr: 0.0001   # AdamW learning rate (1e-4 from notebook)
+adamw_lr: 0.0001   # AdamW learning rate (1e-5 from notebook)
+learning_rate: 0.0001
 
 seed: 42
 
@@ -37,7 +38,7 @@ EMA: true
 
 # Commitment cost scheduling - THE FOCUS OF THIS CONFIG
 commitment_cost: 0.90  # Final commitment cost (high value for strong regularization)
-use_commitment_scheduling: false  # Enable scheduling
+use_commitment_scheduling: true  # Enable scheduling
 commitment_schedule: linear  # Smooth linear warmup
 commitment_warmup_steps: 1000  # Warmup over 1k steps (~5 epochs with batch_size=20)
 commitment_start: 0.05  # Start with very low commitment cost
 
@@ -17,7 +17,8 @@ gradient_accumulation_steps: 2
 
 use_muon: true
 muon_lr: 0.02      # Muon learning rate for hidden weights
-adamw_lr: 0.0001   # AdamW learning rate (1e-4 from notebook)
+adamw_lr: 0.0001   # AdamW learning rate (1e-5 from notebook)
+learning_rate: 0.0001
 
 seed: 42
 
@@ -28,14 +29,14 @@ embedding_dim: 128
 
 
 hetero_gae: false  # Use MultiMonoDecoder
-overwrite : true
+overwrite : false
 
 # Encoder Configuration
 EMA: true
 
 # Commitment cost scheduling - THE FOCUS OF THIS CONFIG
 commitment_cost: 0.90  # Final commitment cost (high value for strong regularization)
-use_commitment_scheduling: false  # Enable scheduling
+use_commitment_scheduling: true  # Enable scheduling
 commitment_schedule: linear  # Smooth linear warmup
 commitment_warmup_steps: 1000  # Warmup over 1k steps (~5 epochs with batch_size=20)
 commitment_start: 0.05  # Start with very low commitment cost
 
@@ -17,8 +17,8 @@ gradient_accumulation_steps: 2
 
 use_muon: true
 muon_lr: 0.02      # Muon learning rate for hidden weights
-adamw_lr: 0.0001   # AdamW learning rate (1e-4 from notebook)
-
+adamw_lr: 0.0001   # AdamW learning rate (1e-5 from notebook)
+learning_rate: 0.0001
 
 seed: 42
 
@@ -29,14 +29,14 @@ embedding_dim: 128
 
 
 hetero_gae: false  # Use MultiMonoDecoder
-overwrite : true
+overwrite : false
 
 # Encoder Configuration
 EMA: true
 
 # Commitment cost scheduling - THE FOCUS OF THIS CONFIG
 commitment_cost: 0.90  # Final commitment cost (high value for strong regularization)
-use_commitment_scheduling: false  # Enable scheduling
+use_commitment_scheduling: true  # Enable scheduling
 commitment_schedule: linear  # Smooth linear warmup
 commitment_warmup_steps: 1000  # Warmup over 1k steps (~5 epochs with batch_size=20)
 commitment_start: 0.05  # Start with very low commitment cost
 
@@ -1,9 +1,9 @@
 # FoldTree2 Configuration: 30 Embeddings
 # All three decoders: Sequence + Geometry + FoldX
 
-model_name: "ft2_30_embeddings_big"
+model_name: "ft2_30_embeddings"
 output_dir: "./models/embedding_comparison/final/"
-run_name: "30_embeddings_all_decoders_big_final"
+run_name: "30_embeddings_all_decoders_final"
 
 overwrite : false
 
@@ -19,8 +19,8 @@ gradient_accumulation_steps: 2
 
 use_muon: true
 muon_lr: 0.02      # Muon learning rate for hidden weights
-adamw_lr: 0.0001   # AdamW learning rate (1e-4 from notebook)
-learning_rate: 0.00001
+adamw_lr: 0.0001   # AdamW learning rate (1e-5 from notebook)
+learning_rate: 0.0001
 
 seed: 42
 
@@ -31,7 +31,6 @@ embedding_dim: 128
 
 
 hetero_gae: false  # Use MultiMonoDecoder
-overwrite : true
 
 # Encoder Configuration
 EMA: true
 
@@ -1,9 +1,9 @@
 # FoldTree2 Configuration: 35 Embeddings
 # All three decoders: Sequence + Geometry + FoldX
 
-model_name: "ft2_35_embeddings_big"
+model_name: "ft2_35_embeddings"
 output_dir: "./models/embedding_comparison/final/"
-run_name: "35_embeddings_all_decoders_big_final"
+run_name: "35_embeddings_all_decoders_final"
 
 overwrite : false
 
@@ -19,7 +19,8 @@ gradient_accumulation_steps: 2
 
 use_muon: true
 muon_lr: 0.02      # Muon learning rate for hidden weights
-adamw_lr: 0.0001   # AdamW learning rate (1e-4 from notebook)
+adamw_lr: 0.0001   # AdamW learning rate (1e-5 from notebook)
+learning_rate: 0.0001
 
 seed: 42
 
@@ -30,16 +31,15 @@ embedding_dim: 128
 
 
 hetero_gae: false  # Use MultiMonoDecoder
-overwrite : true
 
 # Encoder Configuration
 EMA: true
 
 # Commitment cost scheduling - THE FOCUS OF THIS CONFIG
 commitment_cost: 0.90  # Final commitment cost (high value for strong regularization)
-use_commitment_scheduling: false  # Enable scheduling
+use_commitment_scheduling: true  # Enable scheduling
 commitment_schedule: linear  # Smooth linear warmup
-commitment_warmup_steps: 10000  # Warmup over 1k steps (~5 epochs with batch_size=20)
+commitment_warmup_steps: 1000  # Warmup over 1k steps (~5 epochs with batch_size=20)
 commitment_start: 0.05  # Start with very low commitment cost
 
 
 
@@ -17,7 +17,9 @@ gradient_accumulation_steps: 2
 
 use_muon: true
 muon_lr: 0.02      # Muon learning rate for hidden weights
-adamw_lr: 0.0001   # AdamW learning rate (1e-4 from notebook)
+adamw_lr: 0.0001   # AdamW learning rate (1e-5 from notebook)
+learning_rate: 0.0001
+
 
 seed: 42
 
@@ -26,18 +28,17 @@ hidden_size: 150
 num_embeddings: 40
 embedding_dim: 128
 
-
 hetero_gae: false  # Use MultiMonoDecoder
-overwrite : true
+overwrite : false
 
 # Encoder Configuration
 EMA: true
 
 # Commitment cost scheduling - THE FOCUS OF THIS CONFIG
 commitment_cost: 0.90  # Final commitment cost (high value for strong regularization)
-use_commitment_scheduling: false  # Enable scheduling
+use_commitment_scheduling: true  # Enable scheduling
 commitment_schedule: linear  # Smooth linear warmup
-commitment_warmup_steps: 10000  # Warmup over 1k steps (~5 epochs with batch_size=20)
+commitment_warmup_steps: 1000  # Warmup over 1k steps (~5 epochs with batch_size=20)
 commitment_start: 0.05  # Start with very low commitment cost
 
 
 
@@ -7,21 +7,22 @@ dataset: "structs_train_final.h5"
 output_dir: "./models/notebook_1k_training_nopositions_small/"
 run_name: "notebook_replication_1k_epochs_small"
 
-overwrite: true
+overwrite: false
 
 # Training hyperparameters (from notebook)
 epochs: 1000
-batch_size: 16
-gradient_accumulation_steps: 1
+batch_size: 8
+gradient_accumulation_steps: 2
 seed: 0
 
 # Model architecture (from notebook)
-hidden_size: 150
+hidden_size: 200
 num_embeddings: 30
 embedding_dim: 128
 
 # Encoder configuration (mk1_Encoder from notebook)
 EMA: true
+use_uncertainty_weighting: true
 
 # Decoder configuration (MultiMonoDecoder from notebook)
 hetero_gae: false  # Use MultiMonoDecoder
@@ -38,6 +39,9 @@ mixed_precision: true
 mask_plddt: true
 plddt_threshold: 0.3
 
+jump_aa_loss: 25  # Ramp up amino acid loss weight after 25 epochs for stable training (from notebook)
+jump_ss_loss: 45  # Ramp up secondary structure loss weight after 45 epochs for stable training (from notebook)
+
 # Learning rate scheduling (from notebook)
 lr_schedule: "plateau"  # Notebook uses plateau scheduler
 lr_warmup_steps: 20
@@ -48,13 +52,15 @@ clip_grad: true
 
 # Loss weights (from notebook cell)
 # Note: These are initial weights - notebook has weight schedulers (currently commented out)
-edgeweight: 0.1
-logitweight: 0.01
-xweight: 0.5
+edgeweight: .5
+logitweight: 0.1
+xweight: 0.1
 # fft2weight = 0.01
 vqweight: 0.1
-angles_weight: 0.1
-ss_weight: 0.1
+angles_weight: 0.01
+ss_weight: 0.01
+
+
 
 # Commitment cost scheduling (from notebook encoder config)
 use_commitment_scheduling: true