-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig_2060.yaml
More file actions
27 lines (27 loc) · 1.08 KB
/
config_2060.yaml
File metadata and controls
27 lines (27 loc) · 1.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# lightning.pytorch==2.5.5
#
# Reduced config for RTX 2060 (6GB VRAM):
# > uv run python -m tiny_recursive_model.main fit --config config_base.yaml --config config_2060.yaml
#
# Fast fail mode for testing:
# > --trainer.fast_dev_run=true
#
# Memory usage on RTX 2060 with batch_size=192, accumulate_grad_batches=4:
# - n=2, activation_checkpointing=false: ~4.9 GB, ~1.0 it/s
# - n=3-6, activation_checkpointing=true: ~5.1 GB, ~0.75-0.63 it/s (OOMs without checkpointing)
#
compile: false # No benefit with this setup
trainer:
accumulate_grad_batches: 4 # Process 4×192=768 samples before optimizer step
logger: null # use default logger (TensorBoard)
data:
batch_size: 192 # Smaller batches for 6GB VRAM
model:
n_layers: 2
T: 2 # Paper uses T=3, which doesn't increase memory but slows training
n: 2 # Paper uses n=6, see above re memory usage
N_supervision: 1 # Paper uses 16, which doesn't increase memory but slows training
activation_checkpointing: false # Set to true for n=3-6 (saves memory but slows training)
lr_scheduler: # dummy, no warmup
init_args:
start_factor: 1.0