Skip to content

Commit d2c4a6c

Browse files
committed
sweep sweep
1 parent ef48223 commit d2c4a6c

1 file changed

Lines changed: 15 additions & 40 deletions

File tree

pufferlib/config/boss_fight.ini

Lines changed: 15 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
package = ocean
33
env_name = puffer_boss_fight
44
policy_name = Policy
5-
# rnn_name = Recurrent # Uncomment if adding LSTM/GRU
65

76
[vec]
87
num_envs = 1024
@@ -12,59 +11,40 @@ zero_copy = True
1211
seed = 42
1312

1413
[env]
15-
# Environment-specific params (passed to env constructor)
16-
# None needed - using defaults from README
1714

1815
[policy]
19-
# Policy constructor args (e.g., hidden_size)
20-
# hidden_size = 64 # Experiment: 32, 64, 128
2116

2217
[train]
23-
# Experiment tracking
2418
name = boss_fight
2519
project = boss_fight_experiments
2620
data_dir = experiments
2721
checkpoint_interval = 200
28-
29-
# Reproducibility
3022
seed = 42
31-
# TODO: disable for sweep or speed
3223
torch_deterministic = True
3324
device = cpu
34-
35-
# Optimization
36-
# TODO: try muon with 0.015 lr
3725
optimizer = adam
3826
precision = float32
3927
compile = False
40-
41-
# Core PPO hyperparameters
4228
total_timesteps = 5_000_000
43-
learning_rate = 0.0003
29+
learning_rate = 0.000864
4430
anneal_lr = True
45-
min_lr_ratio = 0.0
46-
gamma = 0.99
47-
gae_lambda = 0.95
31+
min_lr_ratio = 0.437
32+
gamma = 0.983
33+
gae_lambda = 0.902
4834
update_epochs = 4
49-
clip_coef = 0.2
50-
vf_coef = 0.5
51-
vf_clip_coef = 0.2
52-
max_grad_norm = 0.5
53-
ent_coef = 0.01
54-
55-
# Batch sizes
56-
minibatch_size = 2048
35+
clip_coef = 0.421
36+
vf_coef = 4.38
37+
vf_clip_coef = 0.303
38+
max_grad_norm = 2.28
39+
ent_coef = 0.00623
40+
minibatch_size = 2048
5741
max_minibatch_size = 32768
5842
bptt_horizon = 32
59-
60-
# Adam parameters (if optimizer = adam)
61-
adam_beta1 = 0.9
62-
adam_beta2 = 0.999
63-
adam_eps = 1e-8
64-
65-
# V-trace (for off-policy correction)
66-
# vtrace_rho_clip = 1.0
67-
# vtrace_c_clip = 1.0
43+
adam_beta1 = 0.991
44+
adam_beta2 = 0.998
45+
adam_eps = 1e-14
46+
vtrace_rho_clip = 2.72
47+
vtrace_c_clip = 2.13
6848

6949
[sweep]
7050
goal = maximize
@@ -74,31 +54,26 @@ metric_distribution = linear
7454
max_suggestion_cost = 3600
7555
use_gpu = True
7656

77-
# Learning rate sweep
7857
[sweep.train.learning_rate]
7958
distribution = log_normal
8059
min = 0.0001
8160
max = 0.003
8261

83-
# Entropy coefficient sweep (exploration vs exploitation)
8462
[sweep.train.ent_coef]
8563
distribution = log_normal
8664
min = 0.0001
8765
max = 0.05
8866

89-
# Discount factor sweep
9067
[sweep.train.gamma]
9168
distribution = logit_normal
9269
min = 0.95
9370
max = 0.999
9471

95-
# GAE lambda sweep
9672
[sweep.train.gae_lambda]
9773
distribution = logit_normal
9874
min = 0.9
9975
max = 0.99
10076

101-
# Minibatch size sweep
10277
[sweep.train.minibatch_size]
10378
distribution = uniform_pow2
10479
min = 1024

0 commit comments

Comments
 (0)