Skip to content

Commit 82346a3

Browse files
committed
odd config
1 parent aed88e5 commit 82346a3

1 file changed

Lines changed: 31 additions & 34 deletions

File tree

config/boxoban.ini

Lines changed: 31 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,45 +3,42 @@ env_name = boxoban
33

44
[vec]
55
total_agents = 16384
6-
num_buffers = 8
7-
num_threads = 8
8-
9-
[policy]
10-
num_layers = 1
11-
hidden_size = 256
6+
num_buffers = 1.83193
7+
num_threads = 1
128

139
[env]
1410
num_agents = 1
15-
#0 basic, 1 easy, 2 medium, 3 hard, 4 unfiltered
16-
difficulty = 1
17-
#reward per intermediate target (once per episode)
11+
difficulty = 2
1812
int_r_coeff = 0.25
19-
#moving box off target
20-
target_loss_pen_coeff = 0.0
13+
target_loss_pen_coeff = 0
2114
max_steps = 300
22-
15+
16+
[policy]
17+
hidden_size = 1024
18+
num_layers = 7.61945
19+
expansion_factor = 1
20+
2321
[train]
24-
anneal_lr = 1
25-
beta1 = 0.9774372816193448
26-
beta2 = 0.9659403664380584
27-
clip_coef = 0.6046560670053024
28-
ent_coef = 0.00002079831529141607
29-
eps = 0.00000000000001
30-
gae_lambda = 0.9258914518467392
31-
gamma = 0.9772998708784648
3222
gpus = 1
33-
horizon = 64
34-
learning_rate = 0.004480255741933225
35-
max_grad_norm = 1.221684008665154
36-
min_lr_ratio = 0.37872027027338984
37-
minibatch_size = 8192
38-
prio_alpha = 1
39-
prio_beta0 = 0.8789921736378042
40-
replay_ratio = 3.210300031048168
4123
seed = 42
42-
total_timesteps = 55504884
43-
use_rnn = true
44-
vf_clip_coef = 4.339748010438874
45-
vf_coef = 4.240274862679744
46-
vtrace_c_clip = 1.3625779006162615
47-
vtrace_rho_clip = 3.17260199042977
24+
total_timesteps = 1310719
25+
learning_rate = 0.0453785
26+
anneal_lr = 1
27+
min_lr_ratio = 0.37872
28+
gamma = 0.991084
29+
gae_lambda = 0.2
30+
replay_ratio = 4
31+
clip_coef = 0.01
32+
vf_coef = 3.98419
33+
vf_clip_coef = 5
34+
max_grad_norm = 0.1
35+
ent_coef = 0.000305154
36+
beta1 = 0.986866
37+
beta2 = 0.9
38+
eps = 1.03398e-14
39+
minibatch_size = 16384
40+
horizon = 32
41+
vtrace_rho_clip = 2.06753
42+
vtrace_c_clip = 2.50303
43+
prio_alpha = 0.907571
44+
prio_beta0 = 0.989874

0 commit comments

Comments
 (0)