@@ -7,10 +7,10 @@ rnn_name = Recurrent
77[vec]
88total_agents = 16384
99num_buffers = 8
10- num_threads = 4
10+ num_threads = 2
1111
1212[policy]
13- num_layers = 3
13+ num_layers = 2
1414hidden_size = 256
1515
1616[env]
@@ -26,29 +26,29 @@ max_steps = 300
2626
2727[train]
2828anneal_lr = 1
29- beta1 = 0.9552949759646844
30- beta2 = 0.9953139311721624
31- clip_coef = 0.01
32- ent_coef = 0.00013933689574860213
33- eps = 0.00000000000153820701
34- gae_lambda = 0.19999999999999996
35- gamma = 0.986184084546895
29+ beta1 = 0.9524080120922038
30+ beta2 = 0.9938151282804136
31+ clip_coef = 0.4770373231152628
32+ ent_coef = 0.00001
33+ eps = 0.00000000000001
34+ gae_lambda = 0.8032589838049962
35+ gamma = 0.965698161834
3636gpus = 1
37- horizon = 32
38- learning_rate = 0.003776364420407994
39- max_grad_norm = 1.8486959069159363
37+ horizon = 64
38+ learning_rate = 0.002557505571664044
39+ max_grad_norm = 0.1
4040min_lr_ratio = 0.37872027027338984
41- minibatch_size = 4096
42- prio_alpha = 0.8646505783469447
43- prio_beta0 = 0.8287716342217886
44- replay_ratio = 3.5359047237524988
41+ minibatch_size = 8192
42+ prio_alpha = 0.9556021765385548
43+ prio_beta0 = 1
44+ replay_ratio = 4
4545seed = 42
46- total_timesteps = 41864993
46+ total_timesteps = 53251895
4747use_rnn = true
48- vf_clip_coef = 5
49- vf_coef = 4.444818925590201
50- vtrace_c_clip = 4.2719952740551355
51- vtrace_rho_clip = 3.84374084727298
48+ vf_clip_coef = 4.856116827195164
49+ vf_coef = 5
50+ vtrace_c_clip = 2.5648427343944147
51+ vtrace_rho_clip = 3.258932255953648
5252
5353[sweep]
5454metric = perf
0 commit comments