File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -3,25 +3,48 @@ env_name = cartpole
33
44[vec]
55total_agents = 4096
6-
7- [policy]
8- num_layers = 2
9- hidden_size = 64
6+ num_buffers = 4.78896
7+ num_threads = 16
108
119[env]
12- cart_mass = 1.0
10+ cart_mass = 1
1311pole_mass = 0.1
1412pole_length = 0.5
1513gravity = 9.8
16- force_mag = 10.0
14+ force_mag = 10
1715dt = 0.02
1816continuous = 0
1917
18+ [policy]
19+ hidden_size = 32
20+ num_layers = 2.11327
21+ expansion_factor = 1
22+
2023[train]
21- total_timesteps = 20_000_000
22- gamma = 0.95
23- learning_rate = 0.05
24- minibatch_size = 32768
24+ gpus = 1
25+ seed = 42
26+ total_timesteps = 5642560
27+ learning_rate = 0.1
28+ anneal_lr = 1
29+ min_lr_ratio = 0
30+ gamma = 0.8
31+ gae_lambda = 0.922151
32+ replay_ratio = 0.381289
33+ clip_coef = 0.143548
34+ vf_coef = 1.77975
35+ vf_clip_coef = 3.90833
36+ max_grad_norm = 0.329667
37+ ent_coef = 0.0367726
38+ beta1 = 0.942691
39+ beta2 = 0.907572
40+ eps = 4.6046e-09
41+ minibatch_size = 16384
42+ horizon = 32
43+ vtrace_rho_clip = 2.91145
44+ vtrace_c_clip = 1.66148
45+ prio_alpha = 0.786776
46+ prio_beta0 = 0.348617
47+ use_rnn = 1
2548
2649[sweep]
2750method = Protein
Original file line number Diff line number Diff line change @@ -3,34 +3,44 @@ env_name = connect4
33
44[vec]
55total_agents = 4096
6-
7- [policy]
8- num_layers = 2
9- hidden_size = 64
6+ num_buffers = 8
7+ num_threads = 2
108
119[env]
1210num_agents = 1
1311player_pieces = 0
1412env_pieces = 0
1513
14+ [policy]
15+ hidden_size = 256
16+ num_layers = 1
17+ expansion_factor = 1
18+
1619[train]
17- total_timesteps = 22_000_000
18- beta1 = 0.7332525176640032
19- beta2 = 0.9992588002434659
20- eps = 0.0001
21- clip_coef = 0.3344358533613167
22- ent_coef = 0.00004214003802569246
23- gae_lambda = 0.8969790930039623
24- gamma = 0.9945932652529774
25- learning_rate = 0.1
26- max_grad_norm = 1.0219144411399215
27- minibatch_size = 32768
28- prio_alpha = 0.9057091953725436
29- prio_beta0 = 0.6320607520016285
30- vf_clip_coef = 1.9948775471721416
31- vf_coef = 2.3734839181925462
32- vtrace_c_clip = 0.5659747235622431
33- vtrace_rho_clip = 1.4499061438546799
20+ gpus = 1
21+ seed = 42
22+ total_timesteps = 13272299
23+ learning_rate = 0.00847027
24+ anneal_lr = 1
25+ min_lr_ratio = 0
26+ gamma = 0.8
27+ gae_lambda = 0.962627
28+ replay_ratio = 3.16619
29+ clip_coef = 0.511829
30+ vf_coef = 5
31+ vf_clip_coef = 1.99178
32+ max_grad_norm = 0.552251
33+ ent_coef = 3.24222e-05
34+ beta1 = 0.878636
35+ beta2 = 0.986336
36+ eps = 3.02623e-07
37+ minibatch_size = 8192
38+ horizon = 32
39+ vtrace_rho_clip = 2.49786
40+ vtrace_c_clip = 1.52028
41+ prio_alpha = 1
42+ prio_beta0 = 0.746205
43+ use_rnn = 1
3444
3545[sweep.train.total_timesteps]
3646distribution = log_normal
Original file line number Diff line number Diff line change 22env_name = drone
33
44[vec]
5- total_agents = 1024
5+ total_agents = 2048
66num_buffers = 8
7- num_threads = 8
8-
9- [policy]
10- hidden_size = 64
11- num_layers = 2
7+ num_threads = 1
128
139[env]
1410task = 1
1511num_drones = 64
1612max_rings = 10
17- alpha_dist = 0.5602899637895572
18- alpha_hover = 0.18691658256215232
19- alpha_omega = 0.00010000000000000021
20- alpha_shaping = 10
21- hover_target_dist = 5.0
13+ alpha_dist = 0.782192
14+ alpha_hover = 0.071445
15+ alpha_omega = 0.00135588
16+ alpha_shaping = 3.9754
17+ hover_target_dist = 5
2218hover_dist = 0.1
2319hover_omega = 0.1
2420hover_vel = 0.1
2521
22+ [policy]
23+ hidden_size = 128
24+ num_layers = 3.80354
25+ expansion_factor = 1
26+
2627[train]
27- beta1 = 0.9441482023028404
28- beta2 = 0.9999652591777111
29- clip_coef = 0.01
30- ent_coef = 0.0020037723526687536
31- eps = 3.587424560914664e-09
32- gae_lambda = 0.830816124241041
33- gamma = 0.9754204650932019
34- horizon = 64
35- learning_rate = 0.007916609535671186
36- max_grad_norm = 0.1
28+ gpus = 1
29+ seed = 42
30+ total_timesteps = 40000000
31+ learning_rate = 0.00975033
32+ anneal_lr = 1
3733min_lr_ratio = 0
38- minibatch_size = 4096
39- prio_alpha = 0.6044847010385197
34+ gamma = 0.981094
35+ gae_lambda = 0.883828
36+ replay_ratio = 2.25498
37+ clip_coef = 0.067834
38+ vf_coef = 4.42222
39+ vf_clip_coef = 3.42278
40+ max_grad_norm = 0.692201
41+ ent_coef = 9.62597e-05
42+ beta1 = 0.914207
43+ beta2 = 0.99988
44+ eps = 1e-14
45+ minibatch_size = 8192
46+ horizon = 32
47+ vtrace_rho_clip = 3.32822
48+ vtrace_c_clip = 4.37219
49+ prio_alpha = 0.427293
4050prio_beta0 = 1
41- replay_ratio = 2.0931513062144527
42- total_timesteps = 5.9104024e+07
43- vf_clip_coef = 1.9085544982750444
44- vf_coef = 5
45- vtrace_c_clip = 3.061192803089336
46- vtrace_rho_clip = 1.3582992315224223
47-
4851
4952[sweep.train.total_timesteps]
5053distribution = log_normal
Original file line number Diff line number Diff line change 22env_name = enduro
33
44[vec]
5- total_agents = 4096
6-
7- [policy]
8- num_layers = 2
9- hidden_size = 64
5+ total_agents = 256
6+ num_buffers = 4.00702
7+ num_threads = 2
108
119[env]
1210width = 152
@@ -16,26 +14,37 @@ car_height = 11
1614max_enemies = 10
1715continuous = 0
1816
17+ [policy]
18+ hidden_size = 128
19+ num_layers = 2.68359
20+ expansion_factor = 1
21+
1922[train]
20- total_timesteps = 400_000_000
21- beta1 = 0.9602226117399812
22- beta2 = 0.999983918771099
23- eps = 2.109767652202695e-9
23+ gpus = 1
24+ seed = 42
25+ total_timesteps = 58957801
26+ learning_rate = 0.0136314
27+ anneal_lr = 1
28+ min_lr_ratio = 0
29+ gamma = 0.979826
30+ gae_lambda = 0.908362
31+ replay_ratio = 1.54521
32+ clip_coef = 0.915672
33+ vf_coef = 0.977532
34+ vf_clip_coef = 1.75503
35+ max_grad_norm = 1.08789
36+ ent_coef = 0.00246004
37+ beta1 = 0.840696
38+ beta2 = 0.999975
39+ eps = 1.78423e-12
40+ minibatch_size = 16384
2441horizon = 64
25- clip_coef = 0.5716251062832933
26- ent_coef = 0.009778379693175061
27- gae_lambda = 0.9924829173144767
28- gamma = 0.9433427558493771
29- learning_rate = 0.014263349414255656
30- max_grad_norm = 0.42249653686869115
42+ vtrace_rho_clip = 2.58586
43+ vtrace_c_clip = 5
44+ prio_alpha = 1
45+ prio_beta0 = 0.161561
3146max_minibatch_size = 32768
32- minibatch_size = 65536
33- prio_alpha = 0.22253503344197678
34- prio_beta0 = 0.7866639848626998
35- vf_clip_coef = 0.01
36- vf_coef = 3.2952964839081016
37- vtrace_c_clip = 3.060525785199293
38- vtrace_rho_clip = 5
47+ use_rnn = 1
3948
4049[sweep]
4150metric = days_completed
Original file line number Diff line number Diff line change 22env_name = freeway
33
44[vec]
5+ total_agents = 16384
6+ num_buffers = 6.8738
7+ num_threads = 2
58num_agents = 4096
69
7- [policy]
8- num_layers = 2
9- num_units = 64
10-
1110[env]
1211frameskip = 4
1312width = 1216
@@ -23,9 +22,43 @@ enable_human_player = 0
2322env_randomization = 1
2423use_dense_rewards = 1
2524
25+ [policy]
26+ hidden_size = 128
27+ num_layers = 7.44076
28+ expansion_factor = 1
29+ num_units = 64
30+
31+ [legacy]
32+ torch_deterministic = 1
33+ cpu_offload = 0
34+ compile = 0
35+ compile_fullgraph = 1
36+
2637[train]
27- total_timesteps = 500_000_000
38+ gpus = 1
39+ seed = 42
40+ total_timesteps = 403702026
41+ learning_rate = 0.00357256
42+ anneal_lr = 1
43+ min_lr_ratio = 0
44+ gamma = 0.988734
45+ gae_lambda = 0.759081
46+ replay_ratio = 2.08083
47+ clip_coef = 0.168047
48+ vf_coef = 3.51248
49+ vf_clip_coef = 0.179612
50+ max_grad_norm = 5
51+ ent_coef = 0.00023521
52+ beta1 = 0.973725
53+ beta2 = 0.99942
54+ eps = 4.24651e-14
2855minibatch_size = 32768
56+ horizon = 64
57+ vtrace_rho_clip = 2.3679
58+ vtrace_c_clip = 1.29213
59+ prio_alpha = 0.741968
60+ prio_beta0 = 0.654176
61+ use_rnn = 1
2962
3063[sweep.train.total_timesteps]
3164distribution = log_normal
You can’t perform that action at this time.
0 commit comments