Skip to content

Commit c00352e

Browse files
committed
Some tuned runs
1 parent 4126c9b commit c00352e

11 files changed

Lines changed: 190 additions & 115 deletions

File tree

config/cartpole.ini

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,48 @@ env_name = cartpole
33

44
[vec]
55
total_agents = 4096
6-
7-
[policy]
8-
num_layers = 2
9-
hidden_size = 64
6+
num_buffers = 4.78896
7+
num_threads = 16
108

119
[env]
12-
cart_mass = 1.0
10+
cart_mass = 1
1311
pole_mass = 0.1
1412
pole_length = 0.5
1513
gravity = 9.8
16-
force_mag = 10.0
14+
force_mag = 10
1715
dt = 0.02
1816
continuous = 0
1917

18+
[policy]
19+
hidden_size = 32
20+
num_layers = 2.11327
21+
expansion_factor = 1
22+
2023
[train]
21-
total_timesteps = 20_000_000
22-
gamma = 0.95
23-
learning_rate = 0.05
24-
minibatch_size = 32768
24+
gpus = 1
25+
seed = 42
26+
total_timesteps = 5642560
27+
learning_rate = 0.1
28+
anneal_lr = 1
29+
min_lr_ratio = 0
30+
gamma = 0.8
31+
gae_lambda = 0.922151
32+
replay_ratio = 0.381289
33+
clip_coef = 0.143548
34+
vf_coef = 1.77975
35+
vf_clip_coef = 3.90833
36+
max_grad_norm = 0.329667
37+
ent_coef = 0.0367726
38+
beta1 = 0.942691
39+
beta2 = 0.907572
40+
eps = 4.6046e-09
41+
minibatch_size = 16384
42+
horizon = 32
43+
vtrace_rho_clip = 2.91145
44+
vtrace_c_clip = 1.66148
45+
prio_alpha = 0.786776
46+
prio_beta0 = 0.348617
47+
use_rnn = 1
2548

2649
[sweep]
2750
method = Protein

config/connect4.ini

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,34 +3,44 @@ env_name = connect4
33

44
[vec]
55
total_agents = 4096
6-
7-
[policy]
8-
num_layers = 2
9-
hidden_size = 64
6+
num_buffers = 8
7+
num_threads = 2
108

119
[env]
1210
num_agents = 1
1311
player_pieces = 0
1412
env_pieces = 0
1513

14+
[policy]
15+
hidden_size = 256
16+
num_layers = 1
17+
expansion_factor = 1
18+
1619
[train]
17-
total_timesteps = 22_000_000
18-
beta1 = 0.7332525176640032
19-
beta2 = 0.9992588002434659
20-
eps = 0.0001
21-
clip_coef = 0.3344358533613167
22-
ent_coef = 0.00004214003802569246
23-
gae_lambda = 0.8969790930039623
24-
gamma = 0.9945932652529774
25-
learning_rate = 0.1
26-
max_grad_norm = 1.0219144411399215
27-
minibatch_size = 32768
28-
prio_alpha = 0.9057091953725436
29-
prio_beta0 = 0.6320607520016285
30-
vf_clip_coef = 1.9948775471721416
31-
vf_coef = 2.3734839181925462
32-
vtrace_c_clip = 0.5659747235622431
33-
vtrace_rho_clip = 1.4499061438546799
20+
gpus = 1
21+
seed = 42
22+
total_timesteps = 13272299
23+
learning_rate = 0.00847027
24+
anneal_lr = 1
25+
min_lr_ratio = 0
26+
gamma = 0.8
27+
gae_lambda = 0.962627
28+
replay_ratio = 3.16619
29+
clip_coef = 0.511829
30+
vf_coef = 5
31+
vf_clip_coef = 1.99178
32+
max_grad_norm = 0.552251
33+
ent_coef = 3.24222e-05
34+
beta1 = 0.878636
35+
beta2 = 0.986336
36+
eps = 3.02623e-07
37+
minibatch_size = 8192
38+
horizon = 32
39+
vtrace_rho_clip = 2.49786
40+
vtrace_c_clip = 1.52028
41+
prio_alpha = 1
42+
prio_beta0 = 0.746205
43+
use_rnn = 1
3444

3545
[sweep.train.total_timesteps]
3646
distribution = log_normal

config/drone.ini

Lines changed: 33 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,49 +2,52 @@
22
env_name = drone
33

44
[vec]
5-
total_agents = 1024
5+
total_agents = 2048
66
num_buffers = 8
7-
num_threads = 8
8-
9-
[policy]
10-
hidden_size = 64
11-
num_layers = 2
7+
num_threads = 1
128

139
[env]
1410
task = 1
1511
num_drones = 64
1612
max_rings = 10
17-
alpha_dist = 0.5602899637895572
18-
alpha_hover = 0.18691658256215232
19-
alpha_omega = 0.00010000000000000021
20-
alpha_shaping = 10
21-
hover_target_dist = 5.0
13+
alpha_dist = 0.782192
14+
alpha_hover = 0.071445
15+
alpha_omega = 0.00135588
16+
alpha_shaping = 3.9754
17+
hover_target_dist = 5
2218
hover_dist = 0.1
2319
hover_omega = 0.1
2420
hover_vel = 0.1
2521

22+
[policy]
23+
hidden_size = 128
24+
num_layers = 3.80354
25+
expansion_factor = 1
26+
2627
[train]
27-
beta1 = 0.9441482023028404
28-
beta2 = 0.9999652591777111
29-
clip_coef = 0.01
30-
ent_coef = 0.0020037723526687536
31-
eps = 3.587424560914664e-09
32-
gae_lambda = 0.830816124241041
33-
gamma = 0.9754204650932019
34-
horizon = 64
35-
learning_rate = 0.007916609535671186
36-
max_grad_norm = 0.1
28+
gpus = 1
29+
seed = 42
30+
total_timesteps = 40000000
31+
learning_rate = 0.00975033
32+
anneal_lr = 1
3733
min_lr_ratio = 0
38-
minibatch_size = 4096
39-
prio_alpha = 0.6044847010385197
34+
gamma = 0.981094
35+
gae_lambda = 0.883828
36+
replay_ratio = 2.25498
37+
clip_coef = 0.067834
38+
vf_coef = 4.42222
39+
vf_clip_coef = 3.42278
40+
max_grad_norm = 0.692201
41+
ent_coef = 9.62597e-05
42+
beta1 = 0.914207
43+
beta2 = 0.99988
44+
eps = 1e-14
45+
minibatch_size = 8192
46+
horizon = 32
47+
vtrace_rho_clip = 3.32822
48+
vtrace_c_clip = 4.37219
49+
prio_alpha = 0.427293
4050
prio_beta0 = 1
41-
replay_ratio = 2.0931513062144527
42-
total_timesteps = 5.9104024e+07
43-
vf_clip_coef = 1.9085544982750444
44-
vf_coef = 5
45-
vtrace_c_clip = 3.061192803089336
46-
vtrace_rho_clip = 1.3582992315224223
47-
4851

4952
[sweep.train.total_timesteps]
5053
distribution = log_normal

config/enduro.ini

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,9 @@
22
env_name = enduro
33

44
[vec]
5-
total_agents = 4096
6-
7-
[policy]
8-
num_layers = 2
9-
hidden_size = 64
5+
total_agents = 256
6+
num_buffers = 4.00702
7+
num_threads = 2
108

119
[env]
1210
width = 152
@@ -16,26 +14,37 @@ car_height = 11
1614
max_enemies = 10
1715
continuous = 0
1816

17+
[policy]
18+
hidden_size = 128
19+
num_layers = 2.68359
20+
expansion_factor = 1
21+
1922
[train]
20-
total_timesteps = 400_000_000
21-
beta1 = 0.9602226117399812
22-
beta2 = 0.999983918771099
23-
eps = 2.109767652202695e-9
23+
gpus = 1
24+
seed = 42
25+
total_timesteps = 58957801
26+
learning_rate = 0.0136314
27+
anneal_lr = 1
28+
min_lr_ratio = 0
29+
gamma = 0.979826
30+
gae_lambda = 0.908362
31+
replay_ratio = 1.54521
32+
clip_coef = 0.915672
33+
vf_coef = 0.977532
34+
vf_clip_coef = 1.75503
35+
max_grad_norm = 1.08789
36+
ent_coef = 0.00246004
37+
beta1 = 0.840696
38+
beta2 = 0.999975
39+
eps = 1.78423e-12
40+
minibatch_size = 16384
2441
horizon = 64
25-
clip_coef = 0.5716251062832933
26-
ent_coef = 0.009778379693175061
27-
gae_lambda = 0.9924829173144767
28-
gamma = 0.9433427558493771
29-
learning_rate = 0.014263349414255656
30-
max_grad_norm = 0.42249653686869115
42+
vtrace_rho_clip = 2.58586
43+
vtrace_c_clip = 5
44+
prio_alpha = 1
45+
prio_beta0 = 0.161561
3146
max_minibatch_size = 32768
32-
minibatch_size = 65536
33-
prio_alpha = 0.22253503344197678
34-
prio_beta0 = 0.7866639848626998
35-
vf_clip_coef = 0.01
36-
vf_coef = 3.2952964839081016
37-
vtrace_c_clip = 3.060525785199293
38-
vtrace_rho_clip = 5
47+
use_rnn = 1
3948

4049
[sweep]
4150
metric = days_completed

config/freeway.ini

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,11 @@
22
env_name = freeway
33

44
[vec]
5+
total_agents = 16384
6+
num_buffers = 6.8738
7+
num_threads = 2
58
num_agents = 4096
69

7-
[policy]
8-
num_layers = 2
9-
num_units = 64
10-
1110
[env]
1211
frameskip = 4
1312
width = 1216
@@ -23,9 +22,43 @@ enable_human_player = 0
2322
env_randomization = 1
2423
use_dense_rewards = 1
2524

25+
[policy]
26+
hidden_size = 128
27+
num_layers = 7.44076
28+
expansion_factor = 1
29+
num_units = 64
30+
31+
[legacy]
32+
torch_deterministic = 1
33+
cpu_offload = 0
34+
compile = 0
35+
compile_fullgraph = 1
36+
2637
[train]
27-
total_timesteps = 500_000_000
38+
gpus = 1
39+
seed = 42
40+
total_timesteps = 403702026
41+
learning_rate = 0.00357256
42+
anneal_lr = 1
43+
min_lr_ratio = 0
44+
gamma = 0.988734
45+
gae_lambda = 0.759081
46+
replay_ratio = 2.08083
47+
clip_coef = 0.168047
48+
vf_coef = 3.51248
49+
vf_clip_coef = 0.179612
50+
max_grad_norm = 5
51+
ent_coef = 0.00023521
52+
beta1 = 0.973725
53+
beta2 = 0.99942
54+
eps = 4.24651e-14
2855
minibatch_size = 32768
56+
horizon = 64
57+
vtrace_rho_clip = 2.3679
58+
vtrace_c_clip = 1.29213
59+
prio_alpha = 0.741968
60+
prio_beta0 = 0.654176
61+
use_rnn = 1
2962

3063
[sweep.train.total_timesteps]
3164
distribution = log_normal

0 commit comments

Comments
 (0)