Skip to content

Commit a059fa6

Browse files
committed
env updates for 4.0
1 parent 46c9d20 commit a059fa6

32 files changed

Lines changed: 319 additions & 293 deletions

config/cartpole.ini

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
[base]
22
package = ocean
33
env_name = cartpole
4-
policy_name = Policy
4+
policy_name = MinGRU
55
rnn_name = Recurrent
66

77
[vec]
88
total_agents = 4096
99

10+
[policy]
11+
num_layers = 2
12+
hidden_size = 64
13+
1014
[env]
1115
cart_mass = 1.0
1216
pole_mass = 0.1
@@ -24,11 +28,10 @@ minibatch_size = 32768
2428

2529
[sweep]
2630
method = Protein
27-
metric = episode_length
31+
metric = perf
2832

2933
[sweep.train.total_timesteps]
3034
distribution = log_normal
31-
min = 1e6
32-
max = 1e7
33-
mean = 5e6
34-
scale = 0.5
35+
min = 5e6
36+
max = 2e7
37+
mean = 1e7

config/connect4.ini

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,21 @@
11
[base]
22
package = ocean
33
env_name = connect4
4-
policy_name = Policy
4+
policy_name = MinGRU
55
rnn_name = Recurrent
66

7+
[vec]
8+
total_agents = 4096
9+
10+
[policy]
11+
num_layers = 2
12+
hidden_size = 64
13+
14+
[env]
15+
num_agents = 1
16+
player_pieces = 0
17+
env_pieces = 0
18+
719
[train]
820
total_timesteps = 22_000_000
921
beta1 = 0.7332525176640032

config/enduro.ini

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
11
[base]
22
package = ocean
33
env_name = enduro
4-
policy_name = Policy
4+
policy_name = MinGRU
55
rnn_name = Recurrent
66

7+
[vec]
8+
total_agents = 4096
9+
10+
[policy]
11+
num_layers = 2
12+
hidden_size = 64
13+
714
[env]
815
width = 152
916
height = 210
@@ -12,9 +19,6 @@ car_height = 11
1219
max_enemies = 10
1320
continuous = 0
1421

15-
[vec]
16-
total_agents = 1024
17-
1822
[train]
1923
total_timesteps = 400_000_000
2024
beta1 = 0.9602226117399812
@@ -41,7 +45,7 @@ metric = days_completed
4145

4246
[sweep.train.total_timesteps]
4347
distribution = log_normal
44-
min = 5e7
45-
max = 4e8
46-
mean = 2e8
48+
min = 2e8
49+
max = 6e8
50+
mean = 4e8
4751
scale = auto

config/freeway.ini

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
[base]
22
package = ocean
33
env_name = freeway
4-
policy_name = Policy
4+
policy_name = MinGRU
55
rnn_name = Recurrent
66

77
[vec]
8-
num_envs = 8
8+
num_agents = 4096
9+
10+
[policy]
11+
num_layers = 2
12+
num_units = 64
913

1014
[env]
11-
num_envs = 1024
1215
frameskip = 4
1316
width = 1216
1417
height = 720
@@ -30,6 +33,6 @@ minibatch_size = 32768
3033
[sweep.train.total_timesteps]
3134
distribution = log_normal
3235
min = 3e8
33-
max = 4e8
34-
mean = 3e8
36+
max = 6e8
37+
mean = 4e8
3538
scale = auto

config/grid.ini renamed to config/maze.ini

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,20 @@
11
[base]
22
package = ocean
3-
env_name = grid
4-
policy_name = Policy
3+
env_name = maze
4+
policy_name = MinGRU
55
rnn_name = Recurrent
66

77
[vec]
8-
total_agents = 512
9-
num_buffers = 2
10-
num_threads = 2
11-
seed = 73
8+
total_agents = 4096
129

1310
[env]
1411
max_size = 47
1512
num_maps = 8192
1613
map_size = -1
1714

1815
[policy]
19-
hidden_size = 1024
20-
num_layers = 4.621958
16+
hidden_size = 64
17+
num_layers = 2
2118
expansion_factor = 1
2219

2320
[train]
@@ -38,7 +35,7 @@ ent_coef = 0.000063
3835
beta1 = 0.989472
3936
beta2 = 0.994822
4037
eps = 0.000001
41-
minibatch_size = 8192
38+
minibatch_size = 32768
4239
horizon = 64
4340
vtrace_rho_clip = 5
4441
vtrace_c_clip = 2.007307
@@ -47,9 +44,9 @@ prio_beta0 = 0.976698
4744
use_rnn = 0
4845
env = 0
4946

50-
[environment]
51-
score = 0.931234
52-
perf = 0.931234
47+
#[environment]
48+
#score = 0.931234
49+
#perf = 0.931234
5350

5451

5552
[sweep]
@@ -62,9 +59,3 @@ max = 1e9
6259
mean = 3e8
6360
scale = time
6461

65-
[sweep.policy.hidden_size]
66-
distribution = uniform_pow2
67-
min = 16
68-
max = 1024
69-
mean = 128
70-
scale = auto

config/pacman.ini

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -38,25 +38,11 @@ vtrace_rho_clip = 1.5301756939690652
3838

3939
[sweep]
4040
downsample = 10
41-
max_cost = 300
4241

4342
[sweep.train.total_timesteps]
4443
distribution = log_normal
45-
min = 2e7
44+
min = 5e7
4645
max = 5e8
4746
mean = 1e8
4847
scale = auto
4948

50-
[sweep.policy.hidden_size]
51-
distribution = uniform_pow2
52-
min = 16
53-
max = 1024
54-
mean = 128
55-
scale = auto
56-
57-
[sweep.env.num_envs]
58-
distribution = uniform_pow2
59-
min = 1
60-
max = 4096
61-
mean = 2048
62-
scale = auto

config/slimevolley.ini

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
[base]
2+
package = ocean
3+
env_name = slimevolley
4+
policy_name = MinGRU
5+
rnn_name = Recurrent
6+
7+
[vec]
8+
total_agents = 4096
9+
10+
[policy]
11+
num_layers = 2
12+
hidden_size = 64
13+
14+
[env]
15+
; 1 for single-agent (vs bot), 2 for two-agent (self-play)
16+
num_agents=1
17+
gamma = 0.99
18+
[train]
19+
total_timesteps = 500_000_000
20+
21+
[sweep]
22+
downsample = 5
23+
24+
[sweep.train.total_timesteps]
25+
distribution = log_normal
26+
min = 1e8
27+
max = 2e9
28+
mean = 3e8
29+
scale = time
30+

config/tetris.ini

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ vtrace_rho_clip = 0.70
4242
[sweep]
4343
metric = score
4444
goal = maximize
45-
max_cost = 3600
4645

4746
[sweep.train.total_timesteps]
4847
distribution = log_normal
@@ -51,30 +50,30 @@ max = 3_000_000_000
5150
mean = 200_000_000
5251
scale = auto
5352

54-
[sweep.train.gae_lambda]
55-
distribution = logit_normal
56-
min = 0.01
57-
mean = 0.6
58-
max = 0.995
59-
scale = auto
53+
#[sweep.train.gae_lambda]
54+
#distribution = logit_normal
55+
#min = 0.01
56+
#mean = 0.6
57+
#max = 0.995
58+
#scale = auto
6059

61-
[sweep.train.clip_coef]
62-
distribution = uniform
63-
min = 0.01
64-
max = 1.0
65-
mean = 0.1
66-
scale = auto
60+
#[sweep.train.clip_coef]
61+
#distribution = uniform
62+
#min = 0.01
63+
#max = 1.0
64+
#mean = 0.1
65+
#scale = auto
6766

68-
[sweep.train.adam_beta1]
69-
distribution = logit_normal
70-
min = 0.5
71-
mean = 0.95
72-
max = 0.999
73-
scale = auto
67+
#[sweep.train.adam_beta1]
68+
#distribution = logit_normal
69+
#min = 0.5
70+
#mean = 0.95
71+
#max = 0.999
72+
#scale = auto
7473

75-
[sweep.env.num_envs]
76-
distribution = uniform_pow2
77-
min = 1
78-
max = 4096
79-
mean = 2048
80-
scale = auto
74+
#[sweep.env.num_envs]
75+
#distribution = uniform_pow2
76+
#min = 1
77+
#max = 4096
78+
#mean = 2048
79+
#scale = auto

config/trash_pickup.ini

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
11
[base]
22
package = ocean
33
env_name = trash_pickup
4-
policy_name = TrashPickup
4+
policy_name = MinGRU
55
rnn_name = Recurrent
66

7+
[vec]
8+
total_agents = 4096
9+
10+
[policy]
11+
num_layers = 2
12+
hidden_size = 64
13+
714
[env]
815
grid_size = 20
916
num_agents = 8

config/tripletriad.ini

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
11
[base]
22
package = ocean
33
env_name = tripletriad
4-
policy_name = Policy
4+
policy_name = MinGRU
55
rnn_name = Recurrent
66

7+
[vec]
8+
num_agents = 4096
9+
10+
[policy]
11+
num_layers = 2
12+
hidden_size = 64
13+
714
[env]
815
width = 990
916
height = 690
@@ -14,6 +21,10 @@ card_height = 224
1421
total_timesteps = 20_000_000
1522
gamma = 0.95
1623

24+
[sweep]
25+
method = Protein
26+
metric = perf
27+
1728
[sweep.train.total_timesteps]
1829
distribution = log_normal
1930
min = 1e7

0 commit comments

Comments
 (0)