Skip to content

Commit 7039a9e

Browse files
committed
conflicts
2 parents 579f978 + a059fa6 commit 7039a9e

36 files changed

Lines changed: 429 additions & 289 deletions

config/cartpole.ini

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ env_name = cartpole
44
[vec]
55
total_agents = 4096
66

7+
[policy]
8+
num_layers = 2
9+
hidden_size = 64
10+
711
[env]
812
cart_mass = 1.0
913
pole_mass = 0.1
@@ -21,11 +25,10 @@ minibatch_size = 32768
2125

2226
[sweep]
2327
method = Protein
24-
metric = episode_length
28+
metric = perf
2529

2630
[sweep.train.total_timesteps]
2731
distribution = log_normal
28-
min = 1e6
29-
max = 1e7
30-
mean = 5e6
31-
scale = 0.5
32+
min = 5e6
33+
max = 2e7
34+
mean = 1e7

config/connect4.ini

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,18 @@
11
[base]
22
env_name = connect4
33

4+
[vec]
5+
total_agents = 4096
6+
7+
[policy]
8+
num_layers = 2
9+
hidden_size = 64
10+
11+
[env]
12+
num_agents = 1
13+
player_pieces = 0
14+
env_pieces = 0
15+
416
[train]
517
total_timesteps = 22_000_000
618
beta1 = 0.7332525176640032

config/enduro.ini

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
[base]
22
env_name = enduro
33

4+
[vec]
5+
total_agents = 4096
6+
7+
[policy]
8+
num_layers = 2
9+
hidden_size = 64
10+
411
[env]
512
width = 152
613
height = 210
@@ -9,9 +16,6 @@ car_height = 11
916
max_enemies = 10
1017
continuous = 0
1118

12-
[vec]
13-
total_agents = 1024
14-
1519
[train]
1620
total_timesteps = 400_000_000
1721
beta1 = 0.9602226117399812
@@ -38,7 +42,7 @@ metric = days_completed
3842

3943
[sweep.train.total_timesteps]
4044
distribution = log_normal
41-
min = 5e7
42-
max = 4e8
43-
mean = 2e8
45+
min = 2e8
46+
max = 6e8
47+
mean = 4e8
4448
scale = auto

config/freeway.ini

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22
env_name = freeway
33

44
[vec]
5-
num_envs = 8
5+
num_agents = 4096
6+
7+
[policy]
8+
num_layers = 2
9+
num_units = 64
610

711
[env]
8-
num_envs = 1024
912
frameskip = 4
1013
width = 1216
1114
height = 720
@@ -27,6 +30,6 @@ minibatch_size = 32768
2730
[sweep.train.total_timesteps]
2831
distribution = log_normal
2932
min = 3e8
30-
max = 4e8
31-
mean = 3e8
33+
max = 6e8
34+
mean = 4e8
3235
scale = auto

config/grid.ini renamed to config/maze.ini

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,18 @@
11
[base]
2-
env_name = grid
2+
package = ocean
3+
env_name = maze
34

45
[vec]
5-
total_agents = 512
6-
num_buffers = 2
7-
num_threads = 2
8-
seed = 73
6+
total_agents = 4096
97

108
[env]
119
max_size = 47
1210
num_maps = 8192
1311
map_size = -1
1412

1513
[policy]
16-
hidden_size = 1024
17-
num_layers = 4.621958
14+
hidden_size = 64
15+
num_layers = 2
1816
expansion_factor = 1
1917

2018
[train]
@@ -35,17 +33,17 @@ ent_coef = 0.000063
3533
beta1 = 0.989472
3634
beta2 = 0.994822
3735
eps = 0.000001
38-
minibatch_size = 8192
36+
minibatch_size = 32768
3937
horizon = 64
4038
vtrace_rho_clip = 5
4139
vtrace_c_clip = 2.007307
4240
prio_alpha = 0.664124
4341
prio_beta0 = 0.976698
4442
env = 0
4543

46-
[environment]
47-
score = 0.931234
48-
perf = 0.931234
44+
#[environment]
45+
#score = 0.931234
46+
#perf = 0.931234
4947

5048

5149
[sweep]
@@ -58,9 +56,3 @@ max = 1e9
5856
mean = 3e8
5957
scale = time
6058

61-
[sweep.policy.hidden_size]
62-
distribution = uniform_pow2
63-
min = 16
64-
max = 1024
65-
mean = 128
66-
scale = auto

config/ocean/drive.ini

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
[base]
2+
package = ocean
3+
env_name = puffer_drive
4+
policy_name = MinGRU
5+
rnn_name = Recurrent
6+
7+
[vec]
8+
total_agents = 8192
9+
num_buffers = 8
10+
11+
[policy]
12+
input_size = 64
13+
hidden_size = 256
14+
15+
[rnn]
16+
input_size = 256
17+
hidden_size = 256
18+
19+
[env]
20+
width = 1280
21+
height = 1024
22+
human_agent_idx = 0
23+
reward_vehicle_collision = 0
24+
reward_offroad_collision = 0
25+
spawn_immunity_timer = 50
26+
reward_goal_post_respawn = 0.0
27+
reward_vehicle_collision_post_respawn = 0.0
28+
resample_frequency = 910
29+
num_maps = 10000
30+
31+
[train]
32+
total_timesteps = 2_000_000_000
33+
anneal_lr = True
34+
batch_size = auto
35+
minibatch_size = 32768
36+
num_minibatches = 16
37+
horizon = 128
38+
adam_beta1 = 0.9
39+
adam_beta2 = 0.999
40+
adam_eps = 1e-8
41+
clip_coef = 0.2
42+
ent_coef = 0.001
43+
gae_lambda = 0.95
44+
gamma = 0.98
45+
learning_rate = 0.005
46+
max_grad_norm = 1
47+
prio_alpha = 0.8499999999999999
48+
prio_beta0 = 0.8499999999999999
49+
update_epochs = 1
50+
vf_clip_coef = 0.1999999999999999
51+
vf_coef = 2
52+
vtrace_c_clip = 1
53+
vtrace_rho_clip = 1
54+
checkpoint_interval = 1000
55+
56+
57+
58+
[sweep.train.total_timesteps]
59+
distribution = log_normal
60+
min = 1e8
61+
max = 4e8
62+
mean = 2e8
63+
scale = time
64+
65+
[sweep.env.reward_vehicle_collision]
66+
distribution = uniform
67+
min = -1.0
68+
max = 0.0
69+
mean = -0.2
70+
scale = auto
71+
72+
[sweep.env.reward_offroad_collision]
73+
distribution = uniform
74+
min = -1.0
75+
max = 0.0
76+
mean = -0.2
77+
scale = auto
78+
79+
[sweep.env.spawn_immunity_timer]
80+
distribution = uniform
81+
min = 1
82+
max = 91
83+
mean = 30
84+
scale = auto
85+
86+
[sweep.env.reward_goal_post_respawn]
87+
distribution = uniform
88+
min = 0.0
89+
max = 1.0
90+
mean = 0.5
91+
scale = auto
92+
93+
[sweep.env.reward_vehicle_collision_post_respawn]
94+
distribution = uniform
95+
min = -1.0
96+
max = 0.0
97+
mean = -0.2
98+
scale = auto

config/pacman.ini

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -35,25 +35,11 @@ vtrace_rho_clip = 1.5301756939690652
3535

3636
[sweep]
3737
downsample = 10
38-
max_cost = 300
3938

4039
[sweep.train.total_timesteps]
4140
distribution = log_normal
42-
min = 2e7
41+
min = 5e7
4342
max = 5e8
4443
mean = 1e8
4544
scale = auto
4645

47-
[sweep.policy.hidden_size]
48-
distribution = uniform_pow2
49-
min = 16
50-
max = 1024
51-
mean = 128
52-
scale = auto
53-
54-
[sweep.env.num_envs]
55-
distribution = uniform_pow2
56-
min = 1
57-
max = 4096
58-
mean = 2048
59-
scale = auto

config/slimevolley.ini

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
[base]
2+
package = ocean
3+
env_name = slimevolley
4+
policy_name = MinGRU
5+
rnn_name = Recurrent
6+
7+
[vec]
8+
total_agents = 4096
9+
10+
[policy]
11+
num_layers = 2
12+
hidden_size = 64
13+
14+
[env]
15+
; 1 for single-agent (vs bot), 2 for two-agent (self-play)
16+
num_agents=1
17+
gamma = 0.99
18+
[train]
19+
total_timesteps = 500_000_000
20+
21+
[sweep]
22+
downsample = 5
23+
24+
[sweep.train.total_timesteps]
25+
distribution = log_normal
26+
min = 1e8
27+
max = 2e9
28+
mean = 3e8
29+
scale = time
30+

config/tetris.ini

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ vtrace_rho_clip = 0.70
3939
[sweep]
4040
metric = score
4141
goal = maximize
42-
max_cost = 3600
4342

4443
[sweep.train.total_timesteps]
4544
distribution = log_normal
@@ -48,30 +47,30 @@ max = 3_000_000_000
4847
mean = 200_000_000
4948
scale = auto
5049

51-
[sweep.train.gae_lambda]
52-
distribution = logit_normal
53-
min = 0.01
54-
mean = 0.6
55-
max = 0.995
56-
scale = auto
50+
#[sweep.train.gae_lambda]
51+
#distribution = logit_normal
52+
#min = 0.01
53+
#mean = 0.6
54+
#max = 0.995
55+
#scale = auto
5756

58-
[sweep.train.clip_coef]
59-
distribution = uniform
60-
min = 0.01
61-
max = 1.0
62-
mean = 0.1
63-
scale = auto
57+
#[sweep.train.clip_coef]
58+
#distribution = uniform
59+
#min = 0.01
60+
#max = 1.0
61+
#mean = 0.1
62+
#scale = auto
6463

65-
[sweep.train.adam_beta1]
66-
distribution = logit_normal
67-
min = 0.5
68-
mean = 0.95
69-
max = 0.999
70-
scale = auto
64+
#[sweep.train.adam_beta1]
65+
#distribution = logit_normal
66+
#min = 0.5
67+
#mean = 0.95
68+
#max = 0.999
69+
#scale = auto
7170

72-
[sweep.env.num_envs]
73-
distribution = uniform_pow2
74-
min = 1
75-
max = 4096
76-
mean = 2048
77-
scale = auto
71+
#[sweep.env.num_envs]
72+
#distribution = uniform_pow2
73+
#min = 1
74+
#max = 4096
75+
#mean = 2048
76+
#scale = auto

config/trash_pickup.ini

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
[base]
22
env_name = trash_pickup
33

4+
[vec]
5+
total_agents = 4096
6+
7+
[policy]
8+
num_layers = 2
9+
hidden_size = 64
10+
411
[env]
512
grid_size = 20
613
num_agents = 8

0 commit comments

Comments
 (0)