File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -4,6 +4,10 @@ env_name = cartpole
44[vec]
55total_agents = 4096
66
7+ [policy]
8+ num_layers = 2
9+ hidden_size = 64
10+
711[env]
812cart_mass = 1.0
913pole_mass = 0.1
@@ -21,11 +25,10 @@ minibatch_size = 32768
2125
2226[sweep]
2327method = Protein
24- metric = episode_length
28+ metric = perf
2529
2630[sweep.train.total_timesteps]
2731distribution = log_normal
28- min = 1e6
29- max = 1e7
30- mean = 5e6
31- scale = 0.5
32+ min = 5e6
33+ max = 2e7
34+ mean = 1e7
Original file line number Diff line number Diff line change 11[base]
22env_name = connect4
33
4+ [vec]
5+ total_agents = 4096
6+
7+ [policy]
8+ num_layers = 2
9+ hidden_size = 64
10+
11+ [env]
12+ num_agents = 1
13+ player_pieces = 0
14+ env_pieces = 0
15+
416[train]
517total_timesteps = 22_000_000
618beta1 = 0.7332525176640032
Original file line number Diff line number Diff line change 11[base]
22env_name = enduro
33
4+ [vec]
5+ total_agents = 4096
6+
7+ [policy]
8+ num_layers = 2
9+ hidden_size = 64
10+
411[env]
512width = 152
613height = 210
@@ -9,9 +16,6 @@ car_height = 11
916max_enemies = 10
1017continuous = 0
1118
12- [vec]
13- total_agents = 1024
14-
1519[train]
1620total_timesteps = 400_000_000
1721beta1 = 0.9602226117399812
@@ -38,7 +42,7 @@ metric = days_completed
3842
3943[sweep.train.total_timesteps]
4044distribution = log_normal
41- min = 5e7
42- max = 4e8
43- mean = 2e8
45+ min = 2e8
46+ max = 6e8
47+ mean = 4e8
4448scale = auto
Original file line number Diff line number Diff line change 22env_name = freeway
33
44[vec]
5- num_envs = 8
5+ num_agents = 4096
6+
7+ [policy]
8+ num_layers = 2
9+ num_units = 64
610
711[env]
8- num_envs = 1024
912frameskip = 4
1013width = 1216
1114height = 720
@@ -27,6 +30,6 @@ minibatch_size = 32768
2730[sweep.train.total_timesteps]
2831distribution = log_normal
2932min = 3e8
30- max = 4e8
31- mean = 3e8
33+ max = 6e8
34+ mean = 4e8
3235scale = auto
Original file line number Diff line number Diff line change 11[base]
2- env_name = grid
2+ package = ocean
3+ env_name = maze
34
45[vec]
5- total_agents = 512
6- num_buffers = 2
7- num_threads = 2
8- seed = 73
6+ total_agents = 4096
97
108[env]
119max_size = 47
1210num_maps = 8192
1311map_size = -1
1412
1513[policy]
16- hidden_size = 1024
17- num_layers = 4.621958
14+ hidden_size = 64
15+ num_layers = 2
1816expansion_factor = 1
1917
2018[train]
@@ -35,17 +33,17 @@ ent_coef = 0.000063
3533beta1 = 0.989472
3634beta2 = 0.994822
3735eps = 0.000001
38- minibatch_size = 8192
36+ minibatch_size = 32768
3937horizon = 64
4038vtrace_rho_clip = 5
4139vtrace_c_clip = 2.007307
4240prio_alpha = 0.664124
4341prio_beta0 = 0.976698
4442env = 0
4543
46- [environment]
47- score = 0.931234
48- perf = 0.931234
44+ # [environment]
45+ # score = 0.931234
46+ # perf = 0.931234
4947
5048
5149[sweep]
@@ -58,9 +56,3 @@ max = 1e9
5856mean = 3e8
5957scale = time
6058
61- [sweep.policy.hidden_size]
62- distribution = uniform_pow2
63- min = 16
64- max = 1024
65- mean = 128
66- scale = auto
Original file line number Diff line number Diff line change 1+ [base]
2+ package = ocean
3+ env_name = puffer_drive
4+ policy_name = MinGRU
5+ rnn_name = Recurrent
6+
7+ [vec]
8+ total_agents = 8192
9+ num_buffers = 8
10+
11+ [policy]
12+ input_size = 64
13+ hidden_size = 256
14+
15+ [rnn]
16+ input_size = 256
17+ hidden_size = 256
18+
19+ [env]
20+ width = 1280
21+ height = 1024
22+ human_agent_idx = 0
23+ reward_vehicle_collision = 0
24+ reward_offroad_collision = 0
25+ spawn_immunity_timer = 50
26+ reward_goal_post_respawn = 0.0
27+ reward_vehicle_collision_post_respawn = 0.0
28+ resample_frequency = 910
29+ num_maps = 10000
30+
31+ [train]
32+ total_timesteps = 2_000_000_000
33+ anneal_lr = True
34+ batch_size = auto
35+ minibatch_size = 32768
36+ num_minibatches = 16
37+ horizon = 128
38+ adam_beta1 = 0.9
39+ adam_beta2 = 0.999
40+ adam_eps = 1e-8
41+ clip_coef = 0.2
42+ ent_coef = 0.001
43+ gae_lambda = 0.95
44+ gamma = 0.98
45+ learning_rate = 0.005
46+ max_grad_norm = 1
47+ prio_alpha = 0.8499999999999999
48+ prio_beta0 = 0.8499999999999999
49+ update_epochs = 1
50+ vf_clip_coef = 0.1999999999999999
51+ vf_coef = 2
52+ vtrace_c_clip = 1
53+ vtrace_rho_clip = 1
54+ checkpoint_interval = 1000
55+
56+
57+
58+ [sweep.train.total_timesteps]
59+ distribution = log_normal
60+ min = 1e8
61+ max = 4e8
62+ mean = 2e8
63+ scale = time
64+
65+ [sweep.env.reward_vehicle_collision]
66+ distribution = uniform
67+ min = -1.0
68+ max = 0.0
69+ mean = -0.2
70+ scale = auto
71+
72+ [sweep.env.reward_offroad_collision]
73+ distribution = uniform
74+ min = -1.0
75+ max = 0.0
76+ mean = -0.2
77+ scale = auto
78+
79+ [sweep.env.spawn_immunity_timer]
80+ distribution = uniform
81+ min = 1
82+ max = 91
83+ mean = 30
84+ scale = auto
85+
86+ [sweep.env.reward_goal_post_respawn]
87+ distribution = uniform
88+ min = 0.0
89+ max = 1.0
90+ mean = 0.5
91+ scale = auto
92+
93+ [sweep.env.reward_vehicle_collision_post_respawn]
94+ distribution = uniform
95+ min = -1.0
96+ max = 0.0
97+ mean = -0.2
98+ scale = auto
Original file line number Diff line number Diff line change @@ -35,25 +35,11 @@ vtrace_rho_clip = 1.5301756939690652
3535
3636[sweep]
3737downsample = 10
38- max_cost = 300
3938
4039[sweep.train.total_timesteps]
4140distribution = log_normal
42- min = 2e7
41+ min = 5e7
4342max = 5e8
4443mean = 1e8
4544scale = auto
4645
47- [sweep.policy.hidden_size]
48- distribution = uniform_pow2
49- min = 16
50- max = 1024
51- mean = 128
52- scale = auto
53-
54- [sweep.env.num_envs]
55- distribution = uniform_pow2
56- min = 1
57- max = 4096
58- mean = 2048
59- scale = auto
Original file line number Diff line number Diff line change 1+ [base]
2+ package = ocean
3+ env_name = slimevolley
4+ policy_name = MinGRU
5+ rnn_name = Recurrent
6+
7+ [vec]
8+ total_agents = 4096
9+
10+ [policy]
11+ num_layers = 2
12+ hidden_size = 64
13+
14+ [env]
15+ ; 1 for single-agent (vs bot), 2 for two-agent (self-play)
16+ num_agents =1
17+ gamma = 0.99
18+ [train]
19+ total_timesteps = 500_000_000
20+
21+ [sweep]
22+ downsample = 5
23+
24+ [sweep.train.total_timesteps]
25+ distribution = log_normal
26+ min = 1e8
27+ max = 2e9
28+ mean = 3e8
29+ scale = time
30+
Original file line number Diff line number Diff line change @@ -39,7 +39,6 @@ vtrace_rho_clip = 0.70
3939[sweep]
4040metric = score
4141goal = maximize
42- max_cost = 3600
4342
4443[sweep.train.total_timesteps]
4544distribution = log_normal
@@ -48,30 +47,30 @@ max = 3_000_000_000
4847mean = 200_000_000
4948scale = auto
5049
51- [sweep.train.gae_lambda]
52- distribution = logit_normal
53- min = 0.01
54- mean = 0.6
55- max = 0.995
56- scale = auto
50+ # [sweep.train.gae_lambda]
51+ # distribution = logit_normal
52+ # min = 0.01
53+ # mean = 0.6
54+ # max = 0.995
55+ # scale = auto
5756
58- [sweep.train.clip_coef]
59- distribution = uniform
60- min = 0.01
61- max = 1.0
62- mean = 0.1
63- scale = auto
57+ # [sweep.train.clip_coef]
58+ # distribution = uniform
59+ # min = 0.01
60+ # max = 1.0
61+ # mean = 0.1
62+ # scale = auto
6463
65- [sweep.train.adam_beta1]
66- distribution = logit_normal
67- min = 0.5
68- mean = 0.95
69- max = 0.999
70- scale = auto
64+ # [sweep.train.adam_beta1]
65+ # distribution = logit_normal
66+ # min = 0.5
67+ # mean = 0.95
68+ # max = 0.999
69+ # scale = auto
7170
72- [sweep.env.num_envs]
73- distribution = uniform_pow2
74- min = 1
75- max = 4096
76- mean = 2048
77- scale = auto
71+ # [sweep.env.num_envs]
72+ # distribution = uniform_pow2
73+ # min = 1
74+ # max = 4096
75+ # mean = 2048
76+ # scale = auto
Original file line number Diff line number Diff line change 11[base]
22env_name = trash_pickup
33
4+ [vec]
5+ total_agents = 4096
6+
7+ [policy]
8+ num_layers = 2
9+ hidden_size = 64
10+
411[env]
512grid_size = 20
613num_agents = 8
You can’t perform that action at this time.
0 commit comments