PufferAI
diff --git a/‎config/cartpole.ini‎
Lines changed: 33 additions & 10 deletions b/‎config/cartpole.ini‎
Lines changed: 33 additions & 10 deletions
diff --git a/‎config/connect4.ini‎
Lines changed: 31 additions & 21 deletions b/‎config/connect4.ini‎
Lines changed: 31 additions & 21 deletions
diff --git a/‎config/drone.ini‎
Lines changed: 33 additions & 30 deletions b/‎config/drone.ini‎
Lines changed: 33 additions & 30 deletions
diff --git a/‎config/enduro.ini‎
Lines changed: 31 additions & 22 deletions b/‎config/enduro.ini‎
Lines changed: 31 additions & 22 deletions
diff --git a/‎config/freeway.ini‎
Lines changed: 38 additions & 5 deletions b/‎config/freeway.ini‎
Lines changed: 38 additions & 5 deletions
@@ -3,25 +3,48 @@ env_name = cartpole
 
 [vec]
 total_agents = 4096
-
-[policy]
-num_layers = 2
-hidden_size = 64
+num_buffers = 4.78896
+num_threads = 16
 
 [env]
-cart_mass = 1.0
+cart_mass = 1
 pole_mass = 0.1
 pole_length = 0.5
 gravity = 9.8
-force_mag = 10.0
+force_mag = 10
 dt = 0.02
 continuous = 0
 
+[policy]
+hidden_size = 32
+num_layers = 2.11327
+expansion_factor = 1
+
 [train]
-total_timesteps = 20_000_000
-gamma = 0.95
-learning_rate = 0.05
-minibatch_size = 32768
+gpus = 1
+seed = 42
+total_timesteps = 5642560
+learning_rate = 0.1
+anneal_lr = 1
+min_lr_ratio = 0
+gamma = 0.8
+gae_lambda = 0.922151
+replay_ratio = 0.381289
+clip_coef = 0.143548
+vf_coef = 1.77975
+vf_clip_coef = 3.90833
+max_grad_norm = 0.329667
+ent_coef = 0.0367726
+beta1 = 0.942691
+beta2 = 0.907572
+eps = 4.6046e-09
+minibatch_size = 16384
+horizon = 32
+vtrace_rho_clip = 2.91145
+vtrace_c_clip = 1.66148
+prio_alpha = 0.786776
+prio_beta0 = 0.348617
+use_rnn = 1
 
 [sweep]
 method = Protein
 
@@ -3,34 +3,44 @@ env_name = connect4
 
 [vec]
 total_agents = 4096
-
-[policy]
-num_layers = 2
-hidden_size = 64
+num_buffers = 8
+num_threads = 2
 
 [env]
 num_agents = 1
 player_pieces = 0
 env_pieces = 0
 
+[policy]
+hidden_size = 256
+num_layers = 1
+expansion_factor = 1
+
 [train]
-total_timesteps = 22_000_000
-beta1 = 0.7332525176640032
-beta2 = 0.9992588002434659
-eps = 0.0001
-clip_coef = 0.3344358533613167
-ent_coef = 0.00004214003802569246
-gae_lambda = 0.8969790930039623
-gamma = 0.9945932652529774
-learning_rate = 0.1
-max_grad_norm = 1.0219144411399215
-minibatch_size = 32768
-prio_alpha = 0.9057091953725436
-prio_beta0 = 0.6320607520016285
-vf_clip_coef = 1.9948775471721416
-vf_coef = 2.3734839181925462
-vtrace_c_clip = 0.5659747235622431
-vtrace_rho_clip = 1.4499061438546799
+gpus = 1
+seed = 42
+total_timesteps = 13272299
+learning_rate = 0.00847027
+anneal_lr = 1
+min_lr_ratio = 0
+gamma = 0.8
+gae_lambda = 0.962627
+replay_ratio = 3.16619
+clip_coef = 0.511829
+vf_coef = 5
+vf_clip_coef = 1.99178
+max_grad_norm = 0.552251
+ent_coef = 3.24222e-05
+beta1 = 0.878636
+beta2 = 0.986336
+eps = 3.02623e-07
+minibatch_size = 8192
+horizon = 32
+vtrace_rho_clip = 2.49786
+vtrace_c_clip = 1.52028
+prio_alpha = 1
+prio_beta0 = 0.746205
+use_rnn = 1
 
 [sweep.train.total_timesteps]
 distribution = log_normal
 
@@ -2,49 +2,52 @@
 env_name = drone
 
 [vec]
-total_agents = 1024
+total_agents = 2048
 num_buffers = 8
-num_threads = 8
-
-[policy]
-hidden_size = 64
-num_layers = 2
+num_threads = 1
 
 [env]
 task = 1
 num_drones = 64
 max_rings = 10
-alpha_dist = 0.5602899637895572
-alpha_hover = 0.18691658256215232
-alpha_omega = 0.00010000000000000021
-alpha_shaping = 10
-hover_target_dist = 5.0
+alpha_dist = 0.782192
+alpha_hover = 0.071445
+alpha_omega = 0.00135588
+alpha_shaping = 3.9754
+hover_target_dist = 5
 hover_dist = 0.1
 hover_omega = 0.1
 hover_vel = 0.1
 
+[policy]
+hidden_size = 128
+num_layers = 3.80354
+expansion_factor = 1
+
 [train]
-beta1 = 0.9441482023028404
-beta2 = 0.9999652591777111
-clip_coef = 0.01
-ent_coef = 0.0020037723526687536
-eps = 3.587424560914664e-09
-gae_lambda = 0.830816124241041
-gamma = 0.9754204650932019
-horizon = 64
-learning_rate = 0.007916609535671186
-max_grad_norm = 0.1
+gpus = 1
+seed = 42
+total_timesteps = 40000000
+learning_rate = 0.00975033
+anneal_lr = 1
 min_lr_ratio = 0
-minibatch_size = 4096
-prio_alpha = 0.6044847010385197
+gamma = 0.981094
+gae_lambda = 0.883828
+replay_ratio = 2.25498
+clip_coef = 0.067834
+vf_coef = 4.42222
+vf_clip_coef = 3.42278
+max_grad_norm = 0.692201
+ent_coef = 9.62597e-05
+beta1 = 0.914207
+beta2 = 0.99988
+eps = 1e-14
+minibatch_size = 8192
+horizon = 32
+vtrace_rho_clip = 3.32822
+vtrace_c_clip = 4.37219
+prio_alpha = 0.427293
 prio_beta0 = 1
-replay_ratio = 2.0931513062144527
-total_timesteps = 5.9104024e+07
-vf_clip_coef = 1.9085544982750444
-vf_coef = 5
-vtrace_c_clip = 3.061192803089336
-vtrace_rho_clip = 1.3582992315224223
-
 
 [sweep.train.total_timesteps]
 distribution = log_normal
 
@@ -2,11 +2,9 @@
 env_name = enduro
 
 [vec]
-total_agents = 4096
-
-[policy]
-num_layers = 2
-hidden_size = 64
+total_agents = 256
+num_buffers = 4.00702
+num_threads = 2
 
 [env]
 width = 152
@@ -16,26 +14,37 @@ car_height = 11
 max_enemies = 10
 continuous = 0
 
+[policy]
+hidden_size = 128
+num_layers = 2.68359
+expansion_factor = 1
+
 [train]
-total_timesteps = 400_000_000
-beta1 = 0.9602226117399812
-beta2 = 0.999983918771099
-eps = 2.109767652202695e-9
+gpus = 1
+seed = 42
+total_timesteps = 58957801
+learning_rate = 0.0136314
+anneal_lr = 1
+min_lr_ratio = 0
+gamma = 0.979826
+gae_lambda = 0.908362
+replay_ratio = 1.54521
+clip_coef = 0.915672
+vf_coef = 0.977532
+vf_clip_coef = 1.75503
+max_grad_norm = 1.08789
+ent_coef = 0.00246004
+beta1 = 0.840696
+beta2 = 0.999975
+eps = 1.78423e-12
+minibatch_size = 16384
 horizon = 64
-clip_coef = 0.5716251062832933
-ent_coef = 0.009778379693175061
-gae_lambda = 0.9924829173144767
-gamma = 0.9433427558493771
-learning_rate = 0.014263349414255656
-max_grad_norm = 0.42249653686869115
+vtrace_rho_clip = 2.58586
+vtrace_c_clip = 5
+prio_alpha = 1
+prio_beta0 = 0.161561
 max_minibatch_size = 32768
-minibatch_size = 65536
-prio_alpha = 0.22253503344197678
-prio_beta0 = 0.7866639848626998
-vf_clip_coef = 0.01
-vf_coef = 3.2952964839081016
-vtrace_c_clip = 3.060525785199293
-vtrace_rho_clip = 5
+use_rnn = 1
 
 [sweep]
 metric = days_completed
 
@@ -2,12 +2,11 @@
 env_name = freeway
 
 [vec]
+total_agents = 16384
+num_buffers = 6.8738
+num_threads = 2
 num_agents = 4096
 
-[policy]
-num_layers = 2
-num_units = 64
-
 [env]
 frameskip = 4
 width = 1216
@@ -23,9 +22,43 @@ enable_human_player = 0
 env_randomization = 1
 use_dense_rewards = 1
 
+[policy]
+hidden_size = 128
+num_layers = 7.44076
+expansion_factor = 1
+num_units = 64
+
+[legacy]
+torch_deterministic = 1
+cpu_offload = 0
+compile = 0
+compile_fullgraph = 1
+
 [train]
-total_timesteps = 500_000_000
+gpus = 1
+seed = 42
+total_timesteps = 403702026
+learning_rate = 0.00357256
+anneal_lr = 1
+min_lr_ratio = 0
+gamma = 0.988734
+gae_lambda = 0.759081
+replay_ratio = 2.08083
+clip_coef = 0.168047
+vf_coef = 3.51248
+vf_clip_coef = 0.179612
+max_grad_norm = 5
+ent_coef = 0.00023521
+beta1 = 0.973725
+beta2 = 0.99942
+eps = 4.24651e-14
 minibatch_size = 32768
+horizon = 64
+vtrace_rho_clip = 2.3679
+vtrace_c_clip = 1.29213
+prio_alpha = 0.741968
+prio_beta0 = 0.654176
+use_rnn = 1
 
 [sweep.train.total_timesteps]
 distribution = log_normal