revert nmmo3.ini

Tim-phant · Tim-phant · commit f95a39bf2a89 · 2026-03-24T08:00:02.000Z
diff --git a/pufferlib/config/ocean/nmmo3.ini b/pufferlib/config/ocean/nmmo3.ini
@@ -0,0 +1,90 @@
+[base]
+package = ocean
+env_name = puffer_nmmo3
+policy_name =  NMMO3MinGRU
+rnn_name = NMMO3LSTM
+
+[vec]
+total_agents = 8192
+num_buffers = 4
+num_threads = 4
+
+[env]
+num_agents = 1024
+width = 512
+height = 512
+num_enemies = 2048
+num_resources = 2048
+num_weapons = 1024
+num_gems = 512
+tiers = 5
+levels = 40
+teleportitis_prob = 0.001
+enemy_respawn_ticks = 2
+item_respawn_ticks = 100
+x_window = 7
+y_window = 5
+reward_combat_level = 1.0
+reward_prof_level = 1.0
+reward_item_level = 1.0
+reward_market = 0.0
+reward_death = -1.0
+
+[policy]
+hidden_size = 512
+
+[train]
+#total_timesteps = 642_000_000_000
+total_timesteps = 20_000_000_000
+checkpoint_interval = 10000
+learning_rate = 0.0004573146765703167
+gamma = 0.7647543366891623
+gae_lambda = 0.996005622445478
+max_grad_norm = 0.6075578331947327
+vf_coef = 0.3979089612467003
+horizon = 64
+ent_coef = 0.01210084358004069
+minibatch_size = 32768
+
+[sweep]
+downsample = 50
+
+[sweep.train.total_timesteps]
+distribution = log_normal
+min = 5e9
+max = 5e10
+scale = time
+
+[sweep.vec.total_agents]
+distribution = uniform_pow2
+min = 1024
+max = 16384
+scale = auto
+
+#[sweep.env.reward_combat_level]
+distribution = uniform
+min = 0.0
+max = 1.0
+mean = 0.5
+scale = auto
+
+#[sweep.env.reward_prof_level]
+distribution = uniform
+min = 0.0
+max = 1.0
+mean = 0.5
+scale = auto
+
+[sweep.env.reward_item_level]
+distribution = uniform
+min = 0.0
+max = 1.0
+mean = 1.0
+scale = auto
+
+[sweep.env.reward_death]
+distribution = uniform
+min = -1.0
+max = 0.0
+mean = -1.0
+scale = auto