Skip to content

Commit f95a39b

Browse files
author
Tim-phant
committed
revert nmmo3.ini
1 parent 3cc928d commit f95a39b

1 file changed

Lines changed: 90 additions & 0 deletions

File tree

pufferlib/config/ocean/nmmo3.ini

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
[base]
2+
package = ocean
3+
env_name = puffer_nmmo3
4+
policy_name = NMMO3MinGRU
5+
rnn_name = NMMO3LSTM
6+
7+
[vec]
8+
total_agents = 8192
9+
num_buffers = 4
10+
num_threads = 4
11+
12+
[env]
13+
num_agents = 1024
14+
width = 512
15+
height = 512
16+
num_enemies = 2048
17+
num_resources = 2048
18+
num_weapons = 1024
19+
num_gems = 512
20+
tiers = 5
21+
levels = 40
22+
teleportitis_prob = 0.001
23+
enemy_respawn_ticks = 2
24+
item_respawn_ticks = 100
25+
x_window = 7
26+
y_window = 5
27+
reward_combat_level = 1.0
28+
reward_prof_level = 1.0
29+
reward_item_level = 1.0
30+
reward_market = 0.0
31+
reward_death = -1.0
32+
33+
[policy]
34+
hidden_size = 512
35+
36+
[train]
37+
#total_timesteps = 642_000_000_000
38+
total_timesteps = 20_000_000_000
39+
checkpoint_interval = 10000
40+
learning_rate = 0.0004573146765703167
41+
gamma = 0.7647543366891623
42+
gae_lambda = 0.996005622445478
43+
max_grad_norm = 0.6075578331947327
44+
vf_coef = 0.3979089612467003
45+
horizon = 64
46+
ent_coef = 0.01210084358004069
47+
minibatch_size = 32768
48+
49+
[sweep]
50+
downsample = 50
51+
52+
[sweep.train.total_timesteps]
53+
distribution = log_normal
54+
min = 5e9
55+
max = 5e10
56+
scale = time
57+
58+
[sweep.vec.total_agents]
59+
distribution = uniform_pow2
60+
min = 1024
61+
max = 16384
62+
scale = auto
63+
64+
#[sweep.env.reward_combat_level]
65+
distribution = uniform
66+
min = 0.0
67+
max = 1.0
68+
mean = 0.5
69+
scale = auto
70+
71+
#[sweep.env.reward_prof_level]
72+
distribution = uniform
73+
min = 0.0
74+
max = 1.0
75+
mean = 0.5
76+
scale = auto
77+
78+
[sweep.env.reward_item_level]
79+
distribution = uniform
80+
min = 0.0
81+
max = 1.0
82+
mean = 1.0
83+
scale = auto
84+
85+
[sweep.env.reward_death]
86+
distribution = uniform
87+
min = -1.0
88+
max = 0.0
89+
mean = -1.0
90+
scale = auto

0 commit comments

Comments
 (0)