Skip to content

Commit 3268630

Browse files
author
Tim-phant
committed
load map changes to remove duplition on lots of inits and latest ini values
1 parent 1266be1 commit 3268630

2 files changed

Lines changed: 28 additions & 22 deletions

File tree

pufferlib/config/ocean/boxoban.ini

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ num_buffers = 8
1010
num_threads = 8
1111

1212
[policy]
13-
num_layers = 2
13+
num_layers = 1
1414
hidden_size = 256
1515

1616
[env]
@@ -25,31 +25,33 @@ max_steps = 300
2525

2626

2727
[train]
28-
#EASY
2928
anneal_lr = 1
30-
beta1 = 0.9524080120922038
31-
beta2 = 0.9938151282804136
32-
clip_coef = 0.4770373231152628
33-
ent_coef = 0.00001
29+
beta1 = 0.9774372816193448
30+
beta2 = 0.9659403664380584
31+
clip_coef = 0.6046560670053024
32+
ent_coef = 0.00002079831529141607
3433
eps = 0.00000000000001
35-
gae_lambda = 0.8032589838049962
36-
gamma = 0.965698161834
34+
gae_lambda = 0.9258914518467392
35+
gamma = 0.9772998708784648
3736
gpus = 1
3837
horizon = 64
39-
learning_rate = 0.002557505571664044
40-
max_grad_norm = 0.1
38+
learning_rate = 0.004480255741933225
39+
max_grad_norm = 1.221684008665154
4140
min_lr_ratio = 0.37872027027338984
4241
minibatch_size = 8192
43-
prio_alpha = 0.9556021765385548
44-
prio_beta0 = 1
45-
replay_ratio = 4
42+
prio_alpha = 1
43+
prio_beta0 = 0.8789921736378042
44+
replay_ratio = 3.210300031048168
4645
seed = 42
47-
total_timesteps = 53251895
46+
total_timesteps = 55504884
4847
use_rnn = true
49-
vf_clip_coef = 4.856116827195164
50-
vf_coef = 5
51-
vtrace_c_clip = 2.5648427343944147
52-
vtrace_rho_clip = 3.258932255953648
48+
vf_clip_coef = 4.339748010438874
49+
vf_coef = 4.240274862679744
50+
vtrace_c_clip = 1.3625779006162615
51+
vtrace_rho_clip = 3.17260199042977
52+
53+
54+
#EASY
5355

5456
[sweep]
5557
metric = perf

pufferlib/ocean/boxoban/boxoban.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,11 +113,15 @@ static inline const uint32_t get_random_puzzle_idx(const Boxoban *env) {
113113

114114

115115
void init (Boxoban* env) {
116-
if (boxoban_configure_maps_from_env(env) != 0) {
117-
fprintf(stderr, "Failed to configure Boxoban maps\n");
118-
abort();
116+
static int boxoban_maps_ready = 0;
117+
if (!boxoban_maps_ready) {
118+
if (boxoban_configure_maps_from_env(env) != 0) {
119+
fprintf(stderr, "Failed to configure Boxoban maps\n");
120+
abort();
121+
}
122+
ensure_map_loaded();
123+
boxoban_maps_ready = 1;
119124
}
120-
ensure_map_loaded();
121125
env->intermediate_rewards = calloc(env->size*env->size, sizeof(unsigned char));
122126
env->win = 0;
123127
}

0 commit comments

Comments
 (0)