Skip to content

Commit 8371961

Browse files
committed
boxoban port
2 parents d21a161 + 3268630 commit 8371961

7 files changed

Lines changed: 1747 additions & 0 deletions

File tree

config/boxoban.ini

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
[base]
2+
env_name = boxoban
3+
4+
[vec]
5+
total_agents = 16384
6+
num_buffers = 8
7+
num_threads = 8
8+
9+
[policy]
10+
num_layers = 1
11+
hidden_size = 256
12+
13+
[env]
14+
num_agents = 1
15+
#0 basic, 1 easy, 2 medium, 3 hard, 4 unfiltered
16+
difficulty = 1
17+
#reward per intermediate target (once per episode)
18+
int_r_coeff = 0.25
19+
#moving box off target
20+
target_loss_pen_coeff = 0.0
21+
max_steps = 300
22+
23+
[train]
24+
anneal_lr = 1
25+
beta1 = 0.9774372816193448
26+
beta2 = 0.9659403664380584
27+
clip_coef = 0.6046560670053024
28+
ent_coef = 0.00002079831529141607
29+
eps = 0.00000000000001
30+
gae_lambda = 0.9258914518467392
31+
gamma = 0.9772998708784648
32+
gpus = 1
33+
horizon = 64
34+
learning_rate = 0.004480255741933225
35+
max_grad_norm = 1.221684008665154
36+
min_lr_ratio = 0.37872027027338984
37+
minibatch_size = 8192
38+
prio_alpha = 1
39+
prio_beta0 = 0.8789921736378042
40+
replay_ratio = 3.210300031048168
41+
seed = 42
42+
total_timesteps = 55504884
43+
use_rnn = true
44+
vf_clip_coef = 4.339748010438874
45+
vf_coef = 4.240274862679744
46+
vtrace_c_clip = 1.3625779006162615
47+
vtrace_rho_clip = 3.17260199042977

ocean/boxoban/binding.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#define BOXOBAN_MAPS_IMPLEMENTATION //enables mmap
2+
#include "boxoban.h"
3+
#define OBS_SIZE 400
4+
#define NUM_ATNS 1
5+
#define ACT_SIZES {5}
6+
#define OBS_TENSOR_T ByteTensor
7+
8+
9+
#define Env Boxoban
10+
#include "vecenv.h"
11+
12+
13+
void my_init(Env* env, Dict* kwargs) {
14+
env->difficulty_id = (int)dict_get(kwargs, "difficulty")->value;
15+
env->size = 10;
16+
env->num_agents = 1;
17+
env->max_steps = (int)dict_get(kwargs, "max_steps")->value;
18+
env->int_r_coeff = (float)dict_get(kwargs, "int_r_coeff")->value;
19+
env->target_loss_pen_coeff = (float)dict_get(kwargs, "target_loss_pen_coeff")->value;
20+
init(env);
21+
}
22+
23+
void my_log(Log* log, Dict* out) {
24+
dict_set(out, "perf", log->perf);
25+
dict_set(out, "score", log->score);
26+
dict_set(out, "episode_return", log->episode_return);
27+
dict_set(out, "episode_length", log->episode_length);
28+
dict_set(out, "targets_hit", log->on_targets);
29+
}

ocean/boxoban/boxoban.c

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
/* Pure C demo file for Boxoban. Usage:
2+
* bash scripts/build_ocean.sh boxoban
3+
* ./boxoban [difficulty|path_to_bin]
4+
*
5+
* If you pass one of the known difficulty names (basic, easy, medium,
6+
* hard, unfiltered) the demo looks for pufferlib/ocean/boxoban/boxoban_maps_<difficulty>.bin
7+
* Otherwise the argument is treated as an explicit path to a bin file.
8+
*/
9+
10+
#define BOXOBAN_MAPS_IMPLEMENTATION
11+
#include <time.h>
12+
#include "boxoban.h"
13+
14+
static int is_named_difficulty(const char* arg) {
15+
return strcmp(arg, "basic") == 0 ||
16+
strcmp(arg, "easy") == 0 ||
17+
strcmp(arg, "medium") == 0 ||
18+
strcmp(arg, "hard") == 0 ||
19+
strcmp(arg, "unfiltered") == 0;
20+
}
21+
22+
static const char* resolve_map_path(int argc, char** argv, char* buffer, size_t buf_sz) {
23+
const char* arg = argc > 1 ? argv[1] : NULL;
24+
if (arg == NULL) {
25+
if (boxoban_prepare_maps_for_difficulty("easy", buffer, buf_sz) != 0) {
26+
return NULL;
27+
}
28+
return buffer;
29+
}
30+
if (strchr(arg, '/')) {
31+
return arg;
32+
}
33+
if (is_named_difficulty(arg)) {
34+
if (boxoban_prepare_maps_for_difficulty(arg, buffer, buf_sz) != 0) {
35+
return NULL;
36+
}
37+
return buffer;
38+
}
39+
snprintf(buffer, buf_sz, "pufferlib/ocean/boxoban/boxoban_maps_%s.bin", arg);
40+
return buffer;
41+
}
42+
43+
44+
int demo(int argc, char** argv) {
45+
char path_buffer[512];
46+
const char* chosen_path = resolve_map_path(argc, argv, path_buffer, sizeof(path_buffer));
47+
if (chosen_path == NULL) {
48+
fprintf(stderr, "Failed to prepare map path\n");
49+
return 1;
50+
}
51+
if (boxoban_set_map_path(chosen_path) != 0) {
52+
fprintf(stderr, "Failed to set map path: %s\n", chosen_path);
53+
return 1;
54+
}
55+
56+
Boxoban env = {
57+
.size = 10,
58+
.observations = NULL,
59+
.actions = NULL,
60+
.rewards = NULL,
61+
.terminals = NULL,
62+
.max_steps = 500,
63+
.int_r_coeff = 0.1f,
64+
.target_loss_pen_coeff = 0.5f,
65+
.tick = 0,
66+
.agent_x = 0,
67+
.agent_y = 0,
68+
.intermediate_rewards = NULL,
69+
.on_target = 0,
70+
.n_boxes = 0,
71+
.win = 0,
72+
.difficulty_id = -1,
73+
.client = NULL,
74+
.n_targets = 0,
75+
76+
};
77+
78+
size_t obs_count = 4u * (size_t)env.size * (size_t)env.size;
79+
env.observations = calloc(obs_count, sizeof(unsigned char));
80+
env.actions = calloc(1, sizeof(int));
81+
env.rewards = calloc(1, sizeof(float));
82+
env.terminals = calloc(1, sizeof(unsigned char));
83+
84+
init(&env);
85+
c_reset(&env);
86+
c_render(&env);
87+
while (!WindowShouldClose()) {
88+
if (IsKeyPressed(KEY_LEFT_SHIFT) || IsKeyPressed(KEY_RIGHT_SHIFT)) {
89+
TraceLog(LOG_INFO, "Shift key pressed");
90+
}
91+
bool manual = IsKeyDown(KEY_LEFT_SHIFT) || IsKeyDown(KEY_RIGHT_SHIFT);
92+
bool stepped = false;
93+
if (manual) {
94+
int new_action = -1;
95+
if (IsKeyDown(KEY_UP) || IsKeyDown(KEY_W)) new_action = UP;
96+
if (IsKeyDown(KEY_DOWN) || IsKeyDown(KEY_S)) new_action = DOWN;
97+
if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) new_action = LEFT;
98+
if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) new_action = RIGHT;
99+
100+
if (new_action >= 0) {
101+
env.actions[0] = new_action;
102+
c_step(&env);
103+
stepped = true;
104+
}
105+
} else {
106+
env.actions[0] = rand() % 5;
107+
c_step(&env);
108+
stepped = true;
109+
}
110+
111+
if (!stepped) {
112+
// Manual mode with no direction: stay paused
113+
}
114+
c_render(&env);
115+
}
116+
free(env.observations);
117+
free(env.actions);
118+
free(env.rewards);
119+
free(env.terminals);
120+
c_close(&env);
121+
return 0;
122+
}
123+
124+
void test_performance(int argc, char** argv, int timeout) {
125+
char path_buffer[512];
126+
const char* chosen_path = resolve_map_path(argc, argv, path_buffer, sizeof(path_buffer));
127+
if (chosen_path == NULL) {
128+
fprintf(stderr, "Failed to prepare map path\n");
129+
return;
130+
}
131+
if (boxoban_set_map_path(chosen_path) != 0) {
132+
fprintf(stderr, "Failed to set map path: %s\n", chosen_path);
133+
return;
134+
}
135+
printf("Loaded map: %s\n", chosen_path);
136+
137+
Boxoban env = {
138+
.size = 10,
139+
.observations = NULL,
140+
.actions = NULL,
141+
.rewards = NULL,
142+
.terminals = NULL,
143+
.max_steps = 500,
144+
.int_r_coeff = 0.1f,
145+
.target_loss_pen_coeff = 0.5f,
146+
.tick = 0,
147+
.agent_x = 0,
148+
.agent_y = 0,
149+
.intermediate_rewards = NULL,
150+
.on_target = 0,
151+
.n_boxes = 0,
152+
.win = 0,
153+
.difficulty_id = -1,
154+
.client = NULL,
155+
.n_targets = 0,
156+
};
157+
158+
size_t obs_count = 4u * (size_t)env.size * (size_t)env.size;
159+
env.observations = calloc(obs_count, sizeof(unsigned char));
160+
env.actions = calloc(1, sizeof(int));
161+
env.rewards = calloc(1, sizeof(float));
162+
env.terminals = calloc(1, sizeof(unsigned char));
163+
164+
printf("Initializing...\n");
165+
init(&env);
166+
printf("Resetting...\n");
167+
c_reset(&env);
168+
printf("Starting test...\n");
169+
170+
int start = time(NULL);
171+
int num_steps = 0;
172+
while (time(NULL) - start < timeout) {
173+
env.actions[0] = rand() % 5;
174+
c_step(&env);
175+
num_steps++;
176+
}
177+
178+
int end = time(NULL);
179+
float sps = num_steps / (end - start);
180+
printf("Test Environment SPS: %f\n", sps);
181+
free(env.observations);
182+
free(env.actions);
183+
free(env.rewards);
184+
free(env.terminals);
185+
c_close(&env);
186+
}
187+
188+
int main(int argc, char** argv) {
189+
demo(argc, argv);
190+
setbuf(stdout, NULL);
191+
fprintf(stderr, "Entered main\n");
192+
fflush(stderr);
193+
//test_performance(argc, argv,10);
194+
return 0;
195+
}

0 commit comments

Comments
 (0)