Skip to content

Commit ff8e6c6

Browse files
author
Tim-phant
committed
adjust scoring metrics
1 parent c3c0de8 commit ff8e6c6

1 file changed

Lines changed: 8 additions & 3 deletions

File tree

pufferlib/ocean/boxoban/boxoban.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ typedef struct {
6060
int difficulty_id; // 0=basic,1=easy,2=medium,3=hard,4=unfiltered
6161
Client* client;
6262
int win;
63+
float epiosde_return;
6364
} Boxoban;
6465

6566
void ensure_map_loaded(void);
@@ -126,9 +127,9 @@ void add_log(Boxoban* env) {
126127
float denom = (float)env->n_boxes;
127128
float num = (float)env->on_target;
128129
env->log.perf += (env->win== 1) ? 1.0 : num/denom;
129-
env->log.score += env->rewards[0];
130+
env->log.score += env->log.perf;
130131
env->log.episode_length += env->tick;
131-
env->log.episode_return += env->rewards[0];
132+
env->log.episode_return += env->episode_return;
132133
env->log.on_targets += env->on_target;
133134
env->log.n++;
134135
}
@@ -159,6 +160,7 @@ void c_reset(Boxoban* env) {
159160

160161
env->tick = 0;
161162
env->win = 0;
163+
env->episode_return = 0;
162164

163165
}
164166

@@ -250,18 +252,21 @@ void c_step(Boxoban* env) {
250252
env->terminals[0] = 1;
251253
env->rewards[0] += 1.0;
252254
env->win = 1;
255+
env->episode_return += env->rewards[0];
253256
add_log(env);
254257
c_reset(env);
255258
return;
256259
}
257260

258261
if (env->tick >= env->max_steps) {
259262
env->terminals[0] = 1;
260-
env->rewards[0] -= 1.0;
263+
env->rewards[0] -= 1.0;
264+
env->episode_return += env->rewards[0];
261265
add_log(env);
262266
c_reset(env);
263267
return;
264268
}
269+
env->episode_return += env->rewards[0];
265270

266271
}
267272

0 commit comments

Comments
 (0)