Skip to content

Commit 0896d21

Browse files
committed
normalize observation space data
1 parent d2c4a6c commit 0896d21

4 files changed

Lines changed: 91 additions & 53 deletions

File tree

pufferlib/ocean/boss_fight/README.md

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ All hitboxes are circles (collision = circles overlap).
88

99
## Game rules
1010

11-
- **Arena:** square `[-ARENA_HALF_SIZE, ARENA_HALF_SIZE]^2` (default `5.0`)
11+
- **Arena:** square `[-ARENA_HALF_SIZE, ARENA_HALF_SIZE]^2` (default `500.0`)
1212
- **Boss:** stationary at `(0, 0)`
1313
- **Episode ends on:**
1414
- win: boss HP reaches 0
@@ -49,23 +49,22 @@ During **ACTIVE**, the boss deals damage if the player overlaps the AOE circle.
4949

5050
## Observation space
5151

52-
`Box(shape=(13,), dtype=float32)` (see `update_observations` in `boss_fight.h`):
53-
54-
| idx | meaning |
55-
| --: | ------------------------------------------------------ |
56-
| 0 | `boss_x - player_x` |
57-
| 1 | `boss_y - player_y` |
58-
| 2 | `player_x` |
59-
| 3 | `player_y` |
60-
| 4 | `boss_x` |
61-
| 5 | `boss_y` |
62-
| 6 | `player_hp` |
63-
| 7 | `boss_hp` |
64-
| 8 | `player_state` (`0=idle, 1=dodge, 2=attack`) |
65-
| 9 | `player_dodge_cooldown` |
66-
| 10 | `player_state_ticks` (remaining) |
67-
| 11 | `boss_state` (`0=idle, 1=windup, 2=attack, 3=recover`) |
68-
| 12 | `boss_phase_ticks` (remaining) |
52+
`Box(shape=(12,), dtype=float32)` — all normalized to [-1, 1] or [0, 1] (see `update_observations` in `boss_fight.h`):
53+
54+
| idx | meaning | range |
55+
| --: | ---------------------------- | ------- |
56+
| 0 | `player_x` normalized | [-1, 1] |
57+
| 1 | `player_y` normalized | [-1, 1] |
58+
| 2 | `dist_to_boss` normalized | [0, 1] |
59+
| 3 | `player_hp` normalized | [0, 1] |
60+
| 4 | `boss_hp` normalized | [0, 1] |
61+
| 5 | `dodge_cooldown` normalized | [0, 1] |
62+
| 6 | `dodge_remaining` | [0, 1] |
63+
| 7 | `iframe_remaining` | [0, 1] |
64+
| 8 | `attack_remaining` | [0, 1] |
65+
| 9 | `time_until_aoe` | [0, 1] |
66+
| 10 | `aoe_remaining` | [0, 1] |
67+
| 11 | `episode_time_remaining` | [0, 1] |
6968

7069
## Rewards (defaults)
7170

pufferlib/ocean/boss_fight/boss_fight.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#include "raylib.h"
33

44
int main() {
5-
int num_obs = 13;
5+
int num_obs = 12;
66
int num_actions = 1;
77
int num_agents = 1;
88

pufferlib/ocean/boss_fight/boss_fight.h

Lines changed: 72 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,29 @@
33
#include <stdio.h>
44
#include <stdlib.h>
55

6-
#define ARENA_HALF_SIZE 5.0f
7-
#define MAX_HP 1.0f
8-
#define PLAYER_SPEED_PER_TICK 0.25f
9-
#define PLAYER_SIZE 0.3f
10-
#define BOSS_SIZE 0.5f
11-
#define PLAYER_ATTACK_RADIUS 0.4f
6+
#define ARENA_HALF_SIZE 500.0f
7+
#define MAX_HP 100.0f
8+
#define PLAYER_SPEED_PER_TICK 25.0f
9+
#define PLAYER_SIZE 30.0f
10+
#define BOSS_SIZE 50.0f
11+
#define PLAYER_ATTACK_RADIUS 40.0f
1212
#define PLAYER_ATTACK_TICKS 3
1313
#define PLAYER_DODGE_TICKS 4
1414
#define PLAYER_IFRAME_TICKS 2
1515
#define PLAYER_DODGE_COOLDOWN 15
16-
#define PLAYER_DODGE_SPEED_PER_TICK 0.35f
17-
#define PLAYER_ATTACK_DMG 0.05f
18-
#define BOSS_ATTACK_DMG 0.15f
19-
#define BOSS_AOE_ATTACK_RADIUS 0.8f
16+
#define PLAYER_DODGE_SPEED_PER_TICK 35.0f
17+
#define PLAYER_ATTACK_DMG 5.0f
18+
#define BOSS_ATTACK_DMG 15.0f
19+
#define BOSS_AOE_ATTACK_RADIUS 80.0f
2020
#define BOSS_IDLE_TICKS 7
2121
#define BOSS_WINDUP_TICKS 5
2222
#define BOSS_ACTIVE_TICKS 5
2323
#define BOSS_RECOVERY_TICKS 5
24+
2425
#define HP_BAR_WIDTH 40
2526
#define HP_BAR_HEIGHT 5
2627

27-
#define REWARD_APPROACH 0.05f
28+
#define REWARD_APPROACH 0.7f
2829
#define REWARD_HIT_WALL -0.05f
2930
#define REWARD_PLAYER_HIT_BOSS 0.07f
3031
#define REWARD_BOSS_HIT_PLAYER -0.05f
@@ -72,7 +73,7 @@ typedef struct {
7273
float player_y;
7374
float boss_x;
7475
float boss_y;
75-
float prev_distance;
76+
float dist_to_boss;
7677

7778
PlayerState player_state;
7879
float player_hp;
@@ -111,19 +112,59 @@ void add_log(BossFight *env) {
111112

112113
void update_observations(BossFight *env) {
113114
int obs_idx = 0;
114-
env->observations[obs_idx++] = env->boss_x - env->player_x;
115-
env->observations[obs_idx++] = env->boss_y - env->player_y;
116-
env->observations[obs_idx++] = env->player_x;
117-
env->observations[obs_idx++] = env->player_y;
118-
env->observations[obs_idx++] = env->boss_x;
119-
env->observations[obs_idx++] = env->boss_y;
120-
env->observations[obs_idx++] = (float)env->player_hp;
121-
env->observations[obs_idx++] = (float)env->boss_hp;
122-
env->observations[obs_idx++] = (float)env->player_state;
123-
env->observations[obs_idx++] = (float)env->player_dodge_cooldown;
124-
env->observations[obs_idx++] = (float)env->player_state_ticks;
125-
env->observations[obs_idx++] = (float)env->boss_state;
126-
env->observations[obs_idx++] = (float)env->boss_phase_ticks;
115+
116+
env->observations[obs_idx++] = env->player_x / ARENA_HALF_SIZE;
117+
env->observations[obs_idx++] = env->player_y / ARENA_HALF_SIZE;
118+
119+
float dist = distance(env->player_x, env->player_y, env->boss_x, env->boss_y);
120+
float max_dist = sqrtf(2.0f) * ARENA_HALF_SIZE;
121+
env->observations[obs_idx++] = dist / max_dist;
122+
123+
env->observations[obs_idx++] = env->player_hp / MAX_HP;
124+
env->observations[obs_idx++] = env->boss_hp / MAX_HP;
125+
126+
env->observations[obs_idx++] =
127+
(float)env->player_dodge_cooldown / PLAYER_DODGE_COOLDOWN;
128+
129+
float dodge_remaining =
130+
(env->player_state == PLAYER_DODGING)
131+
? (float)env->player_state_ticks / PLAYER_DODGE_TICKS
132+
: 0.0f;
133+
env->observations[obs_idx++] = dodge_remaining;
134+
135+
int iframe_ticks =
136+
env->player_state_ticks - (PLAYER_DODGE_TICKS - PLAYER_IFRAME_TICKS);
137+
float iframe_remaining =
138+
(env->player_state == PLAYER_DODGING && iframe_ticks > 0)
139+
? fminf((float)iframe_ticks / PLAYER_IFRAME_TICKS, 1.0f)
140+
: 0.0f;
141+
env->observations[obs_idx++] = iframe_remaining;
142+
143+
float attack_remaining =
144+
(env->player_state == PLAYER_ATTACKING)
145+
? (float)env->player_state_ticks / PLAYER_ATTACK_TICKS
146+
: 0.0f;
147+
env->observations[obs_idx++] = attack_remaining;
148+
149+
float cycle_len = BOSS_IDLE_TICKS + BOSS_WINDUP_TICKS + BOSS_ACTIVE_TICKS +
150+
BOSS_RECOVERY_TICKS;
151+
float time_until_aoe = 0.0f;
152+
if (env->boss_state == BOSS_IDLING)
153+
time_until_aoe = env->boss_phase_ticks + BOSS_WINDUP_TICKS;
154+
else if (env->boss_state == BOSS_WINDING_UP)
155+
time_until_aoe = env->boss_phase_ticks;
156+
else if (env->boss_state == BOSS_RECOVERING)
157+
time_until_aoe =
158+
env->boss_phase_ticks + BOSS_IDLE_TICKS + BOSS_WINDUP_TICKS;
159+
env->observations[obs_idx++] = time_until_aoe / cycle_len;
160+
161+
float aoe_remaining = (env->boss_state == BOSS_ATTACKING)
162+
? (float)env->boss_phase_ticks / BOSS_ACTIVE_TICKS
163+
: 0.0f;
164+
env->observations[obs_idx++] = aoe_remaining;
165+
166+
env->observations[obs_idx++] =
167+
(float)(EPISODE_LENGTH - env->tick) / EPISODE_LENGTH;
127168
}
128169

129170
void c_reset(BossFight *env) {
@@ -152,7 +193,7 @@ void c_reset(BossFight *env) {
152193
env->player_y = rand_uniform(-ARENA_HALF_SIZE, ARENA_HALF_SIZE);
153194
}
154195

155-
env->prev_distance =
196+
env->dist_to_boss =
156197
distance(env->player_x, env->player_y, env->boss_x, env->boss_y);
157198

158199
update_observations(env);
@@ -204,11 +245,6 @@ void c_step(BossFight *env) {
204245
env->player_state != PLAYER_DODGING && env->player_dodge_cooldown == 0;
205246
bool can_attack = env->player_state == PLAYER_IDLING;
206247

207-
if (wanna_attack && can_attack) {
208-
env->player_state_ticks = PLAYER_ATTACK_TICKS;
209-
env->player_state = PLAYER_ATTACKING;
210-
}
211-
212248
float aoe_dist = BOSS_SIZE + PLAYER_SIZE + BOSS_AOE_ATTACK_RADIUS;
213249
bool boss_threatening =
214250
env->boss_state == BOSS_WINDING_UP || env->boss_state == BOSS_ATTACKING;
@@ -249,8 +285,9 @@ void c_step(BossFight *env) {
249285

250286
float dist = distance(env->player_x, env->player_y, env->boss_x, env->boss_y);
251287

252-
reward += REWARD_APPROACH * (env->prev_distance - dist);
253-
env->prev_distance = dist;
288+
float max_dist = sqrtf(2.0f) * ARENA_HALF_SIZE;
289+
reward += REWARD_APPROACH * ((env->dist_to_boss - dist) / max_dist);
290+
env->dist_to_boss = dist;
254291

255292
// Push player out if clipping into boss
256293
if (dist < BOSS_SIZE + PLAYER_SIZE && dist > 1e-6f) {
@@ -265,6 +302,8 @@ void c_step(BossFight *env) {
265302
bool close_enough = dist <= BOSS_SIZE + PLAYER_ATTACK_RADIUS + PLAYER_SIZE;
266303

267304
if (wanna_attack && can_attack && close_enough) {
305+
env->player_state_ticks = PLAYER_ATTACK_TICKS;
306+
env->player_state = PLAYER_ATTACKING;
268307
env->boss_hp -= PLAYER_ATTACK_DMG;
269308
reward += REWARD_PLAYER_HIT_BOSS;
270309
}

pufferlib/ocean/boss_fight/boss_fight.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def __init__(
1212
self, num_envs=1, render_mode=None, log_interval=1, size=5, buf=None, seed=0
1313
):
1414
self.single_observation_space = gymnasium.spaces.Box(
15-
low=-10, high=110, shape=(13,), dtype=np.float32
15+
low=-1, high=1, shape=(12,), dtype=np.float32
1616
)
1717
self.single_action_space = gymnasium.spaces.Discrete(7)
1818
self.render_mode = render_mode

0 commit comments

Comments
 (0)