33#include <stdio.h>
44#include <stdlib.h>
55
6- #define ARENA_HALF_SIZE 5 .0f
7- #define MAX_HP 1 .0f
8- #define PLAYER_SPEED_PER_TICK 0.25f
9- #define PLAYER_SIZE 0.3f
10- #define BOSS_SIZE 0.5f
11- #define PLAYER_ATTACK_RADIUS 0.4f
6+ #define ARENA_HALF_SIZE 500 .0f
7+ #define MAX_HP 100 .0f
8+ #define PLAYER_SPEED_PER_TICK 25.0f
9+ #define PLAYER_SIZE 30.0f
10+ #define BOSS_SIZE 50.0f
11+ #define PLAYER_ATTACK_RADIUS 40.0f
1212#define PLAYER_ATTACK_TICKS 3
1313#define PLAYER_DODGE_TICKS 4
1414#define PLAYER_IFRAME_TICKS 2
1515#define PLAYER_DODGE_COOLDOWN 15
16- #define PLAYER_DODGE_SPEED_PER_TICK 0.35f
17- #define PLAYER_ATTACK_DMG 0.05f
18- #define BOSS_ATTACK_DMG 0.15f
19- #define BOSS_AOE_ATTACK_RADIUS 0.8f
16+ #define PLAYER_DODGE_SPEED_PER_TICK 35.0f
17+ #define PLAYER_ATTACK_DMG 5.0f
18+ #define BOSS_ATTACK_DMG 15.0f
19+ #define BOSS_AOE_ATTACK_RADIUS 80.0f
2020#define BOSS_IDLE_TICKS 7
2121#define BOSS_WINDUP_TICKS 5
2222#define BOSS_ACTIVE_TICKS 5
2323#define BOSS_RECOVERY_TICKS 5
24+
2425#define HP_BAR_WIDTH 40
2526#define HP_BAR_HEIGHT 5
2627
27- #define REWARD_APPROACH 0.05f
28+ #define REWARD_APPROACH 0.7f
2829#define REWARD_HIT_WALL -0.05f
2930#define REWARD_PLAYER_HIT_BOSS 0.07f
3031#define REWARD_BOSS_HIT_PLAYER -0.05f
@@ -72,7 +73,7 @@ typedef struct {
7273 float player_y ;
7374 float boss_x ;
7475 float boss_y ;
75- float prev_distance ;
76+ float dist_to_boss ;
7677
7778 PlayerState player_state ;
7879 float player_hp ;
@@ -111,19 +112,59 @@ void add_log(BossFight *env) {
111112
112113void update_observations (BossFight * env ) {
113114 int obs_idx = 0 ;
114- env -> observations [obs_idx ++ ] = env -> boss_x - env -> player_x ;
115- env -> observations [obs_idx ++ ] = env -> boss_y - env -> player_y ;
116- env -> observations [obs_idx ++ ] = env -> player_x ;
117- env -> observations [obs_idx ++ ] = env -> player_y ;
118- env -> observations [obs_idx ++ ] = env -> boss_x ;
119- env -> observations [obs_idx ++ ] = env -> boss_y ;
120- env -> observations [obs_idx ++ ] = (float )env -> player_hp ;
121- env -> observations [obs_idx ++ ] = (float )env -> boss_hp ;
122- env -> observations [obs_idx ++ ] = (float )env -> player_state ;
123- env -> observations [obs_idx ++ ] = (float )env -> player_dodge_cooldown ;
124- env -> observations [obs_idx ++ ] = (float )env -> player_state_ticks ;
125- env -> observations [obs_idx ++ ] = (float )env -> boss_state ;
126- env -> observations [obs_idx ++ ] = (float )env -> boss_phase_ticks ;
115+
116+ env -> observations [obs_idx ++ ] = env -> player_x / ARENA_HALF_SIZE ;
117+ env -> observations [obs_idx ++ ] = env -> player_y / ARENA_HALF_SIZE ;
118+
119+ float dist = distance (env -> player_x , env -> player_y , env -> boss_x , env -> boss_y );
120+ float max_dist = sqrtf (2.0f ) * ARENA_HALF_SIZE ;
121+ env -> observations [obs_idx ++ ] = dist / max_dist ;
122+
123+ env -> observations [obs_idx ++ ] = env -> player_hp / MAX_HP ;
124+ env -> observations [obs_idx ++ ] = env -> boss_hp / MAX_HP ;
125+
126+ env -> observations [obs_idx ++ ] =
127+ (float )env -> player_dodge_cooldown / PLAYER_DODGE_COOLDOWN ;
128+
129+ float dodge_remaining =
130+ (env -> player_state == PLAYER_DODGING )
131+ ? (float )env -> player_state_ticks / PLAYER_DODGE_TICKS
132+ : 0.0f ;
133+ env -> observations [obs_idx ++ ] = dodge_remaining ;
134+
135+ int iframe_ticks =
136+ env -> player_state_ticks - (PLAYER_DODGE_TICKS - PLAYER_IFRAME_TICKS );
137+ float iframe_remaining =
138+ (env -> player_state == PLAYER_DODGING && iframe_ticks > 0 )
139+ ? fminf ((float )iframe_ticks / PLAYER_IFRAME_TICKS , 1.0f )
140+ : 0.0f ;
141+ env -> observations [obs_idx ++ ] = iframe_remaining ;
142+
143+ float attack_remaining =
144+ (env -> player_state == PLAYER_ATTACKING )
145+ ? (float )env -> player_state_ticks / PLAYER_ATTACK_TICKS
146+ : 0.0f ;
147+ env -> observations [obs_idx ++ ] = attack_remaining ;
148+
149+ float cycle_len = BOSS_IDLE_TICKS + BOSS_WINDUP_TICKS + BOSS_ACTIVE_TICKS +
150+ BOSS_RECOVERY_TICKS ;
151+ float time_until_aoe = 0.0f ;
152+ if (env -> boss_state == BOSS_IDLING )
153+ time_until_aoe = env -> boss_phase_ticks + BOSS_WINDUP_TICKS ;
154+ else if (env -> boss_state == BOSS_WINDING_UP )
155+ time_until_aoe = env -> boss_phase_ticks ;
156+ else if (env -> boss_state == BOSS_RECOVERING )
157+ time_until_aoe =
158+ env -> boss_phase_ticks + BOSS_IDLE_TICKS + BOSS_WINDUP_TICKS ;
159+ env -> observations [obs_idx ++ ] = time_until_aoe / cycle_len ;
160+
161+ float aoe_remaining = (env -> boss_state == BOSS_ATTACKING )
162+ ? (float )env -> boss_phase_ticks / BOSS_ACTIVE_TICKS
163+ : 0.0f ;
164+ env -> observations [obs_idx ++ ] = aoe_remaining ;
165+
166+ env -> observations [obs_idx ++ ] =
167+ (float )(EPISODE_LENGTH - env -> tick ) / EPISODE_LENGTH ;
127168}
128169
129170void c_reset (BossFight * env ) {
@@ -152,7 +193,7 @@ void c_reset(BossFight *env) {
152193 env -> player_y = rand_uniform (- ARENA_HALF_SIZE , ARENA_HALF_SIZE );
153194 }
154195
155- env -> prev_distance =
196+ env -> dist_to_boss =
156197 distance (env -> player_x , env -> player_y , env -> boss_x , env -> boss_y );
157198
158199 update_observations (env );
@@ -204,11 +245,6 @@ void c_step(BossFight *env) {
204245 env -> player_state != PLAYER_DODGING && env -> player_dodge_cooldown == 0 ;
205246 bool can_attack = env -> player_state == PLAYER_IDLING ;
206247
207- if (wanna_attack && can_attack ) {
208- env -> player_state_ticks = PLAYER_ATTACK_TICKS ;
209- env -> player_state = PLAYER_ATTACKING ;
210- }
211-
212248 float aoe_dist = BOSS_SIZE + PLAYER_SIZE + BOSS_AOE_ATTACK_RADIUS ;
213249 bool boss_threatening =
214250 env -> boss_state == BOSS_WINDING_UP || env -> boss_state == BOSS_ATTACKING ;
@@ -249,8 +285,9 @@ void c_step(BossFight *env) {
249285
250286 float dist = distance (env -> player_x , env -> player_y , env -> boss_x , env -> boss_y );
251287
252- reward += REWARD_APPROACH * (env -> prev_distance - dist );
253- env -> prev_distance = dist ;
288+ float max_dist = sqrtf (2.0f ) * ARENA_HALF_SIZE ;
289+ reward += REWARD_APPROACH * ((env -> dist_to_boss - dist ) / max_dist );
290+ env -> dist_to_boss = dist ;
254291
255292 // Push player out if clipping into boss
256293 if (dist < BOSS_SIZE + PLAYER_SIZE && dist > 1e-6f ) {
@@ -265,6 +302,8 @@ void c_step(BossFight *env) {
265302 bool close_enough = dist <= BOSS_SIZE + PLAYER_ATTACK_RADIUS + PLAYER_SIZE ;
266303
267304 if (wanna_attack && can_attack && close_enough ) {
305+ env -> player_state_ticks = PLAYER_ATTACK_TICKS ;
306+ env -> player_state = PLAYER_ATTACKING ;
268307 env -> boss_hp -= PLAYER_ATTACK_DMG ;
269308 reward += REWARD_PLAYER_HIT_BOSS ;
270309 }
0 commit comments