Skip to content

Commit 9bb5839

Browse files
committed
add OSRS PvP + Zulrah environments
1 parent 38b6821 commit 9bb5839

56 files changed

Lines changed: 33994 additions & 0 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

pufferlib/ocean/osrs_pvp/README.md

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# OSRS PvP Environment
2+
3+
C implementation of Old School RuneScape NH PvP for reinforcement learning.
4+
5+
## Build
6+
7+
```bash
8+
python setup.py build_osrs_pvp --inplace --force
9+
```
10+
11+
Zulrah encounter is a separate env that shares headers with osrs_pvp:
12+
```bash
13+
python setup.py build_osrs_zulrah --inplace --force
14+
```
15+
16+
## Data assets
17+
18+
Not in git. Exported from the OSRS game cache:
19+
20+
1. Download a modern cache from https://archive.openrs2.org/ ("flat file" export)
21+
2. `cd pufferlib/ocean/osrs_pvp && ./scripts/export_all.sh /path/to/cache`
22+
23+
Pure Python, no deps.
24+
25+
## Spaces
26+
27+
**Obs:** 373 = 334 features + 39 action mask, normalized in C.
28+
29+
**Actions:** MultiDiscrete `[9, 13, 6, 2, 5, 2, 2]` — loadout, combat, prayer, food, potion, karambwan, veng.
30+
31+
**Timing:** tick N actions apply at tick N+1 (OSRS-accurate async).
32+
33+
## Opponents
34+
35+
28 scripted policies from trivial (`true_random`) to boss (`nightmare_nh` — onetick + 50% action reading). Curriculum mixes and PFSP supported.
36+
37+
## Encounters
38+
39+
Vtable interface (`osrs_encounter.h`). Current: NH PvP, Zulrah (81 obs, 6 heads, 3 forms, venom, clouds, collision).
40+
41+
## Files
42+
43+
Core env: `osrs_pvp_types/items/gear/combat/collision/pathfinding/movement/observations/actions/opponents/api.h`
44+
45+
Visual: `osrs_pvp_render/gui/anim/models/terrain/objects/effects/human_input.h`
46+
47+
Encounters: `encounters/encounter_nh_pvp.h`, `encounters/encounter_zulrah.h`
48+
49+
Data: `data/` (gitignored binaries + C model headers), `scripts/` (cache exporters)

pufferlib/ocean/osrs_pvp/binding.c

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
/**
2+
* @file binding.c
3+
* @brief Static-native binding for OSRS PVP environment
4+
*
5+
* Bridges vecenv.h's contract (double actions, float terminals) with the PVP
6+
* env's internal types (int actions, unsigned char terminals) using a wrapper
7+
* struct. PVP source headers are untouched.
8+
*/
9+
10+
#include "osrs_pvp.h"
11+
12+
/* Wrapper struct: vecenv-compatible fields at top + embedded OsrsPvp.
13+
* vecenv.h's create_static_vec assigns to env->observations, env->actions,
14+
* env->rewards, env->terminals directly. These fields must match vecenv's
15+
* expected types (void*, double*, float*, float*). The embedded OsrsPvp has
16+
* its own identically-named fields with different types — pvp_init sets those
17+
* to internal inline buffers, so there's no conflict. */
18+
typedef struct {
19+
void* observations;
20+
double* actions;
21+
float* rewards;
22+
float* terminals;
23+
int num_agents;
24+
int rng;
25+
Log log;
26+
27+
OsrsPvp pvp;
28+
29+
/* staging buffers for type conversion */
30+
int ocean_acts_staging[NUM_ACTION_HEADS];
31+
unsigned char ocean_term_staging;
32+
} OsrsPvpEnv;
33+
34+
#define OBS_SIZE OCEAN_OBS_SIZE
35+
#define NUM_ATNS NUM_ACTION_HEADS
36+
#define ACT_SIZES {LOADOUT_DIM, COMBAT_DIM, OVERHEAD_DIM, FOOD_DIM, POTION_DIM, KARAMBWAN_DIM, VENG_DIM}
37+
#define OBS_TYPE FLOAT
38+
#define ACT_TYPE DOUBLE
39+
#define Env OsrsPvpEnv
40+
41+
/* c_step/c_reset/c_close/c_render must be defined BEFORE including vecenv.h
42+
* because vecenv.h calls them inside its implementation section without
43+
* forward-declaring them (they're expected to come from the env header). */
44+
45+
void c_step(Env* env) {
46+
/* double actions from vecenv → int staging for PVP */
47+
for (int i = 0; i < NUM_ATNS; i++) {
48+
env->ocean_acts_staging[i] = (int)env->actions[i];
49+
}
50+
51+
pvp_step(&env->pvp);
52+
53+
/* terminal: unsigned char → float for vecenv */
54+
env->terminals[0] = (float)env->ocean_term_staging;
55+
56+
/* copy PVP log to wrapper log on episode end */
57+
if (env->ocean_term_staging) {
58+
env->log.episode_return = env->pvp.log.episode_return;
59+
env->log.episode_length = env->pvp.log.episode_length;
60+
env->log.wins = env->pvp.log.wins;
61+
env->log.damage_dealt = env->pvp.log.damage_dealt;
62+
env->log.damage_received = env->pvp.log.damage_received;
63+
env->log.n = env->pvp.log.n;
64+
memset(&env->pvp.log, 0, sizeof(env->pvp.log));
65+
}
66+
67+
if (env->ocean_term_staging && env->pvp.auto_reset) {
68+
ocean_write_obs(&env->pvp);
69+
}
70+
}
71+
72+
void c_reset(Env* env) {
73+
/* Wire ocean pointers to vecenv shared buffers (deferred from my_init because
74+
* create_static_vec assigns env->observations/rewards AFTER my_vec_init). */
75+
env->pvp.ocean_obs = (float*)env->observations;
76+
env->pvp.ocean_rew = env->rewards;
77+
env->pvp.ocean_term = &env->ocean_term_staging;
78+
env->pvp.ocean_acts = env->ocean_acts_staging;
79+
80+
pvp_reset(&env->pvp);
81+
ocean_write_obs(&env->pvp);
82+
env->pvp.ocean_rew[0] = 0.0f;
83+
env->pvp.ocean_term[0] = 0;
84+
env->terminals[0] = 0.0f;
85+
}
86+
87+
void c_close(Env* env) { pvp_close(&env->pvp); }
88+
void c_render(Env* env) { (void)env; }
89+
90+
#include "vecenv.h"
91+
92+
void my_init(Env* env, Dict* kwargs) {
93+
env->num_agents = 1;
94+
95+
pvp_init(&env->pvp);
96+
97+
/* Ocean pointer wiring is DEFERRED to c_reset because my_init runs inside
98+
* my_vec_init BEFORE create_static_vec assigns the shared buffer pointers
99+
* (env->observations, env->actions, env->rewards, env->terminals are NULL
100+
* at this point). c_reset runs after buffer assignment and does the wiring.
101+
*
102+
* For now, point ocean pointers at internal staging so pvp_reset doesn't
103+
* crash on writes to ocean_term/ocean_rew. */
104+
env->pvp.ocean_obs = NULL;
105+
env->pvp.ocean_rew = env->pvp._rews_buf;
106+
env->pvp.ocean_term = &env->ocean_term_staging;
107+
env->pvp.ocean_acts = env->ocean_acts_staging;
108+
env->pvp.ocean_obs_p1 = NULL;
109+
env->pvp.ocean_selfplay_mask = NULL;
110+
111+
/* config from Dict (all values are double) */
112+
env->pvp.use_c_opponent = 1;
113+
env->pvp.auto_reset = 1;
114+
env->pvp.is_lms = 1;
115+
116+
DictItem* opp = dict_get_unsafe(kwargs, "opponent_type");
117+
env->pvp.opponent.type = opp ? (OpponentType)(int)opp->value : OPP_IMPROVED;
118+
119+
DictItem* shaping_scale = dict_get_unsafe(kwargs, "shaping_scale");
120+
env->pvp.shaping.shaping_scale = shaping_scale ? (float)shaping_scale->value : 0.0f;
121+
122+
DictItem* shaping_en = dict_get_unsafe(kwargs, "shaping_enabled");
123+
env->pvp.shaping.enabled = shaping_en ? (int)shaping_en->value : 0;
124+
125+
/* reward shaping coefficients (same defaults as ocean_binding.c) */
126+
env->pvp.shaping.damage_dealt_coef = 0.005f;
127+
env->pvp.shaping.damage_received_coef = -0.005f;
128+
env->pvp.shaping.correct_prayer_bonus = 0.03f;
129+
env->pvp.shaping.wrong_prayer_penalty = -0.02f;
130+
env->pvp.shaping.prayer_switch_no_attack_penalty = -0.01f;
131+
env->pvp.shaping.off_prayer_hit_bonus = 0.03f;
132+
env->pvp.shaping.melee_frozen_penalty = -0.05f;
133+
env->pvp.shaping.wasted_eat_penalty = -0.001f;
134+
env->pvp.shaping.premature_eat_penalty = -0.02f;
135+
env->pvp.shaping.magic_no_staff_penalty = -0.05f;
136+
env->pvp.shaping.gear_mismatch_penalty = -0.05f;
137+
env->pvp.shaping.spec_off_prayer_bonus = 0.02f;
138+
env->pvp.shaping.spec_low_defence_bonus = 0.01f;
139+
env->pvp.shaping.spec_low_hp_bonus = 0.02f;
140+
env->pvp.shaping.smart_triple_eat_bonus = 0.05f;
141+
env->pvp.shaping.wasted_triple_eat_penalty = -0.0005f;
142+
env->pvp.shaping.damage_burst_bonus = 0.002f;
143+
env->pvp.shaping.damage_burst_threshold = 30;
144+
env->pvp.shaping.premature_eat_threshold = 0.7071f;
145+
env->pvp.shaping.ko_bonus = 0.15f;
146+
env->pvp.shaping.wasted_resources_penalty = -0.07f;
147+
env->pvp.shaping.prayer_penalty_enabled = 1;
148+
env->pvp.shaping.click_penalty_enabled = 0;
149+
env->pvp.shaping.click_penalty_threshold = 5;
150+
env->pvp.shaping.click_penalty_coef = -0.003f;
151+
152+
/* gear: default tier 0 (basic LMS) */
153+
env->pvp.gear_tier_weights[0] = 1.0f;
154+
env->pvp.gear_tier_weights[1] = 0.0f;
155+
env->pvp.gear_tier_weights[2] = 0.0f;
156+
env->pvp.gear_tier_weights[3] = 0.0f;
157+
158+
/* pvp_reset sets up game state (players, positions, gear, etc.)
159+
* but does NOT write to ocean buffers — that happens in c_reset. */
160+
pvp_reset(&env->pvp);
161+
}
162+
163+
void my_log(Log* log, Dict* out) {
164+
dict_set(out, "episode_return", log->episode_return);
165+
dict_set(out, "episode_length", log->episode_length);
166+
dict_set(out, "wins", log->wins);
167+
dict_set(out, "damage_dealt", log->damage_dealt);
168+
dict_set(out, "damage_received", log->damage_received);
169+
}
170+
171+
/* ========================================================================
172+
* PFSP: set/get opponent pool weights across all envs
173+
* ======================================================================== */
174+
175+
void binding_set_pfsp_weights(StaticVec* vec, int* pool, int* cum_weights, int pool_size) {
176+
Env* envs = (Env*)vec->envs;
177+
if (pool_size > MAX_OPPONENT_POOL) pool_size = MAX_OPPONENT_POOL;
178+
for (int e = 0; e < vec->size; e++) {
179+
int was_unconfigured = (envs[e].pvp.pfsp.pool_size == 0);
180+
envs[e].pvp.pfsp.pool_size = pool_size;
181+
for (int i = 0; i < pool_size; i++) {
182+
envs[e].pvp.pfsp.pool[i] = (OpponentType)pool[i];
183+
envs[e].pvp.pfsp.cum_weights[i] = cum_weights[i];
184+
}
185+
/* Only reset on first configuration — restarts the episode that was started
186+
* during env creation before the pool was set (would have used fallback opponent).
187+
* Periodic weight updates must NOT reset: that would corrupt PufferLib's rollout. */
188+
if (was_unconfigured) {
189+
c_reset(&envs[e]);
190+
}
191+
}
192+
}
193+
194+
void binding_get_pfsp_stats(StaticVec* vec, float* out_wins, float* out_episodes, int* out_pool_size) {
195+
Env* envs = (Env*)vec->envs;
196+
int pool_size = 0;
197+
198+
for (int e = 0; e < vec->size; e++) {
199+
if (envs[e].pvp.pfsp.pool_size > pool_size)
200+
pool_size = envs[e].pvp.pfsp.pool_size;
201+
}
202+
*out_pool_size = pool_size;
203+
for (int i = 0; i < pool_size; i++) {
204+
out_wins[i] = 0.0f;
205+
out_episodes[i] = 0.0f;
206+
}
207+
208+
/* Aggregate and reset (read-and-reset pattern) */
209+
for (int e = 0; e < vec->size; e++) {
210+
for (int i = 0; i < envs[e].pvp.pfsp.pool_size; i++) {
211+
out_wins[i] += envs[e].pvp.pfsp.wins[i];
212+
out_episodes[i] += envs[e].pvp.pfsp.episodes[i];
213+
}
214+
memset(envs[e].pvp.pfsp.wins, 0, sizeof(envs[e].pvp.pfsp.wins));
215+
memset(envs[e].pvp.pfsp.episodes, 0, sizeof(envs[e].pvp.pfsp.episodes));
216+
}
217+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# all binary assets — regenerated from OSRS cache via scripts/
2+
# run: scripts/export_all.sh <cache_path>
3+
*.models
4+
*.anims
5+
*.objects
6+
*.terrain
7+
*.atlas
8+
*.npcs
9+
*.cmap
10+
*.bin
11+
sprites/
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/* generated by scripts/export_models.py — do not edit */
2+
#ifndef ITEM_MODELS_H
3+
#define ITEM_MODELS_H
4+
5+
#include <stdint.h>
6+
7+
typedef struct {
8+
uint16_t item_id;
9+
uint32_t inv_model;
10+
uint32_t wield_model;
11+
uint8_t has_sleeves;
12+
} ItemModelMapping;
13+
14+
#define ITEM_MODEL_COUNT 94
15+
16+
static const ItemModelMapping ITEM_MODEL_MAP[] = {
17+
{ 10828, 21938, 917504, 0 },
18+
{ 21795, 34166, 917505, 0 },
19+
{ 1712, 2796, 917506, 0 },
20+
{ 2503, 2745, 917507, 0 },
21+
{ 4091, 5043, 917508, 1 },
22+
{ 1079, 2582, 917509, 0 },
23+
{ 4093, 5042, 917510, 0 },
24+
{ 4151, 5412, 917511, 0 },
25+
{ 9185, 16876, 917512, 0 },
26+
{ 4710, 6590, 917513, 0 },
27+
{ 5698, 2718, 917514, 0 },
28+
{ 12954, 10422, 917515, 0 },
29+
{ 12829, 11308, 917516, 0 },
30+
{ 7462, 13631, 917517, 0 },
31+
{ 3105, 2837, 917518, 0 },
32+
{ 6737, 9931, 4294967295, 0 },
33+
{ 9243, 16856, 4294967295, 0 },
34+
{ 22324, 35739, 917521, 0 },
35+
{ 24417, 39068, 917522, 0 },
36+
{ 11791, 2810, 917523, 0 },
37+
{ 21006, 32789, 917524, 0 },
38+
{ 24424, 39072, 917525, 0 },
39+
{ 11785, 19967, 917527, 0 },
40+
{ 26374, 43246, 917528, 0 },
41+
{ 13652, 32784, 917529, 0 },
42+
{ 11802, 28075, 917530, 0 },
43+
{ 25730, 4845, 4294967295, 0 },
44+
{ 4153, 5413, 917532, 0 },
45+
{ 21003, 32792, 917533, 0 },
46+
{ 11235, 26386, 917534, 0 },
47+
{ 19481, 31523, 917535, 0 },
48+
{ 22613, 35995, 917536, 0 },
49+
{ 27690, 47422, 917537, 0 },
50+
{ 22622, 35986, 917538, 0 },
51+
{ 22636, 35997, 917539, 0 },
52+
{ 21018, 32794, 917540, 0 },
53+
{ 21021, 32790, 917541, 1 },
54+
{ 21024, 32787, 917542, 0 },
55+
{ 4712, 6578, 917543, 1 },
56+
{ 4714, 6577, 917544, 0 },
57+
{ 4736, 6588, 917545, 1 },
58+
{ 11834, 28047, 917546, 0 },
59+
{ 12831, 11307, 917547, 0 },
60+
{ 6585, 9633, 917548, 0 },
61+
{ 12002, 28438, 917549, 0 },
62+
{ 21295, 33144, 917550, 0 },
63+
{ 13235, 29394, 917551, 0 },
64+
{ 11770, 21850, 4294967295, 0 },
65+
{ 25975, 46473, 4294967295, 0 },
66+
{ 6889, 10573, 917554, 0 },
67+
{ 11212, 26306, 4294967295, 0 },
68+
{ 4751, 6584, 917556, 0 },
69+
{ 4722, 6581, 917557, 0 },
70+
{ 4759, 6595, 917558, 0 },
71+
{ 4745, 6592, 917559, 0 },
72+
{ 4716, 6580, 917560, 0 },
73+
{ 4753, 6597, 917561, 0 },
74+
{ 4724, 6583, 917562, 0 },
75+
{ 21932, 16856, 4294967295, 0 },
76+
{ 21791, 34261, 917564, 0 },
77+
{ 31113, 56713, 917565, 0 },
78+
{ 27251, 46472, 917566, 0 },
79+
{ 31106, 56703, 917567, 0 },
80+
{ 31097, 56694, 917568, 0 },
81+
{ 20657, 31519, 4294967295, 0 },
82+
{ 20997, 32799, 917570, 0 },
83+
{ 27235, 46466, 917571, 0 },
84+
{ 27238, 46469, 917572, 0 },
85+
{ 27241, 46475, 917573, 0 },
86+
{ 19547, 31510, 917574, 0 },
87+
{ 28947, 52244, 917575, 0 },
88+
{ 26235, 43237, 917576, 0 },
89+
{ 12926, 19219, 917577, 0 },
90+
{ 4708, 5419, 917578, 0 },
91+
{ 19544, 31515, 917579, 0 },
92+
{ 22481, 35744, 917580, 0 },
93+
{ 6920, 10580, 917581, 0 },
94+
{ 20220, 31976, 4294967295, 0 },
95+
{ 2550, 2677, 4294967295, 0 },
96+
{ 23971, 38761, 917584, 0 },
97+
{ 22109, 35041, 917585, 0 },
98+
{ 23975, 38766, 917586, 0 },
99+
{ 23979, 38765, 917587, 0 },
100+
{ 25865, 42605, 917588, 0 },
101+
{ 19921, 32033, 917589, 0 },
102+
{ 4089, 5040, 917590, 0 },
103+
{ 12899, 19223, 917591, 0 },
104+
{ 12612, 2543, 917592, 0 },
105+
{ 21326, 2711, 4294967295, 0 },
106+
{ 4097, 5038, 917594, 0 },
107+
{ 10382, 20231, 917595, 1 },
108+
{ 2497, 2507, 917596, 0 },
109+
{ 12788, 48061, 917597, 0 },
110+
{ 10499, 20454, 917598, 0 },
111+
};
112+
113+
#endif /* ITEM_MODELS_H */

0 commit comments

Comments
 (0)