PufferAI
diff --git a/‎build.sh‎
Lines changed: 9 additions & 1 deletion b/‎build.sh‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎config/craftax.ini‎
Lines changed: 7 additions & 0 deletions b/‎config/craftax.ini‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎config/craftax_classic.ini‎
Lines changed: 12 additions & 0 deletions b/‎config/craftax_classic.ini‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎ocean/craftax/PORT_NOTES.md‎
Lines changed: 543 additions & 0 deletions b/‎ocean/craftax/PORT_NOTES.md‎
Lines changed: 543 additions & 0 deletions
diff --git a/‎ocean/craftax/binding.c‎
Lines changed: 40 additions & 15 deletions b/‎ocean/craftax/binding.c‎
Lines changed: 40 additions & 15 deletions
diff --git a/‎ocean/craftax/craftax.c‎
Lines changed: 76 additions & 0 deletions b/‎ocean/craftax/craftax.c‎
Lines changed: 76 additions & 0 deletions
@@ -208,6 +208,13 @@ if [ -z "$NCCL_LFLAG" ]; then
     NCCL_LFLAG=$(python -c "import nvidia.nccl, os; print('-L' + os.path.join(nvidia.nccl.__path__[0], 'lib'))" 2>/dev/null || echo "")
 fi
 
+WHEEL_RPATH_FLAGS=()
+for lib_flag in "$CUDNN_LFLAG" "$NCCL_LFLAG"; do
+    if [[ "$lib_flag" == -L* ]]; then
+        WHEEL_RPATH_FLAGS+=("-Wl,-rpath,${lib_flag#-L}")
+    fi
+done
+
 export CCACHE_DIR="${CCACHE_DIR:-$HOME/.ccache}"
 export CCACHE_BASEDIR="$(pwd)"
 export CCACHE_COMPILERCHECK=content
@@ -232,7 +239,7 @@ if [ ! -f "$BINDING_SRC" ]; then
 fi
 
 echo "Compiling static library for $ENV..."
-${CC:-clang} -c "${CLANG_OPT[@]}" \
+${CC:-clang} -c "${CLANG_OPT[@]}" $EXTRA_CFLAGS \
     -I. -Isrc -I$SRC_DIR -Ivendor \
     -I./$RAYLIB_NAME/include -I$CUDA_HOME/include \
     -DPLATFORM_DESKTOP \
@@ -268,6 +275,7 @@ if [ -z "$MODE" ]; then
         ${CXX:-g++} -shared -fPIC -fopenmp
         build/bindings.o "$STATIC_LIB" "$RAYLIB_A"
         -L$CUDA_HOME/lib64 $CUDNN_LFLAG $NCCL_LFLAG
+        "${WHEEL_RPATH_FLAGS[@]}"
         -lcudart -lnccl -lnvidia-ml -lcublas -lcusolver -lcurand -lcudnn
         $OMP_LIB $LINK_OPT
         "${SHARED_LDFLAGS[@]}"
 
@@ -7,6 +7,13 @@ num_buffers = 4
 num_threads = 16
 
 [env]
+seed_offset = 0
+# Pre-generated world pool. Each reset memcpys from a pool entry
+# instead of re-running generate_world (~ms -> ~us per reset).
+# Bounds world diversity: at most reset_pool_size unique maps are
+# ever seen per process. Set to 0 to disable (required for the
+# parity harness to maintain exact per-seed determinism).
+reset_pool_size = 1024
 
 [train]
 total_timesteps = 200_000_000
@@ -0,0 +1,12 @@
+[base]
+env_name = craftax_classic
+
+[vec]
+total_agents = 8192
+num_buffers = 4
+num_threads = 16
+
+[env]
+
+[train]
+total_timesteps = 200_000_000
@@ -1,34 +1,59 @@
+#define CRAFTAX_ENABLE_ENV_IMPL
 #include "craftax.h"
+#include "step_crafting.h"
+#include "step_update_mobs.h"
+#include "step_spawn_mobs.h"
 
-#define OBS_SIZE 1345
+#define OBS_SIZE CRAFTAX_OBS_SIZE
 #define NUM_ATNS 1
-#define ACT_SIZES {17}
+#define ACT_SIZES {CRAFTAX_NUM_ACTIONS}
 #define OBS_TENSOR_T FloatTensor
 
 #define Env Craftax
 #include "vecenv.h"
 
 void my_init(Env* env, Dict* kwargs) {
-    // No per-env kwargs for Craftax-Classic: the 64x64 map, inventory sizes,
-    // mob caps, etc. are all compile-time constants.
+    env->num_agents = 1;
+
+    uint64_t seed_offset = 0;
+    DictItem* item = dict_get_unsafe(kwargs, "seed_offset");
+    if (item != NULL) {
+        seed_offset = (uint64_t)item->value;
+    }
+    env->seed = seed_offset + (uint64_t)env->rng;
+
+    // Process-wide reset pool (first caller wins, rest block until ready).
+    // 0 disables caching -- regenerate every reset (exact parity mode).
+    int reset_pool_size = 0;
+    DictItem* pool_item = dict_get_unsafe(kwargs, "reset_pool_size");
+    if (pool_item != NULL) reset_pool_size = (int)pool_item->value;
+    craftax_set_reset_pool_size(reset_pool_size);
+
     c_init(env);
 }
 
 void my_log(Log* log, Dict* out) {
-    dict_set(out, "perf",           log->perf);
-    dict_set(out, "score",          log->score);
+    dict_set(out, "perf", log->perf);
+    dict_set(out, "score", log->score);
     dict_set(out, "episode_return", log->episode_return);
     dict_set(out, "episode_length", log->episode_length);
 
-    static const char* ACH_NAMES[NUM_ACHIEVEMENTS] = {
-        "collect_wood",   "place_table",    "eat_cow",       "collect_sapling",
-        "collect_drink",  "make_wood_pick", "make_wood_sword","place_plant",
-        "defeat_zombie",  "collect_stone",  "place_stone",   "eat_plant",
-        "defeat_skeleton","make_stone_pick","make_stone_sword","wake_up",
-        "place_furnace",  "collect_coal",   "collect_iron",  "collect_diamond",
-        "make_iron_pick", "make_iron_sword",
+    // Log 8 checkpoint achievements that form the tech / exploration curve.
+    // perf (above) already aggregates all 67 into a normalized score; the
+    // individual lines here are the milestones worth watching on a dashboard.
+    // The env still tracks all 67 internally for reward and perf; we just
+    // don't send every one through the log Dict.
+    struct { const char* name; int idx; } checkpoints[] = {
+        {"collect_wood",         0},
+        {"make_wood_pickaxe",    5},
+        {"make_stone_pickaxe",  13},
+        {"collect_iron",        18},
+        {"make_iron_pickaxe",   20},
+        {"collect_diamond",     19},
+        {"enter_gnomish_mines", 28},
+        {"defeat_necromancer",  48},
     };
-    for (int i = 0; i < NUM_ACHIEVEMENTS; i++) {
-        dict_set(out, ACH_NAMES[i], log->achievements[i]);
+    for (int i = 0; i < (int)(sizeof(checkpoints) / sizeof(checkpoints[0])); i++) {
+        dict_set(out, checkpoints[i].name, log->achievements[checkpoints[i].idx]);
     }
 }
@@ -0,0 +1,76 @@
+// Standalone viewer for Craftax (random-action policy).
+//
+// Build:
+//   ./build.sh craftax --fast         # optimized
+//   ./build.sh craftax --local        # debug with sanitizers
+// Run:
+//   ./craftax
+
+#define CRAFTAX_ENABLE_ENV_IMPL
+#include "craftax.h"
+#include "step_crafting.h"
+#include "step_update_mobs.h"
+#include "step_spawn_mobs.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+static uint32_t xorshift32(uint32_t* s) {
+    uint32_t x = *s;
+    x ^= x << 13; x ^= x >> 17; x ^= x << 5;
+    *s = x ? x : 0xdeadbeef;
+    return x;
+}
+
+int main(int argc, char** argv) {
+    uint64_t seed = (argc > 1) ? strtoull(argv[1], NULL, 10) : (uint64_t)time(NULL);
+
+    Craftax env;
+    memset(&env, 0, sizeof(env));
+    env.num_agents = 1;
+    env.seed = seed;
+    env.rng = (uint32_t)seed;
+
+    // Minimal buffers for a single agent
+    env.observations = calloc(CRAFTAX_OBS_SIZE, sizeof(float));
+    env.actions = calloc(1, sizeof(float));
+    env.rewards = calloc(1, sizeof(float));
+    env.terminals = calloc(1, sizeof(float));
+
+    c_init(&env);
+    c_reset(&env);
+
+    uint32_t action_rng = (uint32_t)(seed ^ 0x9E3779B9u);
+    bool human_control = false;
+    int human_action = CRAFTAX_ACTION_NOOP;
+
+    while (!WindowShouldClose()) {
+        // Toggle human control
+        if (IsKeyPressed(KEY_H)) human_control = !human_control;
+
+        if (human_control) {
+            human_action = CRAFTAX_ACTION_NOOP;
+            if (IsKeyPressed(KEY_A) || IsKeyPressed(KEY_LEFT))  human_action = CRAFTAX_ACTION_LEFT;
+            if (IsKeyPressed(KEY_D) || IsKeyPressed(KEY_RIGHT)) human_action = CRAFTAX_ACTION_RIGHT;
+            if (IsKeyPressed(KEY_W) || IsKeyPressed(KEY_UP))    human_action = CRAFTAX_ACTION_UP;
+            if (IsKeyPressed(KEY_S) || IsKeyPressed(KEY_DOWN))  human_action = CRAFTAX_ACTION_DOWN;
+            if (IsKeyPressed(KEY_SPACE)) human_action = CRAFTAX_ACTION_DO;
+            if (IsKeyPressed(KEY_Z)) human_action = CRAFTAX_ACTION_SLEEP;
+            env.actions[0] = (float)human_action;
+            if (human_action != CRAFTAX_ACTION_NOOP || IsKeyPressed(KEY_PERIOD)) c_step(&env);
+        } else {
+            env.actions[0] = (float)(xorshift32(&action_rng) % CRAFTAX_NUM_ACTIONS);
+            c_step(&env);
+        }
+
+        c_render(&env);
+    }
+
+    c_close(&env);
+    free(env.observations);
+    free(env.actions);
+    free(env.rewards);
+    free(env.terminals);
+    return 0;
+}