diff --git a/Cargo.toml b/Cargo.toml
index 4d9004a..32b83c2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -42,6 +42,7 @@ no-atomic-cas = []
 multi-core = []
 error-msg = []
 defmt = ["dep:defmt", "dep:defmt-rtt"]
+metrics = ["hal_api/metrics"]
 
 [build-dependencies]
 cbindgen = "0.28.0"
@@ -53,7 +54,7 @@ rand = "0.8.5"
 cfg_aliases = "0.2.1"
 
 [lints.rust]
-unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)'] }
+unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)', 'cfg(metrics)'] }
 
 [profile.dev]
 panic = "abort"
diff --git a/build.rs b/build.rs
index 9807319..7a49783 100644
--- a/build.rs
+++ b/build.rs
@@ -15,6 +15,10 @@ extern crate cbindgen;
 fn main() {
     println!("cargo::rerun-if-changed=src");
     println!("cargo::rerun-if-changed=build.rs");
+    println!("cargo::rerun-if-env-changed=OSIRIS_METRICS");
+    if std::env::var("OSIRIS_METRICS").map_or(false, |v| v == "true" || v == "1") {
+        println!("cargo::rustc-cfg=metrics");
+    }
     let out_dir = std::env::var("OUT_DIR").unwrap();
 
     if gen_syscall_match(Path::new("src/syscalls"), Path::new(&out_dir)).is_err() {
diff --git a/machine/api/Cargo.toml b/machine/api/Cargo.toml
index e4eab78..27d9322 100644
--- a/machine/api/Cargo.toml
+++ b/machine/api/Cargo.toml
@@ -3,5 +3,11 @@ name = "hal-api"
 version = "0.1.0"
 edition = "2024"
 
+[features]
+metrics = []
+
+[lints.rust]
+unexpected_cfgs = { level = "warn", check-cfg = ['cfg(metrics)'] }
+
 [dependencies]
 seq-macro = "0.3.6"
\ No newline at end of file
diff --git a/machine/api/build.rs b/machine/api/build.rs
new file mode 100644
index 0000000..667df70
--- /dev/null
+++ b/machine/api/build.rs
@@ -0,0 +1,6 @@
+fn main() {
+    println!("cargo::rerun-if-env-changed=OSIRIS_METRICS");
+    if std::env::var("OSIRIS_METRICS").map_or(false, |v| v == "true" || v == "1") {
+        println!("cargo::rustc-cfg=metrics");
+    }
+}
diff --git a/machine/api/src/stack.rs b/machine/api/src/stack.rs
index 4e7e46e..449072d 100644
--- a/machine/api/src/stack.rs
+++ b/machine/api/src/stack.rs
@@ -12,6 +12,21 @@ pub struct Descriptor {
     pub fin: Option<FinFn>,
 }
 
+/// Per-stack resource snapshot. Available when the `metrics` feature is enabled.
+/// Backends that do not override `Stacklike::metrics` return all-zero values.
+#[cfg(any(feature = "metrics", metrics))]
+#[derive(Debug, Clone, Copy)]
+pub struct StackMetrics {
+    /// Total bytes allocated for this stack.
+    pub total_bytes: usize,
+    /// Bytes currently consumed (from stack top down to current SP).
+    pub used_bytes: usize,
+    /// Bytes still available for use.
+    pub free_bytes: usize,
+    /// Peak bytes ever used since the stack was created (high-water mark).
+    pub peak_used_bytes: usize,
+}
+
 pub trait Stacklike {
     type ElemSize: Copy;
     type StackPtr;
@@ -25,6 +40,18 @@ pub trait Stacklike {
 
     fn sp(&self) -> *mut c_void;
 
+    /// Returns a metrics snapshot for this stack.
+    /// Backends that do not implement full metrics tracking return all-zero values.
+    #[cfg(any(feature = "metrics", metrics))]
+    fn metrics(&self) -> StackMetrics {
+        StackMetrics {
+            total_bytes: 0,
+            used_bytes: 0,
+            free_bytes: 0,
+            peak_used_bytes: 0,
+        }
+    }
+
     //fn push_tinit<F, const N: usize>(&mut self, init: &ThreadInitializer<F, N, Self::ElemSize>) -> Result<CtxPtr>;
 
     // Pushes a function context onto the stack, which will be executed when the IRQ returns.
diff --git a/machine/cortex-m/Cargo.toml b/machine/cortex-m/Cargo.toml
index 5871c24..dadb37a 100644
--- a/machine/cortex-m/Cargo.toml
+++ b/machine/cortex-m/Cargo.toml
@@ -25,6 +25,7 @@ syn = { version = "2.0.36", features = ["full"] }
 [features]
 panic-exit = []
 panic-uart = []
+metrics = ["hal-api/metrics"]
 
 [lints.rust]
-unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)', 'cfg(cortex_m)', 'cfg(disabled)'] }
+unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)', 'cfg(cortex_m)', 'cfg(disabled)', 'cfg(metrics)'] }
diff --git a/machine/cortex-m/build.rs b/machine/cortex-m/build.rs
index 573a0c3..da1c593 100644
--- a/machine/cortex-m/build.rs
+++ b/machine/cortex-m/build.rs
@@ -329,6 +329,11 @@ mod vector_table {
 ///
 /// Exits with error code 1 if any critical build step fails
 fn main() {
+    println!("cargo::rerun-if-env-changed=OSIRIS_METRICS");
+    if env::var("OSIRIS_METRICS").map_or(false, |v| v == "true" || v == "1") {
+        println!("cargo::rustc-cfg=metrics");
+    }
+
     if !hal_builder::check_enabled("cortex-m") || !check_cortex_m() {
         return;
     }
diff --git a/machine/cortex-m/src/native/sched.rs b/machine/cortex-m/src/native/sched.rs
index 82b7ffa..1e6476a 100644
--- a/machine/cortex-m/src/native/sched.rs
+++ b/machine/cortex-m/src/native/sched.rs
@@ -7,7 +7,7 @@ use core::{
     ptr::NonNull,
 };
 
-use hal_api::{Result, stack::Descriptor};
+use hal_api::{Result, stack::{Descriptor, Stacklike}};
 
 // A default finalizer used if none is supplied: just spins forever.
 #[inline(never)]
@@ -62,6 +62,9 @@ pub struct ArmStack {
     sp: StackPtr,
     /// The size of the stack
     size: NonZero<usize>,
+    /// High-water mark: largest sp offset ever recorded via set_sp.
+    #[cfg(any(feature = "metrics", metrics))]
+    peak_offset: usize,
 }
 
 impl ArmStack {
@@ -168,16 +171,99 @@ impl ArmStack {
 
             // We should have written exactly FRAME_WORDS words.
             debug_assert!(write_index == self.top.sub(self.sp.offset() + FRAME_WORDS));
-
-            self.sp += FRAME_WORDS;
         }
 
+        self.set_sp(self.sp + FRAME_WORDS);
+
         // The returned stack pointer must be call-aligned.
         debug_assert!(Self::is_call_aligned(self.sp));
         Ok(())
     }
 }
 
+#[cfg(all(test, any(feature = "metrics", metrics)))]
+mod metrics_tests {
+    use super::*;
+    use core::num::NonZero;
+    use hal_api::stack::{Descriptor, Stacklike};
+    use hal_api::mem::PhysAddr;
+
+    const STACK_WORDS: usize = 256;
+
+    // Each test gets its own static buffer to avoid aliasing between parallel tests.
+    static mut BUF_A: [u32; STACK_WORDS] = [0u32; STACK_WORDS];
+    static mut BUF_B: [u32; STACK_WORDS] = [0u32; STACK_WORDS];
+
+    fn make_stack(buf: &mut [u32; STACK_WORDS]) -> ArmStack {
+        let top = unsafe { buf.as_mut_ptr().add(STACK_WORDS) };
+        extern "C" fn entry() {}
+        unsafe {
+            ArmStack::new(Descriptor {
+                top: PhysAddr::new(top as usize),
+                size: NonZero::new(STACK_WORDS).unwrap(),
+                entry,
+                fin: None,
+            })
+            .unwrap()
+        }
+    }
+
+    #[test]
+    fn metrics_total_bytes_matches_size() {
+        let stack = make_stack(unsafe { &mut BUF_A });
+        let m = stack.metrics();
+        let expected_total = STACK_WORDS * core::mem::size_of::<u32>();
+        assert_eq!(m.total_bytes, expected_total);
+        assert_eq!(m.total_bytes, m.used_bytes + m.free_bytes);
+    }
+
+    #[test]
+    fn metrics_used_bytes_after_init() {
+        // After new(), push_irq_ret_fn has consumed FRAME_WORDS (18) words.
+        let stack = make_stack(unsafe { &mut BUF_A });
+        let m = stack.metrics();
+        let word = core::mem::size_of::<u32>();
+        // Frame is 18 words; we allow for an optional alignment word.
+        assert!(m.used_bytes >= 18 * word);
+        assert!(m.used_bytes <= 20 * word);
+        assert!(m.free_bytes < m.total_bytes);
+    }
+
+    #[test]
+    fn metrics_peak_includes_entry_frame() {
+        let stack = make_stack(unsafe { &mut BUF_A });
+        let word = core::mem::size_of::<u32>();
+        // The entry frame (18 words) is pushed during new(), so peak starts there.
+        assert_eq!(stack.metrics().peak_used_bytes, stack.metrics().used_bytes);
+        assert!(stack.metrics().peak_used_bytes >= 18 * word);
+    }
+
+    #[test]
+    fn metrics_peak_tracks_high_water_mark() {
+        let mut stack = make_stack(unsafe { &mut BUF_A });
+        let word = core::mem::size_of::<u32>();
+
+        // Simulate two context saves at increasing depths.
+        let sp_deep = StackPtr { offset: 50 };
+        stack.set_sp(sp_deep);
+        assert_eq!(stack.metrics().peak_used_bytes, 50 * word);
+
+        let sp_shallow = StackPtr { offset: 20 };
+        stack.set_sp(sp_shallow);
+        // Peak must not decrease.
+        assert_eq!(stack.metrics().peak_used_bytes, 50 * word);
+        assert_eq!(stack.metrics().used_bytes, 20 * word);
+    }
+
+    #[test]
+    fn metrics_free_plus_used_equals_total() {
+        let mut stack = make_stack(unsafe { &mut BUF_B });
+        stack.set_sp(StackPtr { offset: 100 });
+        let m = stack.metrics();
+        assert_eq!(m.used_bytes + m.free_bytes, m.total_bytes);
+    }
+}
+
 impl hal_api::stack::Stacklike for ArmStack {
     type ElemSize = u32;
     type StackPtr = StackPtr;
@@ -202,6 +288,8 @@ impl hal_api::stack::Stacklike for ArmStack {
             top,
             sp: StackPtr { offset: 0 },
             size,
+            #[cfg(any(feature = "metrics", metrics))]
+            peak_offset: 0,
         };
 
         stack.push_irq_ret_fn(entry, ctx, fin)?;
@@ -217,9 +305,26 @@ impl hal_api::stack::Stacklike for ArmStack {
     }
 
     fn set_sp(&mut self, sp: StackPtr) {
+        #[cfg(any(feature = "metrics", metrics))]
+        if sp.offset > self.peak_offset {
+            self.peak_offset = sp.offset;
+        }
         self.sp = sp;
     }
 
+    #[cfg(any(feature = "metrics", metrics))]
+    fn metrics(&self) -> hal_api::stack::StackMetrics {
+        let word = core::mem::size_of::<u32>();
+        let total_bytes = self.size.get() * word;
+        let used_bytes = self.sp.offset * word;
+        hal_api::stack::StackMetrics {
+            total_bytes,
+            used_bytes,
+            free_bytes: total_bytes.saturating_sub(used_bytes),
+            peak_used_bytes: self.peak_offset * word,
+        }
+    }
+
     fn sp(&self) -> *mut c_void {
         self.sp.as_ptr(self.top).as_ptr() as *mut c_void
     }
diff --git a/options.toml b/options.toml
index 088cdb3..7180dd5 100644
--- a/options.toml
+++ b/options.toml
@@ -10,6 +10,12 @@ name = "Runtime Symbols"
 description = "Enables runtime symbols for debugging. This will increase the binary size by potentially a lot. When enabled e.g. backtraces can display function names."
 type = "Boolean"
 
+[debug.metrics]
+name = "Metrics"
+description = "Enables runtime metrics collection for heap allocator and stack usage. Increases binary size and adds small per-alloc/free overhead."
+type = "Boolean"
+default = false
+
 [debug.uart]
 name = "Debug UART"
 description = "Select the UART peripheral to use for debug output."
diff --git a/presets/stm32l4r5zi_def.toml b/presets/stm32l4r5zi_def.toml
index ee3bf89..04b8885 100644
--- a/presets/stm32l4r5zi_def.toml
+++ b/presets/stm32l4r5zi_def.toml
@@ -8,6 +8,7 @@ OSIRIS_MACHINE = "cortex-m"
 # Debugging configuration
 OSIRIS_DEBUG_UART = "LPUART1"
 OSIRIS_DEBUG_RUNTIMESYMBOLS = "false"
+OSIRIS_METRICS = "false"
 
 # Tuning parameters
 OSIRIS_TUNING_ENABLEFPU = "false"
diff --git a/src/lib.rs b/src/lib.rs
index 15c260d..2e195e5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -20,6 +20,9 @@ mod sync;
 mod syscalls;
 mod time;
 
+#[cfg(any(feature = "metrics", metrics))]
+pub mod metrics;
+
 // Public, for now.
 pub mod drivers;
 pub mod uapi;
diff --git a/src/mem.rs b/src/mem.rs
index 36d610e..9b42bc5 100644
--- a/src/mem.rs
+++ b/src/mem.rs
@@ -91,6 +91,12 @@ pub unsafe fn free(ptr: NonNull<u8>, size: usize) {
     unsafe { allocator.free(ptr, size) };
 }
 
+/// Returns a metrics snapshot of the global kernel heap.
+#[cfg(any(feature = "metrics", metrics))]
+pub(crate) fn global_metrics() -> alloc::Metrics {
+    GLOBAL_ALLOCATOR.lock().metrics()
+}
+
 /// Aligns a size to be a multiple of the u128 alignment.
 ///
 /// `size` - The size to align.
diff --git a/src/mem/alloc.rs b/src/mem/alloc.rs
index bde5db2..b633ab7 100644
--- a/src/mem/alloc.rs
+++ b/src/mem/alloc.rs
@@ -9,6 +9,52 @@ use crate::error::Result;
 
 pub mod bestfit;
 
+/// Snapshot of allocator resource usage. Available when the `metrics` feature is enabled.
+#[cfg(any(feature = "metrics", metrics))]
+#[derive(Debug, Clone, Copy, Default)]
+pub struct Metrics {
+    pub total_bytes: usize,
+    pub free_bytes: usize,
+    pub free_blocks: usize,
+    pub alloc_count: u64,
+    pub free_count: u64,
+}
+
+#[cfg(any(feature = "metrics", metrics))]
+impl Metrics {
+    pub const fn new() -> Self {
+        Self {
+            total_bytes: 0,
+            free_bytes: 0,
+            free_blocks: 0,
+            alloc_count: 0,
+            free_count: 0,
+        }
+    }
+
+    pub fn allocated_bytes(&self) -> usize {
+        self.total_bytes.saturating_sub(self.free_bytes)
+    }
+
+    pub(crate) fn record_add_range(&mut self, total: usize, free: usize) {
+        self.total_bytes = self.total_bytes.saturating_add(total);
+        self.free_bytes = self.free_bytes.saturating_add(free);
+        self.free_blocks += 1;
+    }
+
+    pub(crate) fn record_alloc(&mut self, consumed_bytes: usize, blocks_removed: usize) {
+        self.free_bytes = self.free_bytes.saturating_sub(consumed_bytes);
+        self.free_blocks = self.free_blocks.saturating_sub(blocks_removed);
+        self.alloc_count += 1;
+    }
+
+    pub(crate) fn record_free(&mut self, added_bytes: usize) {
+        self.free_bytes = self.free_bytes.saturating_add(added_bytes);
+        self.free_blocks += 1;
+        self.free_count += 1;
+    }
+}
+
 #[cfg(target_pointer_width = "64")]
 pub const MAX_ADDR: usize = 2_usize.pow(48);
 
diff --git a/src/mem/alloc/bestfit.rs b/src/mem/alloc/bestfit.rs
index 32d3d64..aadbea6 100644
--- a/src/mem/alloc/bestfit.rs
+++ b/src/mem/alloc/bestfit.rs
@@ -20,6 +20,8 @@ struct BestFitMeta {
 pub struct BestFitAllocator {
     /// Head of the free block list.
     head: Option<NonNull<u8>>,
+    #[cfg(any(feature = "metrics", metrics))]
+    metrics: super::Metrics,
 }
 
 // Safety: BestFitAllocator is not Copy or Clone.
@@ -37,7 +39,11 @@ impl BestFitAllocator {
     ///
     /// Returns the new BestFitAllocator.
     pub const fn new() -> Self {
-        Self { head: None }
+        Self {
+            head: None,
+            #[cfg(any(feature = "metrics", metrics))]
+            metrics: super::Metrics::new(),
+        }
     }
 
     /// Adds a range of memory to the allocator.
@@ -70,9 +76,11 @@ impl BestFitAllocator {
         // The user pointer is the pointer to the user memory. So we need to add the size of the meta data and possibly add padding.
         let user_pointer = ptr + size_of::<BestFitMeta>() + Self::align_up();
 
+        let usable = range.end.diff(user_pointer);
+
         // Set the current head as the next block, so we can add the new block to the head.
         let meta = BestFitMeta {
-            size: range.end.diff(user_pointer),
+            size: usable,
             next: self.head,
         };
 
@@ -81,6 +89,11 @@ impl BestFitAllocator {
 
         // Set the head to the new block.
         self.head = Some(unsafe { NonNull::new_unchecked(ptr.as_mut_ptr::<u8>()) });
+
+        #[cfg(any(feature = "metrics", metrics))]
+        self.metrics
+            .record_add_range(range.end.diff(range.start), usable);
+
         Ok(())
     }
 
@@ -242,6 +255,12 @@ impl super::Allocator for BestFitAllocator {
         debug_assert!(aligned_size >= size);
         debug_assert!(aligned_size <= isize::MAX as usize);
 
+        // Tracking variables for O(1) metrics update after the allocation.
+        #[cfg(any(feature = "metrics", metrics))]
+        let mut free_sub: usize = 0;
+        #[cfg(any(feature = "metrics", metrics))]
+        let mut blocks_sub: usize = 0;
+
         // Find the best fit block.
         let (split, block, prev) = match self.select_block(aligned_size, request) {
             Ok((block, prev)) => {
@@ -272,6 +291,13 @@ impl super::Allocator for BestFitAllocator {
 
                 // If the block is big enough to split. Then it also needs to be big enough to store the metadata + align of the next block.
                 if meta.size > min {
+                    // Split: old free block (meta.size) leaves, remainder (meta.size - min) stays.
+                    // Net free_bytes change: -min. free_blocks unchanged (one out, one in).
+                    #[cfg(any(feature = "metrics", metrics))]
+                    {
+                        free_sub = min;
+                    }
+
                     // Calculate the remaining size of the block and thus the next metadata.
                     let remaining_meta = BestFitMeta {
                         size: meta.size - min,
@@ -302,11 +328,25 @@ impl super::Allocator for BestFitAllocator {
 
                     (true, block, prev)
                 } else {
+                    // No split: entire free block (meta.size) is consumed.
+                    #[cfg(any(feature = "metrics", metrics))]
+                    {
+                        free_sub = meta.size;
+                        blocks_sub = 1;
+                    }
+
                     (false, block, prev)
                 }
             }
             Err(_) => {
                 let (block, prev) = self.select_block(size, request)?;
+                // Retry succeeded with original size; always no-split.
+                #[cfg(any(feature = "metrics", metrics))]
+                {
+                    let meta = unsafe { block.cast::<BestFitMeta>().as_ref() };
+                    free_sub = meta.size;
+                    blocks_sub = 1;
+                }
                 (false, block, prev)
             }
         };
@@ -334,6 +374,9 @@ impl super::Allocator for BestFitAllocator {
             });
         }
 
+        #[cfg(any(feature = "metrics", metrics))]
+        self.metrics.record_alloc(free_sub, blocks_sub);
+
         // Return the user pointer.
         Ok(unsafe { Self::user_ptr(block).cast() })
     }
@@ -359,11 +402,179 @@ impl super::Allocator for BestFitAllocator {
 
         // Set the block as the new head.
         self.head = Some(block);
+
+        #[cfg(any(feature = "metrics", metrics))]
+        self.metrics.record_free(meta.size);
+    }
+}
+
+#[cfg(any(feature = "metrics", metrics))]
+impl BestFitAllocator {
+    pub fn metrics(&self) -> super::Metrics {
+        self.metrics
     }
 }
 
 // TESTING ------------------------------------------------------------------------------------------------------------
 
+#[cfg(all(test, any(feature = "metrics", metrics)))]
+mod metrics_tests {
+    use super::super::*;
+    use super::*;
+    use core::mem::size_of;
+
+    fn alloc_range(length: usize) -> std::ops::Range<crate::hal::mem::PhysAddr> {
+        use crate::hal::mem::PhysAddr;
+        let layout = std::alloc::Layout::from_size_align(length, align_of::<u128>()).unwrap();
+        let ptr = unsafe { std::alloc::alloc(layout) };
+        if ptr.is_null() {
+            std::alloc::handle_alloc_error(layout);
+        }
+        PhysAddr::new(ptr as usize)..PhysAddr::new(ptr as usize + length)
+    }
+
+    #[test]
+    fn metrics_fresh_allocator_is_zero() {
+        let allocator = BestFitAllocator::new();
+        let m = allocator.metrics();
+        assert_eq!(m.total_bytes, 0);
+        assert_eq!(m.free_bytes, 0);
+        assert_eq!(m.allocated_bytes(), 0);
+        assert_eq!(m.free_blocks, 0);
+        assert_eq!(m.alloc_count, 0);
+        assert_eq!(m.free_count, 0);
+    }
+
+    #[test]
+    fn metrics_after_add_range() {
+        let mut allocator = BestFitAllocator::new();
+        let range_len = 4096usize;
+        let range = alloc_range(range_len);
+        unsafe { allocator.add_range(&range).unwrap() };
+
+        let m = allocator.metrics();
+        assert_eq!(m.total_bytes, range_len);
+        assert_eq!(m.free_blocks, 1);
+        assert!(m.free_bytes > 0);
+        assert!(m.free_bytes < range_len, "metadata must consume some bytes");
+        assert_eq!(m.allocated_bytes(), range_len - m.free_bytes);
+        assert_eq!(m.alloc_count, 0);
+        assert_eq!(m.free_count, 0);
+    }
+
+    #[test]
+    fn metrics_alloc_increments_count_and_reduces_free() {
+        let mut allocator = BestFitAllocator::new();
+        let range = alloc_range(4096);
+        unsafe { allocator.add_range(&range).unwrap() };
+        let before = allocator.metrics();
+
+        let _ptr = unsafe { allocator.malloc::<u8>(128, 1, None).unwrap() };
+        let after = allocator.metrics();
+
+        assert_eq!(after.alloc_count, 1);
+        assert_eq!(after.free_count, 0);
+        assert!(after.free_bytes < before.free_bytes);
+    }
+
+    #[test]
+    fn metrics_free_increments_count_and_restores_free_bytes() {
+        let mut allocator = BestFitAllocator::new();
+        let range = alloc_range(4096);
+        unsafe { allocator.add_range(&range).unwrap() };
+
+        let ptr = unsafe { allocator.malloc::<u8>(128, 1, None).unwrap() };
+        let after_alloc = allocator.metrics();
+
+        unsafe { allocator.free(ptr, 128) };
+        let after_free = allocator.metrics();
+
+        assert_eq!(after_free.alloc_count, 1);
+        assert_eq!(after_free.free_count, 1);
+        // Freeing must return bytes to the free pool.
+        assert!(after_free.free_bytes > after_alloc.free_bytes);
+    }
+
+    #[test]
+    fn metrics_free_blocks_count() {
+        let mut allocator = BestFitAllocator::new();
+        let range = alloc_range(4096);
+        unsafe { allocator.add_range(&range).unwrap() };
+
+        let p1 = unsafe { allocator.malloc::<u8>(128, 1, None).unwrap() };
+        let p2 = unsafe { allocator.malloc::<u8>(128, 1, None).unwrap() };
+        let after_two_allocs = allocator.metrics();
+
+        unsafe { allocator.free(p1, 128) };
+        let after_free1 = allocator.metrics();
+
+        unsafe { allocator.free(p2, 128) };
+        let after_free2 = allocator.metrics();
+
+        // Each free prepends one block to the free list.
+        assert_eq!(after_free1.free_blocks, after_two_allocs.free_blocks + 1);
+        assert_eq!(after_free2.free_blocks, after_two_allocs.free_blocks + 2);
+        assert_eq!(after_free2.alloc_count, 2);
+        assert_eq!(after_free2.free_count, 2);
+    }
+
+    #[test]
+    fn metrics_largest_free_block_single_range() {
+        let mut allocator = BestFitAllocator::new();
+        let range = alloc_range(4096);
+        unsafe { allocator.add_range(&range).unwrap() };
+
+        let m = allocator.metrics();
+        // Single block: all free bytes in one block.
+        assert_eq!(m.free_blocks, 1);
+
+        let _p = unsafe { allocator.malloc::<u8>(128, 1, None).unwrap() };
+        let m2 = allocator.metrics();
+        // Free bytes shrink after allocation.
+        assert!(m2.free_bytes <= m.free_bytes);
+    }
+
+    #[test]
+    fn metrics_multiple_ranges_total_bytes() {
+        let mut allocator = BestFitAllocator::new();
+        const RANGE_LEN: usize = 1024;
+        const RANGES: usize = 3;
+
+        for _ in 0..RANGES {
+            let range = alloc_range(RANGE_LEN);
+            unsafe { allocator.add_range(&range).unwrap() };
+        }
+
+        let m = allocator.metrics();
+        assert_eq!(m.total_bytes, RANGE_LEN * RANGES);
+        assert_eq!(m.free_blocks, RANGES);
+    }
+
+    #[test]
+    fn metrics_exact_fit_no_split() {
+        // Allocate the entire usable space of a single-block range so no split occurs.
+        let mut allocator = BestFitAllocator::new();
+        let overhead = size_of::<BestFitMeta>() + BestFitAllocator::align_up();
+        let user_size = 128usize;
+        let range = alloc_range(user_size + overhead);
+        unsafe { allocator.add_range(&range).unwrap() };
+
+        let before = allocator.metrics();
+        assert_eq!(before.free_blocks, 1);
+
+        let ptr = unsafe { allocator.malloc::<u8>(user_size, 1, None).unwrap() };
+        let after_alloc = allocator.metrics();
+        // Exact fit: no remainder block left.
+        assert_eq!(after_alloc.free_blocks, 0);
+        assert_eq!(after_alloc.free_bytes, 0);
+
+        unsafe { allocator.free(ptr, user_size) };
+        let after_free = allocator.metrics();
+        assert_eq!(after_free.free_blocks, 1);
+        assert_eq!(after_free.free_bytes, before.free_bytes);
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use crate::mem::align_up;
diff --git a/src/mem/vmm/nommu.rs b/src/mem/vmm/nommu.rs
index e47533a..027c591 100644
--- a/src/mem/vmm/nommu.rs
+++ b/src/mem/vmm/nommu.rs
@@ -17,6 +17,13 @@ pub struct AddressSpace {
     allocator: bestfit::BestFitAllocator,
 }
 
+#[cfg(any(feature = "metrics", metrics))]
+impl AddressSpace {
+    pub(crate) fn metrics(&self) -> crate::mem::alloc::Metrics {
+        self.allocator.metrics()
+    }
+}
+
 impl vmm::AddressSpacelike for AddressSpace {
     fn new(pgs: usize) -> Result<Self> {
         let begin = pfa::alloc_page(pgs).ok_or(kerr!(ENOMEM))?;
diff --git a/src/metrics.rs b/src/metrics.rs
new file mode 100644
index 0000000..04ef82e
--- /dev/null
+++ b/src/metrics.rs
@@ -0,0 +1,2 @@
+pub mod store;
+pub use store::{HeapSnapshot, StackSnapshot};
diff --git a/src/metrics/store.rs b/src/metrics/store.rs
new file mode 100644
index 0000000..a503ec1
--- /dev/null
+++ b/src/metrics/store.rs
@@ -0,0 +1,98 @@
+use crate::hal::stack::StackMetrics;
+use crate::mem::alloc::Metrics as AllocMetrics;
+use crate::sync::seqlock::Seqlock;
+
+impl From<AllocMetrics> for HeapSnapshot {
+    fn from(m: AllocMetrics) -> Self {
+        Self {
+            total_bytes: m.total_bytes,
+            free_bytes: m.free_bytes,
+            used_bytes: m.allocated_bytes(),
+            alloc_count: m.alloc_count,
+            free_count: m.free_count,
+        }
+    }
+}
+
+impl From<StackMetrics> for StackSnapshot {
+    fn from(m: StackMetrics) -> Self {
+        Self {
+            total_bytes: m.total_bytes,
+            used_bytes: m.used_bytes,
+            free_bytes: m.free_bytes,
+            peak_used_bytes: m.peak_used_bytes,
+        }
+    }
+}
+
+pub(crate) const SLOTS: usize = crate::sched::THREAD_COUNT;
+
+#[derive(Debug, Clone, Copy)]
+pub struct HeapSnapshot {
+    pub total_bytes: usize,
+    pub free_bytes: usize,
+    pub used_bytes: usize,
+    pub alloc_count: u64,
+    pub free_count: u64,
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct StackSnapshot {
+    pub total_bytes: usize,
+    pub used_bytes: usize,
+    pub free_bytes: usize,
+    pub peak_used_bytes: usize,
+}
+
+static GLOBAL_HEAP: Seqlock<Option<HeapSnapshot>> = Seqlock::new(None);
+static TASK_HEAPS: [Seqlock<Option<HeapSnapshot>>; SLOTS] = [const { Seqlock::new(None) }; SLOTS];
+static THREAD_STACKS: [Seqlock<Option<StackSnapshot>>; SLOTS] =
+    [const { Seqlock::new(None) }; SLOTS];
+
+pub(crate) fn write_global_heap(s: HeapSnapshot) {
+    GLOBAL_HEAP.write(Some(s));
+}
+
+pub(crate) fn write_task_heap(slot: usize, s: HeapSnapshot) {
+    if slot < SLOTS {
+        TASK_HEAPS[slot].write(Some(s));
+    }
+}
+
+pub(crate) fn clear_task_heap(slot: usize) {
+    if slot < SLOTS {
+        TASK_HEAPS[slot].write(None);
+    }
+}
+
+pub(crate) fn write_thread_stack(slot: usize, s: StackSnapshot) {
+    if slot < SLOTS {
+        THREAD_STACKS[slot].write(Some(s));
+    }
+}
+
+pub(crate) fn clear_thread_stack(slot: usize) {
+    if slot < SLOTS {
+        THREAD_STACKS[slot].write(None);
+    }
+}
+
+pub fn global_heap() -> Option<HeapSnapshot> {
+    GLOBAL_HEAP.read()
+}
+
+pub fn task_heap(slot: usize) -> Option<HeapSnapshot> {
+    if slot < SLOTS {
+        TASK_HEAPS[slot].read()
+    } else {
+        None
+    }
+}
+
+pub fn thread_stack(slot: usize) -> Option<StackSnapshot> {
+    if slot < SLOTS {
+        THREAD_STACKS[slot].read()
+    } else {
+        None
+    }
+}
diff --git a/src/sched.rs b/src/sched.rs
index c5f01d6..c39c7d0 100644
--- a/src/sched.rs
+++ b/src/sched.rs
@@ -29,7 +29,7 @@ use crate::{
 type ThreadMap<const N: usize> = BitReclaimMap<thread::UId, thread::Thread, N>;
 type TaskMap<const N: usize> = BitReclaimMap<task::UId, task::Task, N>;
 
-const THREAD_COUNT: usize = 32;
+pub(crate) const THREAD_COUNT: usize = 32;
 type GlobalScheduler = Scheduler<THREAD_COUNT>;
 
 static SCHED: SpinLocked<GlobalScheduler> = SpinLocked::new(GlobalScheduler::new());
@@ -342,10 +342,20 @@ impl<const N: usize> Scheduler<N> {
     }
 
     pub fn create_task(&mut self, attrs: task::Attributes) -> Result<task::UId> {
-        self.tasks.insert_with(|idx| {
+        let task_id = self.tasks.insert_with(|idx| {
             let task = task::Task::new(task::UId::new(idx), attrs);
             task.map(|t| (task::UId::new(idx), t))
-        })
+        })?;
+
+        #[cfg(any(feature = "metrics", metrics))]
+        if let Some(task) = self.tasks.get(task_id) {
+            crate::metrics::store::write_task_heap(
+                task_id.as_usize(),
+                task.allocator_metrics().into(),
+            );
+        }
+
+        Ok(task_id)
     }
 
     /// Dequeues all threads of the task and removes the task. If the current thread belongs to the task, reschedule will be triggered.
@@ -367,6 +377,9 @@ impl<const N: usize> Scheduler<N> {
                 bug!("failed to remove thread {} from thread list.", id);
             }
 
+            #[cfg(any(feature = "metrics", metrics))]
+            crate::metrics::store::clear_thread_stack(id.as_usize());
+
             if Some(id) == self.current {
                 self.current = None;
                 reschedule();
@@ -374,6 +387,10 @@ impl<const N: usize> Scheduler<N> {
         }
 
         self.tasks.remove(&uid).ok_or(kerr!(EINVAL))?;
+
+        #[cfg(any(feature = "metrics", metrics))]
+        crate::metrics::store::clear_task_heap(uid.as_usize());
+
         Ok(())
     }
 
@@ -388,7 +405,8 @@ impl<const N: usize> Scheduler<N> {
         };
         let task = self.tasks.get_mut(task).ok_or(kerr!(EINVAL))?;
 
-        self.threads
+        let uid = self
+            .threads
             .insert_with(|idx| {
                 let uid = task.allocate_tid().get_uid(idx);
                 let stack = task.allocate_stack(attrs)?;
@@ -398,7 +416,17 @@ impl<const N: usize> Scheduler<N> {
             .and_then(|k| {
                 task.register_thread(k, &mut self.threads)?;
                 Ok(k)
-            })
+            })?;
+
+        #[cfg(any(feature = "metrics", metrics))]
+        if let Some(thread) = self.threads.get(uid) {
+            crate::metrics::store::write_thread_stack(
+                uid.as_usize(),
+                thread.stack_metrics().into(),
+            );
+        }
+
+        Ok(uid)
     }
 
     /// Dequeues a thread and removes it from its corresponding task. If the thread is currently running, reschedule will be triggered.
@@ -421,12 +449,35 @@ impl<const N: usize> Scheduler<N> {
 
         self.threads.remove(&uid).ok_or(kerr!(EINVAL))?;
 
+        #[cfg(any(feature = "metrics", metrics))]
+        crate::metrics::store::clear_thread_stack(uid.as_usize());
+
         if Some(uid) == self.current {
             self.current = None;
             reschedule();
         }
         Ok(())
     }
+
+    /// Updates the lock-free mirror for the currently scheduled thread and its task.
+    /// Called on every reschedule; only the thread that just ran needs updating.
+    #[cfg(any(feature = "metrics", metrics))]
+    fn mirror_stats(&self) {
+        use crate::metrics::store;
+
+        store::write_global_heap(crate::mem::global_metrics().into());
+
+        if let Some(uid) = self.current {
+            if let Some(thread) = self.threads.get(uid) {
+                store::write_thread_stack(uid.as_usize(), thread.stack_metrics().into());
+
+                let task_id = thread.task_id();
+                if let Some(task) = self.tasks.get(task_id) {
+                    store::write_task_heap(task_id.as_usize(), task.allocator_metrics().into());
+                }
+            }
+        }
+    }
 }
 
 /// This function provides safe access to the global scheduler.
@@ -510,6 +561,12 @@ pub extern "C" fn sched_enter(mut ctx: *mut c_void) -> *mut c_void {
         let old = sched.current.map(|c| c.owner());
         sched.land(ctx);
 
+        // Mirror stats while self.current still points to the outgoing thread —
+        // its stack context was just saved by land() and its task reflects any
+        // allocations made since the last reschedule.
+        #[cfg(any(feature = "metrics", metrics))]
+        sched.mirror_stats();
+
         if let Some((new, task)) = sched.do_sched(time::tick()) {
             if old != Some(task.id) {
                 dispch::prepare(task);
diff --git a/src/sched/task.rs b/src/sched/task.rs
index 09e7296..eec12da 100644
--- a/src/sched/task.rs
+++ b/src/sched/task.rs
@@ -37,6 +37,10 @@ impl UId {
         Self { uid }
     }
 
+    pub fn as_usize(&self) -> usize {
+        self.uid
+    }
+
     pub fn is_kernel(&self) -> bool {
         self.uid == 0
     }
@@ -137,4 +141,9 @@ impl Task {
     pub fn threads(&self) -> &list::List<thread::ThreadList, thread::UId> {
         &self.threads
     }
+
+    #[cfg(any(feature = "metrics", metrics))]
+    pub(crate) fn allocator_metrics(&self) -> crate::mem::alloc::Metrics {
+        self.address_space.metrics()
+    }
 }
diff --git a/src/sched/thread.rs b/src/sched/thread.rs
index 77d1321..7f6d52c 100644
--- a/src/sched/thread.rs
+++ b/src/sched/thread.rs
@@ -342,6 +342,11 @@ impl Thread {
         self.state.stack.sp()
     }
 
+    #[cfg(any(feature = "metrics", metrics))]
+    pub fn stack_metrics(&self) -> crate::hal::stack::StackMetrics {
+        self.state.stack.metrics()
+    }
+
     pub fn uid(&self) -> UId {
         self.uid
     }
diff --git a/src/sync.rs b/src/sync.rs
index 85005f8..2c6ae5f 100644
--- a/src/sync.rs
+++ b/src/sync.rs
@@ -1,4 +1,6 @@
 pub mod atomic;
 pub mod once;
+#[cfg(any(feature = "metrics", metrics))]
+pub mod seqlock;
 pub mod spinlock;
 pub mod waiter;
diff --git a/src/sync/seqlock.rs b/src/sync/seqlock.rs
new file mode 100644
index 0000000..07f446b
--- /dev/null
+++ b/src/sync/seqlock.rs
@@ -0,0 +1,48 @@
+use core::cell::UnsafeCell;
+use core::hint::spin_loop;
+use core::sync::atomic::{AtomicUsize, Ordering};
+
+/// Single-writer, multi-reader seqlock.
+///
+/// Odd `seq` means a write is in progress; even means data is stable.
+/// Readers spin on odd seq and retry if seq changes while reading.
+pub struct Seqlock<T> {
+    seq: AtomicUsize,
+    data: UnsafeCell<T>,
+}
+
+unsafe impl<T: Send> Send for Seqlock<T> {}
+unsafe impl<T: Send> Sync for Seqlock<T> {}
+
+impl<T: Copy> Seqlock<T> {
+    pub const fn new(val: T) -> Self {
+        Self {
+            seq: AtomicUsize::new(0),
+            data: UnsafeCell::new(val),
+        }
+    }
+
+    /// Overwrite the value. Only one writer at a time is supported.
+    pub fn write(&self, val: T) {
+        self.seq.fetch_add(1, Ordering::SeqCst); // even → odd: write in progress
+        unsafe { core::ptr::write_volatile(self.data.get(), val) };
+        self.seq.fetch_add(1, Ordering::SeqCst); // odd → even: write complete
+    }
+
+    /// Read the current value. Retries if a write is in progress or races a write.
+    pub fn read(&self) -> T {
+        loop {
+            let seq1 = self.seq.load(Ordering::SeqCst);
+            if seq1 & 1 != 0 {
+                spin_loop();
+                continue;
+            }
+            // Safety: seq is even so no write is in progress on this single-core target.
+            let val = unsafe { core::ptr::read_volatile(self.data.get()) };
+            let seq2 = self.seq.load(Ordering::SeqCst);
+            if seq1 == seq2 {
+                return val;
+            }
+        }
+    }
+}
diff --git a/src/types/array.rs b/src/types/array.rs
index b7956c5..01a0aef 100644
--- a/src/types/array.rs
+++ b/src/types/array.rs
@@ -760,6 +760,17 @@ impl<K: ?Sized + ToIndex, V, const N: usize> BitReclaimMap<K, V, N> {
     }
 }
 
+impl<K: ?Sized + ToIndex, V, const N: usize> BitReclaimMap<K, V, N> {
+    /// Call `f(slot, value)` for every occupied slot in the map.
+    pub fn for_each<F: FnMut(usize, &V)>(&self, mut f: F) {
+        for slot in 0..N {
+            if let Some(v) = self.map.raw_at(slot) {
+                f(slot, v);
+            }
+        }
+    }
+}
+
 impl<K: Copy + ToIndex, V, const N: usize> BitReclaimMap<K, V, N> {
     pub fn insert_with(&mut self, f: impl FnOnce(usize) -> Result<(K, V)>) -> Result<K> {
         let idx = self.free.alloc(1).ok_or(kerr!(ENOMEM))?;