From f26e8675966ff2a3abbf190c4ee0dd04a6746f40 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Sat, 30 May 2026 20:35:34 +0200 Subject: [PATCH] =?UTF-8?q?fix(inline):=20map=20full=E2=86=92local=20funct?= =?UTF-8?q?ion=20index=20so=20imported=20calls=20don't=20break=20inlining?= =?UTF-8?q?=20=E2=80=94=20closes=20#153?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gale follow-up (#153): on v1.1.4, `inline_functions` reverted the inline of a local callee whenever the CALLER also called an imported function. This blocked the gale C↔Rust seam (z_impl_k_sem_give inlines gale_k_sem_give_decide but also calls 5 env:: kernel imports). Root cause: a function-INDEX-SPACE confusion, not a verifier gap. `Call(func_idx)` uses the FULL WebAssembly index space — imported functions occupy 0..num_imported_funcs, local functions follow. But the inliner: - built `function_sizes` keyed by LOCAL index (module.functions, from 0) while `call_counts` (from count_calls_recursive) keys by FULL index; - indexed `all_functions` (local-only) with the FULL `func_idx`. With an import present the spaces diverge: the import's index (0) collides with local function 0, so the inliner treated a VOID imported call as a call to local function 0, emitted a `local.set` to bind its params with NO argument on the stack, and produced a malformed body. The verifier correctly rejected the bad encode ("Stack underflow in LocalSet") → the whole inline reverted. (The reporter hypothesised a missing verifier model for imports; the real bug was upstream in the inliner emitting nonsense.) Fix (loom-core/src/lib.rs, inline_functions / inline_calls_in_block): - compute num_imported_funcs once per iteration; - key `function_sizes` by FULL index (local idx + offset) to match call_counts; - exclude imported indices (< offset) from inline candidates — they have no body to inline; - in inline_calls_in_block, map func_idx → local via checked_sub(offset); an imported index yields None → keep the original call untouched; thread the offset through the Block/Loop/If recursion. Modules with no imported functions are unaffected (offset 0), so all existing inline behaviour is preserved. Regression test test_inline_caller_with_imported_call: a caller that calls an import AND a local i64 callee now inlines the local callee (call removed, verified) while preserving the import call. 386 lib tests pass; #151 i64/i32 repros still inline; gale wasm + full pipeline dogfood clean. The 7 pre-existing LICM/DCE integration failures (#150) are unrelated and unchanged. Closes #153. Trace: REQ-8 --- CHANGELOG.md | 31 ++++++++++++ Cargo.toml | 2 +- loom-core/src/lib.rs | 110 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 138 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 94f0823..2d9a037 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,37 @@ All notable changes to LOOM will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.1.5] - 2026-05-30 + +**Bug-fix release: inline a local callee even when the caller also calls +an imported function (#153).** Closes the v1.1.4 follow-up that blocked +the gale C↔Rust seam — `z_impl_k_sem_give` inlines `gale_k_sem_give_decide` +but also calls 5 `env::` kernel imports, and the inline was reverted. + +### Fixed + +- **#153: `inline_functions` reverted the inline whenever the *caller* + also called an imported function.** Root cause was a + **function-index-space confusion**, not a verifier gap. `Call(func_idx)` + uses the *full* WebAssembly index space — imported functions occupy + indices `0..num_imported_funcs`, local functions follow — but the + inliner indexed `module.functions` (local functions only, from 0) and + built its candidate set from `call_counts` (full space) against + `function_sizes` (local space). With an import present the spaces + diverge: the import's index collides with a local function's slot, so + the inliner treated a *void imported call* as a call to local function + 0, emitted a `local.set` to bind its parameters with **no argument on + the stack**, and produced a malformed body. The verifier correctly + rejected the bad encoding (`Stack underflow in LocalSet`) — so the + *whole* inline reverted. Fix: account for the import offset + everywhere — key `function_sizes` and the candidate gate by the full + index, exclude imported indices from candidates (they have no body to + inline), and map full→local (`func_idx - num_imported_funcs`) when + indexing the local-function table, keeping imported calls untouched. + Modules with no imported functions are unaffected (offset 0). Regression + test `test_inline_caller_with_imported_call` asserts the import call is + preserved while the local callee is inlined and verified. + ## [1.1.4] - 2026-05-30 **Bug-fix release: verified i64 (and all-type) inlining (#151).** Closes diff --git a/Cargo.toml b/Cargo.toml index 5d75d29..7aa588f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ members = [ ] [workspace.package] -version = "1.1.4" +version = "1.1.5" authors = ["PulseEngine "] edition = "2024" license = "Apache-2.0" diff --git a/loom-core/src/lib.rs b/loom-core/src/lib.rs index 5806484..8be8736 100644 --- a/loom-core/src/lib.rs +++ b/loom-core/src/lib.rs @@ -13237,14 +13237,35 @@ pub mod optimize { } // Build context at start of each iteration (after possible function changes) let ctx = ValidationContext::from_module(module); + + // loom#153: `Call(func_idx)` uses the FULL WebAssembly function + // index space — imported functions occupy indices + // 0..num_imported_funcs, local functions follow. But + // `module.functions` (and `all_functions`) holds ONLY local + // functions, indexed from 0. Without accounting for the import + // offset, an imported call's index collides with a local + // function's slot: the inliner would treat a void imported call + // as a call to local function 0, emit a `local.set` for its + // params with no argument on the stack, and produce a malformed + // body the verifier rejects (Stack underflow) → the whole inline + // reverts. Key all index-space maps by the FULL index and map + // back to local only when indexing `all_functions`. + let num_imported_funcs = module + .imports + .iter() + .filter(|imp| matches!(imp.kind, crate::ImportKind::Func(_))) + .count() as u32; + // Phase 1: Build call graph and analyze functions let mut call_counts: BTreeMap = BTreeMap::new(); let mut function_sizes: BTreeMap = BTreeMap::new(); - // Calculate function sizes (instruction count) + // Calculate function sizes (instruction count). Keyed by FULL + // index (local idx + import offset) to match `call_counts`, + // which `count_calls_recursive` records in the full index space. for (idx, func) in module.functions.iter().enumerate() { let size = count_instructions_recursive(&func.instructions); - function_sizes.insert(idx as u32, size); + function_sizes.insert(idx as u32 + num_imported_funcs, size); } // Count call sites for each function @@ -13255,6 +13276,13 @@ pub mod optimize { // Phase 2: Identify inlining candidates let mut inline_candidates = Vec::new(); for (func_idx, &call_count) in &call_counts { + // loom#153: imported functions (full index < import count) + // have no body to inline — never candidates. Without this + // they'd pass the `size < 10` gate (size defaults to 0) and + // the inliner would try to inline the import. + if *func_idx < num_imported_funcs { + continue; + } let size = function_sizes.get(func_idx).copied().unwrap_or(0); // Heuristic: inline if: @@ -13313,6 +13341,7 @@ pub mod optimize { &func.instructions, &inline_set, &all_functions, + num_imported_funcs, func.signature.params.len() as u32, &mut func.locals, ); @@ -13345,6 +13374,7 @@ pub mod optimize { instructions: &[Instruction], inline_set: &std::collections::HashSet, all_functions: &[super::Function], + num_imported_funcs: u32, base_local_count: u32, caller_locals: &mut Vec<(u32, super::ValueType)>, ) -> Vec { @@ -13353,8 +13383,13 @@ pub mod optimize { for instr in instructions { match instr { Instruction::Call(func_idx) if inline_set.contains(func_idx) => { - // Inline this function call - if let Some(callee) = all_functions.get(*func_idx as usize) { + // Inline this function call. loom#153: `func_idx` is in + // the FULL index space (imports first); map it to the + // local-function slot. An imported index (< import count) + // has no body — `checked_sub` yields None and we keep the + // original call rather than inline a nonexistent body. + let local_idx = func_idx.checked_sub(num_imported_funcs); + if let Some(callee) = local_idx.and_then(|li| all_functions.get(li as usize)) { // Calculate local index offset to avoid conflicts let current_local_count = base_local_count + caller_locals.iter().map(|(count, _)| count).sum::(); @@ -13406,6 +13441,7 @@ pub mod optimize { body, inline_set, all_functions, + num_imported_funcs, base_local_count, caller_locals, ), @@ -13419,6 +13455,7 @@ pub mod optimize { body, inline_set, all_functions, + num_imported_funcs, base_local_count, caller_locals, ), @@ -13436,6 +13473,7 @@ pub mod optimize { then_body, inline_set, all_functions, + num_imported_funcs, base_local_count, caller_locals, ), @@ -13443,6 +13481,7 @@ pub mod optimize { else_body, inline_set, all_functions, + num_imported_funcs, base_local_count, caller_locals, ), @@ -18372,6 +18411,69 @@ mod tests { ); } + #[test] + fn test_inline_caller_with_imported_call() { + // loom#153: when the caller also calls an IMPORTED function, the + // inline of a *local* callee was reverted. `Call(func_idx)` uses the + // full index space (imports first), but the inliner indexed the + // local-only `all_functions` with it — so the import's index (0) + // collided with local function 0, and the inliner tried to inline + // the void import call, emitting a `local.set` with no stack + // argument → malformed body → verifier "Stack underflow" → revert. + // After the fix: the import call ($ext) is preserved and the local + // callee ($dec) is inlined (call to it removed) and verified. + let wat = r#"(module + (import "env" "ext" (func $ext)) + (func $dec (param i32) (result i64) + (i64.extend_i32_u (local.get 0))) + (func $z (export "z") (param i32) (result i64) + (call $ext) + (call $dec (local.get 0))) + )"#; + let mut module = parse::parse_wat(wat).expect("parse"); + + // Full index space: $ext = 0 (import), $dec = 1, $z = 2. + // module.functions holds locals only: [0]=$dec, [1]=$z. + let z = &module.functions[1]; + let ext_calls_before = z + .instructions + .iter() + .filter(|i| matches!(i, Instruction::Call(0))) + .count(); + let dec_calls_before = z + .instructions + .iter() + .filter(|i| matches!(i, Instruction::Call(1))) + .count(); + assert_eq!(ext_calls_before, 1, "caller starts with one import call"); + assert_eq!(dec_calls_before, 1, "caller starts with one local call"); + + optimize::inline_functions(&mut module).expect("must not panic"); + + let z = &module.functions[1]; + let ext_calls_after = z + .instructions + .iter() + .filter(|i| matches!(i, Instruction::Call(0))) + .count(); + let dec_calls_after = z + .instructions + .iter() + .filter(|i| matches!(i, Instruction::Call(1))) + .count(); + assert_eq!( + ext_calls_after, 1, + "imported call must be preserved (never inlined — it has no body)" + ); + assert_eq!( + dec_calls_after, 0, + "local callee must be inlined even though the caller also calls an import" + ); + + let wasm_bytes = encode::encode_wasm(&module).expect("encode"); + wasmparser::validate(&wasm_bytes).expect("output validates"); + } + #[test] fn test_inline_i64_loop_kinduction_no_panic() { // loom#145 regression: the prior i64 inline tests are loopless, so