From b0be56af5ba6fcc8909f9a43f165ba963d9714f8 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Sat, 30 May 2026 16:18:18 +0200 Subject: [PATCH 1/2] fix(inline): terminate fixpoint when no inline survives verification (closes #147) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gale repro (#147): `loom optimize --passes inline` hung indefinitely on a ~5 KB i64 module (z_impl_k_sem_give u64-unpack + caller) on v1.1.2 — 0 panics, no completion. Root cause: NOT a Z3 hang. The inline pass loops "until no inline candidate remains", but a candidate whose inline the Z3 verifier reverts (an i64 function whose inline-equivalence can't be proven) stays a candidate every iteration — its call site is restored on revert — so the pass livelocks: inline → verify reverts → inline → … forever. Verify itself returns fast (confirmed: the run spews "counterexample / reverting" rapidly, not a single slow solve, and the pass completes instantly under LOOM_Z3_MAX_INSTRUCTIONS=0). It only surfaced now because the #145 fix removed the SortDiffers panic that previously aborted the pass; with the panic gone the underlying livelock was exposed. Fix (loom-core/src/lib.rs, inline_functions): - Track whether each iteration KEEPS any inline (func body changed AND not reverted by verify_or_revert). An iteration that reverts everything made no progress -> break instead of re-spinning. - Hard cap of 64 iterations as a backstop. Legitimate inlining is unaffected (a kept inline sets kept_any -> the loop continues exactly as before). Also reverts the verify() watchdog-thread experiment from the investigation (the hang was never in verify); the Z3 solve-time bound in z3_config_with_timeout stays. Verified locally: the gale repro now completes (gtimeout exit 0, "Optimization complete", output passes wasm-tools validate) — the unprovable i64 function is reverted once, then the pass terminates. Closes #147 Trace: REQ-6 --- loom-core/src/lib.rs | 40 +++++++++++++++++++++++++++++++++++++++- loom-core/src/verify.rs | 34 ++++++++++++++-------------------- 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/loom-core/src/lib.rs b/loom-core/src/lib.rs index 9560c53..7bf1532 100644 --- a/loom-core/src/lib.rs +++ b/loom-core/src/lib.rs @@ -13215,8 +13215,26 @@ pub mod optimize { use std::collections::BTreeMap; // Run inlining to fixed point to ensure idempotence - // Keep inlining until no more candidates are found + // Keep inlining until no more candidates are found. + // + // loom#147 livelock guard: a candidate whose inline the Z3 + // verifier always REVERTS (e.g. an i64 function whose + // inline-equivalence can't be proven) stays a candidate every + // iteration — its call site is restored on revert — so the + // `inline_candidates.is_empty()` exit never fires and the pass + // spins forever (inline → revert → inline → …). It only looked + // like a "hang" because pre-v1.1.2 the verifier panicked, which + // happened to abort the pass; once the panic was fixed the + // underlying livelock was exposed. Terminate when an iteration + // keeps NO inline (no net progress), with a hard iteration cap as + // a backstop. + let mut iteration: u32 = 0; + const MAX_INLINE_ITERATIONS: u32 = 64; loop { + iteration += 1; + if iteration > MAX_INLINE_ITERATIONS { + break; + } // Build context at start of each iteration (after possible function changes) let ctx = ValidationContext::from_module(module); // Phase 1: Build call graph and analyze functions @@ -13263,12 +13281,19 @@ pub mod optimize { // Clone functions to avoid borrow checker issues let all_functions = module.functions.clone(); + // Did this iteration KEEP any inline (verified, not reverted)? + // Only kept inlines are progress; an iteration that reverts + // everything must not loop again (loom#147 livelock guard). + let mut kept_any = false; + for func in &mut module.functions { // Skip functions with unsupported instructions (can't verify) if has_unknown_instructions(func) || has_unsupported_isle_instructions(func) { continue; } + let before = func.instructions.clone(); + let guard = ValidationGuard::with_context(func, "inline_functions", ctx.clone()); let translator = TranslationValidator::new(func, "inline_functions"); @@ -13282,6 +13307,19 @@ pub mod optimize { let _ = guard.validate(func); translator.verify_or_revert(func); + + // After verify_or_revert, func.instructions is either the + // inlined body (kept) or restored to `before` (reverted). + // A difference means a verified inline landed → progress. + if func.instructions != before { + kept_any = true; + } + } + + // No inline survived verification this iteration → the + // remaining candidates are unprovable; stop rather than spin. + if !kept_any { + break; } // Continue to next iteration to check for more inlining opportunities diff --git a/loom-core/src/verify.rs b/loom-core/src/verify.rs index 68a8f07..b7aa9b5 100644 --- a/loom-core/src/verify.rs +++ b/loom-core/src/verify.rs @@ -2609,7 +2609,11 @@ impl TranslationValidator { return Ok(()); } - // Use catch_unwind to handle Z3 internal panics gracefully + // Use catch_unwind to handle Z3 internal panics gracefully. (The + // i64 inline hang in loom#147 turned out NOT to be a verify hang — + // verify returns fast; it was a livelock in the inline-pass + // fixpoint, fixed there. The Z3 solve-time bound stays in + // z3_config_with_timeout.) let sig_ctx = self.sig_ctx.clone(); let original = self.original.clone(); let pass_name = self.pass_name.clone(); @@ -2627,8 +2631,6 @@ impl TranslationValidator { match result { Ok(Ok(true)) => Ok(()), Ok(Ok(false)) => { - // Z3 found a counterexample - the optimization is NOT proven correct. - // Per our proof-first philosophy, we MUST reject unproven optimizations. revert_note(format_args!( "{}: Verification failed! Found counterexample:", self.pass_name @@ -2638,23 +2640,15 @@ impl TranslationValidator { self.pass_name )) } - Ok(Err(e)) => { - // Verification error - we cannot prove correctness. - // Per our philosophy: if we cannot prove it, we must not do it. - Err(anyhow!( - "{}: Verification failed ({}) - optimization rejected (unproven)", - self.pass_name, - e - )) - } - Err(_panic) => { - // Z3 internal error - we cannot prove correctness. - // Per our philosophy: if we cannot prove it, we must not do it. - Err(anyhow!( - "{}: Z3 internal error - optimization rejected (unproven)", - self.pass_name - )) - } + Ok(Err(e)) => Err(anyhow!( + "{}: Verification failed ({}) - optimization rejected (unproven)", + self.pass_name, + e + )), + Err(_panic) => Err(anyhow!( + "{}: Z3 internal error - optimization rejected (unproven)", + self.pass_name + )), } } From 328768630709d676ffe59753255f49c616a302cd Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Sat, 30 May 2026 16:23:04 +0200 Subject: [PATCH 2/2] =?UTF-8?q?release:=20v1.1.3=20=E2=80=94=20fix=20#147?= =?UTF-8?q?=20i64=20inline=20livelock;=20re-enable=204=20i64=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps version 1.1.2 -> 1.1.3 and re-enables the four no-panic i64 inline regression tests that the #147 livelock fix unblocks (they now run in ms; the one asserting i64 IS inlined stays #[ignore] — the verifier soundly reverts the unprovable i64 inline, a #147 follow-up). The earlier "i64 Z3 verification is slow" diagnosis was wrong: the 56-min / 4h CI matrices were the inline-pass livelock spinning, not slow solves. With the livelock fixed the i64 inline tests complete in milliseconds and the Test matrix should run normally again. Closes #147. Trace: REQ-6 --- CHANGELOG.md | 28 ++++++++++++++++++++++++++++ Cargo.toml | 2 +- loom-core/src/lib.rs | 36 ++++++------------------------------ 3 files changed, 35 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49e1f50..521e130 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,34 @@ All notable changes to LOOM will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.1.3] - 2026-05-30 + +**Bug-fix release: the gale-ffi i64 inline hang (#147).** Closes the +`loom optimize --passes inline` hang on i64-heavy modules that the +v1.1.2 #145 fix exposed. + +### Fixed + +- **#147: `inline_functions` livelock on i64 modules.** On v1.1.2, + `loom optimize --passes inline` hung indefinitely on a small i64 + module (gale-ffi `z_impl_k_sem_give` u64-unpack + caller) — 0 panics, + no completion. The cause was **not** a slow Z3 solve (verify returns + in microseconds): the inline pass loops "until no candidate remains", + but a candidate whose inline the verifier reverts (an i64 function + whose inline-equivalence can't be proven) stays a candidate every + iteration — its call site is restored on revert — so the pass + live-locked (inline → revert → inline → …). It surfaced only because + v1.1.2's #145 fix removed the `SortDiffers` panic that previously + aborted the pass. Fix: the fixpoint now terminates when an iteration + **keeps no inline** (made no net progress), with a 64-iteration hard + cap as a backstop. Legitimate inlining is unchanged. The earlier + "i64 Z3 is slow" diagnosis was wrong — it was this livelock; with it + fixed, four of the five previously-`#[ignore]`'d i64 inline tests run + again (in milliseconds). The one remaining `#[ignore]` + (`test_inline_pass_actually_inlines_i64_helper`) asserts the i64 + helper is *inlined*, which the verifier soundly reverts as unprovable + — real verified i64 inlining stays a #147 follow-up. + ## [1.1.2] - 2026-05-30 **Bug-fix release: the gale-ffi i64 `SortDiffers` crash (#145).** Closes diff --git a/Cargo.toml b/Cargo.toml index 4c442c5..1d7e4be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ members = [ ] [workspace.package] -version = "1.1.2" +version = "1.1.3" authors = ["PulseEngine "] edition = "2024" license = "Apache-2.0" diff --git a/loom-core/src/lib.rs b/loom-core/src/lib.rs index 7bf1532..669d02b 100644 --- a/loom-core/src/lib.rs +++ b/loom-core/src/lib.rs @@ -18166,12 +18166,6 @@ mod tests { // a panic. #[test] - #[ignore = "loom#147: hangs in i64 Z3 verification. The Config-level \ - `timeout` param does not bound it (re-enabling these took the CI \ - matrix from ~56min to 4h+), so the hang is likely in SMT-formula \ - construction, not solver.check(). Tracked in #147 for the correct \ - timeout mechanism; the #145 fix is exercised by the Z3 Verification \ - Build CI job + structural soundness."] fn test_inline_i64_helper_no_z3_panic() { // Smallest reproducer of loom#98: a tiny i64-param helper with a // single call site triggers the inline pass to add new i64 locals @@ -18203,12 +18197,6 @@ mod tests { } #[test] - #[ignore = "loom#147: hangs in i64 Z3 verification. The Config-level \ - `timeout` param does not bound it (re-enabling these took the CI \ - matrix from ~56min to 4h+), so the hang is likely in SMT-formula \ - construction, not solver.check(). Tracked in #147 for the correct \ - timeout mechanism; the #145 fix is exercised by the Z3 Verification \ - Build CI job + structural soundness."] fn test_inline_mixed_i32_i64_widths_no_z3_panic() { // Exercises the gale-ffi pattern more directly: i64-packed FFI // return that the caller masks down to i32 fields. The bit-mask @@ -18242,12 +18230,6 @@ mod tests { } #[test] - #[ignore = "loom#147: hangs in i64 Z3 verification. The Config-level \ - `timeout` param does not bound it (re-enabling these took the CI \ - matrix from ~56min to 4h+), so the hang is likely in SMT-formula \ - construction, not solver.check(). Tracked in #147 for the correct \ - timeout mechanism; the #145 fix is exercised by the Z3 Verification \ - Build CI job + structural soundness."] fn test_inline_i64_local_only_no_z3_panic() { // No params, just an i64 local — the helper still needs Z3 to // handle 64-bit symbolic state when its body is inlined into a @@ -18275,12 +18257,12 @@ mod tests { } #[test] - #[ignore = "loom#147: this test asserts the i64 helper is INLINED, but \ - the new Z3 per-query timeout (LOOM_Z3_TIMEOUT_MS) may return Unknown on \ - a slow i64 solve -> conservative revert -> not inlined, which would flake \ - this assertion. Re-enable once the timeout is tuned to admit this proof \ - (or the test is reworked to accept a sound revert). The no-panic i64 \ - tests are un-ignored and cover the #145 fix."] + #[ignore = "loom#147 follow-up: asserts the i64 helper is INLINED, but \ + the Z3 verifier currently REVERTS the i64 inline (cannot prove i64 \ + inline-equivalence) — a sound, conservative outcome. The #147 livelock \ + that hung the pass is fixed and the other four i64 inline tests are \ + re-enabled; this one needs real verified i64 inlining (or a reworked \ + assertion) before it can pass."] fn test_inline_pass_actually_inlines_i64_helper() { // Past the panic-prevention bar: confirm the pass does its job // on i64 helpers — the call must be replaced by the helper's @@ -18324,12 +18306,6 @@ mod tests { } #[test] - #[ignore = "loom#147: hangs in i64 Z3 verification. The Config-level \ - `timeout` param does not bound it (re-enabling these took the CI \ - matrix from ~56min to 4h+), so the hang is likely in SMT-formula \ - construction, not solver.check(). Tracked in #147 for the correct \ - timeout mechanism; the #145 fix is exercised by the Z3 Verification \ - Build CI job + structural soundness."] fn test_inline_i64_loop_kinduction_no_panic() { // loom#145 regression: the prior i64 inline tests are loopless, so // they never exercise the k-induction verifier path