Skip to content

Commit ff6160d

Browse files
committed
[stack-switching] X86_64Stack compression
1 parent 29c4d16 commit ff6160d

5 files changed

Lines changed: 198 additions & 4 deletions

File tree

src/engine/Debug.v3

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@ component Debug {
1010
def runtime = false;
1111
def compiler = false;
1212
def pregen = false;
13-
def stack = false;
13+
def stack = true;
1414
def memory = false;
1515
def diagnostic = false;
16+
def compression = true;
1617

1718
// Prevents arguments from being dead-code-eliminated.
1819
def keepAlive<T>(x: T) { }

src/engine/WasmStack.v3

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,14 @@ class ExecStack {
5151
}
5252
}
5353

54+
// Represents a suspendable stack that can be used to instantiate a continuation.
55+
class VersionedStack extends ExecStack {
56+
var version: u64;
57+
}
58+
5459
// Represents a stack on which Wasm code can be executed.
55-
class WasmStack extends ExecStack {
60+
class WasmStack extends VersionedStack {
5661
var parent: WasmStack;
57-
var version: u64;
5862

5963
// ext:stack-switching
6064
// Denotes the bottom stack of a suspended continuation (with {this} as the top stack).

src/engine/x86-64/X86_64Runtime.v3

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ component X86_64Runtime {
135135
// is found). Then, the tag parameters and the continuation is pushed onto
136136
// the handler stack.
137137
def runtime_handle_suspend(stack: X86_64Stack, instance: Instance, tag_id: u31) -> Throwable {
138+
// TODO[sc]: remove this check
139+
var compressed = X86_64Compression.compress(stack);
140+
X86_64Compression.decompress(stack, compressed);
141+
138142
var tag = instance.tags[tag_id];
139143
var vals = stack.popN(tag.sig.params);
140144
var cont = Runtime.unwindStackChain(stack, instance, tag_id, WasmStack.tryHandleSuspension);

src/engine/x86-64/X86_64Stack.v3

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ class X86_64Stack extends WasmStack {
120120
// stop the stackwalk. Otherwise stop at the return-parent stub. In any case, return the last
121121
// valid stack pointer, and a boolean indicating if the walk stopped early (due to {f} returning {false}).
122122
// (XXX: takes a function {f} with an additional parameter, and the parameter, to avoid a closure).
123-
private def walk<P>(f: (Pointer, RiUserCode, StackFramePos, P) -> bool, param: P, start_sp: Pointer, continue_to_parent: bool) -> (bool, StackFramePos) {
123+
def walk<P>(f: (Pointer, RiUserCode, StackFramePos, P) -> bool, param: P, start_sp: Pointer, continue_to_parent: bool) -> (bool, StackFramePos) {
124124
var stack = this;
125125
var sp = start_sp;
126126
if (Trace.stack && Debug.stack) {
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
// Copyright 2025 Wizard authors. All rights reserved.
2+
// See LICENSE for details of Apache 2.0 license.
3+
4+
// Represents a compressed frame.
5+
// {ret_addr} is a pointer into the code of {func}.
6+
// TODO: make it more compact
7+
type CompressedFrame(func: WasmFunction, pc: int, n_vals: int, ret_addr: Pointer) #unboxed {
8+
9+
// {value_slice} should start at the value index of this frame.
10+
def render(buf: StringBuilder, value_slice: Range<Value>) -> StringBuilder {
11+
// Frame header.
12+
buf.put3(
13+
" Frame(func=%q, pc=%d, retip=0x%x):",
14+
func.decl.render(null, _), pc, ret_addr - Pointer.NULL
15+
).ln();
16+
buf.put1(" Values(count=%d):", n_vals).ln();
17+
18+
// Frame values.
19+
if (n_vals > 0) {
20+
buf.puts(" ");
21+
for (j < n_vals) buf.put1("%q ", Values.render(value_slice[j], _));
22+
}
23+
24+
return buf;
25+
}
26+
}
27+
28+
component X86_64Compression {
29+
def collector = StackFrameCollector.new();
30+
def builder = CompressedStackBuilder.new();
31+
32+
def compress(stack: X86_64Stack) -> X86_64CompressedStack {
33+
if (Debug.compression) {
34+
Trace.OUT.put1(
35+
"Compressing X86-64Stack @ 0x%x", Pointer.atObject(stack) - Pointer.NULL
36+
).ln();
37+
Trace.OUT.put1("Stack info:\n%q", renderStackBounds(stack, _)).ln();
38+
}
39+
40+
collector.reset();
41+
stack.walk<void>(collector.visitFrame, void, stack.rsp, false);
42+
for (i = collector.frames.length - 1; i >= 0; i--) builder.addFrame(collector.frames[i]);
43+
44+
var compressed = X86_64CompressedStack.new();
45+
builder.build(compressed);
46+
if (Debug.compression) compressed.render(Trace.OUT);
47+
return compressed;
48+
}
49+
50+
def decompress(to: X86_64Stack, from: X86_64CompressedStack) {
51+
if (from.numFrames() == 0) return; // TODO: empty stack behavior?
52+
if (Debug.compression) Trace.OUT.put1(
53+
"Decompressing into X86-64Stack @ 0x%x", Pointer.atObject(to) - Pointer.NULL
54+
).ln();
55+
56+
to.clear();
57+
to.rsp += Pointer.SIZE; // Pop the enter-func-stub off (only needed for first entry).
58+
var root_func = from.frames[0].func;
59+
var instance = root_func.instance;
60+
to.reset(instance.functions[root_func.decl.func_index]);
61+
if (Debug.compression) Trace.OUT.put1(
62+
" Root function: %q", root_func.decl.render(null, _)
63+
).ln();
64+
65+
var val_offset = 0;
66+
for (frame_index < from.numFrames()) {
67+
var f = from.frames[frame_index];
68+
var map = SidetableMap.new(f.func.decl);
69+
var stp = map[f.pc];
70+
if (Debug.compression) Trace.OUT.put1("%q", f.render(_, from.values[val_offset...])).ln();
71+
72+
// Allocate interpreter stack frame.
73+
to.rsp += -X86_64InterpreterFrame.size;
74+
setFrameContext(to.rsp, f.func);
75+
setNewProgramLocation(to.rsp, f.func.decl, f.pc, stp);
76+
77+
// Load values.
78+
(to.rsp + X86_64InterpreterFrame.vfp.offset).store<Pointer>(to.vsp); // Store %vfp.
79+
for (i < f.n_vals) to.push(from.values[val_offset + i]);
80+
(to.rsp + X86_64InterpreterFrame.vsp.offset).store<Pointer>(to.vsp); // Store %vsp.
81+
82+
// Store recorded machine address as retip.
83+
to.rsp += -Pointer.SIZE;
84+
to.rsp.store<Pointer>(f.ret_addr);
85+
86+
val_offset += f.n_vals;
87+
}
88+
89+
if (Debug.compression) Trace.OUT.put1("Stack info:\n%q", renderStackBounds(to, _)).ln();
90+
}
91+
92+
private def renderStackBounds(stack: X86_64Stack, buf: StringBuilder) -> StringBuilder {
93+
buf.put1(" vsp: 0x%x", stack.vsp - Pointer.NULL).ln();
94+
buf.put1(" rsp: 0x%x", stack.rsp - Pointer.NULL).ln();
95+
return buf;
96+
}
97+
98+
def setFrameContext(sp: Pointer, wf: WasmFunction) {
99+
var module = wf.instance.module;
100+
(sp + X86_64InterpreterFrame.wasm_func.offset).store<WasmFunction>(wf);
101+
(sp + X86_64InterpreterFrame.instance.offset).store<Instance>(wf.instance);
102+
(sp + X86_64InterpreterFrame.sidetable.offset).store<Array<int>>(wf.decl.sidetable.entries);
103+
104+
// Load instance.memories[0].start into MEM0_BASE
105+
if (module.memories.length > 0) {
106+
var memory = NativeWasmMemory.!(wf.instance.memories[0]);
107+
(sp + X86_64InterpreterFrame.mem0_base.offset).store<Pointer>(memory.start);
108+
}
109+
}
110+
111+
// XXX: merge with functionality in FrameAccessor
112+
// TODO: forgeRange
113+
def setNewProgramLocation(sp: Pointer, func: FuncDecl, pc: int, stp: int) {
114+
var code = func.cur_bytecode;
115+
(sp + X86_64InterpreterFrame.func_decl.offset) .store<FuncDecl>(func);
116+
(sp + X86_64InterpreterFrame.curpc.offset) .store<int>(pc);
117+
(sp + X86_64InterpreterFrame.code.offset) .store<Array<byte>>(code);
118+
(sp + X86_64InterpreterFrame.ip.offset) .store<Pointer>(Pointer.atElement(code, pc));
119+
(sp + X86_64InterpreterFrame.eip.offset) .store<Pointer>(Pointer.atContents(code) + code.length);
120+
var st_entries = func.sidetable.entries;
121+
var st_ptr = if(stp == st_entries.length, Pointer.atContents(st_entries), Pointer.atElement(func.sidetable.entries, stp));
122+
(sp + X86_64InterpreterFrame.stp.offset) .store<Pointer>(st_ptr);
123+
}
124+
}
125+
126+
// Class for collecting and organizing the frames of a {X86_64Stack} during its
127+
// compression by walking its stack frames.
128+
// XXX: merge with CompressedStackBuilder
129+
class StackFrameCollector {
130+
def frames = Vector<TargetFrame>.new();
131+
132+
def reset() { frames.clear(); }
133+
def visitFrame(p: Pointer, c: RiUserCode, pos: StackFramePos, v: void) -> bool {
134+
if (X86_64InterpreterCode.?(c) || X86_64SpcModuleCode.?(c)) frames.put(pos.frame);
135+
return true;
136+
}
137+
}
138+
139+
class CompressedStackBuilder {
140+
def frames = Vector<CompressedFrame>.new();
141+
def values = Vector<Value>.new();
142+
143+
def addFrame(frame: TargetFrame) {
144+
var accessor = frame.getFrameAccessor();
145+
var func = accessor.func();
146+
var pc = accessor.pc();
147+
148+
var n_locals = accessor.numLocals();
149+
var n_operands = accessor.numOperands();
150+
var n_vals = n_locals + n_operands;
151+
152+
// XXX: a neater way to access ret_addr pointer?
153+
var ret_addr = (frame.sp + (-Pointer.SIZE)).load<Pointer>();
154+
155+
frames.put(CompressedFrame(func, pc, n_vals, ret_addr));
156+
for (i < n_locals) values.put(accessor.getLocal(i));
157+
for (i < n_operands) values.put(accessor.getOperand(i - n_operands + 1));
158+
}
159+
160+
def build(to: X86_64CompressedStack) {
161+
to.frames = frames.extract();
162+
to.values = values.extract();
163+
}
164+
}
165+
166+
// Representation of a compressed stack containing an array of CompressedFrame,
167+
// with the semantics:
168+
// type CompressedFrame(func: FuncDecl, pc: int, stp: int, values: Array<Value>)
169+
class X86_64CompressedStack extends WasmStack {
170+
var frames: Array<CompressedFrame>;
171+
var values: Array<Value>;
172+
173+
def numFrames() -> int { return frames.length; }
174+
175+
def render(buf: StringBuilder) -> StringBuilder {
176+
buf.put1("CompressedStack(n_frames=%d):", frames.length).ln();
177+
var val_offset = 0;
178+
for (i < frames.length) {
179+
var f = frames[i];
180+
buf.put1("%q", f.render(_, values[val_offset...])).ln();
181+
val_offset += f.n_vals;
182+
}
183+
return buf;
184+
}
185+
}

0 commit comments

Comments
 (0)