Skip to content

Commit 3729a05

Browse files
ryanbreenclaude
andcommitted
feat: GPU-composited window manager with floating bounce window
Multi-window compositing via the VirGL 3D pipeline: - BWM renders its terminal + tab bar to a pixel buffer, composites registered client windows on top, then displays via TRANSFER_TO_HOST_3D on the 3D resource (direct blit to display at ~8 FPS) - bounce.rs creates a MAP_SHARED window buffer, renders colored rectangles at 42 FPS, and registers with BWM for compositing - Window buffer syscalls: create_window_buffer (op=11) returns buffer_id via syscall result and writes full 64-bit mmap address to userspace output pointer, fixing the previous 32-bit truncation bug - BWM discovers windows via list_windows (op=13), reads pixel data via read_window_buffer (op=14), and blits with title bar chrome - quick-test.sh now rebuilds userspace + ext2 disk on every run - Preserved virgl_composite_frame_textured() for future shader-based compositing (texture sampling not yet working on Parallels) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 3a40b14 commit 3729a05

6 files changed

Lines changed: 361 additions & 131 deletions

File tree

kernel/src/drivers/virtio/gpu_pci.rs

Lines changed: 128 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -2935,197 +2935,224 @@ pub fn virgl_composite_frame(
29352935
width: u32,
29362936
height: u32,
29372937
) -> Result<(), &'static str> {
2938-
use super::virgl::{CommandBuffer, format as vfmt, pipe, swizzle};
2939-
29402938
static COMPOSITE_FRAME_COUNT: core::sync::atomic::AtomicU32 = core::sync::atomic::AtomicU32::new(0);
29412939
let frame = COMPOSITE_FRAME_COUNT.fetch_add(1, core::sync::atomic::Ordering::Relaxed);
29422940
let verbose = frame < 3 || frame % 500 == 0;
29432941

29442942
if !is_virgl_enabled() {
29452943
return Err("VirGL not enabled");
29462944
}
2947-
if !COMPOSITE_TEX_READY.load(Ordering::Acquire) {
2948-
return Err("Compositor texture not initialized");
2949-
}
29502945

2951-
let tex_w = COMPOSITE_TEX_W.load(Ordering::Relaxed);
2952-
let tex_h = COMPOSITE_TEX_H.load(Ordering::Relaxed);
29532946
let (display_w, display_h) = dimensions().ok_or("GPU not initialized")?;
29542947

2955-
// Clamp source dimensions to texture capacity
2956-
let copy_w = width.min(tex_w);
2957-
let copy_h = height.min(tex_h);
2948+
// Clamp source dimensions to display
2949+
let copy_w = width.min(display_w);
2950+
let copy_h = height.min(display_h);
29582951
let expected_pixels = (copy_w * copy_h) as usize;
29592952
if pixels.len() < expected_pixels {
29602953
return Err("Pixel buffer too small");
29612954
}
29622955

29632956
if verbose {
2957+
let mut first_nonzero = 0u32;
2958+
let mut nonzero_count = 0u32;
2959+
for i in 0..expected_pixels.min(1000) {
2960+
if pixels[i] != 0 {
2961+
if first_nonzero == 0 { first_nonzero = pixels[i]; }
2962+
nonzero_count += 1;
2963+
}
2964+
}
29642965
crate::serial_println!(
2965-
"[virgl-composite] Frame #{}: {}x{} pixels → {}x{} texture",
2966-
frame, copy_w, copy_h, tex_w, tex_h
2966+
"[virgl-composite] Frame #{}: {}x{} → {}x{} display (first1k: {} nonzero, first={:#010x})",
2967+
frame, copy_w, copy_h, display_w, display_h, nonzero_count, first_nonzero
29672968
);
29682969
}
29692970

2970-
// Step 1: Copy pixel data into the compositor texture backing.
2971-
// The backing is a contiguous BGRA buffer matching the texture dimensions.
2972-
// If source is smaller than texture, we copy row by row with correct stride.
2973-
unsafe {
2974-
let dst = COMPOSITE_TEX_PTR;
2975-
let dst_stride = tex_w as usize * 4; // bytes per row in texture
2976-
let src_stride = width as usize * 4; // bytes per row in source
2971+
// Direct blit: copy pixel data into the 3D framebuffer backing, then upload.
2972+
// No VirGL SUBMIT_3D needed — just TRANSFER_TO_HOST_3D + SET_SCANOUT + RESOURCE_FLUSH.
2973+
let fb_ptr = unsafe { PCI_3D_FB_PTR };
2974+
if fb_ptr.is_null() {
2975+
return Err("3D framebuffer not initialized");
2976+
}
2977+
let fb_len = unsafe { PCI_3D_FB_LEN };
2978+
let dst_stride = display_w as usize * 4;
2979+
let src_stride = width as usize * 4;
29772980

2978-
if copy_w == tex_w {
2979-
// Fast path: source width matches texture width, bulk copy
2980-
let copy_bytes = (copy_w as usize) * (copy_h as usize) * 4;
2981+
unsafe {
2982+
if copy_w == display_w {
2983+
let copy_bytes = (copy_w as usize * copy_h as usize * 4).min(fb_len);
29812984
core::ptr::copy_nonoverlapping(
29822985
pixels.as_ptr() as *const u8,
2983-
dst,
2984-
copy_bytes.min(COMPOSITE_TEX_LEN),
2986+
fb_ptr,
2987+
copy_bytes,
29852988
);
29862989
} else {
2987-
// Slow path: different widths, copy row by row
29882990
for y in 0..copy_h as usize {
2989-
let src_offset = y * src_stride;
2990-
let dst_offset = y * dst_stride;
2991+
let src_off = y * src_stride;
2992+
let dst_off = y * dst_stride;
29912993
let row_bytes = (copy_w as usize) * 4;
2992-
if dst_offset + row_bytes <= COMPOSITE_TEX_LEN {
2994+
if dst_off + row_bytes <= fb_len {
29932995
core::ptr::copy_nonoverlapping(
2994-
(pixels.as_ptr() as *const u8).add(src_offset),
2995-
dst.add(dst_offset),
2996+
(pixels.as_ptr() as *const u8).add(src_off),
2997+
fb_ptr.add(dst_off),
29962998
row_bytes,
29972999
);
29983000
}
29993001
}
30003002
}
30013003
}
30023004

3003-
// Step 2: Upload texture to host GPU via TRANSFER_TO_HOST_3D.
3004-
let tex_bytes = (tex_w as usize) * (tex_h as usize) * 4;
3005-
dma_cache_clean(unsafe { COMPOSITE_TEX_PTR }, tex_bytes);
3005+
// Cache clean the 3D FB backing before DMA upload
3006+
let upload_bytes = (copy_w as usize * copy_h as usize * 4).min(fb_len);
3007+
dma_cache_clean(fb_ptr, upload_bytes);
3008+
3009+
// Upload to host via TRANSFER_TO_HOST_3D on the 3D resource
30063010
with_device_state(|state| {
3007-
transfer_to_host_3d(
3008-
state,
3009-
RESOURCE_COMPOSITE_TEX_ID,
3010-
0, 0,
3011-
copy_w, copy_h,
3012-
tex_w * 4,
3013-
)
3011+
// Need to send the transfer command manually since transfer_to_host_3d
3012+
// skips cache clean for RESOURCE_3D_ID (we already did it above)
3013+
let offset = 0u64;
3014+
unsafe {
3015+
let cmd_ptr = &raw mut PCI_CMD_BUF;
3016+
let cmd = &mut *((*cmd_ptr).data.as_mut_ptr() as *mut VirtioGpuTransferHost3d);
3017+
*cmd = VirtioGpuTransferHost3d {
3018+
hdr: VirtioGpuCtrlHdr {
3019+
type_: cmd::TRANSFER_TO_HOST_3D,
3020+
flags: 0,
3021+
fence_id: 0,
3022+
ctx_id: VIRGL_CTX_ID,
3023+
padding: 0,
3024+
},
3025+
box_x: 0,
3026+
box_y: 0,
3027+
box_z: 0,
3028+
box_w: copy_w,
3029+
box_h: copy_h,
3030+
box_d: 1,
3031+
offset,
3032+
resource_id: RESOURCE_3D_ID,
3033+
level: 0,
3034+
stride: display_w * 4,
3035+
layer_stride: 0,
3036+
};
3037+
}
3038+
send_command_expect_ok(state, core::mem::size_of::<VirtioGpuTransferHost3d>() as u32)
30143039
})?;
30153040

3016-
// Ensure scanout is on the 3D render target
3017-
if !VIRGL_SCANOUT_ACTIVE.load(Ordering::Acquire) {
3018-
with_device_state(|state| {
3019-
set_scanout_resource(state, RESOURCE_3D_ID)
3020-
}).ok();
3021-
VIRGL_SCANOUT_ACTIVE.store(true, Ordering::Release);
3041+
// SET_SCANOUT + RESOURCE_FLUSH
3042+
with_device_state(|state| {
3043+
set_scanout_resource(state, RESOURCE_3D_ID)?;
3044+
resource_flush_3d(state, RESOURCE_3D_ID)
3045+
})?;
3046+
3047+
Ok(())
3048+
}
3049+
3050+
/// GPU-composited frame via textured quad rendering.
3051+
///
3052+
/// Uploads pixel data as a VirGL texture, then renders a full-screen textured quad.
3053+
/// This is the end-goal approach for real GPU compositing (window decorations,
3054+
/// alpha blending, transforms). Currently not working on Parallels — texture
3055+
/// sampling produces black output despite successful SUBMIT_3D. Kept for future
3056+
/// debugging and enablement.
3057+
#[allow(dead_code)]
3058+
pub fn virgl_composite_frame_textured(
3059+
pixels: &[u32],
3060+
width: u32,
3061+
height: u32,
3062+
) -> Result<(), &'static str> {
3063+
use super::virgl::{CommandBuffer, format as vfmt, pipe, swizzle};
3064+
3065+
static TEX_FRAME_COUNT: core::sync::atomic::AtomicU32 = core::sync::atomic::AtomicU32::new(0);
3066+
let frame = TEX_FRAME_COUNT.fetch_add(1, core::sync::atomic::Ordering::Relaxed);
3067+
3068+
if !is_virgl_enabled() { return Err("VirGL not enabled"); }
3069+
if !COMPOSITE_TEX_READY.load(Ordering::Acquire) {
3070+
return Err("Compositor texture not initialized");
3071+
}
3072+
3073+
let tex_w = COMPOSITE_TEX_W.load(Ordering::Relaxed);
3074+
let tex_h = COMPOSITE_TEX_H.load(Ordering::Relaxed);
3075+
let (display_w, display_h) = dimensions().ok_or("GPU not initialized")?;
3076+
let copy_w = width.min(tex_w);
3077+
let copy_h = height.min(tex_h);
3078+
let expected_pixels = (copy_w * copy_h) as usize;
3079+
if pixels.len() < expected_pixels { return Err("Pixel buffer too small"); }
3080+
3081+
// Copy pixel data into the compositor texture backing
3082+
unsafe {
3083+
let dst = COMPOSITE_TEX_PTR;
3084+
let copy_bytes = (copy_w as usize) * (copy_h as usize) * 4;
3085+
core::ptr::copy_nonoverlapping(
3086+
pixels.as_ptr() as *const u8, dst, copy_bytes.min(COMPOSITE_TEX_LEN),
3087+
);
30223088
}
30233089

3024-
// Step 3: Build VirGL command batch — full pipeline + textured quad.
3025-
// CRITICAL: create_sub_ctx required per SUBMIT_3D on Parallels.
3090+
// Upload texture via TRANSFER_TO_HOST_3D
3091+
let tex_bytes = (tex_w as usize) * (tex_h as usize) * 4;
3092+
dma_cache_clean(unsafe { COMPOSITE_TEX_PTR }, tex_bytes);
3093+
with_device_state(|state| {
3094+
transfer_to_host_3d(state, RESOURCE_COMPOSITE_TEX_ID, 0, 0, copy_w, copy_h, tex_w * 4)
3095+
})?;
3096+
3097+
// Build VirGL batch: full pipeline + textured quad
30263098
let mut cmdbuf = CommandBuffer::new();
30273099
cmdbuf.create_sub_ctx(1);
30283100
cmdbuf.set_sub_ctx(1);
30293101
cmdbuf.set_tweaks(1, 1);
30303102
cmdbuf.set_tweaks(2, display_w);
30313103

3032-
// Surface wrapping the render target
30333104
cmdbuf.create_surface(1, RESOURCE_3D_ID, vfmt::B8G8R8X8_UNORM, 0, 0);
30343105
cmdbuf.set_framebuffer_state(0, &[1]);
3035-
3036-
// Pipeline state objects
30373106
cmdbuf.create_blend_simple(1);
30383107
cmdbuf.bind_object(1, super::virgl::OBJ_BLEND);
30393108
cmdbuf.create_dsa_default(1);
30403109
cmdbuf.bind_object(1, super::virgl::OBJ_DSA);
30413110
cmdbuf.create_rasterizer_default(1);
30423111
cmdbuf.bind_object(1, super::virgl::OBJ_RASTERIZER);
30433112

3044-
// Texture vertex shader: passes position + texcoord to fragment shader
3113+
// Texture shaders
30453114
let tex_vs = b"VERT\nDCL IN[0]\nDCL IN[1]\nDCL OUT[0], POSITION\nDCL OUT[1], GENERIC[0]\n 0: MOV OUT[0], IN[0]\n 1: MOV OUT[1], IN[1]\n 2: END\n";
30463115
cmdbuf.create_shader(1, pipe::SHADER_VERTEX, 300, tex_vs);
30473116
cmdbuf.bind_shader(1, pipe::SHADER_VERTEX);
3048-
3049-
// Texture fragment shader: samples from SAMP[0]
30503117
let tex_fs = b"FRAG\nDCL IN[0], GENERIC[0], LINEAR\nDCL OUT[0], COLOR\nDCL SAMP[0]\nDCL SVIEW[0], 2D, FLOAT\n 0: TEX OUT[0], IN[0], SAMP[0], 2D\n 1: END\n";
30513118
cmdbuf.create_shader(2, pipe::SHADER_FRAGMENT, 300, tex_fs);
30523119
cmdbuf.bind_shader(2, pipe::SHADER_FRAGMENT);
30533120

3054-
// Vertex elements: two attributes (position + texcoord), each R32G32B32A32_FLOAT
3055-
// Attribute 0: position at byte offset 0
3056-
// Attribute 1: texcoord at byte offset 16 (4 × f32)
30573121
cmdbuf.create_vertex_elements(1, &[
3058-
(0, 0, 0, vfmt::R32G32B32A32_FLOAT), // position
3059-
(16, 0, 0, vfmt::R32G32B32A32_FLOAT), // texcoord
3122+
(0, 0, 0, vfmt::R32G32B32A32_FLOAT),
3123+
(16, 0, 0, vfmt::R32G32B32A32_FLOAT),
30603124
]);
30613125
cmdbuf.bind_object(1, super::virgl::OBJ_VERTEX_ELEMENTS);
30623126

3063-
// Sampler view: bind compositor texture for shader sampling
3064-
cmdbuf.create_sampler_view(
3065-
2, // handle
3066-
RESOURCE_COMPOSITE_TEX_ID,
3067-
vfmt::B8G8R8X8_UNORM,
3068-
0, 0,
3069-
swizzle::IDENTITY,
3070-
);
3071-
3072-
// Sampler state: nearest filtering, clamp-to-edge
3073-
cmdbuf.create_sampler_state(
3074-
3, // handle
3075-
pipe::TEX_WRAP_CLAMP_TO_EDGE,
3076-
pipe::TEX_WRAP_CLAMP_TO_EDGE,
3077-
pipe::TEX_WRAP_CLAMP_TO_EDGE,
3078-
pipe::TEX_FILTER_NEAREST,
3079-
pipe::TEX_MIPFILTER_NONE,
3080-
pipe::TEX_FILTER_NEAREST,
3081-
);
3082-
3127+
cmdbuf.create_sampler_view(2, RESOURCE_COMPOSITE_TEX_ID, vfmt::B8G8R8X8_UNORM, 0, 0, swizzle::IDENTITY);
3128+
cmdbuf.create_sampler_state(3, pipe::TEX_WRAP_CLAMP_TO_EDGE, pipe::TEX_WRAP_CLAMP_TO_EDGE, pipe::TEX_WRAP_CLAMP_TO_EDGE, pipe::TEX_FILTER_NEAREST, pipe::TEX_MIPFILTER_NONE, pipe::TEX_FILTER_NEAREST);
30833129
cmdbuf.set_min_samples(1);
30843130
cmdbuf.set_viewport(display_w as f32, display_h as f32);
3085-
3086-
// Bind sampler view + state to fragment shader
30873131
cmdbuf.set_sampler_views(pipe::SHADER_FRAGMENT, 0, &[2]);
30883132
cmdbuf.bind_sampler_states(pipe::SHADER_FRAGMENT, 0, &[3]);
3089-
3090-
// Clear to black (background behind textured quad)
30913133
cmdbuf.clear_color(0.0, 0.0, 0.0, 1.0);
30923134

3093-
// Full-screen textured quad: 4 vertices × 32 bytes each (position + texcoord)
3094-
// Compute UV range: if source is smaller than display, scale UVs
30953135
let u_max = copy_w as f32 / tex_w as f32;
30963136
let v_max = copy_h as f32 / tex_h as f32;
3097-
30983137
let quad_verts: [u32; 32] = [
3099-
// v0: top-left — clip (-1, 1), texcoord (0, 0)
31003138
(-1.0f32).to_bits(), (1.0f32).to_bits(), 0f32.to_bits(), 1.0f32.to_bits(),
31013139
0f32.to_bits(), 0f32.to_bits(), 0f32.to_bits(), 0f32.to_bits(),
3102-
// v1: bottom-left — clip (-1, -1), texcoord (0, v_max)
31033140
(-1.0f32).to_bits(), (-1.0f32).to_bits(), 0f32.to_bits(), 1.0f32.to_bits(),
31043141
0f32.to_bits(), v_max.to_bits(), 0f32.to_bits(), 0f32.to_bits(),
3105-
// v2: bottom-right — clip (1, -1), texcoord (u_max, v_max)
31063142
1.0f32.to_bits(), (-1.0f32).to_bits(), 0f32.to_bits(), 1.0f32.to_bits(),
31073143
u_max.to_bits(), v_max.to_bits(), 0f32.to_bits(), 0f32.to_bits(),
3108-
// v3: top-right — clip (1, 1), texcoord (u_max, 0)
31093144
1.0f32.to_bits(), (1.0f32).to_bits(), 0f32.to_bits(), 1.0f32.to_bits(),
31103145
u_max.to_bits(), 0f32.to_bits(), 0f32.to_bits(), 0f32.to_bits(),
31113146
];
3112-
3113-
// Upload quad vertices inline
31143147
cmdbuf.resource_inline_write(RESOURCE_VB_ID, 0, 128, &quad_verts);
3115-
cmdbuf.set_vertex_buffers(&[(32, 0, RESOURCE_VB_ID)]); // stride=32 (8 floats)
3116-
3117-
// Draw as triangle fan (TL, BL, BR, TR)
3148+
cmdbuf.set_vertex_buffers(&[(32, 0, RESOURCE_VB_ID)]);
31183149
cmdbuf.draw_vbo(0, 4, pipe::PRIM_TRIANGLE_FAN, 3);
31193150

3120-
if verbose {
3121-
crate::serial_println!(
3122-
"[virgl-composite] Submitting {} DWORDs (frame #{})",
3123-
cmdbuf.as_slice().len(), frame
3124-
);
3151+
if frame < 3 {
3152+
crate::serial_println!("[virgl-composite-tex] Frame #{}: {} DWORDs", frame, cmdbuf.as_slice().len());
31253153
}
31263154
virgl_submit_sync(cmdbuf.as_slice())?;
31273155

3128-
// Display: SET_SCANOUT + RESOURCE_FLUSH
31293156
with_device_state(|state| {
31303157
set_scanout_resource(state, RESOURCE_3D_ID)?;
31313158
resource_flush_3d(state, RESOURCE_3D_ID)

0 commit comments

Comments
 (0)