Skip to content

Commit 9d3f815

Browse files
authored
Merge pull request #252 from ryanbreen/feat/compositor-perf
feat: MAP_SHARED compositor, cursor fix, mouse encoding fix
2 parents 5212ec3 + d310ee6 commit 9d3f815

12 files changed

Lines changed: 654 additions & 179 deletions

File tree

kernel/src/drivers/usb/hid.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,13 @@ pub fn process_keyboard_report(report: &[u8]) {
176176
let keys = &report[2..8];
177177

178178
// Update modifier state from the modifier byte
179-
// Bit 0: Left Ctrl, Bit 1: Left Shift, Bit 4: Right Ctrl, Bit 5: Right Shift
179+
// Bit 0: Left Ctrl, Bit 1: Left Shift, Bit 2: Left Alt
180+
// Bit 3: Left GUI, Bit 4: Right Ctrl, Bit 5: Right Shift
181+
// Bit 6: Right Alt, Bit 7: Right GUI
182+
// Map GUI (Command on macOS) to ctrl so Command+T/W/C work in terminal
180183
let shift = (modifiers & 0x02) != 0 || (modifiers & 0x20) != 0;
181-
let ctrl = (modifiers & 0x01) != 0 || (modifiers & 0x10) != 0;
184+
let ctrl = (modifiers & 0x01) != 0 || (modifiers & 0x10) != 0
185+
|| (modifiers & 0x08) != 0 || (modifiers & 0x80) != 0;
182186
SHIFT_PRESSED.store(shift, Ordering::Relaxed);
183187
CTRL_PRESSED.store(ctrl, Ordering::Relaxed);
184188

kernel/src/drivers/virtio/gpu_pci.rs

Lines changed: 159 additions & 92 deletions
Large diffs are not rendered by default.

kernel/src/fs/procfs/mod.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,7 @@ fn generate_stat() -> String {
731731
use crate::tracing::providers::counters::{
732732
SYSCALL_TOTAL, IRQ_TOTAL, CTX_SWITCH_TOTAL, TIMER_TICK_TOTAL,
733733
FORK_TOTAL, EXEC_TOTAL, COW_FAULT_TOTAL,
734+
GPU_BYTES_UPLOADED, GPU_FULL_UPLOADS, GPU_PARTIAL_UPLOADS,
734735
};
735736

736737
format!(
@@ -740,14 +741,20 @@ fn generate_stat() -> String {
740741
timer_ticks {}\n\
741742
forks {}\n\
742743
execs {}\n\
743-
cow_faults {}\n",
744+
cow_faults {}\n\
745+
gpu_bytes {}\n\
746+
gpu_full {}\n\
747+
gpu_partial {}\n",
744748
SYSCALL_TOTAL.aggregate(),
745749
IRQ_TOTAL.aggregate(),
746750
CTX_SWITCH_TOTAL.aggregate(),
747751
TIMER_TICK_TOTAL.aggregate(),
748752
FORK_TOTAL.aggregate(),
749753
EXEC_TOTAL.aggregate(),
750754
COW_FAULT_TOTAL.aggregate(),
755+
GPU_BYTES_UPLOADED.aggregate(),
756+
GPU_FULL_UPLOADS.aggregate(),
757+
GPU_PARTIAL_UPLOADS.aggregate(),
751758
)
752759
}
753760

kernel/src/syscall/graphics.rs

Lines changed: 138 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -965,6 +965,20 @@ fn handle_virgl_op(cmd: &FbDrawCmd) -> SyscallResult {
965965

966966
SyscallResult::Ok(count as u64)
967967
}
968+
20 => {
969+
// MapCompositorTexture: map COMPOSITE_TEX backing pages into caller's
970+
// address space for zero-copy compositor writes.
971+
// p1/p2 = output pointer (lo/hi) for mapped address (u64)
972+
// Returns: packed (width << 32 | height) on success.
973+
#[cfg(target_arch = "aarch64")]
974+
{
975+
handle_map_compositor_texture(cmd)
976+
}
977+
#[cfg(not(target_arch = "aarch64"))]
978+
{
979+
SyscallResult::Err(super::ErrorCode::InvalidArgument as u64)
980+
}
981+
}
968982
_ => {
969983
crate::serial_println!("[virgl-op] UNKNOWN op={}", cmd.op);
970984
SyscallResult::Err(super::ErrorCode::InvalidArgument as u64)
@@ -973,14 +987,23 @@ fn handle_virgl_op(cmd: &FbDrawCmd) -> SyscallResult {
973987
}
974988

975989
/// Descriptor for multi-window GPU compositing (passed from userspace).
990+
///
991+
/// bg_dirty modes:
992+
/// 0 = no background change (cursor/frame-pacing only)
993+
/// 1 = full background upload (entire buffer changed)
994+
/// 2 = partial background upload (only dirty_rect region changed)
976995
#[cfg(target_arch = "aarch64")]
977996
#[repr(C)]
978997
struct CompositeWindowsDesc {
979998
bg_pixels_ptr: u64,
980999
bg_width: u32,
9811000
bg_height: u32,
9821001
bg_dirty: u32,
983-
_reserved: u32,
1002+
num_dirty_rects: u32,
1003+
dirty_x: u32,
1004+
dirty_y: u32,
1005+
dirty_w: u32,
1006+
dirty_h: u32,
9841007
}
9851008

9861009
/// Handle multi-window GPU compositing (op=16).
@@ -991,11 +1014,27 @@ struct CompositeWindowsDesc {
9911014
fn handle_composite_windows(desc_ptr: u64) -> SyscallResult {
9921015
let desc: CompositeWindowsDesc = unsafe { core::ptr::read(desc_ptr as *const CompositeWindowsDesc) };
9931016

994-
if desc.bg_width == 0 || desc.bg_height == 0 || desc.bg_width > 4096 || desc.bg_height > 4096 {
995-
return SyscallResult::Err(super::ErrorCode::InvalidArgument as u64);
996-
}
1017+
// When bg_pixels_ptr=0 (direct-mapped compositor), use COMPOSITE_TEX dimensions
1018+
let (bg_width, bg_height) = if desc.bg_pixels_ptr == 0 {
1019+
match crate::drivers::virtio::gpu_pci::compositor_texture_info() {
1020+
Some((_phys, _pages, w, h)) => (w, h),
1021+
None => return SyscallResult::Err(super::ErrorCode::InvalidArgument as u64),
1022+
}
1023+
} else {
1024+
if desc.bg_width == 0 || desc.bg_height == 0 || desc.bg_width > 4096 || desc.bg_height > 4096 {
1025+
return SyscallResult::Err(super::ErrorCode::InvalidArgument as u64);
1026+
}
1027+
(desc.bg_width, desc.bg_height)
1028+
};
9971029

9981030
let bg_dirty = desc.bg_dirty != 0;
1031+
let dirty_rect = if desc.bg_dirty == 2 && desc.num_dirty_rects > 0
1032+
&& desc.dirty_w > 0 && desc.dirty_h > 0
1033+
{
1034+
Some((desc.dirty_x, desc.dirty_y, desc.dirty_w, desc.dirty_h))
1035+
} else {
1036+
None
1037+
};
9991038

10001039
// Fast path: quick dirty check under lock — no heap allocs if nothing changed
10011040
if !bg_dirty {
@@ -1058,7 +1097,7 @@ fn handle_composite_windows(desc_ptr: u64) -> SyscallResult {
10581097
}
10591098

10601099
let bg_pixels = if desc.bg_pixels_ptr != 0 && desc.bg_pixels_ptr < USER_SPACE_MAX {
1061-
let pixel_count = (desc.bg_width as usize) * (desc.bg_height as usize);
1100+
let pixel_count = (bg_width as usize) * (bg_height as usize);
10621101
let end = desc.bg_pixels_ptr + (pixel_count as u64) * 4;
10631102
if end > USER_SPACE_MAX {
10641103
return SyscallResult::Err(super::ErrorCode::Fault as u64);
@@ -1108,7 +1147,7 @@ fn handle_composite_windows(desc_ptr: u64) -> SyscallResult {
11081147
};
11091148

11101149
let result = match crate::drivers::virtio::gpu_pci::virgl_composite_windows(
1111-
bg_pixels, desc.bg_width, desc.bg_height, bg_dirty, None, &windows,
1150+
bg_pixels, bg_width, bg_height, bg_dirty, dirty_rect, &windows,
11121151
) {
11131152
Ok(()) => SyscallResult::Ok(0),
11141153
Err(e) => {
@@ -1279,6 +1318,99 @@ fn handle_create_window_buffer(width: u32, height: u32, out_addr_ptr: u64) -> Sy
12791318
SyscallResult::Ok(buffer_id as u64)
12801319
}
12811320

1321+
/// Handle map_compositor_texture: map COMPOSITE_TEX backing pages into the
1322+
/// calling process's address space (read/write). This allows BWM to write
1323+
/// pixels directly into the GPU texture backing, eliminating the kernel-side
1324+
/// copy in virgl_composite_windows Phase A.
1325+
#[cfg(target_arch = "aarch64")]
1326+
fn handle_map_compositor_texture(cmd: &FbDrawCmd) -> SyscallResult {
1327+
use crate::memory::vma::{MmapFlags, Protection, Vma};
1328+
use crate::syscall::memory_common::{
1329+
get_current_thread_id, prot_to_page_flags, flush_tlb, round_down_to_page, PAGE_SIZE,
1330+
};
1331+
use crate::memory::arch_stub::{Page, Size4KiB, VirtAddr};
1332+
1333+
let out_ptr = (cmd.p1 as u32 as u64) | ((cmd.p2 as u32 as u64) << 32);
1334+
if out_ptr == 0 || out_ptr >= USER_SPACE_MAX {
1335+
return SyscallResult::Err(super::ErrorCode::Fault as u64);
1336+
}
1337+
1338+
// Get compositor texture info from GPU driver
1339+
let (phys_base, num_pages, tex_w, tex_h) =
1340+
match crate::drivers::virtio::gpu_pci::compositor_texture_info() {
1341+
Some(info) => info,
1342+
None => return SyscallResult::Err(super::ErrorCode::InvalidArgument as u64),
1343+
};
1344+
1345+
// Get current process
1346+
let current_thread_id = match get_current_thread_id() {
1347+
Some(id) => id,
1348+
None => return SyscallResult::Err(super::ErrorCode::NoSuchProcess as u64),
1349+
};
1350+
1351+
let mut manager_guard = crate::process::manager();
1352+
let manager = match *manager_guard {
1353+
Some(ref mut m) => m,
1354+
None => return SyscallResult::Err(super::ErrorCode::NoSuchProcess as u64),
1355+
};
1356+
1357+
let (_pid, process) = match manager.find_process_by_thread_mut(current_thread_id) {
1358+
Some(p) => p,
1359+
None => return SyscallResult::Err(super::ErrorCode::NoSuchProcess as u64),
1360+
};
1361+
1362+
// Allocate virtual address range from mmap hint
1363+
let total_size = (num_pages as u64) * PAGE_SIZE;
1364+
let new_addr = round_down_to_page(process.mmap_hint.saturating_sub(total_size));
1365+
if new_addr < 0x1000_0000 {
1366+
return SyscallResult::Err(super::ErrorCode::OutOfMemory as u64);
1367+
}
1368+
process.mmap_hint = new_addr;
1369+
1370+
let page_table = match process.page_table.as_mut() {
1371+
Some(pt) => pt,
1372+
None => return SyscallResult::Err(super::ErrorCode::OutOfMemory as u64),
1373+
};
1374+
1375+
// Map each physical page of COMPOSITE_TEX into the process
1376+
let page_flags = prot_to_page_flags(Protection::from_bits_truncate(3)); // READ | WRITE
1377+
for i in 0..num_pages as usize {
1378+
let frame_phys = phys_base + (i as u64) * PAGE_SIZE;
1379+
let frame = crate::memory::arch_stub::PhysFrame::<Size4KiB>::containing_address(
1380+
crate::memory::arch_stub::PhysAddr::new(frame_phys),
1381+
);
1382+
let page_addr = new_addr + (i as u64) * PAGE_SIZE;
1383+
let page = Page::<Size4KiB>::containing_address(VirtAddr::new(page_addr));
1384+
1385+
if let Err(_) = page_table.map_page(page, frame, page_flags) {
1386+
return SyscallResult::Err(super::ErrorCode::OutOfMemory as u64);
1387+
}
1388+
flush_tlb(VirtAddr::new(page_addr));
1389+
}
1390+
1391+
// Create VMA
1392+
let vma = Vma::new(
1393+
VirtAddr::new(new_addr),
1394+
VirtAddr::new(new_addr + total_size),
1395+
Protection::from_bits_truncate(3),
1396+
MmapFlags::from_bits_truncate(0x21), // MAP_SHARED | MAP_ANONYMOUS
1397+
);
1398+
process.vmas.push(vma);
1399+
1400+
crate::serial_println!(
1401+
"[compositor] Mapped COMPOSITE_TEX into process: virt={:#x}, {}x{}, {} pages",
1402+
new_addr, tex_w, tex_h, num_pages
1403+
);
1404+
1405+
// Write mapped address to userspace
1406+
unsafe {
1407+
core::ptr::write(out_ptr as *mut u64, new_addr);
1408+
}
1409+
1410+
// Return packed dimensions
1411+
SyscallResult::Ok(((tex_w as u64) << 32) | tex_h as u64)
1412+
}
1413+
12821414
/// sys_fbdraw - Draw to the left pane of the framebuffer
12831415
///
12841416
/// # Arguments

kernel/src/tracing/providers/counters.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,27 @@ pub static COW_FAULT_TOTAL: TraceCounter = TraceCounter::new(
123123
"Total CoW fault operations",
124124
);
125125

126+
/// GPU compositor: total bytes uploaded to VRAM.
127+
#[no_mangle]
128+
pub static GPU_BYTES_UPLOADED: TraceCounter = TraceCounter::new(
129+
"GPU_BYTES_UPLOADED",
130+
"GPU bytes uploaded to VRAM",
131+
);
132+
133+
/// GPU compositor: full-screen uploads (4.9MB each).
134+
#[no_mangle]
135+
pub static GPU_FULL_UPLOADS: TraceCounter = TraceCounter::new(
136+
"GPU_FULL_UPLOADS",
137+
"Full-screen GPU uploads",
138+
);
139+
140+
/// GPU compositor: partial rect uploads.
141+
#[no_mangle]
142+
pub static GPU_PARTIAL_UPLOADS: TraceCounter = TraceCounter::new(
143+
"GPU_PARTIAL_UPLOADS",
144+
"Partial rect GPU uploads",
145+
);
146+
126147
// =============================================================================
127148
// Boot Test Counters (BTRT feature)
128149
// =============================================================================
@@ -182,6 +203,9 @@ pub fn init() {
182203
register_counter(&FORK_TOTAL);
183204
register_counter(&EXEC_TOTAL);
184205
register_counter(&COW_FAULT_TOTAL);
206+
register_counter(&GPU_BYTES_UPLOADED);
207+
register_counter(&GPU_FULL_UPLOADS);
208+
register_counter(&GPU_PARTIAL_UPLOADS);
185209

186210
#[cfg(feature = "btrt")]
187211
{

libs/breengel/src/event.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ impl Event {
5959
},
6060
input_event_type::MOUSE_BUTTON => Event::MouseButton {
6161
button: raw.keycode as u8,
62-
pressed: raw.mouse_x != 0,
63-
x: raw.mouse_y as i32,
64-
y: 0,
62+
pressed: raw._pad != 0,
63+
x: raw.mouse_x as i32,
64+
y: raw.mouse_y as i32,
6565
},
6666
input_event_type::FOCUS_GAINED => Event::FocusGained,
6767
input_event_type::FOCUS_LOST => Event::FocusLost,

libs/libbreenix/src/graphics.rs

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ pub mod draw_op {
130130
pub const WRITE_WINDOW_INPUT: u32 = 18;
131131
/// Read input events from a window's ring buffer (client ← BWM)
132132
pub const READ_WINDOW_INPUT: u32 = 19;
133+
/// Map compositor texture into caller's address space
134+
pub const MAP_COMPOSITOR_TEXTURE: u32 = 20;
133135
}
134136

135137
/// Ball descriptor for VirGL GPU rendering.
@@ -683,7 +685,7 @@ pub fn virgl_composite_windows_rect(
683685
dirty_mode: u32, dirty_x: u32, dirty_y: u32, dirty_w: u32, dirty_h: u32,
684686
) -> Result<(), Error> {
685687
let desc = CompositeWindowsDesc {
686-
bg_pixels_ptr: bg_pixels.as_ptr() as u64,
688+
bg_pixels_ptr: if bg_pixels.is_empty() { 0 } else { bg_pixels.as_ptr() as u64 },
687689
bg_width: bg_w,
688690
bg_height: bg_h,
689691
bg_dirty: dirty_mode,
@@ -705,6 +707,36 @@ pub fn virgl_composite_windows_rect(
705707
fbdraw(&cmd)
706708
}
707709

710+
/// Map the compositor texture backing into this process's address space.
711+
///
712+
/// Returns a pointer to the mapped memory along with the texture dimensions.
713+
/// BWM can write pixels directly into this memory — they are the GPU texture
714+
/// backing pages. This eliminates the kernel-side copy in the composite syscall.
715+
///
716+
/// # Returns
717+
/// * Ok((ptr, width, height)) - Mapped pointer + dimensions
718+
/// * Err(Error) - Error if compositor texture is not initialized
719+
pub fn map_compositor_texture() -> Result<(*mut u32, u32, u32), Error> {
720+
let mut mapped_addr: u64 = 0;
721+
let out_ptr = &mut mapped_addr as *mut u64 as u64;
722+
let cmd = FbDrawCmd {
723+
op: draw_op::MAP_COMPOSITOR_TEXTURE,
724+
p1: out_ptr as i32,
725+
p2: (out_ptr >> 32) as i32,
726+
p3: 0,
727+
p4: 0,
728+
color: 0,
729+
};
730+
let ret = unsafe { raw::syscall1(nr::FBDRAW, &cmd as *const FbDrawCmd as u64) as i64 };
731+
if ret < 0 {
732+
return Err(Error::Os(Errno::from_raw(-ret)));
733+
}
734+
let result = ret as u64;
735+
let width = (result >> 32) as u32;
736+
let height = (result & 0xFFFF_FFFF) as u32;
737+
Ok((mapped_addr as *mut u32, width, height))
738+
}
739+
708740
/// Get the current mouse cursor position.
709741
///
710742
/// # Returns

0 commit comments

Comments
 (0)