Skip to content

Commit 8417efd

Browse files
authored
Merge pull request #254 from ryanbreen/feat/compositor-wait-syscall
feat: event-driven compositor_wait syscall — BWM CPU 34% → 20%
2 parents d5e50f3 + 09fb6e3 commit 8417efd

4 files changed

Lines changed: 331 additions & 148 deletions

File tree

kernel/src/drivers/usb/hid.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,7 @@ pub fn process_mouse_report(report: &[u8]) {
353353
}
354354
MOUSE_X.store(new_x, Ordering::Relaxed);
355355
MOUSE_Y.store(new_y, Ordering::Relaxed);
356+
crate::syscall::graphics::wake_compositor_if_waiting();
356357
return;
357358
}
358359

@@ -369,6 +370,7 @@ pub fn process_mouse_report(report: &[u8]) {
369370
let old_y = MOUSE_Y.load(Ordering::Relaxed) as i32;
370371
let new_y = (old_y + dy).clamp(0, sh as i32 - 1) as u32;
371372
MOUSE_Y.store(new_y, Ordering::Relaxed);
373+
crate::syscall::graphics::wake_compositor_if_waiting();
372374
}
373375

374376
// =============================================================================

kernel/src/syscall/graphics.rs

Lines changed: 167 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
//! - op=20: `map_compositor_texture` — map GPU texture into BWM's address space
1919
//! - op=21: `map_window_buffer` — map window buffer into BWM's address space (read-only)
2020
//! - op=22: `check_window_dirty` — lightweight generation check without pixel copy
21+
//! - op=23: `compositor_wait` — block until window dirty/mouse/keyboard/registry change
2122
2223
extern crate alloc;
2324

@@ -41,6 +42,34 @@ pub static FB_FLUSH_COUNT: core::sync::atomic::AtomicU64 = core::sync::atomic::A
4142
#[cfg(target_arch = "aarch64")]
4243
static COMPOSITOR_WAITING_THREAD: core::sync::atomic::AtomicU64 = core::sync::atomic::AtomicU64::new(0);
4344

45+
/// Registry generation counter — bumped when windows are registered/unregistered.
46+
/// compositor_wait (op=23) compares this against its saved value to detect changes.
47+
#[cfg(target_arch = "aarch64")]
48+
static REGISTRY_GENERATION: core::sync::atomic::AtomicU64 = core::sync::atomic::AtomicU64::new(0);
49+
50+
/// Last mouse state seen by compositor_wait, packed as (x << 32 | y << 16 | buttons).
51+
/// Compared against current mouse state to detect movement without a syscall.
52+
#[cfg(target_arch = "aarch64")]
53+
static COMPOSITOR_LAST_MOUSE: core::sync::atomic::AtomicU64 = core::sync::atomic::AtomicU64::new(0);
54+
55+
/// Set to 1 by mark_window_dirty (op=15) when it wakes the compositor.
56+
/// compositor_wait checks this after waking to know if a dirty window caused the wake.
57+
#[cfg(target_arch = "aarch64")]
58+
static COMPOSITOR_DIRTY_WAKE: core::sync::atomic::AtomicBool = core::sync::atomic::AtomicBool::new(false);
59+
60+
/// Wake the compositor thread if it's blocked in compositor_wait (op=23).
61+
/// Called from input interrupt handlers (mouse, keyboard) to provide low-latency
62+
/// input response without polling.
63+
#[cfg(target_arch = "aarch64")]
64+
pub fn wake_compositor_if_waiting() {
65+
let tid = COMPOSITOR_WAITING_THREAD.load(core::sync::atomic::Ordering::Acquire);
66+
if tid != 0 {
67+
crate::task::scheduler::with_scheduler(|sched| {
68+
sched.unblock(tid);
69+
});
70+
}
71+
}
72+
4473
/// Restore TTBR0 to the current process's page tables after blocking.
4574
///
4675
/// After a blocking syscall (mark_window_dirty), TTBR0 may point to a different
@@ -625,6 +654,17 @@ fn handle_virgl_op(cmd: &FbDrawCmd) -> SyscallResult {
625654
}
626655
}
627656
}
657+
// Bump registry generation + wake compositor so it discovers the new window
658+
#[cfg(target_arch = "aarch64")]
659+
{
660+
REGISTRY_GENERATION.fetch_add(1, core::sync::atomic::Ordering::Relaxed);
661+
let compositor_tid = COMPOSITOR_WAITING_THREAD.load(core::sync::atomic::Ordering::Acquire);
662+
if compositor_tid != 0 {
663+
crate::task::scheduler::with_scheduler(|sched| {
664+
sched.unblock(compositor_tid);
665+
});
666+
}
667+
}
628668
SyscallResult::Ok(0)
629669
}
630670
None => SyscallResult::Err(super::ErrorCode::InvalidArgument as u64),
@@ -728,6 +768,7 @@ fn handle_virgl_op(cmd: &FbDrawCmd) -> SyscallResult {
728768
// This gives immediate frame delivery instead of waiting for a timer tick.
729769
#[cfg(target_arch = "aarch64")]
730770
{
771+
COMPOSITOR_DIRTY_WAKE.store(true, core::sync::atomic::Ordering::Relaxed);
731772
let compositor_tid = COMPOSITOR_WAITING_THREAD.load(core::sync::atomic::Ordering::Acquire);
732773
if compositor_tid != 0 {
733774
crate::task::scheduler::with_scheduler(|sched| {
@@ -1008,13 +1049,137 @@ fn handle_virgl_op(cmd: &FbDrawCmd) -> SyscallResult {
10081049
None => SyscallResult::Err(super::ErrorCode::InvalidArgument as u64),
10091050
}
10101051
}
1052+
23 => {
1053+
// CompositorWait: block until something needs compositing.
1054+
// p1=timeout_ms, p2=last_registry_gen (lo32)
1055+
// Returns bitmask: bit0=windows_dirty, bit1=mouse_changed, bit2=registry_changed
1056+
// Replaces BWM's poll+sleep loop with a single blocking syscall.
1057+
#[cfg(target_arch = "aarch64")]
1058+
{
1059+
handle_compositor_wait(cmd)
1060+
}
1061+
#[cfg(not(target_arch = "aarch64"))]
1062+
{
1063+
SyscallResult::Err(super::ErrorCode::InvalidArgument as u64)
1064+
}
1065+
}
10111066
_ => {
10121067
crate::serial_println!("[virgl-op] UNKNOWN op={}", cmd.op);
10131068
SyscallResult::Err(super::ErrorCode::InvalidArgument as u64)
10141069
}
10151070
}
10161071
}
10171072

1073+
/// Handle compositor_wait (op=23): block until the compositor has work to do.
1074+
///
1075+
/// Returns packed value: (registry_generation << 8) | ready_bitmask
1076+
/// bits 0-7: ready bitmask (bit0=dirty, bit1=mouse, bit2=registry)
1077+
/// bits 8-31: current registry generation (for next call's last_registry_gen)
1078+
///
1079+
/// If nothing is ready, blocks the compositor thread with the given timeout.
1080+
/// Woken by: mark_window_dirty (op=15), mouse interrupt handler, registry changes.
1081+
#[cfg(target_arch = "aarch64")]
1082+
fn handle_compositor_wait(cmd: &FbDrawCmd) -> SyscallResult {
1083+
use core::sync::atomic::Ordering;
1084+
1085+
let timeout_ms = cmd.p1 as u32;
1086+
let last_registry_gen = cmd.p2 as u32 as u64;
1087+
1088+
// Pack current mouse state for comparison
1089+
let (mx, my, mb) = crate::drivers::usb::hid::mouse_state();
1090+
let mouse_packed = ((mx as u64) << 32) | ((my as u64) << 16) | (mb as u64);
1091+
let prev_mouse = COMPOSITOR_LAST_MOUSE.load(Ordering::Relaxed);
1092+
1093+
// Check non-dirty conditions first (mouse + registry are always non-blocking)
1094+
let mut ready: u64 = 0;
1095+
1096+
// Bit 1: mouse changed?
1097+
if mouse_packed != prev_mouse {
1098+
ready |= 2;
1099+
}
1100+
1101+
// Bit 2: registry changed?
1102+
let cur_reg_gen = REGISTRY_GENERATION.load(Ordering::Relaxed);
1103+
if cur_reg_gen != last_registry_gen {
1104+
ready |= 4;
1105+
}
1106+
1107+
// If mouse or registry changed, return immediately (don't check dirty — BWM
1108+
// will do its own per-window dirty check via check_window_dirty).
1109+
if ready != 0 {
1110+
COMPOSITOR_LAST_MOUSE.store(mouse_packed, Ordering::Relaxed);
1111+
return SyscallResult::Ok(ready | ((cur_reg_gen & 0x00FF_FFFF) << 8));
1112+
}
1113+
1114+
// Nothing urgent — block until woken by mark_window_dirty, mouse, or registry change.
1115+
// mark_window_dirty (op=15) wakes us immediately via COMPOSITOR_WAITING_THREAD.
1116+
let compositor_tid = match crate::task::scheduler::current_thread_id() {
1117+
Some(id) => id,
1118+
None => return SyscallResult::Ok(0),
1119+
};
1120+
COMPOSITOR_WAITING_THREAD.store(compositor_tid, Ordering::Release);
1121+
1122+
let (s, n) = crate::time::get_monotonic_time_ns();
1123+
let now_ns = (s as u64) * 1_000_000_000 + (n as u64);
1124+
let timeout_ns = now_ns.saturating_add((timeout_ms as u64) * 1_000_000);
1125+
1126+
crate::task::scheduler::with_scheduler(|sched| {
1127+
sched.block_current_for_compositor(timeout_ns);
1128+
});
1129+
1130+
#[cfg(target_arch = "aarch64")]
1131+
crate::per_cpu_aarch64::preempt_enable();
1132+
1133+
loop {
1134+
let still_blocked = crate::task::scheduler::with_scheduler(|sched| {
1135+
sched.wake_expired_timers();
1136+
sched.current_thread_mut()
1137+
.map(|t| t.state == crate::task::thread::ThreadState::BlockedOnTimer)
1138+
.unwrap_or(false)
1139+
});
1140+
if !still_blocked.unwrap_or(false) { break; }
1141+
crate::task::scheduler::yield_current();
1142+
crate::arch_halt_with_interrupts();
1143+
}
1144+
1145+
crate::task::scheduler::with_scheduler(|sched| {
1146+
if let Some(thread) = sched.current_thread_mut() {
1147+
thread.blocked_in_syscall = false;
1148+
}
1149+
});
1150+
1151+
#[cfg(target_arch = "aarch64")]
1152+
crate::per_cpu_aarch64::preempt_disable();
1153+
#[cfg(target_arch = "aarch64")]
1154+
ensure_current_address_space();
1155+
1156+
COMPOSITOR_WAITING_THREAD.store(0, Ordering::Release);
1157+
1158+
// Re-check conditions after waking — return bitmask of what woke us.
1159+
let mut ready_after: u64 = 0;
1160+
1161+
// Bit 0: check if mark_window_dirty woke us
1162+
if COMPOSITOR_DIRTY_WAKE.swap(false, Ordering::Relaxed) {
1163+
ready_after |= 1;
1164+
}
1165+
1166+
let (mx2, my2, mb2) = crate::drivers::usb::hid::mouse_state();
1167+
let mouse_packed2 = ((mx2 as u64) << 32) | ((my2 as u64) << 16) | (mb2 as u64);
1168+
1169+
if mouse_packed2 != prev_mouse {
1170+
ready_after |= 2;
1171+
}
1172+
1173+
let cur_reg_gen2 = REGISTRY_GENERATION.load(Ordering::Relaxed);
1174+
if cur_reg_gen2 != last_registry_gen {
1175+
ready_after |= 4;
1176+
}
1177+
1178+
COMPOSITOR_LAST_MOUSE.store(mouse_packed2, Ordering::Relaxed);
1179+
1180+
SyscallResult::Ok(ready_after | ((cur_reg_gen2 & 0x00FF_FFFF) << 8))
1181+
}
1182+
10181183
/// Descriptor for multi-window GPU compositing (passed from userspace).
10191184
///
10201185
/// bg_dirty modes:
@@ -1065,7 +1230,8 @@ fn handle_composite_windows(desc_ptr: u64) -> SyscallResult {
10651230
None
10661231
};
10671232

1068-
// Fast path: quick dirty check under lock — no heap allocs if nothing changed
1233+
// Fast path: quick dirty check under lock — no heap allocs if nothing changed.
1234+
// compositor_wait (op=23) handles blocking; op=16 just returns immediately.
10691235
if !bg_dirty {
10701236
let reg = WINDOW_REGISTRY.lock();
10711237
let any_window_dirty = reg.buffers.iter().any(|slot| {
@@ -1076,50 +1242,6 @@ fn handle_composite_windows(desc_ptr: u64) -> SyscallResult {
10761242
});
10771243
if !any_window_dirty {
10781244
drop(reg);
1079-
// Block the compositor until a window becomes dirty or timeout.
1080-
// mark_window_dirty (op=15) will wake us immediately via unblock().
1081-
// This eliminates spin-loop CPU waste when no windows need compositing.
1082-
let compositor_tid = match crate::task::scheduler::current_thread_id() {
1083-
Some(id) => id,
1084-
None => return SyscallResult::Ok(0),
1085-
};
1086-
COMPOSITOR_WAITING_THREAD.store(compositor_tid, core::sync::atomic::Ordering::Release);
1087-
1088-
let (s, n) = crate::time::get_monotonic_time_ns();
1089-
let now_ns = (s as u64) * 1_000_000_000 + (n as u64);
1090-
let timeout_ns = now_ns.saturating_add(5_000_000); // 5ms max wait
1091-
1092-
crate::task::scheduler::with_scheduler(|sched| {
1093-
sched.block_current_for_compositor(timeout_ns);
1094-
});
1095-
1096-
#[cfg(target_arch = "aarch64")]
1097-
crate::per_cpu_aarch64::preempt_enable();
1098-
1099-
loop {
1100-
let still_blocked = crate::task::scheduler::with_scheduler(|sched| {
1101-
sched.wake_expired_timers();
1102-
sched.current_thread_mut()
1103-
.map(|t| t.state == crate::task::thread::ThreadState::BlockedOnTimer)
1104-
.unwrap_or(false)
1105-
});
1106-
if !still_blocked.unwrap_or(false) { break; }
1107-
crate::task::scheduler::yield_current();
1108-
crate::arch_halt_with_interrupts();
1109-
}
1110-
1111-
crate::task::scheduler::with_scheduler(|sched| {
1112-
if let Some(thread) = sched.current_thread_mut() {
1113-
thread.blocked_in_syscall = false;
1114-
}
1115-
});
1116-
1117-
#[cfg(target_arch = "aarch64")]
1118-
crate::per_cpu_aarch64::preempt_disable();
1119-
#[cfg(target_arch = "aarch64")]
1120-
ensure_current_address_space();
1121-
1122-
COMPOSITOR_WAITING_THREAD.store(0, core::sync::atomic::Ordering::Release);
11231245
return SyscallResult::Ok(0);
11241246
}
11251247
drop(reg);

libs/libbreenix/src/graphics.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,8 @@ pub mod draw_op {
136136
pub const MAP_WINDOW_BUFFER: u32 = 21;
137137
/// Lightweight dirty check without pixel copy
138138
pub const CHECK_WINDOW_DIRTY: u32 = 22;
139+
/// Block until compositor has work (dirty windows, mouse, registry change)
140+
pub const COMPOSITOR_WAIT: u32 = 23;
139141
}
140142

141143
/// Ball descriptor for VirGL GPU rendering.
@@ -796,6 +798,46 @@ pub fn check_window_dirty(buffer_id: u32) -> Result<bool, Error> {
796798
Ok(ret != 0)
797799
}
798800

801+
/// Bitmask: at least one window has dirty pixels
802+
pub const COMPOSITOR_READY_DIRTY: u32 = 1;
803+
/// Bitmask: mouse position or buttons changed
804+
pub const COMPOSITOR_READY_MOUSE: u32 = 2;
805+
/// Bitmask: window registry changed (new/removed windows)
806+
pub const COMPOSITOR_READY_REGISTRY: u32 = 4;
807+
808+
/// Block until the compositor has work to do.
809+
///
810+
/// Returns packed result: bits 0-7 = ready bitmask, bits 8-31 = registry generation.
811+
/// bit 0: window(s) have dirty pixels
812+
/// bit 1: mouse position or buttons changed
813+
/// bit 2: window registry changed (new/removed windows)
814+
///
815+
/// If nothing is pending, blocks the calling thread for up to `timeout_ms`
816+
/// milliseconds. Woken early by: window dirty, mouse movement, registry change.
817+
///
818+
/// `last_registry_gen` should be the registry generation returned by the
819+
/// previous call (bits 8-31 of the result).
820+
///
821+
/// Returns (ready_bitmask, registry_generation).
822+
pub fn compositor_wait(timeout_ms: u32, last_registry_gen: u32) -> Result<(u32, u32), Error> {
823+
let cmd = FbDrawCmd {
824+
op: draw_op::COMPOSITOR_WAIT,
825+
p1: timeout_ms as i32,
826+
p2: last_registry_gen as i32,
827+
p3: 0,
828+
p4: 0,
829+
color: 0,
830+
};
831+
let ret = unsafe { raw::syscall1(nr::FBDRAW, &cmd as *const FbDrawCmd as u64) as i64 };
832+
if ret < 0 {
833+
return Err(Error::Os(Errno::from_raw(-ret)));
834+
}
835+
let packed = ret as u32;
836+
let ready = packed & 0xFF;
837+
let reg_gen = (packed >> 8) & 0x00FF_FFFF;
838+
Ok((ready, reg_gen))
839+
}
840+
799841
/// Get the current mouse cursor position.
800842
///
801843
/// # Returns

0 commit comments

Comments
 (0)