From dd73cbfd4689f8d3bd41ce2b93d691d09a309b99 Mon Sep 17 00:00:00 2001 From: Jared Lunde Date: Thu, 4 Jun 2026 16:32:28 -0700 Subject: [PATCH] Support macOS and the BSDs, not just Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The handoff mechanism is plain POSIX — fork/exec FD inheritance, flock, Unix-domain control sockets, signals — with no Linux-only syscalls. It was only incidentally Linux-bound. Make it build and run on macOS and the BSDs automatically (via cfg, not a Cargo feature — no opt-in for embedders). - supervisor.rs: cfg-gate SOCK_CLOEXEC on the control socketpair. macOS doesn't define the flag (nix won't compile the symbol there), so fall back to a follow-up fcntl(FD_CLOEXEC) on each end. Linux/BSD path unchanged. - fd.rs: drop SOCK_CLOEXEC from a test socketpair — throwaway source FDs don't need it, and dropping it keeps the test compiling on macOS. - stress.rs: read /dev/fd instead of /proc/self/fd for the FD-leak counter. /dev/fd is the portable spelling across Linux, macOS, and FreeBSD. - CI: add a macos-latest test job (exercises the fcntl fallback and /dev/fd for real) and a FreeBSD cross-compile check (no free BSD runner exists). - Docs: broaden the rename-atomicity note beyond Linux; add a Platforms section (Linux/macOS/BSD supported, Windows not). Windows is out of scope: no fork/exec FD inheritance and no flock, so the model would need a separate backend. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/ci.yml | 32 ++++++++++++++++++++++++++++ ARCHITECTURE.md | 2 +- README.md | 4 ++++ crates/handoff-tests/tests/stress.rs | 15 +++++++------ crates/handoff/src/fd.rs | 6 +++++- crates/handoff/src/supervisor.rs | 23 ++++++++++++++++++++ 6 files changed, 73 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 399eb35..5ed4e01 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,6 +37,38 @@ jobs: - name: Doc tests run: cargo test --workspace --doc + test-macos: + name: Test (macOS) + runs-on: macos-latest + needs: check + timeout-minutes: 15 + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + - name: Unit + integration tests + # Exercises the macOS-specific paths that cross-compilation can't: + # the `fcntl(FD_CLOEXEC)` fallback for socketpair (no SOCK_CLOEXEC + # on macOS) and the `/dev/fd` FD-leak enumeration. + run: cargo test --workspace --lib --tests + - name: Stress test (FD stability) + run: cargo test -p handoff-tests --test stress + + cross-check: + name: Cross-compile (FreeBSD) + runs-on: ubuntu-latest + needs: check + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + with: + targets: x86_64-unknown-freebsd + - uses: Swatinem/rust-cache@v2 + - name: Type-check BSD target + # GitHub has no free FreeBSD runner; `cargo check` proves the BSD + # path stays buildable without one (type-check only, no linking). + run: cargo check --workspace --all-targets --target x86_64-unknown-freebsd + crash-matrix: name: Crash matrix (fault injection) runs-on: ubuntu-latest diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 37171c0..d9dc1ab 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -264,7 +264,7 @@ If `ChildGuard` were still armed when the `Commit` write failed (e.g. O crashed ### Why the state journal uses rename, not O_DSYNC write -`write tmp + rename` produces an atomic view: the on-disk file is always either the old complete state or the new complete state, never a partial write. `O_DSYNC` only ensures the write itself is durable — it doesn't prevent a torn record if the supervisor crashes mid-write. Rename on Linux ext4/XFS/btrfs is atomic with respect to crash consistency. +`write tmp + rename` produces an atomic view: the on-disk file is always either the old complete state or the new complete state, never a partial write. `O_DSYNC` only ensures the write itself is durable — it doesn't prevent a torn record if the supervisor crashes mid-write. `rename(2)` is atomic with respect to crash consistency on every supported filesystem — Linux ext4/XFS/btrfs, macOS APFS, and BSD UFS/ZFS — so the guarantee is not Linux-specific. ### Liveness: heartbeats during drain/seal + two-tier supervisor timeout diff --git a/README.md b/README.md index 3ff92a9..e5636ec 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,10 @@ Three roles: a **supervisor** that holds listener FDs and drives the swap, an ** See [ARCHITECTURE.md](./ARCHITECTURE.md) for the wire protocol, state machine, and correctness invariants. +## Platforms + +Linux, macOS, and the BSDs. The mechanism is plain POSIX — `fork`/`exec` FD inheritance, `flock`, Unix-domain control sockets, and signals — with no Linux-only syscalls. Windows is unsupported: it has no `fork`/`exec` FD inheritance and no `flock`, so the handoff model doesn't map without a separate backend. + ## Integrate your daemon ### 1. Implement `Drainable` diff --git a/crates/handoff-tests/tests/stress.rs b/crates/handoff-tests/tests/stress.rs index 07b7596..9432be9 100644 --- a/crates/handoff-tests/tests/stress.rs +++ b/crates/handoff-tests/tests/stress.rs @@ -61,13 +61,14 @@ fn many_handoffs_no_resource_leaks() { ); } -/// Count of open file descriptors in the test process. Reads -/// `/proc/self/fd` directly so it captures everything — not just FDs we -/// know about. The directory entry for `/proc/self/fd` itself opens an -/// FD during enumeration; we measure with the same method on both sides -/// so the bias cancels. +/// Count of open file descriptors in the test process. Reads `/dev/fd` +/// directly so it captures everything — not just FDs we know about. +/// `/dev/fd` is the portable spelling of the per-process FD directory: a +/// symlink to `/proc/self/fd` on Linux, a real fdescfs on macOS and +/// FreeBSD. The directory entry itself opens an FD during enumeration; we +/// measure with the same method on both sides so the bias cancels. fn count_open_fds() -> usize { - std::fs::read_dir("/proc/self/fd") - .expect("Linux: /proc/self/fd should exist") + std::fs::read_dir("/dev/fd") + .expect("/dev/fd should exist on any supported Unix") .count() } diff --git a/crates/handoff/src/fd.rs b/crates/handoff/src/fd.rs index 8c6f49d..4c74dd2 100644 --- a/crates/handoff/src/fd.rs +++ b/crates/handoff/src/fd.rs @@ -150,11 +150,15 @@ mod tests { use std::os::fd::IntoRawFd; let mk = || { + // `SockFlag::empty()` (not SOCK_CLOEXEC): these are throwaway + // source FDs for the dup2-shuffle assertion, and the flag isn't + // defined on macOS — keeping it portable lets the test compile + // everywhere. let (a, b) = socketpair( AddressFamily::Unix, SockType::Stream, None, - SockFlag::SOCK_CLOEXEC, + SockFlag::empty(), ) .unwrap(); (a.into_raw_fd(), b.into_raw_fd()) diff --git a/crates/handoff/src/supervisor.rs b/crates/handoff/src/supervisor.rs index b7f8f9a..23343ca 100644 --- a/crates/handoff/src/supervisor.rs +++ b/crates/handoff/src/supervisor.rs @@ -775,12 +775,35 @@ fn send_best_effort_abort( } fn make_socketpair() -> Result<(UnixStream, UnixStream)> { + // Linux and the BSDs create the pair close-on-exec atomically via the + // SOCK_CLOEXEC flag. macOS doesn't define that flag for socketpair (nix + // won't even compile the symbol there), so set FD_CLOEXEC with a + // follow-up fcntl on each end. The non-atomic window on macOS is + // theoretical: this runs on the rare swap path, not under a fork storm. + #[cfg(not(target_os = "macos"))] let (a, b) = socketpair( AddressFamily::Unix, SockType::Stream, None, SockFlag::SOCK_CLOEXEC, )?; + #[cfg(target_os = "macos")] + let (a, b) = { + use std::os::fd::AsFd; + let pair = socketpair( + AddressFamily::Unix, + SockType::Stream, + None, + SockFlag::empty(), + )?; + for fd in [pair.0.as_fd(), pair.1.as_fd()] { + nix::fcntl::fcntl( + fd, + nix::fcntl::FcntlArg::F_SETFD(nix::fcntl::FdFlag::FD_CLOEXEC), + )?; + } + pair + }; // SAFETY: both ends are freshly owned by us, valid, non-blocking unset. let s_a = unsafe { use std::os::fd::FromRawFd;