From a91180dd91fdc088fd03a29407b67b148897c1f9 Mon Sep 17 00:00:00 2001 From: Bugs5382 Date: Thu, 4 Jun 2026 07:38:34 -0400 Subject: [PATCH] build: boot the SquashFS root via a switch-root shim initramfs --- .gitignore | 1 + README.md | 2 + build/README.md | 17 ++-- build/kernel/cryptos.config | 2 + build/uki/assemble.sh | 46 ++++++--- cmd/cryptos-switchroot/main.go | 40 ++++++++ internal/switchroot/switchroot.go | 109 ++++++++++++++++++++ internal/switchroot/switchroot_linux.go | 94 ++++++++++++++++++ internal/switchroot/switchroot_other.go | 39 ++++++++ internal/switchroot/switchroot_test.go | 126 ++++++++++++++++++++++++ 10 files changed, 458 insertions(+), 18 deletions(-) create mode 100644 cmd/cryptos-switchroot/main.go create mode 100644 internal/switchroot/switchroot.go create mode 100644 internal/switchroot/switchroot_linux.go create mode 100644 internal/switchroot/switchroot_other.go create mode 100644 internal/switchroot/switchroot_test.go diff --git a/.gitignore b/.gitignore index 10ae821..0518ef6 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,7 @@ dist/ /init /cryptos-install /cryptos-sbkey +/cryptos-switchroot # Secure Boot signing material (generated by cryptos-sbkey; never commit keys) sb.key diff --git a/README.md b/README.md index 5e536ca..85507bb 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ cmd/ cryptosctl/ # operator CLI (the only management surface on a standalone node) cryptos-install/ # bare-metal disk installer (GPT + ESP + UKI) cryptos-sbkey/ # Secure Boot signing key + cert generator (for db enrollment) + cryptos-switchroot/ # shim /init: loop-mounts the SquashFS root and pivots into it internal/ init/ # supervisor + boot bring-up netlink/ # NIC bring-up via rtnetlink @@ -34,6 +35,7 @@ internal/ grpc/ # mTLS gRPC server, RPC handlers node/ # typed etcd state layer + gRPC Identity/Status/Config providers install/ # bare-metal disk provisioning (partition plan + UKI install) + switchroot/ # SquashFS-root pivot sequence (loop-mount + switch_root) audit/ # hash-chained audit log config/ # machine config parser + validator bootstrap/ # bootstrap admin cert loading + first-ceremony rotation diff --git a/build/README.md b/build/README.md index e04a787..9ca969b 100644 --- a/build/README.md +++ b/build/README.md @@ -38,15 +38,18 @@ Driven by the `Taskfile.yml` targets: - `SB_KEY` / `SB_CERT` — the Secure Boot signing key + cert (ephemeral in CI smoke tests; hardware-token key for tagged releases). +## Rootfs delivery + +`uki/assemble.sh` defaults to `ROOTFS_MODE=squashfs` (the spec target): a tiny +shim initramfs — the `cryptos-switchroot` `/init` plus the SquashFS image — +loop-mounts the read-only SquashFS and `switch_root`s into it, so the real +PID 1 runs from an immutable, RAM-resident root. `ROOTFS_MODE=initramfs` is a +bring-up fallback that runs init directly from a writable cpio tree. The pivot +sequence is unit-tested; the boot itself is validated in QEMU on a real host. + ## Open decisions to finalize during Linux validation -1. **Rootfs delivery.** The spec target is a read-only **SquashFS** root - (`squashfs/build.sh` produces it). The draft `uki/assemble.sh` instead - packs the rootfs tree as a **cpio initramfs** and runs init from there - (initramfs-as-root) — the simplest first-bootable path. Wiring the - SquashFS as the real root needs a small switch-root shim initramfs; - layer it on once the initramfs-as-root path boots. -2. **arm64.** Scripts parameterize `arch`, but only amd64 is exercised first. +1. **arm64.** Scripts parameterize `arch`, but only amd64 is exercised first. ## Not covered here (separate issues) diff --git a/build/kernel/cryptos.config b/build/kernel/cryptos.config index 54c559e..e32e1fd 100644 --- a/build/kernel/cryptos.config +++ b/build/kernel/cryptos.config @@ -43,6 +43,8 @@ CONFIG_CRYPTO_SHA256=y CONFIG_SQUASHFS=y CONFIG_SQUASHFS_XZ=y CONFIG_OVERLAY_FS=y +# Loop device: the switch-root shim loop-mounts the RAM-resident SquashFS. +CONFIG_BLK_DEV_LOOP=y CONFIG_TMPFS=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y diff --git a/build/uki/assemble.sh b/build/uki/assemble.sh index d743f8a..7ba63f0 100755 --- a/build/uki/assemble.sh +++ b/build/uki/assemble.sh @@ -4,12 +4,15 @@ # Assemble an unsigned Unified Kernel Image (kernel + initrd + cmdline) # with ukify (systemd-stub). Output: build/out/cryptos-.uki.unsigned. # -# OPEN DECISION (finalize on Linux): the rootfs delivery. This draft packs -# the rootfs tree as a cpio initramfs and uses it as the initrd -# (initramfs-as-root) — the simplest first-bootable path. The spec target -# is the read-only SquashFS root (built by build/squashfs/build.sh); wiring -# it as root needs a small switch-root shim initramfs, layered on once the -# initramfs-as-root path boots. See build/README.md. +# Rootfs delivery is selected by ROOTFS_MODE: +# squashfs (default) — the spec target: a tiny shim initramfs (the +# cryptos-switchroot /init plus the SquashFS image) that +# loop-mounts the read-only SquashFS and switch_roots into it. +# initramfs — pack the rootfs tree directly as the initrd and run +# the real init from it (initramfs-as-root). A fallback for +# bring-up; the running root is then writable tmpfs, not the +# immutable SquashFS. +# Boot validation of either path is done in QEMU on a real host. set -euo pipefail here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -35,11 +38,32 @@ esac SOURCE_DATE_EPOCH="$(git -C "$root" log -1 --format=%ct)" export SOURCE_DATE_EPOCH -# Build a reproducible cpio initramfs from the rootfs tree. +mode="${ROOTFS_MODE:-squashfs}" initrd="$root/build/.work/initrd-$arch.cpio.gz" -( cd "$tree" && find . -print0 | sort -z \ - | cpio --null --create --format=newc --owner=0:0 2>/dev/null \ - | gzip -n ) > "$initrd" + +case "$mode" in + squashfs) + sqfs="$out/rootfs-$arch.squashfs" + [ -f "$sqfs" ] || { echo "run rootfs build first (missing $sqfs)" >&2; exit 1; } + # Shim initramfs = the switch-root /init + the SquashFS image. The shim + # loop-mounts the SquashFS read-only and pivots into it. + shim="$root/build/.work/shim-$arch" + rm -rf "$shim"; mkdir -p "$shim" + GOARCH="$arch" CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" \ + -o "$shim/init" "$root/cmd/cryptos-switchroot" + cp "$sqfs" "$shim/rootfs.squashfs" + ( cd "$shim" && find . -print0 | sort -z \ + | cpio --null --create --format=newc --owner=0:0 2>/dev/null \ + | gzip -n ) > "$initrd" + ;; + initramfs) + # Pack the rootfs tree directly; the real init runs from it. + ( cd "$tree" && find . -print0 | sort -z \ + | cpio --null --create --format=newc --owner=0:0 2>/dev/null \ + | gzip -n ) > "$initrd" + ;; + *) echo "unknown ROOTFS_MODE: $mode (want squashfs|initramfs)" >&2; exit 1 ;; +esac ukify build \ --linux="$out/vmlinuz-$arch" \ @@ -47,4 +71,4 @@ ukify build \ --cmdline="$cmdline" \ --os-release="@$root/build/uki/os-release" \ --output="$out/cryptos-$arch.uki.unsigned" -echo "uki: wrote $out/cryptos-$arch.uki.unsigned (profile=$profile)" +echo "uki: wrote $out/cryptos-$arch.uki.unsigned (profile=$profile, rootfs=$mode)" diff --git a/cmd/cryptos-switchroot/main.go b/cmd/cryptos-switchroot/main.go new file mode 100644 index 0000000..4e459d0 --- /dev/null +++ b/cmd/cryptos-switchroot/main.go @@ -0,0 +1,40 @@ +// Command cryptos-switchroot is the shim init for the SquashFS-root boot +// path. It is the /init of a tiny initramfs that also carries the read-only +// SquashFS rootfs image; it loop-mounts that image and switch_roots into it +// so the real PID 1 (the Go init baked into the SquashFS) runs from an +// immutable, RAM-resident read-only root. See internal/switchroot. +// +// It is only ever run as PID 1 on Linux. On failure there is nowhere to go, +// so it panics — PID 1 dying triggers a kernel panic and reboot, which is +// the correct fail-closed behavior for a trust anchor that can't boot. +package main + +/* +Apache License 2.0 + +Copyright 2026 Shane + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import ( + "os" + + "github.com/CryptOS-PKI/cryptos/internal/switchroot" +) + +func main() { + if err := switchroot.Run(switchroot.NewSystem(), os.Environ()); err != nil { + panic("cryptos-switchroot: " + err.Error()) + } +} diff --git a/internal/switchroot/switchroot.go b/internal/switchroot/switchroot.go new file mode 100644 index 0000000..1775505 --- /dev/null +++ b/internal/switchroot/switchroot.go @@ -0,0 +1,109 @@ +// Package switchroot is the shim init for the SquashFS-root boot path. The +// UKI carries a tiny initramfs whose /init is this shim plus the read-only +// SquashFS rootfs image. The shim loop-mounts the SquashFS and switch_roots +// into it, so the real PID 1 (the Go init baked into the SquashFS) runs from +// an immutable, RAM-resident read-only root. +// +// The shim mounts only /dev (needed to set up the loop device); it leaves +// /proc, /sys, /run, and /tmp for the real init's EarlyMounts so the two +// never fight over the same mount. +package switchroot + +/* +Apache License 2.0 + +Copyright 2026 Shane + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import ( + "errors" + "fmt" + "io/fs" +) + +const ( + // SquashFSPath is where the SquashFS image sits in the shim initramfs. + SquashFSPath = "/rootfs.squashfs" + // NewRoot is the mountpoint the SquashFS is mounted at before pivoting. + NewRoot = "/sysroot" + // InitPath is the real PID 1 inside the SquashFS, exec'd after pivot. + InitPath = "/init" + + // msRDONLY / msMOVE are the stable MS_* flag values used here (defined + // locally so the sequence logic stays OS-independent and unit-testable). + msRDONLY uintptr = 1 << 0 // MS_RDONLY + msMOVE uintptr = 1 << 13 +) + +// System is the set of OS operations the pivot needs, injected so the +// sequence can be unit-tested without touching real mounts or loop devices. +type System interface { + Mkdir(path string, perm uint32) error + Mount(source, target, fstype string, flags uintptr, data string) error + // AttachLoop binds backingFile to a free loop device and returns its + // path (e.g. /dev/loop0). + AttachLoop(backingFile string) (string, error) + Chdir(dir string) error + Chroot(dir string) error + // Exec replaces the current process image (execve); on success it does + // not return. + Exec(argv0 string, argv, envv []string) error +} + +// Run performs the SquashFS-root pivot: +// +// 1. mount devtmpfs at /dev so the loop device can be set up; +// 2. loop-mount the SquashFS read-only at /sysroot; +// 3. switch_root into /sysroot and exec the real /init. +// +// On success Exec does not return; any return value is an error. +func Run(sys System, env []string) error { + if err := sys.Mkdir("/dev", 0o755); err != nil && !errors.Is(err, fs.ErrExist) { + return fmt.Errorf("switchroot: mkdir /dev: %w", err) + } + if err := sys.Mount("devtmpfs", "/dev", "devtmpfs", 0, "mode=0755"); err != nil { + return fmt.Errorf("switchroot: mount /dev: %w", err) + } + + if err := sys.Mkdir(NewRoot, 0o755); err != nil && !errors.Is(err, fs.ErrExist) { + return fmt.Errorf("switchroot: mkdir %s: %w", NewRoot, err) + } + + loop, err := sys.AttachLoop(SquashFSPath) + if err != nil { + return fmt.Errorf("switchroot: attach loop for %s: %w", SquashFSPath, err) + } + if err := sys.Mount(loop, NewRoot, "squashfs", msRDONLY, ""); err != nil { + return fmt.Errorf("switchroot: mount %s on %s: %w", loop, NewRoot, err) + } + + // switch_root: make NewRoot the new / and exec the real init there. + if err := sys.Chdir(NewRoot); err != nil { + return fmt.Errorf("switchroot: chdir %s: %w", NewRoot, err) + } + if err := sys.Mount(".", "/", "", msMOVE, ""); err != nil { + return fmt.Errorf("switchroot: move mount to /: %w", err) + } + if err := sys.Chroot("."); err != nil { + return fmt.Errorf("switchroot: chroot: %w", err) + } + if err := sys.Chdir("/"); err != nil { + return fmt.Errorf("switchroot: chdir /: %w", err) + } + if err := sys.Exec(InitPath, []string{InitPath}, env); err != nil { + return fmt.Errorf("switchroot: exec %s: %w", InitPath, err) + } + return errors.New("switchroot: exec returned without error") +} diff --git a/internal/switchroot/switchroot_linux.go b/internal/switchroot/switchroot_linux.go new file mode 100644 index 0000000..15700c7 --- /dev/null +++ b/internal/switchroot/switchroot_linux.go @@ -0,0 +1,94 @@ +//go:build linux + +package switchroot + +/* +Apache License 2.0 + +Copyright 2026 Shane + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import ( + "errors" + "fmt" + "io/fs" + + "golang.org/x/sys/unix" +) + +// loopMajor is the device-node major number for loop devices. +const loopMajor = 7 + +// linuxSystem is the real System backed by Linux syscalls. +type linuxSystem struct{} + +// NewSystem returns the real Linux System. +func NewSystem() System { return linuxSystem{} } + +func (linuxSystem) Mkdir(path string, perm uint32) error { + return unix.Mkdir(path, perm) +} + +func (linuxSystem) Mount(source, target, fstype string, flags uintptr, data string) error { + return unix.Mount(source, target, fstype, flags, data) +} + +func (linuxSystem) Chdir(dir string) error { return unix.Chdir(dir) } + +func (linuxSystem) Chroot(dir string) error { return unix.Chroot(dir) } + +func (linuxSystem) Exec(argv0 string, argv, envv []string) error { + return unix.Exec(argv0, argv, envv) +} + +// AttachLoop binds backingFile (read-only) to the first free loop device, +// creating the device node if devtmpfs has not yet materialized it. +func (linuxSystem) AttachLoop(backingFile string) (string, error) { + backingFd, err := unix.Open(backingFile, unix.O_RDONLY|unix.O_CLOEXEC, 0) + if err != nil { + return "", fmt.Errorf("open backing file: %w", err) + } + defer func() { _ = unix.Close(backingFd) }() + + ctrl, err := unix.Open("/dev/loop-control", unix.O_RDWR|unix.O_CLOEXEC, 0) + if err != nil { + return "", fmt.Errorf("open /dev/loop-control: %w", err) + } + defer func() { _ = unix.Close(ctrl) }() + + num, err := unix.IoctlRetInt(ctrl, unix.LOOP_CTL_GET_FREE) + if err != nil { + return "", fmt.Errorf("LOOP_CTL_GET_FREE: %w", err) + } + + dev := fmt.Sprintf("/dev/loop%d", num) + node := int(unix.Mkdev(loopMajor, uint32(num))) + if err := unix.Mknod(dev, unix.S_IFBLK|0o600, node); err != nil && !errors.Is(err, fs.ErrExist) { + return "", fmt.Errorf("mknod %s: %w", dev, err) + } + + loopFd, err := unix.Open(dev, unix.O_RDONLY|unix.O_CLOEXEC, 0) + if err != nil { + return "", fmt.Errorf("open %s: %w", dev, err) + } + defer func() { _ = unix.Close(loopFd) }() + + // The kernel takes its own reference to backingFd, so it is safe to + // close ours afterward. + if err := unix.IoctlSetInt(loopFd, unix.LOOP_SET_FD, backingFd); err != nil { + return "", fmt.Errorf("LOOP_SET_FD: %w", err) + } + return dev, nil +} diff --git a/internal/switchroot/switchroot_other.go b/internal/switchroot/switchroot_other.go new file mode 100644 index 0000000..a8720c8 --- /dev/null +++ b/internal/switchroot/switchroot_other.go @@ -0,0 +1,39 @@ +//go:build !linux + +package switchroot + +/* +Apache License 2.0 + +Copyright 2026 Shane + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import "errors" + +// errUnsupported is returned by the non-Linux stub; the shim only ever runs +// as PID 1 on Linux. The stub exists so the package builds on the dev host. +var errUnsupported = errors.New("switchroot: only supported on linux") + +type stubSystem struct{} + +// NewSystem returns a stub that errors on every call off Linux. +func NewSystem() System { return stubSystem{} } + +func (stubSystem) Mkdir(string, uint32) error { return errUnsupported } +func (stubSystem) Mount(string, string, string, uintptr, string) error { return errUnsupported } +func (stubSystem) AttachLoop(string) (string, error) { return "", errUnsupported } +func (stubSystem) Chdir(string) error { return errUnsupported } +func (stubSystem) Chroot(string) error { return errUnsupported } +func (stubSystem) Exec(string, []string, []string) error { return errUnsupported } diff --git a/internal/switchroot/switchroot_test.go b/internal/switchroot/switchroot_test.go new file mode 100644 index 0000000..c44264b --- /dev/null +++ b/internal/switchroot/switchroot_test.go @@ -0,0 +1,126 @@ +package switchroot + +/* +Apache License 2.0 + +Copyright 2026 Shane + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import ( + "errors" + "io/fs" + "strings" + "testing" +) + +type mockSystem struct { + calls []string + loopDev string + // failts simulate a failure at a given call name. + failOn string + mkdirErr error +} + +func (m *mockSystem) record(s string) { m.calls = append(m.calls, s) } + +func (m *mockSystem) Mkdir(path string, _ uint32) error { + m.record("mkdir " + path) + if m.mkdirErr != nil { + return m.mkdirErr + } + return nil +} + +func (m *mockSystem) Mount(source, target, fstype string, flags uintptr, _ string) error { + m.record("mount " + source + " " + target + " " + fstype) + if m.failOn == "mount:"+target { + return errors.New("boom") + } + return nil +} + +func (m *mockSystem) AttachLoop(backingFile string) (string, error) { + m.record("attachloop " + backingFile) + if m.failOn == "attachloop" { + return "", errors.New("boom") + } + return m.loopDev, nil +} + +func (m *mockSystem) Chdir(dir string) error { m.record("chdir " + dir); return nil } +func (m *mockSystem) Chroot(dir string) error { m.record("chroot " + dir); return nil } + +func (m *mockSystem) Exec(argv0 string, _, _ []string) error { + m.record("exec " + argv0) + if m.failOn == "exec" { + return errors.New("boom") + } + // A real Exec never returns; the mock returns nil and Run reports that + // as an error, which lets us assert exec was reached. + return nil +} + +func TestRun_Sequence(t *testing.T) { + m := &mockSystem{loopDev: "/dev/loop0"} + err := Run(m, nil) + // Exec "succeeds" in the mock, which Run treats as an error. + if err == nil || !strings.Contains(err.Error(), "exec returned") { + t.Fatalf("expected exec-returned error, got %v", err) + } + + want := []string{ + "mkdir /dev", + "mount devtmpfs /dev devtmpfs", + "mkdir /sysroot", + "attachloop /rootfs.squashfs", + "mount /dev/loop0 /sysroot squashfs", + "chdir /sysroot", + "mount . / ", + "chroot .", + "chdir /", + "exec /init", + } + got := strings.Join(m.calls, "|") + if got != strings.Join(want, "|") { + t.Errorf("call sequence:\n got %v\nwant %v", m.calls, want) + } +} + +func TestRun_LoopFailureStopsBeforePivot(t *testing.T) { + m := &mockSystem{loopDev: "/dev/loop0", failOn: "attachloop"} + if err := Run(m, nil); err == nil { + t.Fatal("expected error when AttachLoop fails") + } + for _, c := range m.calls { + if strings.HasPrefix(c, "chroot") || strings.HasPrefix(c, "exec") { + t.Errorf("pivot step %q ran despite loop failure", c) + } + } +} + +func TestRun_SquashFSMountFailure(t *testing.T) { + m := &mockSystem{loopDev: "/dev/loop0", failOn: "mount:/sysroot"} + if err := Run(m, nil); err == nil { + t.Fatal("expected error when the SquashFS mount fails") + } +} + +func TestRun_ExistingDirsAreOK(t *testing.T) { + // Mkdir returning fs.ErrExist must not abort the pivot. + m := &mockSystem{loopDev: "/dev/loop0", mkdirErr: fs.ErrExist} + if err := Run(m, nil); err == nil || !strings.Contains(err.Error(), "exec returned") { + t.Fatalf("ErrExist on mkdir should be tolerated, got %v", err) + } +}