Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dist/
/init
/cryptos-install
/cryptos-sbkey
/cryptos-switchroot

# Secure Boot signing material (generated by cryptos-sbkey; never commit keys)
sb.key
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ cmd/
cryptosctl/ # operator CLI (the only management surface on a standalone node)
cryptos-install/ # bare-metal disk installer (GPT + ESP + UKI)
cryptos-sbkey/ # Secure Boot signing key + cert generator (for db enrollment)
cryptos-switchroot/ # shim /init: loop-mounts the SquashFS root and pivots into it
internal/
init/ # supervisor + boot bring-up
netlink/ # NIC bring-up via rtnetlink
Expand All @@ -34,6 +35,7 @@ internal/
grpc/ # mTLS gRPC server, RPC handlers
node/ # typed etcd state layer + gRPC Identity/Status/Config providers
install/ # bare-metal disk provisioning (partition plan + UKI install)
switchroot/ # SquashFS-root pivot sequence (loop-mount + switch_root)
audit/ # hash-chained audit log
config/ # machine config parser + validator
bootstrap/ # bootstrap admin cert loading + first-ceremony rotation
Expand Down
17 changes: 10 additions & 7 deletions build/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,18 @@ Driven by the `Taskfile.yml` targets:
- `SB_KEY` / `SB_CERT` — the Secure Boot signing key + cert (ephemeral in
CI smoke tests; hardware-token key for tagged releases).

## Rootfs delivery

`uki/assemble.sh` defaults to `ROOTFS_MODE=squashfs` (the spec target): a tiny
shim initramfs — the `cryptos-switchroot` `/init` plus the SquashFS image —
loop-mounts the read-only SquashFS and `switch_root`s into it, so the real
PID 1 runs from an immutable, RAM-resident root. `ROOTFS_MODE=initramfs` is a
bring-up fallback that runs init directly from a writable cpio tree. The pivot
sequence is unit-tested; the boot itself is validated in QEMU on a real host.

## Open decisions to finalize during Linux validation

1. **Rootfs delivery.** The spec target is a read-only **SquashFS** root
(`squashfs/build.sh` produces it). The draft `uki/assemble.sh` instead
packs the rootfs tree as a **cpio initramfs** and runs init from there
(initramfs-as-root) — the simplest first-bootable path. Wiring the
SquashFS as the real root needs a small switch-root shim initramfs;
layer it on once the initramfs-as-root path boots.
2. **arm64.** Scripts parameterize `arch`, but only amd64 is exercised first.
1. **arm64.** Scripts parameterize `arch`, but only amd64 is exercised first.

## Not covered here (separate issues)

Expand Down
2 changes: 2 additions & 0 deletions build/kernel/cryptos.config
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ CONFIG_CRYPTO_SHA256=y
CONFIG_SQUASHFS=y
CONFIG_SQUASHFS_XZ=y
CONFIG_OVERLAY_FS=y
# Loop device: the switch-root shim loop-mounts the RAM-resident SquashFS.
CONFIG_BLK_DEV_LOOP=y
CONFIG_TMPFS=y
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
Expand Down
46 changes: 35 additions & 11 deletions build/uki/assemble.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
# Assemble an unsigned Unified Kernel Image (kernel + initrd + cmdline)
# with ukify (systemd-stub). Output: build/out/cryptos-<arch>.uki.unsigned.
#
# OPEN DECISION (finalize on Linux): the rootfs delivery. This draft packs
# the rootfs tree as a cpio initramfs and uses it as the initrd
# (initramfs-as-root) — the simplest first-bootable path. The spec target
# is the read-only SquashFS root (built by build/squashfs/build.sh); wiring
# it as root needs a small switch-root shim initramfs, layered on once the
# initramfs-as-root path boots. See build/README.md.
# Rootfs delivery is selected by ROOTFS_MODE:
# squashfs (default) — the spec target: a tiny shim initramfs (the
# cryptos-switchroot /init plus the SquashFS image) that
# loop-mounts the read-only SquashFS and switch_roots into it.
# initramfs — pack the rootfs tree directly as the initrd and run
# the real init from it (initramfs-as-root). A fallback for
# bring-up; the running root is then writable tmpfs, not the
# immutable SquashFS.
# Boot validation of either path is done in QEMU on a real host.
set -euo pipefail

here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
Expand All @@ -35,16 +38,37 @@ esac
SOURCE_DATE_EPOCH="$(git -C "$root" log -1 --format=%ct)"
export SOURCE_DATE_EPOCH

# Build a reproducible cpio initramfs from the rootfs tree.
mode="${ROOTFS_MODE:-squashfs}"
initrd="$root/build/.work/initrd-$arch.cpio.gz"
( cd "$tree" && find . -print0 | sort -z \
| cpio --null --create --format=newc --owner=0:0 2>/dev/null \
| gzip -n ) > "$initrd"

case "$mode" in
squashfs)
sqfs="$out/rootfs-$arch.squashfs"
[ -f "$sqfs" ] || { echo "run rootfs build first (missing $sqfs)" >&2; exit 1; }
# Shim initramfs = the switch-root /init + the SquashFS image. The shim
# loop-mounts the SquashFS read-only and pivots into it.
shim="$root/build/.work/shim-$arch"
rm -rf "$shim"; mkdir -p "$shim"
GOARCH="$arch" CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" \
-o "$shim/init" "$root/cmd/cryptos-switchroot"
cp "$sqfs" "$shim/rootfs.squashfs"
( cd "$shim" && find . -print0 | sort -z \
| cpio --null --create --format=newc --owner=0:0 2>/dev/null \
| gzip -n ) > "$initrd"
;;
initramfs)
# Pack the rootfs tree directly; the real init runs from it.
( cd "$tree" && find . -print0 | sort -z \
| cpio --null --create --format=newc --owner=0:0 2>/dev/null \
| gzip -n ) > "$initrd"
;;
*) echo "unknown ROOTFS_MODE: $mode (want squashfs|initramfs)" >&2; exit 1 ;;
esac

ukify build \
--linux="$out/vmlinuz-$arch" \
--initrd="$initrd" \
--cmdline="$cmdline" \
--os-release="@$root/build/uki/os-release" \
--output="$out/cryptos-$arch.uki.unsigned"
echo "uki: wrote $out/cryptos-$arch.uki.unsigned (profile=$profile)"
echo "uki: wrote $out/cryptos-$arch.uki.unsigned (profile=$profile, rootfs=$mode)"
40 changes: 40 additions & 0 deletions cmd/cryptos-switchroot/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Command cryptos-switchroot is the shim init for the SquashFS-root boot
// path. It is the /init of a tiny initramfs that also carries the read-only
// SquashFS rootfs image; it loop-mounts that image and switch_roots into it
// so the real PID 1 (the Go init baked into the SquashFS) runs from an
// immutable, RAM-resident read-only root. See internal/switchroot.
//
// It is only ever run as PID 1 on Linux. On failure there is nowhere to go,
// so it panics — PID 1 dying triggers a kernel panic and reboot, which is
// the correct fail-closed behavior for a trust anchor that can't boot.
package main

/*
Apache License 2.0

Copyright 2026 Shane

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

import (
"os"

"github.com/CryptOS-PKI/cryptos/internal/switchroot"
)

func main() {
if err := switchroot.Run(switchroot.NewSystem(), os.Environ()); err != nil {
panic("cryptos-switchroot: " + err.Error())
}
}
109 changes: 109 additions & 0 deletions internal/switchroot/switchroot.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// Package switchroot is the shim init for the SquashFS-root boot path. The
// UKI carries a tiny initramfs whose /init is this shim plus the read-only
// SquashFS rootfs image. The shim loop-mounts the SquashFS and switch_roots
// into it, so the real PID 1 (the Go init baked into the SquashFS) runs from
// an immutable, RAM-resident read-only root.
//
// The shim mounts only /dev (needed to set up the loop device); it leaves
// /proc, /sys, /run, and /tmp for the real init's EarlyMounts so the two
// never fight over the same mount.
package switchroot

/*
Apache License 2.0

Copyright 2026 Shane

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

import (
"errors"
"fmt"
"io/fs"
)

const (
// SquashFSPath is where the SquashFS image sits in the shim initramfs.
SquashFSPath = "/rootfs.squashfs"
// NewRoot is the mountpoint the SquashFS is mounted at before pivoting.
NewRoot = "/sysroot"
// InitPath is the real PID 1 inside the SquashFS, exec'd after pivot.
InitPath = "/init"

// msRDONLY / msMOVE are the stable MS_* flag values used here (defined
// locally so the sequence logic stays OS-independent and unit-testable).
msRDONLY uintptr = 1 << 0 // MS_RDONLY
msMOVE uintptr = 1 << 13
)

// System is the set of OS operations the pivot needs, injected so the
// sequence can be unit-tested without touching real mounts or loop devices.
type System interface {
Mkdir(path string, perm uint32) error
Mount(source, target, fstype string, flags uintptr, data string) error
// AttachLoop binds backingFile to a free loop device and returns its
// path (e.g. /dev/loop0).
AttachLoop(backingFile string) (string, error)
Chdir(dir string) error
Chroot(dir string) error
// Exec replaces the current process image (execve); on success it does
// not return.
Exec(argv0 string, argv, envv []string) error
}

// Run performs the SquashFS-root pivot:
//
// 1. mount devtmpfs at /dev so the loop device can be set up;
// 2. loop-mount the SquashFS read-only at /sysroot;
// 3. switch_root into /sysroot and exec the real /init.
//
// On success Exec does not return; any return value is an error.
func Run(sys System, env []string) error {
if err := sys.Mkdir("/dev", 0o755); err != nil && !errors.Is(err, fs.ErrExist) {
return fmt.Errorf("switchroot: mkdir /dev: %w", err)
}
if err := sys.Mount("devtmpfs", "/dev", "devtmpfs", 0, "mode=0755"); err != nil {
return fmt.Errorf("switchroot: mount /dev: %w", err)
}

if err := sys.Mkdir(NewRoot, 0o755); err != nil && !errors.Is(err, fs.ErrExist) {
return fmt.Errorf("switchroot: mkdir %s: %w", NewRoot, err)
}

loop, err := sys.AttachLoop(SquashFSPath)
if err != nil {
return fmt.Errorf("switchroot: attach loop for %s: %w", SquashFSPath, err)
}
if err := sys.Mount(loop, NewRoot, "squashfs", msRDONLY, ""); err != nil {
return fmt.Errorf("switchroot: mount %s on %s: %w", loop, NewRoot, err)
}

// switch_root: make NewRoot the new / and exec the real init there.
if err := sys.Chdir(NewRoot); err != nil {
return fmt.Errorf("switchroot: chdir %s: %w", NewRoot, err)
}
if err := sys.Mount(".", "/", "", msMOVE, ""); err != nil {
return fmt.Errorf("switchroot: move mount to /: %w", err)
}
if err := sys.Chroot("."); err != nil {
return fmt.Errorf("switchroot: chroot: %w", err)
}
if err := sys.Chdir("/"); err != nil {
return fmt.Errorf("switchroot: chdir /: %w", err)
}
if err := sys.Exec(InitPath, []string{InitPath}, env); err != nil {
return fmt.Errorf("switchroot: exec %s: %w", InitPath, err)
}
return errors.New("switchroot: exec returned without error")
}
94 changes: 94 additions & 0 deletions internal/switchroot/switchroot_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
//go:build linux

package switchroot

/*
Apache License 2.0

Copyright 2026 Shane

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

import (
"errors"
"fmt"
"io/fs"

"golang.org/x/sys/unix"
)

// loopMajor is the device-node major number for loop devices.
const loopMajor = 7

// linuxSystem is the real System backed by Linux syscalls.
type linuxSystem struct{}

// NewSystem returns the real Linux System.
func NewSystem() System { return linuxSystem{} }

func (linuxSystem) Mkdir(path string, perm uint32) error {
return unix.Mkdir(path, perm)
}

func (linuxSystem) Mount(source, target, fstype string, flags uintptr, data string) error {
return unix.Mount(source, target, fstype, flags, data)
}

func (linuxSystem) Chdir(dir string) error { return unix.Chdir(dir) }

func (linuxSystem) Chroot(dir string) error { return unix.Chroot(dir) }

func (linuxSystem) Exec(argv0 string, argv, envv []string) error {
return unix.Exec(argv0, argv, envv)
}

// AttachLoop binds backingFile (read-only) to the first free loop device,
// creating the device node if devtmpfs has not yet materialized it.
func (linuxSystem) AttachLoop(backingFile string) (string, error) {
backingFd, err := unix.Open(backingFile, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return "", fmt.Errorf("open backing file: %w", err)
}
defer func() { _ = unix.Close(backingFd) }()

ctrl, err := unix.Open("/dev/loop-control", unix.O_RDWR|unix.O_CLOEXEC, 0)
if err != nil {
return "", fmt.Errorf("open /dev/loop-control: %w", err)
}
defer func() { _ = unix.Close(ctrl) }()

num, err := unix.IoctlRetInt(ctrl, unix.LOOP_CTL_GET_FREE)
if err != nil {
return "", fmt.Errorf("LOOP_CTL_GET_FREE: %w", err)
}

dev := fmt.Sprintf("/dev/loop%d", num)
node := int(unix.Mkdev(loopMajor, uint32(num)))
if err := unix.Mknod(dev, unix.S_IFBLK|0o600, node); err != nil && !errors.Is(err, fs.ErrExist) {
return "", fmt.Errorf("mknod %s: %w", dev, err)
}

loopFd, err := unix.Open(dev, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return "", fmt.Errorf("open %s: %w", dev, err)
}
defer func() { _ = unix.Close(loopFd) }()

// The kernel takes its own reference to backingFd, so it is safe to
// close ours afterward.
if err := unix.IoctlSetInt(loopFd, unix.LOOP_SET_FD, backingFd); err != nil {
return "", fmt.Errorf("LOOP_SET_FD: %w", err)
}
return dev, nil
}
Loading
Loading