diff --git a/.github/workflows/update-agent.yml b/.github/workflows/update-agent.yml index 614224ef1..d689e200e 100644 --- a/.github/workflows/update-agent.yml +++ b/.github/workflows/update-agent.yml @@ -25,7 +25,6 @@ jobs: sudo apt-get update sudo apt update && sudo apt install -y \ qemu-system-x86 qemu-utils \ - podman \ squashfs-tools \ parted e2fsprogs \ genisoimage \ @@ -56,3 +55,10 @@ jobs: ./qemu-runner.js mock mock_path ./qemu-runner.js run ../../target/debug/orb-update-agent mock_path ./qemu-runner.js check mock_path + + - name: Run bidiff cache corruption regression test + run: | + cd update-agent/t + ./qemu-runner.js mock-bidiff-cache-corruption mock_path_bidiff + ./qemu-runner.js run-bidiff-cache-corruption ../../target/debug/orb-update-agent mock_path_bidiff + ./qemu-runner.js check-bidiff-cache-corruption mock_path_bidiff diff --git a/update-agent/t/podman-runner.nu b/update-agent/t/podman-runner.nu index 3f47ec046..c823f0a75 100755 --- a/update-agent/t/podman-runner.nu +++ b/update-agent/t/podman-runner.nu @@ -6,11 +6,25 @@ # 1. Create a mockup directory: ./podman-runner.nu mock # 2. Run the OTA on the mockup directory: ./podman-runner.nu run # 3. Check that result of OTA is what is expected: ./podman-runner.nu check +# +# Reproducer for stale verified-marker bidiff corruption regression: +# 1. Create mockup: ./podman-runner.nu mock-bidiff-cache-corruption +# 2. Run update-agent: ./podman-runner.nu run-bidiff-cache-corruption +# 3. Validate logs: ./podman-runner.nu check-bidiff-cache-corruption # NOTE: only works if update-agent is built with 'cargo build --features skip-manifest-signature-verification' use std log +def bidiff-corruption-source-hash [] { + "bidiff-corruption-expected-payload" | hash sha256 +} + +def bidiff-corruption-source-name [] { + let hash = (bidiff-corruption-source-hash) + $"system-($hash)" +} + def populate-mock-efivars [d] { 0x[06 00 00 00 00 00 00 00] | save $"($d)/BootChainFwCurrent-781e084c-a330-417c-b678-38e696380cb9" --raw 0x[07 00 00 00 00 00 00 00] | save $"($d)/RootfsStatusSlotB-781e084c-a330-417c-b678-38e696380cb9" --raw @@ -71,6 +85,56 @@ def populate-mnt-diamond [d] { return $d } +def populate-mnt-bidiff-cache-corruption [d] { + let source_hash = (bidiff-corruption-source-hash) + let source_name = (bidiff-corruption-source-name) + let corrupt_patch = 0x[28 b5 2f fd 24 2a 04 80 f2 18 61 62 63 01 00 2c dd 10 ce 0d df 0e] + $corrupt_patch | save $"($d)/bidiff-corrupt.zst" --raw + let source_size = (ls $"($d)/bidiff-corrupt.zst" | get size.0 | into int) + let empty_hash = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + + echo { + "version": "6.3.0-LL-prod", + "manifest": { + "magic": "some magic", + "type": "normal", + "components": [ + { + "name": "system", + "version-assert": "none", + "version": "none", + "size": 0, + "hash": $empty_hash, + "installation_phase": "normal" + } + ] + }, + "manifest-sig": "TBD", + "sources": { + "system": { + "hash": $source_hash, + "mime_type": "application/zstd-bidiff", + "name": "system", + "size": $source_size, + "url": $"/var/mnt/scratch/downloads/($source_name)" + }, + }, + "system_components": { + "system": { + "type": "gpt", + "value": { + "device": "emmc", + "label": "ROOT", + "redundancy": "redundant" + } + }, + } + } | save $"($d)/claim.json" + + mkdir $"($d)/updates" + return $d +} + def populate-mock-sd [sd] { truncate --size 64G $sd @@ -129,6 +193,18 @@ export def "main mock" [mock_path] { let mock_mnt = mock-systemctl $"($mock_path)/systemctl" } +export def "main mock-bidiff-cache-corruption" [mock_path] { + mkdir $mock_path + mkdir $"($mock_path)/efivars" + let mock_efivars = populate-mock-efivars $"($mock_path)/efivars" + mkdir $"($mock_path)/usr_persistent" + let mock_usr_persistent = populate-mock-usr-persistent $"($mock_path)/usr_persistent" + let sd = populate-mock-sd $"($mock_path)/sd" + mkdir $"($mock_path)/mnt" + let mock_mnt = populate-mnt-bidiff-cache-corruption $"($mock_path)/mnt" + let mock_mnt = mock-systemctl $"($mock_path)/systemctl" +} + def "main run" [prog, mock_path] { let absolute_path = ($prog | path expand) let mock_path = ($mock_path | path expand) @@ -151,8 +227,46 @@ def "main run" [prog, mock_path] { --volume="test:/sys/firmware:O,upperdir=/tmp/upper,workdir=/tmp/work" -e RUST_BACKTRACE -it quay.io/fedora/fedora-bootc:latest - /var/mnt/program --nodbus + /var/mnt/program --nodbus + ) +} + +def "main run-bidiff-cache-corruption" [prog, mock_path] { + let absolute_path = ($prog | path expand) + let mock_path = ($mock_path | path expand) + let source_name = (bidiff-corruption-source-name) + mkdir /tmp/work + mkdir /tmp/upper + let cmd = $" +set -euo pipefail +mkdir -p /var/mnt/scratch/downloads +cp /var/mnt/bidiff-corrupt.zst /var/mnt/scratch/downloads/($source_name) +touch /var/mnt/scratch/downloads/($source_name).verified +/var/mnt/program --nodbus --update-location /var/mnt/claim.json --workspace /var/mnt/scratch --downloads /var/mnt/scratch/downloads --skip-version-asserts +" + + let res = (podman run + --rm + -v $"($absolute_path):/var/mnt/program:Z" + -w /var/mnt + --security-opt=unmask=ALL + $"--mount=type=bind,src=($mock_path)/efivars,dst=/sys/firmware/efi/efivars/,rw,relabel=shared,unbindable" + --mount=type=bind,src=./orb_update_agent.conf,dst=/etc/orb_update_agent.conf,relabel=shared,ro + --mount=type=bind,src=./os-release,dst=/etc/os-release,relabel=shared,ro + $"--mount=type=bind,src=($mock_path)/usr_persistent,dst=/usr/persistent/,rw,relabel=shared" + $"--mount=type=bind,src=($mock_path)/mnt,dst=/var/mnt,ro,relabel=shared" + $"--mount=type=bind,src=($mock_path)/systemctl,dst=/usr/bin/systemctl,ro,relabel=shared" + --mount=type=tmpfs,dst=/var/mnt/scratch/,rw + $"--mount=type=bind,src=($mock_path)/sd,dst=/dev/mmcblk0,rw,relabel=shared" + --volume="test:/sys/firmware:O,upperdir=/tmp/upper,workdir=/tmp/work" + -e RUST_BACKTRACE + quay.io/fedora/fedora-bootc:latest + /bin/bash -lc $cmd | complete ) + + let full_log = $"($res.stdout)\n($res.stderr)" + $full_log | save --force $"($mock_path)/bidiff-cache-corruption.log" + ($res.exit_code | into string) | save --force $"($mock_path)/bidiff-cache-corruption.exit-code" } def "main check" [mock_path] { @@ -177,6 +291,32 @@ export def "main clean" [mock_path] { rm -rf $mock_path } +export def "main check-bidiff-cache-corruption" [mock_path] { + let log = open --raw $"($mock_path)/bidiff-cache-corruption.log" + let exit_code = (open --raw $"($mock_path)/bidiff-cache-corruption.exit-code" | str trim | into int) + + if $exit_code == 0 { + log error "expected update-agent to fail, but it exited successfully" + exit 3 + } + if not ($log | str contains "failed verifying source component `system` against claim") { + log error "missing expected source hash verification failure in logs" + exit 3 + } + if not ($log | str contains "mismatch between recorded and actual hashes") { + log error "missing expected hash mismatch details in logs" + exit 3 + } + if ($log | str contains "failed to run patch processor") { + log error "unexpectedly reached patch processing path" + exit 3 + } + if ($log | str contains "Blocksize was bigger than the absolute maximum") { + log error "unexpectedly reached zstd block-size corruption failure path" + exit 3 + } +} + # Integration testing of update agent def main [] { echo "main" diff --git a/update-agent/t/qemu-runner.js b/update-agent/t/qemu-runner.js index 36af9bfe4..614062675 100755 --- a/update-agent/t/qemu-runner.js +++ b/update-agent/t/qemu-runner.js @@ -9,6 +9,9 @@ * ./qemu-runner.js mock - Create mockup directory structure * ./qemu-runner.js run - Run update-agent in QEMU * ./qemu-runner.js check - Verify OTA results + * ./qemu-runner.js mock-bidiff-cache-corruption + * ./qemu-runner.js run-bidiff-cache-corruption + * ./qemu-runner.js check-bidiff-cache-corruption * ./qemu-runner.js clean - Clean up mockup directory */ @@ -20,6 +23,25 @@ import { createHash } from 'crypto'; const FEDORA_CLOUD_QCOW2_URL = 'https://mirror.us.mirhosting.net/fedora/linux/releases/42/Cloud/x86_64/images/Fedora-Cloud-Base-Generic-42-1.1.x86_64.qcow2'; const QEMU_MEMORY = '2G'; const QEMU_DISK_SIZE = '64G'; +const BIDIFF_CACHE_CORRUPTION_REQUIRED_MARKERS = [ + 'failed verifying source component `system` against claim', + 'mismatch between recorded and actual hashes', +]; +const BIDIFF_CACHE_CORRUPTION_FORBIDDEN_MARKERS = [ + 'failed to run patch processor', + 'Blocksize was bigger than the absolute maximum', +]; +const BIDIFF_CACHE_CORRUPTION_LOG = 'bidiff-cache-corruption.log'; + +function bidiffCorruptionSourceHash() { + return createHash('sha256') + .update('bidiff-corruption-expected-payload') + .digest('hex'); +} + +function bidiffCorruptionSourceName() { + return `system-${bidiffCorruptionSourceHash()}`; +} class Logger { static info(msg) { @@ -178,6 +200,62 @@ async function populateMockMnt(dir) { await fs.mkdir(join(mntDir, 'updates'), { recursive: true }); } +async function populateMockMntBidiffCacheCorruption(dir) { + const mntDir = join(dir, 'mnt'); + await fs.mkdir(mntDir, { recursive: true }); + + const corruptPatchPath = join(mntDir, 'bidiff-corrupt.zst'); + const corruptPatch = Buffer.from([ + 0x28, 0xb5, 0x2f, 0xfd, 0x24, 0x2a, 0x04, 0x80, 0xf2, 0x18, 0x61, + 0x62, 0x63, 0x01, 0x00, 0x2c, 0xdd, 0x10, 0xce, 0x0d, 0xdf, 0x0e, + ]); + await fs.writeFile(corruptPatchPath, corruptPatch); + const sourceSize = (await fs.stat(corruptPatchPath)).size; + const sourceName = bidiffCorruptionSourceName(); + const sourceHash = bidiffCorruptionSourceHash(); + const emptySha256 = + 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'; + + const claimData = { + version: '6.3.0-LL-prod', + manifest: { + magic: 'some magic', + type: 'normal', + components: [{ + name: 'system', + 'version-assert': 'none', + version: 'none', + size: 0, + hash: emptySha256, + installation_phase: 'normal', + }], + }, + 'manifest-sig': 'TBD', + sources: { + system: { + hash: sourceHash, + mime_type: 'application/zstd-bidiff', + name: 'system', + size: sourceSize, + url: `/mnt/scratch/downloads/${sourceName}`, + }, + }, + system_components: { + system: { + type: 'gpt', + value: { + device: 'emmc', + label: 'ROOT', + redundancy: 'redundant', + }, + }, + }, + }; + + await fs.writeFile(join(mntDir, 'claim.json'), JSON.stringify(claimData, null, 2)); + await fs.mkdir(join(mntDir, 'updates'), { recursive: true }); +} + async function createMockDisk(dir, persistent) { const diskPath = join(dir, 'disk.img'); @@ -321,7 +399,8 @@ async function downloadFedoraCloudImage(dir) { return cloudImagePath; } -async function createCloudInit(dir, programPath) { +async function createCloudInit(dir, programPath, options = {}) { + const preStartCommands = options.preStartCommands ?? []; const cloudInitDir = join(dir, 'cloud-init'); await fs.mkdir(cloudInitDir, { recursive: true }); @@ -406,6 +485,7 @@ runcmd: - printf '\\x03\\x00\\x00\\x00' > /tmp/efi_retry_b && efivar -n 781e084c-a330-417c-b678-38e696380cb9-RootfsRetryCountB -w -f /tmp/efi_retry_b - systemctl daemon-reload - setenforce 0 +${preStartCommands.map((cmd) => ` - ${cmd}`).join('\n')} - systemctl start worldcoin-update-agent.service - journalctl -fu worldcoin-update-agent.service `; @@ -435,15 +515,67 @@ local-hostname: update-agent-test return cloudInitIso; } -async function waitForServiceCompletion(qemuProcess) { +async function waitForServiceCompletion(qemuProcess, options = {}) { + const successMarker = options.successMarker ?? 'Finished worldcoin-update-agent.service'; + const requiredMarkers = options.requiredMarkers ?? []; + const forbiddenMarkers = options.forbiddenMarkers ?? []; + const timeoutMs = options.timeoutMs ?? (5 * 60 * 1000); + const allowExitWithoutMarkers = options.allowExitWithoutMarkers ?? true; + + const hasRequiredMarkers = (output) => + requiredMarkers.length > 0 + && requiredMarkers.every((marker) => output.includes(marker)); + + const hasSuccessMarker = (output) => + successMarker && output.includes(successMarker); + + const forbiddenMarker = (output) => + forbiddenMarkers.find((marker) => output.includes(marker)); + // Happy path: wait for service completion const happyPath = new Promise(async (resolve, reject) => { let output = ''; + let settled = false; + let timeoutHandle = null; + let stdinHandler; + + const settleOk = () => { + if (settled) { + return; + } + settled = true; + if (timeoutHandle !== null) { + clearTimeout(timeoutHandle); + } + if (stdinHandler) { + process.stdin.off('data', stdinHandler); + } + resolve(output); + }; + + const settleErr = (err) => { + if (settled) { + return; + } + settled = true; + if (timeoutHandle !== null) { + clearTimeout(timeoutHandle); + } + if (stdinHandler) { + process.stdin.off('data', stdinHandler); + } + reject(err); + }; + + timeoutHandle = setTimeout(() => { + settleErr(new Error(`Timed out after ${timeoutMs}ms waiting for update-agent output`)); + }, timeoutMs); // Forward stdin to QEMU process - process.stdin.on('data', (data) => { + stdinHandler = (data) => { qemuProcess.stdin.write(data); - }); + }; + process.stdin.on('data', stdinHandler); // Read from stdout using ReadableStream const stdoutReader = qemuProcess.stdout.getReader(); @@ -460,15 +592,29 @@ async function waitForServiceCompletion(qemuProcess) { output += dataStr; process.stdout.write(dataStr); + const forbidden = forbiddenMarker(output); + if (forbidden) { + settleErr( + new Error(`Observed forbidden marker in logs: ${forbidden}`), + ); + return; + } + + if (hasRequiredMarkers(output)) { + Logger.info('Observed expected regression markers'); + settleOk(); + return; + } + // Check if completion marker exists - if (output.includes('Finished worldcoin-update-agent.service')) { + if (hasSuccessMarker(output)) { Logger.info('Service completed successfully'); - resolve('service-completed'); + settleOk(); return; } } } catch (error) { - reject(error); + settleErr(error); } }; @@ -480,7 +626,21 @@ async function waitForServiceCompletion(qemuProcess) { if (done) break; const dataStr = new TextDecoder().decode(value); + output += dataStr; process.stderr.write(dataStr); + + const forbidden = forbiddenMarker(output); + if (forbidden) { + settleErr( + new Error(`Observed forbidden marker in logs: ${forbidden}`), + ); + return; + } + + if (hasRequiredMarkers(output) || hasSuccessMarker(output)) { + settleOk(); + return; + } } } catch (error) { // Stderr errors are non-fatal @@ -489,14 +649,30 @@ async function waitForServiceCompletion(qemuProcess) { }; // Start both stream processors - Promise.all([processStdout(), processStderr()]).catch(reject); + Promise.all([processStdout(), processStderr()]).catch(settleErr); + + qemuProcess.exited + .then((exitCode) => { + if (hasRequiredMarkers(output) || hasSuccessMarker(output)) { + settleOk(); + return; + } + if (allowExitWithoutMarkers && exitCode === 0) { + Logger.info('QEMU exited cleanly before explicit success markers; treating as success'); + settleOk(); + return; + } + settleErr( + new Error(`QEMU exited before expected markers were observed (exit code ${exitCode})`), + ); + }) + .catch(settleErr); }); - // Wait for either the service to complete or the process to exit - await Promise.any([happyPath, qemuProcess.exited]); + return await happyPath; } -async function runQemu(programPath, mockPath) { +async function runQemu(programPath, mockPath, options = {}) { const absoluteProgramPath = resolve(programPath); const absoluteMockPath = resolve(mockPath); @@ -505,7 +681,9 @@ async function runQemu(programPath, mockPath) { const mntImg = join(absoluteMockPath, 'mnt.img'); // Recreate cloud-init ISO with the actual program path - const cloudInitIso = await createCloudInit(absoluteMockPath, absoluteProgramPath); + const cloudInitIso = await createCloudInit(absoluteMockPath, absoluteProgramPath, { + preStartCommands: options.preStartCommands ?? [], + }); // Create a directory with the program and claim for mounting const programDir = join(absoluteMockPath, 'program'); @@ -561,8 +739,14 @@ async function runQemu(programPath, mockPath) { } try { - await waitForServiceCompletion(qemuProcess); + const output = await waitForServiceCompletion(qemuProcess, { + successMarker: options.successMarker, + requiredMarkers: options.requiredMarkers, + forbiddenMarkers: options.forbiddenMarkers, + timeoutMs: options.timeoutMs, + }); Logger.info('Service execution completed'); + return output; } finally { if (process.stdin.isTTY) { process.stdin.setRawMode(false); @@ -668,11 +852,47 @@ async function handleMock(mockPath) { Logger.info('Mock environment created successfully'); } +async function handleMockBidiffCacheCorruption(mockPath) { + Logger.info(`Creating bidiff cache corruption mock environment at ${mockPath}`); + + await fs.mkdir(mockPath, { recursive: true }); + await downloadFedoraCloudImage(mockPath); + await copyOvmfFiles(mockPath); + const persistent = await createMockUsrPersistent(mockPath); + await populateMockMntBidiffCacheCorruption(mockPath); + await createMockDisk(mockPath, persistent); + + await createCloudInit(mockPath, null, { + preStartCommands: [], + }); + await createMockFilesystems(mockPath); + + Logger.info('Bidiff cache corruption mock environment created successfully'); +} + async function handleRun(programPath, mockPath) { Logger.info(`Running update-agent test: ${programPath} in ${mockPath}`); await runQemu(programPath, mockPath); } +async function handleRunBidiffCacheCorruption(programPath, mockPath) { + Logger.info(`Running bidiff cache corruption regression test: ${programPath} in ${mockPath}`); + const sourceName = bidiffCorruptionSourceName(); + const output = await runQemu(programPath, mockPath, { + preStartCommands: [ + 'mkdir -p /mnt/scratch/downloads', + `cp /mnt/bidiff-corrupt.zst /mnt/scratch/downloads/${sourceName}`, + `touch /mnt/scratch/downloads/${sourceName}.verified`, + ], + successMarker: null, + requiredMarkers: BIDIFF_CACHE_CORRUPTION_REQUIRED_MARKERS, + forbiddenMarkers: BIDIFF_CACHE_CORRUPTION_FORBIDDEN_MARKERS, + timeoutMs: 8 * 60 * 1000, + allowExitWithoutMarkers: false, + }); + await fs.writeFile(join(mockPath, BIDIFF_CACHE_CORRUPTION_LOG), output); +} + async function handleCheck(mockPath) { Logger.info(`Checking results in ${mockPath}`); const success = await compareResults(mockPath); @@ -682,6 +902,24 @@ async function handleCheck(mockPath) { Logger.info('Check completed successfully'); } +async function handleCheckBidiffCacheCorruption(mockPath) { + Logger.info(`Checking bidiff cache corruption regression results in ${mockPath}`); + const logPath = join(mockPath, BIDIFF_CACHE_CORRUPTION_LOG); + const log = await fs.readFile(logPath, 'utf8'); + + for (const marker of BIDIFF_CACHE_CORRUPTION_REQUIRED_MARKERS) { + if (!log.includes(marker)) { + throw new Error(`Missing expected marker in regression log: ${marker}`); + } + } + for (const marker of BIDIFF_CACHE_CORRUPTION_FORBIDDEN_MARKERS) { + if (log.includes(marker)) { + throw new Error(`Observed forbidden marker in regression log: ${marker}`); + } + } + Logger.info('Bidiff cache corruption regression check completed successfully'); +} + async function handleClean(mockPath) { Logger.info(`Cleaning up ${mockPath}`); await fs.rm(mockPath, { recursive: true, force: true }); @@ -698,6 +936,9 @@ async function main() { console.log(' ./qemu-runner.js mock - Create mockup directory'); console.log(' ./qemu-runner.js run - Run update-agent in QEMU'); console.log(' ./qemu-runner.js check - Check OTA results'); + console.log(' ./qemu-runner.js mock-bidiff-cache-corruption '); + console.log(' ./qemu-runner.js run-bidiff-cache-corruption '); + console.log(' ./qemu-runner.js check-bidiff-cache-corruption '); console.log(' ./qemu-runner.js clean - Clean up mockup directory'); return; } @@ -727,6 +968,27 @@ async function main() { await handleCheck(args[1]); break; + case 'mock-bidiff-cache-corruption': + if (args.length !== 2) { + throw new Error('Usage: ./qemu-runner.js mock-bidiff-cache-corruption '); + } + await handleMockBidiffCacheCorruption(args[1]); + break; + + case 'run-bidiff-cache-corruption': + if (args.length !== 3) { + throw new Error('Usage: ./qemu-runner.js run-bidiff-cache-corruption '); + } + await handleRunBidiffCacheCorruption(args[1], args[2]); + break; + + case 'check-bidiff-cache-corruption': + if (args.length !== 2) { + throw new Error('Usage: ./qemu-runner.js check-bidiff-cache-corruption '); + } + await handleCheckBidiffCacheCorruption(args[1]); + break; + case 'clean': if (args.length !== 2) { throw new Error('Usage: ./qemu-runner.js clean ');