Skip to content

VM Boot Smoke Test (Tier 2) #14

VM Boot Smoke Test (Tier 2)

VM Boot Smoke Test (Tier 2) #14

Workflow file for this run

name: VM Boot Smoke Test (Tier 2)
# Tier 2: Real VM boot test on self-hosted KVM runner.
# All checks run inside the guest via SSH (not host-side).
#
# REQUIRES: self-hosted runner with KVM, QEMU, cloud-utils, and ssh-keygen.
# Gate: only runs when vars.HAS_KVM_RUNNER == 'true'.
#
# SECURITY: Never triggered by pull_request (fork safety for self-hosted runners).
on:
workflow_dispatch:
inputs:
image_digest:
description: "Exact image digest (sha256:...) to test. Required for manual runs."
required: true
type: string
schedule:
- cron: "17 3 * * *"
concurrency:
group: vm-boot-smoke-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
packages: read
jobs:
vm-boot-smoke:
name: Boot VM & Verify Services
if: vars.HAS_KVM_RUNNER == 'true'
runs-on: [self-hosted, linux, x64, kvm]
timeout-minutes: 30
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Resolve image digest
id: image
env:
GH_TOKEN: ${{ github.token }}
run: |
if [ -n "${{ inputs.image_digest }}" ]; then
DIGEST="${{ inputs.image_digest }}"
else
echo "Fetching IMAGE_DIGEST artifact from latest build workflow run..."
RUN_ID=$(gh api "repos/${{ github.repository }}/actions/workflows/build.yml/runs?branch=${{ github.ref_name }}&status=success&per_page=1" \
--jq '.workflow_runs[0].id' 2>/dev/null || echo "")
if [ -z "$RUN_ID" ] || [ "$RUN_ID" = "null" ]; then
echo "ERROR: No successful build.yml run found for ref ${{ github.ref_name }}"
exit 1
fi
echo "Using build run: ${RUN_ID}"
gh run download "$RUN_ID" -n image-digest -D /tmp/image-digest || {
echo "ERROR: Could not download image-digest artifact from build run ${RUN_ID}"
exit 1
}
if [ ! -f /tmp/image-digest/IMAGE_DIGEST ]; then
echo "ERROR: IMAGE_DIGEST file not found in downloaded artifact"
exit 1
fi
DIGEST=$(cat /tmp/image-digest/IMAGE_DIGEST | tr -d '[:space:]')
if [ -z "$DIGEST" ] || [ "$DIGEST" = "unknown" ]; then
echo "ERROR: IMAGE_DIGEST artifact contains invalid digest: '${DIGEST}'"
exit 1
fi
fi
echo "digest=${DIGEST}" >> "$GITHUB_OUTPUT"
echo "Image under test: ${DIGEST}"
- name: Generate SSH key pair
run: |
ssh-keygen -t ed25519 -f /tmp/vm-smoke-key -N "" -q
echo "SSH_KEY=$(cat /tmp/vm-smoke-key.pub)" >> "$GITHUB_ENV"
- name: Prepare cloud-init ISO
run: |
mkdir -p /tmp/vm-smoke
cat > /tmp/vm-smoke/user-data <<EOF
#cloud-config
ssh_authorized_keys:
- ${SSH_KEY}
runcmd:
- systemctl is-system-running --wait || true
EOF
cat > /tmp/vm-smoke/meta-data <<'METAEOF'
instance-id: secai-smoke-test
local-hostname: secai-smoke
METAEOF
cloud-localds /tmp/vm-smoke/cloud-init.iso /tmp/vm-smoke/user-data /tmp/vm-smoke/meta-data
- name: Build QCOW2 from image
run: |
bash scripts/vm/build-qcow2.sh --ci \
--image-ref "ghcr.io/secai-hub/secai_os@${{ steps.image.outputs.digest }}" \
/tmp/vm-smoke
- name: Boot VM in QEMU/KVM
run: |
qemu-system-x86_64 -enable-kvm -m 4G -smp 2 \
-drive file=/tmp/vm-smoke/secai-os.qcow2,if=virtio,format=qcow2 \
-cdrom /tmp/vm-smoke/cloud-init.iso \
-netdev user,id=net0,hostfwd=tcp::2222-:22 \
-device virtio-net-pci,netdev=net0 \
-nographic &
echo $! > /tmp/vm-smoke/qemu.pid
echo "QEMU started (PID: $(cat /tmp/vm-smoke/qemu.pid))"
- name: Wait for SSH readiness
run: |
echo "Waiting for guest SSH (up to 5 minutes)..."
SSH_OPTS="-o ConnectTimeout=5 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key"
for i in $(seq 1 60); do
if ssh $SSH_OPTS -p 2222 root@localhost echo "SSH ready" 2>/dev/null; then
echo "Guest SSH is up after $((i * 5)) seconds"
break
fi
sleep 5
done
- name: "Check: systemd system state"
run: |
SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost"
STATE=$($SSH "systemctl is-system-running" 2>/dev/null || true)
echo "System state: $STATE"
if [ "$STATE" != "running" ] && [ "$STATE" != "degraded" ]; then
echo "FAIL: expected running or degraded, got $STATE"
$SSH "systemctl --failed" || true
exit 1
fi
- name: "Check: auth endpoint responds"
run: |
SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost"
$SSH "curl -sf http://127.0.0.1:8480/api/auth/status" || {
echo "FAIL: auth endpoint did not respond"
exit 1
}
- name: "Check: health endpoint"
run: |
SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost"
$SSH "curl -sf http://127.0.0.1:8480/health" || {
echo "FAIL: health endpoint did not respond"
exit 1
}
- name: "Check: disabled services stay inactive (offline_private profile)"
run: |
SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost"
DISABLED=(
secure-ai-diffusion.service
secure-ai-airlock.service
secure-ai-tor.service
secure-ai-searxng.service
secure-ai-search-mediator.service
secure-ai-enable-diffusion.path
)
for svc in "${DISABLED[@]}"; do
if $SSH "systemctl is-active --quiet $svc" 2>/dev/null; then
echo "FAIL: $svc is active (should be disabled in offline_private)"
exit 1
fi
echo "OK: $svc is inactive"
done
- name: "Check: default profile is offline_private"
run: |
SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost"
PROFILE=$($SSH "cat /var/lib/secure-ai/state/profile.json 2>/dev/null || echo '{}'")
echo "Profile state: $PROFILE"
# On first boot without wizard, profile.json may not exist yet — fallback is offline_private
- name: "Check: vault API responds"
run: |
SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost"
$SSH "curl -sf http://127.0.0.1:8480/api/vault/status" || {
echo "FAIL: vault status endpoint did not respond"
exit 1
}
- name: "Check: quarantine directory exists"
run: |
SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost"
$SSH "test -d /var/lib/secure-ai/quarantine" || {
echo "FAIL: quarantine directory does not exist"
exit 1
}
- name: "Check: rpm-ostree deployment"
run: |
SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost"
$SSH "rpm-ostree status" || {
echo "FAIL: rpm-ostree status failed"
exit 1
}
- name: Cleanup VM
if: always()
run: |
if [ -f /tmp/vm-smoke/qemu.pid ]; then
kill "$(cat /tmp/vm-smoke/qemu.pid)" 2>/dev/null || true
fi
rm -rf /tmp/vm-smoke /tmp/vm-smoke-key /tmp/vm-smoke-key.pub