VM Boot Smoke Test (Tier 2) #14
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: VM Boot Smoke Test (Tier 2) | |
| # Tier 2: Real VM boot test on self-hosted KVM runner. | |
| # All checks run inside the guest via SSH (not host-side). | |
| # | |
| # REQUIRES: self-hosted runner with KVM, QEMU, cloud-utils, and ssh-keygen. | |
| # Gate: only runs when vars.HAS_KVM_RUNNER == 'true'. | |
| # | |
| # SECURITY: Never triggered by pull_request (fork safety for self-hosted runners). | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| image_digest: | |
| description: "Exact image digest (sha256:...) to test. Required for manual runs." | |
| required: true | |
| type: string | |
| schedule: | |
| - cron: "17 3 * * *" | |
| concurrency: | |
| group: vm-boot-smoke-${{ github.ref }} | |
| cancel-in-progress: true | |
| permissions: | |
| contents: read | |
| packages: read | |
| jobs: | |
| vm-boot-smoke: | |
| name: Boot VM & Verify Services | |
| if: vars.HAS_KVM_RUNNER == 'true' | |
| runs-on: [self-hosted, linux, x64, kvm] | |
| timeout-minutes: 30 | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| - name: Resolve image digest | |
| id: image | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| if [ -n "${{ inputs.image_digest }}" ]; then | |
| DIGEST="${{ inputs.image_digest }}" | |
| else | |
| echo "Fetching IMAGE_DIGEST artifact from latest build workflow run..." | |
| RUN_ID=$(gh api "repos/${{ github.repository }}/actions/workflows/build.yml/runs?branch=${{ github.ref_name }}&status=success&per_page=1" \ | |
| --jq '.workflow_runs[0].id' 2>/dev/null || echo "") | |
| if [ -z "$RUN_ID" ] || [ "$RUN_ID" = "null" ]; then | |
| echo "ERROR: No successful build.yml run found for ref ${{ github.ref_name }}" | |
| exit 1 | |
| fi | |
| echo "Using build run: ${RUN_ID}" | |
| gh run download "$RUN_ID" -n image-digest -D /tmp/image-digest || { | |
| echo "ERROR: Could not download image-digest artifact from build run ${RUN_ID}" | |
| exit 1 | |
| } | |
| if [ ! -f /tmp/image-digest/IMAGE_DIGEST ]; then | |
| echo "ERROR: IMAGE_DIGEST file not found in downloaded artifact" | |
| exit 1 | |
| fi | |
| DIGEST=$(cat /tmp/image-digest/IMAGE_DIGEST | tr -d '[:space:]') | |
| if [ -z "$DIGEST" ] || [ "$DIGEST" = "unknown" ]; then | |
| echo "ERROR: IMAGE_DIGEST artifact contains invalid digest: '${DIGEST}'" | |
| exit 1 | |
| fi | |
| fi | |
| echo "digest=${DIGEST}" >> "$GITHUB_OUTPUT" | |
| echo "Image under test: ${DIGEST}" | |
| - name: Generate SSH key pair | |
| run: | | |
| ssh-keygen -t ed25519 -f /tmp/vm-smoke-key -N "" -q | |
| echo "SSH_KEY=$(cat /tmp/vm-smoke-key.pub)" >> "$GITHUB_ENV" | |
| - name: Prepare cloud-init ISO | |
| run: | | |
| mkdir -p /tmp/vm-smoke | |
| cat > /tmp/vm-smoke/user-data <<EOF | |
| #cloud-config | |
| ssh_authorized_keys: | |
| - ${SSH_KEY} | |
| runcmd: | |
| - systemctl is-system-running --wait || true | |
| EOF | |
| cat > /tmp/vm-smoke/meta-data <<'METAEOF' | |
| instance-id: secai-smoke-test | |
| local-hostname: secai-smoke | |
| METAEOF | |
| cloud-localds /tmp/vm-smoke/cloud-init.iso /tmp/vm-smoke/user-data /tmp/vm-smoke/meta-data | |
| - name: Build QCOW2 from image | |
| run: | | |
| bash scripts/vm/build-qcow2.sh --ci \ | |
| --image-ref "ghcr.io/secai-hub/secai_os@${{ steps.image.outputs.digest }}" \ | |
| /tmp/vm-smoke | |
| - name: Boot VM in QEMU/KVM | |
| run: | | |
| qemu-system-x86_64 -enable-kvm -m 4G -smp 2 \ | |
| -drive file=/tmp/vm-smoke/secai-os.qcow2,if=virtio,format=qcow2 \ | |
| -cdrom /tmp/vm-smoke/cloud-init.iso \ | |
| -netdev user,id=net0,hostfwd=tcp::2222-:22 \ | |
| -device virtio-net-pci,netdev=net0 \ | |
| -nographic & | |
| echo $! > /tmp/vm-smoke/qemu.pid | |
| echo "QEMU started (PID: $(cat /tmp/vm-smoke/qemu.pid))" | |
| - name: Wait for SSH readiness | |
| run: | | |
| echo "Waiting for guest SSH (up to 5 minutes)..." | |
| SSH_OPTS="-o ConnectTimeout=5 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key" | |
| for i in $(seq 1 60); do | |
| if ssh $SSH_OPTS -p 2222 root@localhost echo "SSH ready" 2>/dev/null; then | |
| echo "Guest SSH is up after $((i * 5)) seconds" | |
| break | |
| fi | |
| sleep 5 | |
| done | |
| - name: "Check: systemd system state" | |
| run: | | |
| SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost" | |
| STATE=$($SSH "systemctl is-system-running" 2>/dev/null || true) | |
| echo "System state: $STATE" | |
| if [ "$STATE" != "running" ] && [ "$STATE" != "degraded" ]; then | |
| echo "FAIL: expected running or degraded, got $STATE" | |
| $SSH "systemctl --failed" || true | |
| exit 1 | |
| fi | |
| - name: "Check: auth endpoint responds" | |
| run: | | |
| SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost" | |
| $SSH "curl -sf http://127.0.0.1:8480/api/auth/status" || { | |
| echo "FAIL: auth endpoint did not respond" | |
| exit 1 | |
| } | |
| - name: "Check: health endpoint" | |
| run: | | |
| SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost" | |
| $SSH "curl -sf http://127.0.0.1:8480/health" || { | |
| echo "FAIL: health endpoint did not respond" | |
| exit 1 | |
| } | |
| - name: "Check: disabled services stay inactive (offline_private profile)" | |
| run: | | |
| SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost" | |
| DISABLED=( | |
| secure-ai-diffusion.service | |
| secure-ai-airlock.service | |
| secure-ai-tor.service | |
| secure-ai-searxng.service | |
| secure-ai-search-mediator.service | |
| secure-ai-enable-diffusion.path | |
| ) | |
| for svc in "${DISABLED[@]}"; do | |
| if $SSH "systemctl is-active --quiet $svc" 2>/dev/null; then | |
| echo "FAIL: $svc is active (should be disabled in offline_private)" | |
| exit 1 | |
| fi | |
| echo "OK: $svc is inactive" | |
| done | |
| - name: "Check: default profile is offline_private" | |
| run: | | |
| SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost" | |
| PROFILE=$($SSH "cat /var/lib/secure-ai/state/profile.json 2>/dev/null || echo '{}'") | |
| echo "Profile state: $PROFILE" | |
| # On first boot without wizard, profile.json may not exist yet — fallback is offline_private | |
| - name: "Check: vault API responds" | |
| run: | | |
| SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost" | |
| $SSH "curl -sf http://127.0.0.1:8480/api/vault/status" || { | |
| echo "FAIL: vault status endpoint did not respond" | |
| exit 1 | |
| } | |
| - name: "Check: quarantine directory exists" | |
| run: | | |
| SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost" | |
| $SSH "test -d /var/lib/secure-ai/quarantine" || { | |
| echo "FAIL: quarantine directory does not exist" | |
| exit 1 | |
| } | |
| - name: "Check: rpm-ostree deployment" | |
| run: | | |
| SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost" | |
| $SSH "rpm-ostree status" || { | |
| echo "FAIL: rpm-ostree status failed" | |
| exit 1 | |
| } | |
| - name: Cleanup VM | |
| if: always() | |
| run: | | |
| if [ -f /tmp/vm-smoke/qemu.pid ]; then | |
| kill "$(cat /tmp/vm-smoke/qemu.pid)" 2>/dev/null || true | |
| fi | |
| rm -rf /tmp/vm-smoke /tmp/vm-smoke-key /tmp/vm-smoke-key.pub |