@@ -3,13 +3,10 @@ name: VM Boot Smoke Test (Tier 2)
33# Tier 2: Real VM boot test on self-hosted KVM runner.
44# All checks run inside the guest via SSH (not host-side).
55#
6- # STATUS: DRAFT — verification commands are scaffolding only (echo placeholders).
7- # This workflow will pass without testing anything. Automatic triggers are
8- # disabled until the self-hosted KVM runner is provisioned and the SSH
9- # verification commands are implemented.
6+ # REQUIRES: self-hosted runner with KVM, QEMU, cloud-utils, and ssh-keygen.
7+ # Gate: only runs when vars.HAS_KVM_RUNNER == 'true'.
108#
119# SECURITY: Never triggered by pull_request (fork safety for self-hosted runners).
12- # To test manually: Actions → VM Boot Smoke Test → Run workflow
1310
1411on :
1512 workflow_dispatch :
1815 description : " Exact image digest (sha256:...) to test. Required for manual runs."
1916 required : true
2017 type : string
21- # Nightly and push triggers disabled until verification commands are real.
22- # Uncomment when ready:
23- # schedule:
24- # - cron: "17 3 * * *"
25- # push:
26- # branches: [main, stable, 'release/**']
27- paths-ignore :
28- - " **.md"
29- - " docs/**"
18+ schedule :
19+ - cron : " 17 3 * * *"
3020
3121concurrency :
3222 group : vm-boot-smoke-${{ github.ref }}
@@ -39,6 +29,7 @@ permissions:
3929jobs :
4030 vm-boot-smoke :
4131 name : Boot VM & Verify Services
32+ if : vars.HAS_KVM_RUNNER == 'true'
4233 runs-on : [self-hosted, linux, x64, kvm]
4334 timeout-minutes : 30
4435
@@ -51,36 +42,24 @@ jobs:
5142 GH_TOKEN : ${{ github.token }}
5243 run : |
5344 if [ -n "${{ inputs.image_digest }}" ]; then
54- # Manual dispatch: use the exact digest provided by the operator.
5545 DIGEST="${{ inputs.image_digest }}"
5646 else
57- # Schedule/push: download the exact IMAGE_DIGEST artifact from the
58- # most recent successful build.yml run on this ref.
59- # NO skopeo, NO tag heuristics, NO registry-state dependencies.
60- # The test always runs against exactly the image produced by the
61- # corresponding build pipeline run.
6247 echo "Fetching IMAGE_DIGEST artifact from latest build workflow run..."
6348 RUN_ID=$(gh api "repos/${{ github.repository }}/actions/workflows/build.yml/runs?branch=${{ github.ref_name }}&status=success&per_page=1" \
6449 --jq '.workflow_runs[0].id' 2>/dev/null || echo "")
6550 if [ -z "$RUN_ID" ] || [ "$RUN_ID" = "null" ]; then
6651 echo "ERROR: No successful build.yml run found for ref ${{ github.ref_name }}"
67- echo "Run the build workflow first, or provide an exact digest via workflow_dispatch."
6852 exit 1
6953 fi
7054 echo "Using build run: ${RUN_ID}"
71-
72- # Download the image-digest artifact published by the bluebuild job
7355 gh run download "$RUN_ID" -n image-digest -D /tmp/image-digest || {
7456 echo "ERROR: Could not download image-digest artifact from build run ${RUN_ID}"
75- echo "The build workflow must publish an IMAGE_DIGEST artifact."
7657 exit 1
7758 }
78-
7959 if [ ! -f /tmp/image-digest/IMAGE_DIGEST ]; then
8060 echo "ERROR: IMAGE_DIGEST file not found in downloaded artifact"
8161 exit 1
8262 fi
83-
8463 DIGEST=$(cat /tmp/image-digest/IMAGE_DIGEST | tr -d '[:space:]')
8564 if [ -z "$DIGEST" ] || [ "$DIGEST" = "unknown" ]; then
8665 echo "ERROR: IMAGE_DIGEST artifact contains invalid digest: '${DIGEST}'"
@@ -90,102 +69,137 @@ jobs:
9069 echo "digest=${DIGEST}" >> "$GITHUB_OUTPUT"
9170 echo "Image under test: ${DIGEST}"
9271
93- - name : Prepare cloud-init
72+ - name : Generate SSH key pair
73+ run : |
74+ ssh-keygen -t ed25519 -f /tmp/vm-smoke-key -N "" -q
75+ echo "SSH_KEY=$(cat /tmp/vm-smoke-key.pub)" >> "$GITHUB_ENV"
76+
77+ - name : Prepare cloud-init ISO
9478 run : |
9579 mkdir -p /tmp/vm-smoke
96- cat > /tmp/vm-smoke/user-data <<' EOF'
80+ cat > /tmp/vm-smoke/user-data <<EOF
9781 #cloud-config
9882 ssh_authorized_keys:
99- - ssh-ed25519 SMOKE_TEST_KEY_PLACEHOLDER
83+ - ${SSH_KEY}
10084 runcmd:
10185 - systemctl is-system-running --wait || true
10286 EOF
103- cat > /tmp/vm-smoke/meta-data <<'EOF '
87+ cat > /tmp/vm-smoke/meta-data <<'METAEOF '
10488 instance-id: secai-smoke-test
10589 local-hostname: secai-smoke
106- EOF
90+ METAEOF
91+ cloud-localds /tmp/vm-smoke/cloud-init.iso /tmp/vm-smoke/user-data /tmp/vm-smoke/meta-data
10792
108- # The actual QEMU boot + SSH verification steps are environment-specific.
109- # This template shows the verification checks that run inside the guest.
93+ - name : Build QCOW2 from image
94+ run : |
95+ bash scripts/vm/build-qcow2.sh --ci \
96+ --image-ref "ghcr.io/secai-hub/secai_os@${{ steps.image.outputs.digest }}" \
97+ /tmp/vm-smoke
11098
11199 - name : Boot VM in QEMU/KVM
112100 run : |
113- echo "=== Boot VM with image digest: ${{ steps.image.outputs.digest }} ==="
114- echo "NOTE: Actual QEMU invocation is environment-specific."
115- echo "The self-hosted runner must have:"
116- echo " - QEMU/KVM installed with nested virt or bare-metal KVM"
117- echo " - The SecAI OS image pulled and converted to qcow2"
118- echo " - SSH key pair for guest access"
119- echo ""
120- echo "Template QEMU command:"
121- echo " qemu-system-x86_64 -enable-kvm -m 4G -smp 2 \\"
122- echo " -drive file=secai-os.qcow2,if=virtio \\"
123- echo " -cdrom cloud-init.iso \\"
124- echo " -netdev user,id=net0,hostfwd=tcp::2222-:22 \\"
125- echo " -device virtio-net-pci,netdev=net0 \\"
126- echo " -nographic &"
101+ qemu-system-x86_64 -enable-kvm -m 4G -smp 2 \
102+ -drive file=/tmp/vm-smoke/secai-os.qcow2,if=virtio,format=qcow2 \
103+ -cdrom /tmp/vm-smoke/cloud-init.iso \
104+ -netdev user,id=net0,hostfwd=tcp::2222-:22 \
105+ -device virtio-net-pci,netdev=net0 \
106+ -nographic &
107+ echo $! > /tmp/vm-smoke/qemu.pid
108+ echo "QEMU started (PID: $(cat /tmp/vm-smoke/qemu.pid))"
127109
128110 - name : Wait for SSH readiness
129111 run : |
130112 echo "Waiting for guest SSH (up to 5 minutes)..."
131- # for i in $(seq 1 60); do
132- # ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no \
133- # -p 2222 root@localhost echo "SSH ready" && break
134- # sleep 5
135- # done
113+ SSH_OPTS="-o ConnectTimeout=5 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key"
114+ for i in $(seq 1 60); do
115+ if ssh $SSH_OPTS -p 2222 root@localhost echo "SSH ready" 2>/dev/null; then
116+ echo "Guest SSH is up after $((i * 5)) seconds"
117+ break
118+ fi
119+ sleep 5
120+ done
136121
137- - name : " Check: first-boot flow completed "
122+ - name : " Check: systemd system state "
138123 run : |
139- echo "ssh guest: systemctl is-system-running"
140- # ssh -p 2222 root@localhost "systemctl is-system-running"
124+ SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost"
125+ STATE=$($SSH "systemctl is-system-running" 2>/dev/null || true)
126+ echo "System state: $STATE"
127+ if [ "$STATE" != "running" ] && [ "$STATE" != "degraded" ]; then
128+ echo "FAIL: expected running or degraded, got $STATE"
129+ $SSH "systemctl --failed" || true
130+ exit 1
131+ fi
141132
142- - name : " Check: auth/login endpoint responds"
133+ - name : " Check: auth endpoint responds"
143134 run : |
144- echo "ssh guest: curl -sf http://127.0.0.1:8480/api/auth/status"
145- # ssh -p 2222 root@localhost "curl -sf http://127.0.0.1:8480/api/auth/status"
135+ SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost"
136+ $SSH "curl -sf http://127.0.0.1:8480/api/auth/status" || {
137+ echo "FAIL: auth endpoint did not respond"
138+ exit 1
139+ }
146140
147141 - name : " Check: health endpoint"
148142 run : |
149- echo "ssh guest: curl -sf http://127.0.0.1:8480/health"
150- # ssh -p 2222 root@localhost "curl -sf http://127.0.0.1:8480/health"
143+ SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost"
144+ $SSH "curl -sf http://127.0.0.1:8480/health" || {
145+ echo "FAIL: health endpoint did not respond"
146+ exit 1
147+ }
151148
152- - name : " Check: disabled services stay inactive"
149+ - name : " Check: disabled services stay inactive (offline_private profile) "
153150 run : |
154- echo "ssh guest: verify disabled services"
155- # DISABLED=(
156- # secure-ai-diffusion.service
157- # secure-ai-airlock.service
158- # secure-ai-tor.service
159- # secure-ai-searxng.service
160- # secure-ai-search-mediator.service
161- # )
162- # for svc in "${DISABLED[@]}"; do
163- # ssh -p 2222 root@localhost "
164- # if systemctl is-active --quiet $svc 2>/dev/null; then
165- # echo 'FAIL: $svc is active (should be disabled)'
166- # exit 1
167- # fi
168- # echo 'OK: $svc is inactive'
169- # "
170- # done
171-
172- - name : " Check: vault lock/unlock"
151+ SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost"
152+ DISABLED=(
153+ secure-ai-diffusion.service
154+ secure-ai-airlock.service
155+ secure-ai-tor.service
156+ secure-ai-searxng.service
157+ secure-ai-search-mediator.service
158+ secure-ai-enable-diffusion.path
159+ )
160+ for svc in "${DISABLED[@]}"; do
161+ if $SSH "systemctl is-active --quiet $svc" 2>/dev/null; then
162+ echo "FAIL: $svc is active (should be disabled in offline_private)"
163+ exit 1
164+ fi
165+ echo "OK: $svc is inactive"
166+ done
167+
168+ - name : " Check: default profile is offline_private"
173169 run : |
174- echo "ssh guest: curl -sf http://127.0.0.1:8480/api/vault/status"
175- # ssh -p 2222 root@localhost "curl -sf http://127.0.0.1:8480/api/vault/status"
170+ SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost"
171+ PROFILE=$($SSH "cat /var/lib/secure-ai/state/profile.json 2>/dev/null || echo '{}'")
172+ echo "Profile state: $PROFILE"
173+ # On first boot without wizard, profile.json may not exist yet — fallback is offline_private
176174
177- - name : " Check: model import/quarantine path "
175+ - name : " Check: vault API responds "
178176 run : |
179- echo "ssh guest: test quarantine directory exists"
180- # ssh -p 2222 root@localhost "test -d /var/lib/secure-ai/quarantine"
177+ SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost"
178+ $SSH "curl -sf http://127.0.0.1:8480/api/vault/status" || {
179+ echo "FAIL: vault status endpoint did not respond"
180+ exit 1
181+ }
181182
182- - name : " Check: update/rollback mechanism "
183+ - name : " Check: quarantine directory exists "
183184 run : |
184- echo "ssh guest: rpm-ostree status"
185- # ssh -p 2222 root@localhost "rpm-ostree status"
185+ SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost"
186+ $SSH "test -d /var/lib/secure-ai/quarantine" || {
187+ echo "FAIL: quarantine directory does not exist"
188+ exit 1
189+ }
190+
191+ - name : " Check: rpm-ostree deployment"
192+ run : |
193+ SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /tmp/vm-smoke-key -p 2222 root@localhost"
194+ $SSH "rpm-ostree status" || {
195+ echo "FAIL: rpm-ostree status failed"
196+ exit 1
197+ }
186198
187199 - name : Cleanup VM
188200 if : always()
189201 run : |
190- echo "Shutting down VM..."
191- # kill %1 2>/dev/null || true
202+ if [ -f /tmp/vm-smoke/qemu.pid ]; then
203+ kill "$(cat /tmp/vm-smoke/qemu.pid)" 2>/dev/null || true
204+ fi
205+ rm -rf /tmp/vm-smoke /tmp/vm-smoke-key /tmp/vm-smoke-key.pub
0 commit comments