From 939e2179336bfb90bc7236f5bd04c71b24f38798 Mon Sep 17 00:00:00 2001
From: Alex Newman <posix4e@gmail.com>
Date: Sat, 18 Apr 2026 17:46:44 +0000
Subject: [PATCH] ci: fold every deploy path into Release, drop scripts/
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Release now owns the full lifecycle: build → deploy-preview (PR) OR
deploy-production (main / dispatch) → relaunch-agent (blocking) →
verify agent re-registered with CP. A release is "done" only when the
local dd-local-{kind} VM is back online talking to the freshly-deployed
CP — that's the signal that tells us a PR is safe to merge or a merge
actually shipped.

Deleted:
  .github/workflows/production-deploy.yml — folded into release.yml
    as a deploy-production job with same deploy-cp.yml body.
  .github/workflows/local-agents.yml — manual-dispatch path gone;
    push a commit to trigger a relaunch via the cascade.

Deleted scripts/:
  scripts/gcp-deploy.sh     — inlined into deploy-cp.yml.
  scripts/dd-relaunch.sh    → apps/_infra/dd-relaunch.sh (host-side).
  scripts/local-agents.sh   → apps/_infra/local-agents.sh (host-side).
  scripts/workloads.sh      — dead after inline; only gcp-deploy
                              sourced it and local-agents.sh built
                              workloads via inline jq anyway.
  scripts/redeploy-workload.sh — unused helper, removed.

deploy-cp.yml's Relaunch step drops `continue-on-error: true`; the
relaunch-agent composite gains a "Verify agent registered with CP"
step that polls /api/agents for a freshly-registered dd-local-{kind}
entry with a 5-min budget.

Concurrency on release.yml becomes expression-driven: PR pushes cancel
in-progress runs; main / tag / dispatch queue so an in-flight prod
deploy finishes cleanly.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/actions/relaunch-agent/action.yml |  42 +++++-
 .github/workflows/deploy-cp.yml           |  97 +++++++++++---
 .github/workflows/local-agents.yml        |  47 -------
 .github/workflows/production-deploy.yml   |  53 --------
 .github/workflows/release.yml             |  71 ++++++++---
 README.md                                 |  12 +-
 apps/_infra/dd-relaunch.sh                |  52 ++++++++
 {scripts => apps/_infra}/local-agents.sh  |   6 +-
 scripts/dd-relaunch.sh                    |  57 ---------
 scripts/gcp-deploy.sh                     | 148 ----------------------
 scripts/redeploy-workload.sh              |  50 --------
 scripts/workloads.sh                      |  54 --------
 12 files changed, 227 insertions(+), 462 deletions(-)
 delete mode 100644 .github/workflows/local-agents.yml
 delete mode 100644 .github/workflows/production-deploy.yml
 create mode 100755 apps/_infra/dd-relaunch.sh
 rename {scripts => apps/_infra}/local-agents.sh (97%)
 delete mode 100755 scripts/dd-relaunch.sh
 delete mode 100755 scripts/gcp-deploy.sh
 delete mode 100755 scripts/redeploy-workload.sh
 delete mode 100755 scripts/workloads.sh

diff --git a/.github/actions/relaunch-agent/action.yml b/.github/actions/relaunch-agent/action.yml
index a58a048..b449289 100644
--- a/.github/actions/relaunch-agent/action.yml
+++ b/.github/actions/relaunch-agent/action.yml
@@ -1,9 +1,9 @@
 name: Relaunch local TDX agent
 description: >-
-  SSH into the tdx2 host and recreate the matching dd-local-{kind} libvirt
-  domain against the given CP url, pulling scripts from the given git ref.
-  Shared between Local Agents (push/PR/dispatch) and Deploy CP (cascading
-  relaunch after a successful CP deploy).
+  SSH into the tdx2 host, recreate the matching dd-local-{kind} libvirt
+  domain against the given CP url (pulling apps/ from the given git ref),
+  then block until the agent re-registers with the CP. A release is "done"
+  only when this action succeeds end-to-end.
 
 inputs:
   kind:
@@ -68,4 +68,36 @@ runs:
         ssh-keyscan -H "$HOST" >> ~/.ssh/known_hosts 2>/dev/null
         ssh -o BatchMode=yes -o StrictHostKeyChecking=yes \
             -i ~/.ssh/id_ed25519 "tdx2@$HOST" \
-            "DD_PAT='$DD_PAT' DD_ITA_API_KEY='$DD_ITA_API_KEY' /home/tdx2/src/dd/scripts/dd-relaunch.sh '$KIND' '$URL' '$REF'"
+            "DD_PAT='$DD_PAT' DD_ITA_API_KEY='$DD_ITA_API_KEY' /home/tdx2/src/dd/apps/_infra/dd-relaunch.sh '$KIND' '$URL' '$REF'"
+
+    # Block until the freshly-booted agent VM registers with the CP.
+    # This is the "I can see the local agent deployment worked" signal
+    # that gates the whole release. 5-min budget covers a cold VM boot
+    # (~60s) + cloudflared tunnel (~30s) + agent startup + register —
+    # plenty of headroom. Doesn't probe openclaw/ollama readiness —
+    # that first-boot pays a 30-min npm-install tax and isn't part
+    # of the release gate.
+    - name: Verify agent registered with CP
+      shell: bash
+      env:
+        URL:    ${{ inputs.url }}
+        DD_PAT: ${{ inputs.dd-pat }}
+        KIND:   ${{ inputs.kind }}
+      run: |
+        vm="dd-local-$KIND"
+        started_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)
+        AUTH=(-H "Authorization: Bearer $DD_PAT")
+        for i in $(seq 1 30); do
+          host=$(curl -fsS --max-time 10 "${AUTH[@]}" "$URL/api/agents" 2>/dev/null \
+            | jq -r --arg since "$started_at" --arg vm "$vm" '
+                [.[] | select(.vm_name==$vm and .status=="healthy" and .last_seen > $since)]
+                | sort_by(.last_seen) | reverse | .[0].hostname // empty' 2>/dev/null || true)
+          if [ -n "$host" ] && [ "$host" != "null" ]; then
+            echo "$vm registered at https://$host"
+            exit 0
+          fi
+          echo "  waiting for $vm to register with $URL... (${i}/30)"
+          sleep 10
+        done
+        echo "::error::$vm never registered with $URL within 5 min"
+        exit 1
diff --git a/.github/workflows/deploy-cp.yml b/.github/workflows/deploy-cp.yml
index a6ab9e7..d21265c 100644
--- a/.github/workflows/deploy-cp.yml
+++ b/.github/workflows/deploy-cp.yml
@@ -1,18 +1,17 @@
 name: Deploy CP
 
 # Reusable workflow: provision the CP TDX VM on GCP, wait for it to be
-# healthy, verify attestation + dashboard + STONITH, and cascade a
-# relaunch of the matching dd-local agent VM. Called from release.yml
-# (preview path) and production-deploy.yml (prod path) with different
-# inputs — both paths share this exact set of verification steps, so
-# preview CI exercises the same code that prod runs.
+# healthy, verify attestation + dashboard + STONITH, then cascade a
+# relaunch of the matching dd-local agent VM and block until it
+# re-registers. Called from release.yml's deploy-preview (PR path) and
+# deploy-production (main / dispatch path) with env-specific inputs —
+# both paths share this exact set of verification steps so every PR
+# exercises the prod deploy code.
 #
 # GitHub Actions allows ≤4 levels of workflow_call nesting. Today's
-# chain is `release.yml → deploy-cp.yml` (2) and
-# `production-deploy.yml → deploy-cp.yml` (2) — deep enough headroom
-# that we can still call one more reusable workflow below us if needed.
-# The agent-relaunch cascade uses a composite action (same-job, no
-# nesting) to keep that headroom.
+# chain is `release.yml → deploy-cp.yml` (2). The agent-relaunch
+# cascade uses a composite action (same-job, no nesting) to keep
+# headroom for future wrapping.
 
 on:
   workflow_call:
@@ -95,15 +94,75 @@ jobs:
           CLOUDFLARE_ACCOUNT_ID: ${{ secrets.DD_CP_CF_ACCOUNT_ID }}
           CLOUDFLARE_ZONE_ID: ${{ secrets.DD_CP_CF_ZONE_ID }}
           # OAuth only in environments that have these set (production).
-          # When empty, gcp-deploy.sh omits the workload env vars →
-          # dd-web disables /auth/github/* and serves /auth/pat only.
+          # Empty placeholder values get stripped below before baking the
+          # workload spec, so dd-web disables /auth/github/* and serves
+          # /auth/pat only in those envs.
           DD_GITHUB_CLIENT_ID: ${{ inputs.oauth_enabled && (vars.DD_GITHUB_CLIENT_ID || secrets.DD_GITHUB_CLIENT_ID) || '' }}
           DD_GITHUB_CALLBACK_URL: ${{ inputs.oauth_enabled && vars.DD_GITHUB_CALLBACK_URL || '' }}
           DD_GITHUB_CLIENT_SECRET: ${{ inputs.oauth_enabled && secrets.DD_GITHUB_CLIENT_SECRET || '' }}
-          # ITA — optional. When set, the CP mints + verifies quotes.
           DD_ITA_API_KEY: ${{ secrets.DD_ITA_API_KEY }}
           DD_RELEASE_TAG: ${{ inputs.release_tag }}
-        run: scripts/gcp-deploy.sh
+          EE_IMAGE_FAMILY: easyenclave-staging
+          EE_IMAGE_PROJECT: easyenclave
+          VM_MACHINE_TYPE: c3-standard-4
+          VM_DISK_SIZE: 10GB
+          DD_ITA_BASE_URL: https://api.trustauthority.intel.com
+          DD_ITA_JWKS_URL: https://portal.trustauthority.intel.com/certs
+          DD_ITA_ISSUER: https://portal.trustauthority.intel.com
+        run: |
+          set -euo pipefail
+
+          VM_NAME="dd-${DD_ENV}-$(date +%s)"
+          : "${DD_ITA_API_KEY:?set DD_ITA_API_KEY via secrets.DD_ITA_API_KEY}"
+          export DD_GITHUB_CALLBACK_URL="${DD_GITHUB_CALLBACK_URL:-https://${DD_HOSTNAME}/auth/github/callback}"
+
+          # Bake a workload template: envsubst ${VAR} placeholders and
+          # strip any "KEY=" env entries that ended up with empty values
+          # (e.g. OAuth creds in non-prod envs).
+          bake() {
+            case "$1" in
+              *.json.tmpl)
+                envsubst < "$1" \
+                  | jq -c 'if .env then .env |= map(select(test("^[^=]+=.+"))) else . end'
+                ;;
+              *.json)
+                jq -c . "$1"
+                ;;
+              *)
+                echo "::error::unknown workload file type: $1" >&2
+                return 1
+                ;;
+            esac
+          }
+
+          # Boot workloads come from apps/<name>/workload.{json,json.tmpl}.
+          # cloudflared fetches the binary onto PATH; dd-management runs
+          # devopsdefender in DD_MODE=management (CP + dashboard).
+          EE_BOOT_WORKLOADS=$({
+            bake apps/cloudflared/workload.json
+            bake apps/dd-management/workload.json.tmpl
+          } | jq -cs '.')
+
+          jq -c -n \
+            --arg workloads "$EE_BOOT_WORKLOADS" \
+            '{ "EE_BOOT_WORKLOADS": $workloads, "EE_OWNER": "devopsdefender" }' \
+            > /tmp/ee-config.json
+
+          gcloud compute instances create "$VM_NAME" \
+            --project="$GCP_PROJECT_ID" \
+            --zone="$GCP_ZONE" \
+            --machine-type="$VM_MACHINE_TYPE" \
+            --confidential-compute-type=TDX \
+            --maintenance-policy=TERMINATE \
+            --boot-disk-size="$VM_DISK_SIZE" \
+            --image-family="$EE_IMAGE_FAMILY" \
+            --image-project="$EE_IMAGE_PROJECT" \
+            --metadata-from-file=ee-config=/tmp/ee-config.json \
+            --labels=devopsdefender=managed,dd_env="${DD_ENV}" \
+            --tags=dd-management
+
+          rm -f /tmp/ee-config.json
+          echo "VM: $VM_NAME ($DD_HOSTNAME, release $DD_RELEASE_TAG)"
 
       - name: Wait for agent health (streams serial console)
         env:
@@ -287,15 +346,11 @@ jobs:
             }
 
       # Cascade a relaunch of the matching dd-local-{env} libvirt domain
-      # on the tdx2 host. Preview runs dd-local-preview against the PR's
-      # CP; prod runs dd-local-prod against app.devopsdefender.com.
-      # Non-blocking (`continue-on-error`) because the openclaw boot
-      # chain inside dd-local-preview can take 30 min on first boot —
-      # we want PR status reflecting the CP deploy, with the agent
-      # relaunch as a signal-only exercise until vdc is warm.
+      # on the tdx2 host, then block on it registering with the freshly-
+      # deployed CP. This is the gate: a release is "done" only when the
+      # local agent is back online talking to the new CP.
       - name: Relaunch dd-local-${{ inputs.env == 'production' && 'prod' || 'preview' }}
         if: inputs.relaunch_agent
-        continue-on-error: true
         uses: ./.github/actions/relaunch-agent
         with:
           kind: ${{ inputs.env == 'production' && 'prod' || 'preview' }}
diff --git a/.github/workflows/local-agents.yml b/.github/workflows/local-agents.yml
deleted file mode 100644
index 9a32b46..0000000
--- a/.github/workflows/local-agents.yml
+++ /dev/null
@@ -1,47 +0,0 @@
-name: Local Agents
-
-# Manual entry point for relaunching one of the local dd-local-{kind}
-# libvirt domains on the tdx2 host. The everyday path (prod redeploy,
-# preview PR push) now goes through deploy-cp.yml, which calls the
-# relaunch-agent composite action directly after a successful CP
-# deploy — so this workflow only exists for operator-driven one-shots:
-# iterating on scripts/dd-relaunch.sh, re-running a relaunch without
-# re-deploying the CP, etc.
-
-on:
-  workflow_dispatch:
-    inputs:
-      kind:
-        description: 'prod | preview'
-        required: true
-        default: 'prod'
-      cp_url:
-        description: 'CP URL (e.g. https://app.devopsdefender.com)'
-        required: true
-        default: 'https://app.devopsdefender.com'
-      ref:
-        description: 'git ref whose scripts/apps tree to check out on the host'
-        required: true
-        default: 'main'
-
-permissions:
-  contents: read
-
-concurrency:
-  group: local-agents-${{ github.event.inputs.kind }}
-  cancel-in-progress: false
-
-jobs:
-  relaunch:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: ./.github/actions/relaunch-agent
-        with:
-          kind: ${{ github.event.inputs.kind }}
-          url: ${{ github.event.inputs.cp_url }}
-          ref: ${{ github.event.inputs.ref }}
-          ssh-key: ${{ secrets.DD_LOCAL_SSH_KEY }}
-          host: ${{ secrets.DD_LOCAL_HOST }}
-          dd-pat: ${{ secrets.GITHUB_TOKEN }}
-          ita-api-key: ${{ secrets.DD_ITA_API_KEY }}
diff --git a/.github/workflows/production-deploy.yml b/.github/workflows/production-deploy.yml
deleted file mode 100644
index f804e02..0000000
--- a/.github/workflows/production-deploy.yml
+++ /dev/null
@@ -1,53 +0,0 @@
-name: Production Deploy
-
-# Two triggers:
-#   - workflow_run: fires automatically after a successful Release run
-#     on main. Release publishes the `latest` tag, then this workflow
-#     deploys it to production. Sequential by design — if Release fails,
-#     we don't promote.
-#   - workflow_dispatch: manual re-deploy of any existing tag (e.g. a
-#     known-good v0.2.0 after a bad main push).
-#
-# Body lives in deploy-cp.yml — same workflow PR previews use, so every
-# PR exercises the prod deploy path before it lands here.
-
-on:
-  workflow_run:
-    workflows: ["Release"]
-    types: [completed]
-    branches: [main]
-  workflow_dispatch:
-    inputs:
-      release_tag:
-        description: 'Release tag to deploy (e.g. latest, v0.2.0)'
-        required: false
-        default: 'latest'
-
-permissions:
-  contents: read
-
-jobs:
-  deploy:
-    # workflow_run fires on every Release completion, including failures.
-    # Only promote on success.
-    if: github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success'
-    permissions:
-      contents: read
-      id-token: write
-      # Granted (though unused — inputs.comment_on_pr=false here) so the
-      # intersection with deploy-cp.yml's job-level permissions matches.
-      pull-requests: write
-    uses: ./.github/workflows/deploy-cp.yml
-    with:
-      env: production
-      hostname: app.${{ vars.DD_CF_DOMAIN || 'devopsdefender.com' }}
-      gcp_environment: production
-      workload_identity_provider: 'projects/779946350556/locations/global/workloadIdentityPools/github-actions-pool/providers/github-provider'
-      service_account: 'easyenclave-production-ci@easyenclave.iam.gserviceaccount.com'
-      # workflow_run has no `inputs`; fall back to `latest`, which
-      # release.yml just (re)published on push to main.
-      release_tag: ${{ inputs.release_tag || 'latest' }}
-      oauth_enabled: true
-      comment_on_pr: false
-      ref: main
-    secrets: inherit
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index efe4539..7d55bbc 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,13 +1,18 @@
 name: Release
 
-# Build the static musl binary, publish it as a GitHub release asset,
-# and (on PRs) deploy it to an ephemeral per-PR preview. Replaces the
-# Docker build+push pipeline — easyenclave fetches the asset directly
-# via its github_release workload source.
+# One workflow to rule them all: build the static musl binary, publish
+# it as a GitHub release asset, and deploy it to either the PR preview
+# (per-PR ephemeral CP at pr-N.domain) or production (app.domain). Both
+# paths cascade into a relaunch of the matching dd-local agent VM on
+# the tdx2 host, and the Release run only goes green when that agent
+# re-registers with the freshly-deployed CP.
 #
-# PR:             pre-release tagged pr-{sha12}, then full PR-preview deploy.
-# push to main:   rolling `latest` release (no deploy — that's production)
-# push v* tag:    versioned release (no deploy)
+# Paths:
+#   pull_request        → build → deploy-preview → dd-local-preview relaunch
+#   push main           → build → deploy-production → dd-local-prod relaunch
+#   push v*             → build only (versioned release, no deploy)
+#   workflow_dispatch   → build → deploy-production (rollback tool;
+#                          release_tag input picks which tag to deploy)
 
 on:
   push:
@@ -18,10 +23,18 @@ on:
   pull_request:
     paths-ignore:
       - "README.md"
+  workflow_dispatch:
+    inputs:
+      release_tag:
+        description: 'Release tag to deploy to production (rollback tool; default: latest)'
+        required: false
+        default: 'latest'
 
 concurrency:
   group: dd-release-${{ github.ref }}
-  cancel-in-progress: true
+  # PR pushes cancel old runs. Main / tag / manual dispatch queue —
+  # we never want to cancel an in-progress prod deploy.
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
 
 permissions:
   contents: write
@@ -75,10 +88,7 @@ jobs:
       #  `https://github.com/devopsdefender/dd/.github/workflows/release.yml@<ref>`).
       # The attestation is stored on the repo's /attestations endpoint
       # and retrievable via `gh attestation verify` or the REST API.
-      #
-      # For now we're tracking (not enforcing) — the CP will eventually
-      # use this to verify that a registering agent's artifact came
-      # from this workflow. Skipped on fork PRs (they lack id-token).
+      # Skipped on fork PRs (they lack id-token).
       - name: Attest devopsdefender binary
         if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
         uses: actions/attest-build-provenance@v2
@@ -113,13 +123,8 @@ jobs:
             | tail -n +12 \
             | xargs -rI{} gh release delete {} --yes --cleanup-tag
 
-  # Deploy the freshly-built binary to the PR's ephemeral preview.
-  # Each PR gets its own env at pr-{N}.{domain} with DD_ENV=pr-{N}
-  # (hostname-isolated, no OAuth — browser access via /auth/pat).
-  # main/v* produce releases that production-deploy picks up separately.
-  #
-  # Body lives in deploy-cp.yml — same workflow prod uses, so every PR
-  # exercises the prod deploy path.
+  # Per-PR ephemeral preview at pr-{N}.{domain}. No OAuth (browser login
+  # via /auth/pat). Cascades into dd-local-preview relaunch.
   deploy-preview:
     if: github.event_name == 'pull_request'
     needs: build
@@ -139,3 +144,31 @@ jobs:
       comment_on_pr: true
       ref: ${{ github.event.pull_request.head.ref }}
     secrets: inherit
+
+  # Production deploy at app.{domain}. Fires on push-to-main OR on a
+  # manual workflow_dispatch (rollback to a specific release_tag).
+  # Tag pushes (v*) intentionally do not auto-deploy — they just
+  # publish the artifact. Cascades into dd-local-prod relaunch.
+  deploy-production:
+    if: >-
+      (github.event_name == 'push' && github.ref == 'refs/heads/main')
+      || github.event_name == 'workflow_dispatch'
+    needs: build
+    permissions:
+      contents: read
+      id-token: write
+      # Granted (though unused — comment_on_pr=false here) so the
+      # permissions intersection with deploy-cp.yml's job matches.
+      pull-requests: write
+    uses: ./.github/workflows/deploy-cp.yml
+    with:
+      env: production
+      hostname: app.${{ vars.DD_CF_DOMAIN || 'devopsdefender.com' }}
+      gcp_environment: production
+      workload_identity_provider: 'projects/779946350556/locations/global/workloadIdentityPools/github-actions-pool/providers/github-provider'
+      service_account: 'easyenclave-production-ci@easyenclave.iam.gserviceaccount.com'
+      release_tag: ${{ inputs.release_tag || 'latest' }}
+      oauth_enabled: true
+      comment_on_pr: false
+      ref: main
+    secrets: inherit
diff --git a/README.md b/README.md
index 289d19c..f8a1df1 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ The `devopsdefender` binary ships as a **GitHub release asset** — not an OCI i
 
 `cloudflared` is also pulled directly from `cloudflare/cloudflared`'s GitHub releases as a fetch-only boot workload — no bundling in our image, no Dockerfile step.
 
-Per-VM configuration (CF credentials, GitHub OAuth, the workload spec itself) is passed to easyenclave at boot via **GCE instance metadata** (`ee-config` attribute), read by `easyenclave::init::fetch_gce_metadata_config()` and applied as env vars. `scripts/gcp-deploy.sh` builds the spec and invokes `gcloud compute instances create --image-family=easyenclave-staging --metadata-from-file=ee-config=...`.
+Per-VM configuration (CF credentials, GitHub OAuth, the workload spec itself) is passed to easyenclave at boot via **GCE instance metadata** (`ee-config` attribute), read by `easyenclave::init::fetch_gce_metadata_config()` and applied as env vars. The CP-deploy step in `.github/workflows/deploy-cp.yml` builds the spec and invokes `gcloud compute instances create --image-family=easyenclave-staging --metadata-from-file=ee-config=...`.
 
 ## CI/CD
 
@@ -48,16 +48,18 @@ PR              → pre-release tagged pr-{sha12}, then ephemeral preview at pr-
 branch deleted  → pr-teardown.yml deletes the preview's VM, CF tunnel, and DNS
 push to main    → rolling `latest` release, then auto-deploy to production
 push v* tag     → versioned release (no auto-deploy)
-manual          → production-deploy.yml promotes any existing tag
+manual dispatch → redeploy any existing tag to production (rollback tool)
 ```
 
-Each PR gets its own isolated env at `pr-{N}.{domain}` with `DD_ENV=pr-{N}` — no more shared staging tier. `.github/workflows/release.yml` builds the static musl binary, publishes it as a GitHub release asset, deploys the PR's preview VM, and posts the URL back to the PR. The preview VM is verified via:
+Every path lives in `.github/workflows/release.yml`: one `build` job, then either `deploy-preview` (PR) or `deploy-production` (main / dispatch), both calling the reusable `deploy-cp.yml` with env-specific inputs. Each cascades into a relaunch of the matching `dd-local-{env}` VM on the tdx2 host — the Release run only goes green when that agent re-registers with the freshly-deployed CP. Verifications along the way:
 
 1. `/health` via the Cloudflare tunnel
 2. `/cp/attest` returning a real TDX MRTD (cryptographic proof the freshly-deployed VM is running — old VMs don't have the endpoint and return 404)
-3. No other `dd-pr-{N}-*` VM is RUNNING after deploy (STONITH must have halted the previous instance of this PR)
+3. Dashboard `/` returning HTTP 200 under a Bearer PAT
+4. No other `dd-{env}-*` VM is RUNNING after deploy (STONITH must have halted the previous instance)
+5. `dd-local-{env}` re-registers with the new CP within 5 min
 
-Browser access to a PR preview goes through `/auth/pat` (paste a GitHub PAT, validated against `DD_OWNER`). OAuth is only wired for production, which `production-deploy.yml` still targets at `app.{domain}`.
+Browser access to a PR preview goes through `/auth/pat` (paste a GitHub PAT, validated against `DD_OWNER`). OAuth is only wired for production, at `app.{domain}`.
 
 ## STONITH
 
diff --git a/apps/_infra/dd-relaunch.sh b/apps/_infra/dd-relaunch.sh
new file mode 100755
index 0000000..55d380d
--- /dev/null
+++ b/apps/_infra/dd-relaunch.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# dd-relaunch.sh — destroy and recreate one local TDX agent VM.
+#
+# Invoked over SSH by .github/actions/relaunch-agent during a Release
+# cascade. Pulls the PR's (or main's) apps/_infra tree so this script
+# and local-agents.sh are always the ones the caller authored. Tears
+# down the existing VM + overlay, runs local-agents.sh to redefine,
+# and starts the VM.
+#
+#   dd-relaunch.sh prod    https://app.devopsdefender.com    main
+#   dd-relaunch.sh preview https://pr-N.devopsdefender.com   feat/some-pr
+#
+# DD_PAT and DD_ITA_API_KEY must be set in the environment.
+
+set -euo pipefail
+
+KIND="${1?usage: dd-relaunch.sh <prod|preview> <cp-url> [ref]}"
+CP="${2?cp url required}"
+REF="${3:-main}"
+: "${DD_PAT?DD_PAT must be set}"
+: "${DD_ITA_API_KEY?DD_ITA_API_KEY must be set}"
+
+case "$KIND" in
+  prod|preview) ;;
+  *) echo "unknown kind: $KIND (want prod|preview)" >&2; exit 2 ;;
+esac
+
+cd /home/tdx2/src/dd
+
+# Refresh the infra scripts + apps/ tree from the caller's ref. Limited
+# checkout so a dirty working tree elsewhere doesn't block the deploy.
+# This script is already in memory, so the refresh takes effect on the
+# *next* invocation.
+git fetch --quiet origin "$REF"
+git checkout --quiet "origin/$REF" -- apps/
+echo "dd-relaunch: refreshed apps/ from origin/$REF"
+
+vm="dd-local-$KIND"
+overlay="/var/lib/libvirt/images/$vm.qcow2"
+
+virsh destroy "$vm" 2>/dev/null || true
+virsh undefine "$vm" --managed-save --snapshots-metadata 2>/dev/null || true
+rm -f "$overlay"
+
+# Redefine via local-agents.sh; "" skips the other slot.
+case "$KIND" in
+  prod)    ./apps/_infra/local-agents.sh ""  "$CP" ;;
+  preview) ./apps/_infra/local-agents.sh "$CP" "" ;;
+esac
+
+virsh start "$vm"
+echo "relaunched $vm against $CP"
diff --git a/scripts/local-agents.sh b/apps/_infra/local-agents.sh
similarity index 97%
rename from scripts/local-agents.sh
rename to apps/_infra/local-agents.sh
index 341cc9e..20b772a 100755
--- a/scripts/local-agents.sh
+++ b/apps/_infra/local-agents.sh
@@ -12,11 +12,11 @@
 # Usage:
 #   export DD_PAT="$(gh auth token)"
 #   export DD_ITA_API_KEY="$(cat ~/.secrets/ita_api_key)"
-#   ./scripts/local-agents.sh https://pr-106.devopsdefender.com https://app.devopsdefender.com
+#   ./apps/_infra/local-agents.sh https://pr-106.devopsdefender.com https://app.devopsdefender.com
 #
 # Pass "" for either URL to skip defining that VM:
-#   ./scripts/local-agents.sh "" https://app.devopsdefender.com   # prod only
-#   ./scripts/local-agents.sh https://pr-N.devopsdefender.com ""  # preview only
+#   ./apps/_infra/local-agents.sh "" https://app.devopsdefender.com   # prod only
+#   ./apps/_infra/local-agents.sh https://pr-N.devopsdefender.com ""  # preview only
 #
 # After: virsh start dd-local-preview && virsh start dd-local-prod
 
diff --git a/scripts/dd-relaunch.sh b/scripts/dd-relaunch.sh
deleted file mode 100755
index a118618..0000000
--- a/scripts/dd-relaunch.sh
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/env bash
-# dd-relaunch.sh — destroy and recreate one local TDX agent VM.
-#
-# Invoked over SSH by .github/workflows/local-agents.yml after a
-# Release / Production Deploy succeeds. Pulls the current main of dd
-# (so this script and local-agents.sh are always the latest), tears
-# down the existing VM + overlay, runs scripts/local-agents.sh to
-# redefine, and starts the VM.
-#
-#   dd-relaunch.sh prod    https://app.devopsdefender.com
-#   dd-relaunch.sh preview https://pr-N.devopsdefender.com
-#
-# DD_PAT and DD_ITA_API_KEY must be set in the environment.
-
-set -euo pipefail
-
-KIND="${1?usage: dd-relaunch.sh <prod|preview> <cp-url>}"
-CP="${2?cp url required}"
-REF="${3:-main}"
-: "${DD_PAT?DD_PAT must be set}"
-: "${DD_ITA_API_KEY?DD_ITA_API_KEY must be set}"
-
-case "$KIND" in
-  prod|preview) ;;
-  *) echo "unknown kind: $KIND (want prod|preview)" >&2; exit 2 ;;
-esac
-
-cd /home/tdx2/src/dd
-
-# Pull the latest scripts. Limit the checkout to the two scripts so a
-# dirty working tree elsewhere doesn't block the deploy. The relaunch
-# script itself has already been read into memory by bash, so the
-# update takes effect on the *next* invocation.
-git fetch --quiet origin "$REF"
-git checkout --quiet "origin/$REF" -- scripts/local-agents.sh scripts/dd-relaunch.sh
-git checkout --quiet "origin/$REF" -- scripts/workloads.sh 2>/dev/null || true
-git checkout --quiet "origin/$REF" -- apps/ 2>/dev/null || true
-echo "dd-relaunch: refreshed scripts + apps/ from origin/$REF"
-
-vm="dd-local-$KIND"
-overlay="/var/lib/libvirt/images/$vm.qcow2"
-
-virsh destroy "$vm" 2>/dev/null || true
-virsh undefine "$vm" --managed-save --snapshots-metadata 2>/dev/null || true
-rm -f "$overlay"
-
-# Redefine via local-agents.sh; "" skips the other slot.
-case "$KIND" in
-  prod)    ./scripts/local-agents.sh ""  "$CP" ;;
-  preview) ./scripts/local-agents.sh "$CP" "" ;;
-esac
-
-virsh start "$vm"
-echo "relaunched $vm against $CP"
-
-# ollama deploy + pull + query is driven from the workflow's HTTPS step
-# on ubuntu-latest, not here — see .github/workflows/local-agents.yml.
diff --git a/scripts/gcp-deploy.sh b/scripts/gcp-deploy.sh
deleted file mode 100755
index a65a378..0000000
--- a/scripts/gcp-deploy.sh
+++ /dev/null
@@ -1,148 +0,0 @@
-#!/bin/bash
-# gcp-deploy.sh — Create a TDX management VM on GCP that boots from a
-# sealed easyenclave image and runs dd management as a native process.
-#
-# Both the devopsdefender binary and cloudflared are fetched straight
-# from their GitHub releases by easyenclave's github_release workload
-# source — no OCI registry, no Dockerfile. Cloudflared is a fetch-only
-# boot workload: its binary lands in /var/lib/easyenclave/bin (now on
-# PATH) so dd-register can shell out to `cloudflared` by name.
-#
-# Agent-side mirror: a local TDX guest with a vfio-pci-passed GPU can
-# register against the CP this script deploys by using the same
-# easyenclave `github_release` workload source for the devopsdefender
-# binary, with `DD_REGISTER_URL=wss://{hostname}/register`. See the
-# local-GPU demo notes in the commit trail.
-#
-# Called by .github/workflows/{staging,production}-deploy.yml. Requires
-# gcloud CLI authenticated via Workload Identity Federation.
-#
-# Required env vars (set by the workflow):
-#   GCP_PROJECT_ID          — GCP project where the VM lives
-#   GCP_ZONE                — GCP zone (e.g. us-central1-c)
-#   DD_ENV                  — staging, production, or pr-{num} (ephemeral per-PR)
-#   DD_DOMAIN               — Public domain (e.g. devopsdefender.com)
-#   CLOUDFLARE_API_TOKEN    — CF API token (dd-register uses it)
-#   CLOUDFLARE_ACCOUNT_ID   — CF account ID
-#   CLOUDFLARE_ZONE_ID      — CF zone ID
-#
-# Optional env vars:
-#   DD_HOSTNAME             — public hostname override. If unset, derived
-#                             from DD_ENV (production → app.$DOMAIN,
-#                             anything else → app-staging.$DOMAIN). Set
-#                             explicitly for per-PR envs (pr-42.$DOMAIN).
-#   DD_GITHUB_CLIENT_ID     — GitHub OAuth client ID. If unset, dd-web
-#                             disables OAuth login and only PAT auth works.
-#                             Per-PR envs leave this unset.
-#   DD_GITHUB_CLIENT_SECRET — GitHub OAuth client secret (paired with above)
-#   DD_GITHUB_CALLBACK_URL  — OAuth callback, default https://{hostname}/auth/github/callback
-#   EE_IMAGE_FAMILY         — easyenclave GCP image family
-#   EE_IMAGE_PROJECT        — project hosting the image
-#   DD_RELEASE_TAG          — GitHub release tag on devopsdefender/dd
-#                             (defaults to 'latest'; PRs override with pr-{sha12})
-#   VM_MACHINE_TYPE         — default c3-standard-4
-#   VM_DISK_SIZE            — default 10GB
-
-set -euo pipefail
-
-# ── easyenclave image family ──────────────────────────────────────────────
-#   easyenclave-staging → rolling main, rotates on every push (5 kept)
-#   easyenclave-stable  → v* tags, kept forever
-EE_IMAGE_FAMILY="${EE_IMAGE_FAMILY:-easyenclave-staging}"
-EE_IMAGE_PROJECT="${EE_IMAGE_PROJECT:-easyenclave}"
-DD_RELEASE_TAG="${DD_RELEASE_TAG:-latest}"
-
-VM_NAME="dd-${DD_ENV}-$(date +%s)"
-VM_MACHINE_TYPE="${VM_MACHINE_TYPE:-c3-standard-4}"
-VM_DISK_SIZE="${VM_DISK_SIZE:-10GB}"
-
-if [ -z "${DD_HOSTNAME:-}" ]; then
-  if [ "${DD_ENV}" = "production" ]; then
-    DD_HOSTNAME="app.${DD_DOMAIN}"
-  else
-    DD_HOSTNAME="app-staging.${DD_DOMAIN}"
-  fi
-fi
-DD_GITHUB_CLIENT_ID="${DD_GITHUB_CLIENT_ID:-}"
-DD_GITHUB_CLIENT_SECRET="${DD_GITHUB_CLIENT_SECRET:-}"
-DD_GITHUB_CALLBACK_URL="${DD_GITHUB_CALLBACK_URL:-https://${DD_HOSTNAME}/auth/github/callback}"
-
-# Intel Trust Authority — mandatory. DD_ITA_API_KEY must be set in the
-# workflow (from secrets.DD_ITA_API_KEY). The CP will refuse to start
-# without one. Everything else has a default.
-if [ -z "${DD_ITA_API_KEY:-}" ]; then
-  echo "DD_ITA_API_KEY is required (configure secrets.DD_ITA_API_KEY)" >&2
-  exit 1
-fi
-DD_ITA_BASE_URL="${DD_ITA_BASE_URL:-https://api.trustauthority.intel.com}"
-DD_ITA_JWKS_URL="${DD_ITA_JWKS_URL:-https://portal.trustauthority.intel.com/certs}"
-DD_ITA_ISSUER="${DD_ITA_ISSUER:-https://portal.trustauthority.intel.com}"
-
-# ── Build the workload spec ──────────────────────────────────────────────
-# Boot workloads come from apps/<name>/workload.{json,json.tmpl}. Same
-# file per workload whether this CP runs in prod, staging, or a PR
-# preview; only the env-var substitutions differ.
-#
-#   cloudflared    — fetch-only, puts the binary on PATH for DD to spawn.
-#   dd-management  — devopsdefender in DD_MODE=management (CP + dashboard).
-#
-# Empty ${DD_GITHUB_CLIENT_ID} etc produce empty "KEY=" strings; the
-# bake helper strips those so the resulting spec matches the old
-# `if $gh_client_id == "" then [] else [...]` conditional.
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
-# shellcheck source=./workloads.sh
-source "$SCRIPT_DIR/workloads.sh"
-EE_BOOT_WORKLOADS=$(
-  DD_RELEASE_TAG="$DD_RELEASE_TAG" \
-  CLOUDFLARE_API_TOKEN="$CLOUDFLARE_API_TOKEN" \
-  CLOUDFLARE_ACCOUNT_ID="$CLOUDFLARE_ACCOUNT_ID" \
-  CLOUDFLARE_ZONE_ID="$CLOUDFLARE_ZONE_ID" \
-  DD_DOMAIN="$DD_DOMAIN" \
-  DD_HOSTNAME="$DD_HOSTNAME" \
-  DD_ENV="$DD_ENV" \
-  DD_GITHUB_CLIENT_ID="$DD_GITHUB_CLIENT_ID" \
-  DD_GITHUB_CLIENT_SECRET="$DD_GITHUB_CLIENT_SECRET" \
-  DD_GITHUB_CALLBACK_URL="$DD_GITHUB_CALLBACK_URL" \
-  DD_ITA_API_KEY="$DD_ITA_API_KEY" \
-  DD_ITA_BASE_URL="$DD_ITA_BASE_URL" \
-  DD_ITA_JWKS_URL="$DD_ITA_JWKS_URL" \
-  DD_ITA_ISSUER="$DD_ITA_ISSUER" \
-  join \
-    "$REPO_ROOT/apps/cloudflared/workload.json" \
-    "$REPO_ROOT/apps/dd-management/workload.json.tmpl"
-)
-# ollama + openclaw are NOT baked into the CP preview. EE's tmpfs
-# /var/lib/easyenclave is too small for the 900 MB container image,
-# and attaching a scratch PD here would duplicate what the local
-# dd-local-preview VM already provides via its vdc ext4 disk. The
-# preview CP stays slim; the ollama+openclaw demo registers from
-# dd-local-preview (scripts/local-agents.sh).
-
-# ── Wrap into ee-config ───────────────────────────────────────────────────
-jq -c -n \
-  --arg workloads "$EE_BOOT_WORKLOADS" \
-  '{ "EE_BOOT_WORKLOADS": $workloads, "EE_OWNER": "devopsdefender" }' \
-  > /tmp/ee-config.json
-
-trap 'rm -f /tmp/ee-config.json' EXIT
-
-# ── Create the VM ─────────────────────────────────────────────────────────
-gcloud compute instances create "$VM_NAME" \
-  --project="$GCP_PROJECT_ID" \
-  --zone="$GCP_ZONE" \
-  --machine-type="$VM_MACHINE_TYPE" \
-  --confidential-compute-type=TDX \
-  --maintenance-policy=TERMINATE \
-  --boot-disk-size="$VM_DISK_SIZE" \
-  --image-family="$EE_IMAGE_FAMILY" \
-  --image-project="$EE_IMAGE_PROJECT" \
-  --metadata-from-file=ee-config=/tmp/ee-config.json \
-  --labels=devopsdefender=managed,dd_env="${DD_ENV}" \
-  --tags=dd-management
-
-echo "VM: $VM_NAME"
-echo "  image:    family $EE_IMAGE_FAMILY ($EE_IMAGE_PROJECT)"
-echo "  hostname: $DD_HOSTNAME"
-echo "  dd release: $DD_RELEASE_TAG"
-echo "  workload: dd management"
diff --git a/scripts/redeploy-workload.sh b/scripts/redeploy-workload.sh
deleted file mode 100755
index e866d3f..0000000
--- a/scripts/redeploy-workload.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env bash
-# redeploy-workload.sh — POST one baked workload spec to a live agent's
-# /deploy endpoint. Handy for iterating on apps/<name>/workload.json
-# without rebuilding the whole config.iso + restarting the VM.
-#
-# Usage:
-#   redeploy-workload.sh <cp_url> <agent_vm_name> <app_path>
-#
-# Example:
-#   DD_PAT=$(gh auth token) \
-#     ./scripts/redeploy-workload.sh \
-#       https://app.devopsdefender.com \
-#       dd-local-prod \
-#       apps/openclaw/workload.json.tmpl
-#
-# Requires DD_PAT in env. Template envs (MODEL, DD_CP_URL, …) must
-# also be exported if the referenced workload file is a .tmpl.
-
-set -euo pipefail
-
-CP_URL="${1?usage: redeploy-workload.sh <cp_url> <vm_name> <app_path>}"
-VM_NAME="${2?vm_name required (e.g. dd-local-prod)}"
-APP_PATH="${3?app_path required (e.g. apps/openclaw/workload.json.tmpl)}"
-: "${DD_PAT?set DD_PAT (e.g. DD_PAT=\$(gh auth token))}"
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-# shellcheck source=./workloads.sh
-source "$SCRIPT_DIR/workloads.sh"
-
-AUTH=(-H "Authorization: Bearer $DD_PAT")
-
-# Discover the agent's tunnel hostname via CP's fleet API.
-agent_host=$(
-  curl -fsS "${AUTH[@]}" "$CP_URL/api/agents" 2>/dev/null \
-    | jq -r --arg vm "$VM_NAME" '
-        [.[] | select(.vm_name==$vm and .status=="healthy")]
-        | sort_by(.last_seen) | reverse | .[0].hostname // empty'
-)
-if [ -z "$agent_host" ] || [ "$agent_host" = "null" ]; then
-  echo "ERROR: no healthy $VM_NAME in $CP_URL/api/agents" >&2
-  exit 1
-fi
-echo "agent: https://$agent_host"
-
-spec=$(bake "$APP_PATH")
-echo "redeploying $(echo "$spec" | jq -r .app_name)..."
-curl -fsS --max-time 60 "${AUTH[@]}" \
-  "https://$agent_host/deploy" \
-  -H 'Content-Type: application/json' \
-  -d "$spec" | jq -c .
diff --git a/scripts/workloads.sh b/scripts/workloads.sh
deleted file mode 100755
index c41fe47..0000000
--- a/scripts/workloads.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env bash
-# workloads.sh — shared helpers for assembling EE workload specs.
-#
-# A DD "workload" is a JSON object with {app_name, github_release,
-# cmd, env} that EE's DeployRequest consumes. Each app lives in
-# apps/<name>/workload.json (literal) or apps/<name>/workload.json.tmpl
-# (with ${VAR} placeholders substituted at bake time from the caller's
-# environment).
-#
-# Public functions:
-#   bake <path>           — print one rendered workload to stdout.
-#                           Plain .json is emitted as-is; .json.tmpl
-#                           gets envsubst + empty-env-entry stripping.
-#   join <path> [path…]   — print a JSON array of rendered workloads.
-#
-# Sourced from scripts/local-agents.sh and scripts/gcp-deploy.sh so
-# both scripts share one source of truth for the workload shape.
-
-# Render a single workload file.
-# For .json files, passthrough.
-# For .json.tmpl files, substitute ${VAR} from the current env, then
-# remove any "KEY=" env array entries that ended up with an empty
-# value (matches the conditional-include pattern gcp-deploy.sh used
-# for DD_GITHUB_CLIENT_ID & co).
-bake() {
-  local path="$1"
-  if [[ "$path" == *.json ]]; then
-    jq -c . "$path"
-  elif [[ "$path" == *.json.tmpl ]]; then
-    envsubst < "$path" \
-      | jq -c 'if .env then .env |= map(select(. | test("^[^=]+=.+"))) else . end'
-  else
-    echo "workloads.sh: unknown workload file type: $path" >&2
-    return 1
-  fi
-}
-
-# Print a JSON array of rendered workloads.
-join() {
-  local out="["
-  local first=1
-  for p in "$@"; do
-    local rendered
-    rendered=$(bake "$p") || return 1
-    if [ $first -eq 1 ]; then
-      out+="$rendered"
-      first=0
-    else
-      out+=",$rendered"
-    fi
-  done
-  out+="]"
-  echo "$out"
-}