Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions .github/actions/dispatch-suite/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
name: 'Dispatch scenario suite'
description: >-
Dispatch a downstream cascade-example repo's scenario-suite.yaml on its own
main, recover the run id it created, and watch that run to its conclusion.

inputs:
repo:
description: 'Target repo slug, e.g. stablekernel/cascade-example-primary'
required: true
token:
description: >-
PAT with Actions read/write on the target repo. GITHUB_TOKEN cannot
dispatch cross-repo, so a fleet-wide fine-grained PAT is mandatory.
required: true
workflow:
description: 'Workflow file to dispatch in the target repo'
required: false
default: 'scenario-suite.yaml'
ref:
description: "Target ref to dispatch against (must be the target's default branch)"
required: false
default: 'main'
recover-attempts:
description: 'How many times to poll for the dispatched run before giving up'
required: false
default: '30'
recover-interval:
description: 'Seconds between recovery polls'
required: false
default: '10'

runs:
using: 'composite'
steps:
# Reconciliation, dispatch -> recover -> watch, with zero target-side change.
#
# Cross-repo workflow_dispatch returns 204 with no run id (CONFIRMED in the
# pattern research), so we cannot await the run we just created directly. We
# recover it by listing the target's scenario-suite runs created at/after the
# dispatch timestamp (event = workflow_dispatch) and taking the newest. This
# is the current approach; a future refinement could echo a distinct_id
# marker into the suite run-name for race-free recovery once the suites
# carry one.
- name: Dispatch and watch
shell: bash
env:
GH_TOKEN: ${{ inputs.token }}
TARGET_REPO: ${{ inputs.repo }}
TARGET_WORKFLOW: ${{ inputs.workflow }}
TARGET_REF: ${{ inputs.ref }}
RECOVER_ATTEMPTS: ${{ inputs.recover-attempts }}
RECOVER_INTERVAL: ${{ inputs.recover-interval }}
run: |
set -euo pipefail

# Capture a UTC timestamp BEFORE dispatching so the recovery filter only
# matches runs this action created, not pre-existing ones.
DISPATCH_TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
echo "Dispatching $TARGET_WORKFLOW in $TARGET_REPO @ $TARGET_REF (since $DISPATCH_TS)"

# NOTE: do NOT pass -f cascade_version=... here. The suites do not define
# that input yet, so an extra input would error with "unexpected inputs".
# The version under test is computed and logged by the orchestrator but
# is inert until the suites accept the input.
gh workflow run "$TARGET_WORKFLOW" \
--repo "$TARGET_REPO" \
--ref "$TARGET_REF"

# Recover the run id. Cross-repo dispatch is async; the run may not be
# listable immediately, so poll with a bounded retry.
RUN_ID=""
for attempt in $(seq 1 "$RECOVER_ATTEMPTS"); do
RUN_ID=$(gh run list \
--repo "$TARGET_REPO" \
--workflow "$TARGET_WORKFLOW" \
--event workflow_dispatch \
--created ">=$DISPATCH_TS" \
--limit 20 \
--json databaseId,status,conclusion,createdAt \
--jq 'sort_by(.createdAt) | reverse | .[0].databaseId // empty')
if [ -n "$RUN_ID" ]; then
echo "Recovered run id $RUN_ID on attempt $attempt"
break
fi
echo "Run not visible yet (attempt $attempt/$RECOVER_ATTEMPTS); sleeping ${RECOVER_INTERVAL}s"
sleep "$RECOVER_INTERVAL"
done

if [ -z "$RUN_ID" ]; then
echo "::error::Could not recover a $TARGET_WORKFLOW run in $TARGET_REPO after dispatch"
exit 1
fi

RUN_URL="https://github.com/$TARGET_REPO/actions/runs/$RUN_ID"
echo "Watching $RUN_URL"
{
echo "- **$TARGET_REPO**: [run $RUN_ID]($RUN_URL)"
} >> "$GITHUB_STEP_SUMMARY"

# Block on the recovered run's conclusion. --exit-status makes gh return
# non-zero if the run concluded with a non-success result.
gh run watch "$RUN_ID" --repo "$TARGET_REPO" --exit-status
18 changes: 9 additions & 9 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
# End-to-end test workflow
# Integration test workflow (act + gitea testcontainers).
# Triggers:
# push:tags every release tag (existing)
# workflow_dispatch manual run against any ref (existing)
# merge_group runs as a merge-queue gate before merging to main
# schedule nightly at 07:00 UTC (low-traffic window) against main
#
# E2E uses act + gitea testcontainers and is too slow + flaky to run per PR.
# Run locally (`go test -v ./e2e/...`) before pushing instead.
name: E2E
# This workflow uses act + gitea testcontainers and is too slow + flaky to run
# per PR. Run locally (`go test -v ./e2e/...`) before pushing instead.
#
# NOTE: the `name:` below is referenced by fleet-e2e.yaml's workflow_run trigger
# ("Integration (act + gitea)"). Keep the two in sync if this is ever renamed.
name: Integration (act + gitea)

on:
push:
tags:
- 'v*'
merge_group:
schedule:
- cron: '0 7 * * *'
workflow_dispatch:
inputs:
ref:
Expand All @@ -42,8 +42,8 @@ jobs:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
# workflow_dispatch: honour the explicit ref input.
# All other triggers (push:tags, merge_group, schedule): use the
# exact SHA that triggered the run so we test what GitHub resolved.
# All other triggers (push:tags, merge_group): use the exact SHA
# that triggered the run so we test what GitHub resolved.
ref: ${{ github.event.inputs.ref || github.sha }}

- uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
Expand Down
224 changes: 224 additions & 0 deletions .github/workflows/fleet-e2e.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
# Fleet E2E - revalidates the downstream cascade-example fleet on live GitHub.
#
# This is maintainer CI: hand-written tooling that lives in cascade's repo, not
# a product feature and not part of cascade's generated output. A green Fleet
# run means: this cascade version validated across all 8 example
# repos, each running its own scenario-suite.yaml in its OWN repo context (own
# token, own main, own manifest). It is the release-candidate fleet gate.
#
# Triggers:
# workflow_run of "Integration (act + gitea)" on completion - makes the E2E
# dependency NATIVE: Fleet only fans out once Integration is
# green for an rc tag. No runner held open polling for it.
# workflow_dispatch manual override (bypasses the rc-tag gate intentionally),
# with an optional cascade_version input.
#
# IMPORTANT: the workflow_run trigger references the source workflow by its
# `name:` ("Integration (act + gitea)"). Keep that name in sync with e2e.yaml.
name: Fleet E2E (live GitHub)

on:
workflow_run:
workflows: ["Integration (act + gitea)"]
types: [completed]
workflow_dispatch:
inputs:
cascade_version:
description: >-
cascade version to validate (e.g. v1.2.0-rc.1). Default empty resolves
to the rc tag on the workflow_run path. NOTE: passing this to the
suites is wired but inert until the suites accept the input.
required: false
default: ''

permissions:
contents: read

# Single in-flight fleet run per rc tag; a newer rc supersedes an older queued
# fleet run rather than piling up live cross-repo dispatches.
concurrency:
group: fleet-e2e-${{ github.event.workflow_run.head_branch || github.event.inputs.cascade_version || github.run_id }}
cancel-in-progress: false

env:
# Eight downstream example repos. primary must finish before its two dependents
# (they mutate primary's shared external state); the rest are independent.
FLEET_OWNER: stablekernel

jobs:
# Resolve the cascade version under test and re-assert the rc-tag gate as a
# job output so every fan-out job can gate on it cheaply.
resolve:
name: Resolve version under test
runs-on: ubuntu-latest
# Top-level guard: only fan out for a manual dispatch, or a green
# Integration run that was a push of an rc tag. This filters out
# merge_group / non-rc completions.
#
# workflow_run.head_branch carries the short ref name of whatever triggered
# the source run. For a tag push that is the tag's short name (e.g.
# v1.2.0-rc.1). We gate on it here AND, in the compute step below, resolve
# the tag from head_sha as a fallback in case head_branch is ever empty for
# a tag-triggered source run.
if: >-
github.event_name == 'workflow_dispatch' ||
(github.event.workflow_run.conclusion == 'success' &&
github.event.workflow_run.event == 'push' &&
startsWith(github.event.workflow_run.head_branch, 'v') &&
contains(github.event.workflow_run.head_branch, '-rc.'))
permissions:
contents: read
actions: read
outputs:
cascade_version: ${{ steps.compute.outputs.cascade_version }}
steps:
- name: Compute cascade version under test
id: compute
env:
# PAT is only needed for the head_sha -> tag fallback (a cross-ref
# lookup against this repo's tags). GITHUB_TOKEN would also work for
# same-repo reads, but we standardise on the fleet PAT.
GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }}
EVENT_NAME: ${{ github.event_name }}
INPUT_VERSION: ${{ github.event.inputs.cascade_version }}
WR_HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }}
WR_HEAD_SHA: ${{ github.event.workflow_run.head_sha }}
run: |
set -euo pipefail
if [ "$EVENT_NAME" = "workflow_dispatch" ] && [ -n "$INPUT_VERSION" ]; then
VERSION="$INPUT_VERSION"
elif [ -n "$WR_HEAD_BRANCH" ]; then
# Primary path: the rc tag short-name from the source push run.
VERSION="$WR_HEAD_BRANCH"
elif [ -n "$WR_HEAD_SHA" ]; then
# Fallback: head_branch was empty; resolve the rc tag pointing at the
# source run's head_sha. Tolerated to be empty (dispatch with no
# input), so guard the lookup.
# A sha can carry more than one rc tag; pick the highest by version
# sort so selection is deterministic regardless of API ordering.
VERSION=$(gh api "repos/${GITHUB_REPOSITORY}/tags" \
--jq ".[] | select(.commit.sha == \"$WR_HEAD_SHA\") | .name" \
| grep -- '-rc\.' | sort -V -r | head -n 1 || true)
else
VERSION=""
fi

echo "cascade_version=$VERSION" >> "$GITHUB_OUTPUT"
{
echo "## Fleet E2E"
echo ""
echo "Trigger: \`$EVENT_NAME\`"
echo "cascade version under test: \`${VERSION:-<empty>}\`"
echo ""
echo "> Version passing to suites is computed and logged here but"
echo "> currently INERT: the suites do not yet accept a"
echo "> \`cascade_version\` input."
} >> "$GITHUB_STEP_SUMMARY"

# Stage 1: primary must run and pass before its dependents.
primary:
name: primary
needs: resolve
runs-on: ubuntu-latest
permissions:
contents: read
actions: read
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
- name: Dispatch and watch primary
uses: ./.github/actions/dispatch-suite
with:
repo: ${{ env.FLEET_OWNER }}/cascade-example-primary
token: ${{ secrets.CASCADE_STATE_TOKEN }}

# Stage 2: dependents of primary (mutate primary's shared external state),
# so they only start after primary is green.
dependents:
name: dependents (${{ matrix.repo }})
needs: primary
runs-on: ubuntu-latest
permissions:
contents: read
actions: read
strategy:
fail-fast: false
matrix:
repo: [artifact-a, artifact-b]
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
- name: Dispatch and watch
uses: ./.github/actions/dispatch-suite
with:
repo: ${{ env.FLEET_OWNER }}/cascade-example-${{ matrix.repo }}
token: ${{ secrets.CASCADE_STATE_TOKEN }}

# Stage 3: independent suites, run in parallel with no ordering constraint.
independents:
name: independents (${{ matrix.repo }})
needs: resolve
runs-on: ubuntu-latest
permissions:
contents: read
actions: read
strategy:
fail-fast: false
matrix:
repo: [4env, 3env, 2env, single-env, release-only]
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
- name: Dispatch and watch
uses: ./.github/actions/dispatch-suite
with:
repo: ${{ env.FLEET_OWNER }}/cascade-example-${{ matrix.repo }}
token: ${{ secrets.CASCADE_STATE_TOKEN }}

# Fan-in: this job's conclusion is the rc fleet gate. It fails if any upstream
# fan-out job failed and emits a per-repo pass/fail table to the summary.
aggregate:
name: Fleet gate
needs: [resolve, primary, dependents, independents]
# Only render a verdict when the fleet actually fanned out. On filtered-out
# completions (merge_group, non-rc tags, dispatch with no rc) resolve is
# skipped, so this job is skipped too and the run is a clean no-op rather
# than a false-red. A genuine fan-out failure still reds the run because
# resolve succeeded and the result checks below catch the failed stage.
if: always() && needs.resolve.result == 'success'
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Aggregate fleet result
env:
R_PRIMARY: ${{ needs.primary.result }}
R_DEPENDENTS: ${{ needs.dependents.result }}
R_INDEPENDENTS: ${{ needs.independents.result }}
VERSION: ${{ needs.resolve.outputs.cascade_version }}
run: |
set -euo pipefail
{
echo "## Fleet E2E result"
echo ""
echo "cascade version under test: \`${VERSION:-<empty>}\`"
echo ""
echo "| Stage | Result |"
echo "|---|---|"
echo "| primary | $R_PRIMARY |"
echo "| dependents (artifact-a, artifact-b) | $R_DEPENDENTS |"
echo "| independents (4env, 3env, 2env, single-env, release-only) | $R_INDEPENDENTS |"
echo ""
echo "> rc gate: this conclusion is the fleet validation signal for"
echo "> the rc tag. rc -> release promotion should consume the latest"
echo "> fleet-e2e conclusion for that tag before promoting."
} >> "$GITHUB_STEP_SUMMARY"

fail=0
for r in "$R_PRIMARY" "$R_DEPENDENTS" "$R_INDEPENDENTS"; do
if [ "$r" != "success" ]; then
fail=1
fi
done
if [ "$fail" -ne 0 ]; then
echo "::error::Fleet E2E failed: one or more suites did not pass"
exit 1
fi
echo "Fleet E2E passed across all suites"
17 changes: 14 additions & 3 deletions .github/workflows/validate.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Validation workflow - runs tests and lint
# Called by orchestrate workflow during CI/CD
name: Validate
# Tests & Lint - runs go test -race + coverage and golangci-lint.
#
# Triggers:
# workflow_call invoked by orchestrate.yaml on PRs (keep - do not remove).
# push: tags standalone run on every release/rc tag.
# workflow_dispatch manual standalone run against any ref.
#
# The standalone triggers give this workflow runs of its own so its status
# badge renders; a workflow_call-only workflow has no standalone runs to badge.
name: Tests & Lint

on:
workflow_call:
Expand All @@ -14,6 +21,10 @@ on:
result:
description: 'Validation result (success/failure)'
value: ${{ jobs.validate.outputs.result }}
push:
tags:
- 'v*'
workflow_dispatch:

permissions:
contents: read
Expand Down
Loading
Loading