diff --git a/docs/src/content/docs/workflows.md b/docs/src/content/docs/workflows.md
index f3ac631..3ef84be 100644
--- a/docs/src/content/docs/workflows.md
+++ b/docs/src/content/docs/workflows.md
@@ -259,7 +259,7 @@ flowchart TD
RF["Roll forward first (default)
fix merged to trunk; refused if not an ancestor of trunk tip"]
RF -- "env must run base + fix only" --> IB["env/<env> integration branch
created on demand at recorded state SHA"]
IB --> CP["cherry-pick onto hotfix/<env>/<short-sha>"]
- CP -- "clean" --> PRclean["resolution PR · cascade-hotfix
auto-merge, gated by env checks"]
+ CP -- "clean" --> PRclean["resolution PR · cascade-hotfix
state_token merge, gated by env checks"]
CP -- "conflict" --> PRconf["resolution PR · cascade-hotfix-conflict
markers committed; human force-pushes head"]
PRclean --> MERGE["on merge"]
PRconf --> MERGE
@@ -294,7 +294,7 @@ state:
### Cherry-pick and resolution pull request
-A clean cherry-pick opens a pull request labeled `cascade-hotfix` with auto-merge enabled. The required checks configured on `env/` gate the merge, and the pull request is the audit record even when no human touches it.
+A clean cherry-pick opens a pull request labeled `cascade-hotfix` and merges it as the configured `state_token`. The apply job polls the pull request until it is mergeable, so the required checks configured on `env/` still gate the merge, and the pull request is the audit record even when no human touches it. The merge runs as `state_token` rather than the default `GITHUB_TOKEN` on purpose: a merge authored by `GITHUB_TOKEN` does not emit the `pull_request` close event, so the build, deploy, and finalize stages would never run and the diverged state would never be recorded. Configure `state_token` with a trigger-capable token (the same one used for state writes) to get the post-merge stages after an automated hotfix.
On conflict, the conflicted tree is committed with its conflict markers intact, the branch is pushed, and the pull request is opened labeled `cascade-hotfix-conflict`. Committing the markers makes the resolution pull request a real, checkout-able branch: the diff shows exactly where the conflict is, and a human resolves it locally by force-pushing the head branch.
@@ -332,7 +332,7 @@ Its jobs:
| Job | Trigger | Role |
| --- | --- | --- |
| plan | dispatch | Fetch env branches and tags, run `cascade hotfix plan`, surface branch-protection suggestions as `::notice::` lines |
-| apply | dispatch (not dry-run) | Cherry-pick onto `hotfix//`, open the resolution pull request (clean auto-merges; conflict opens the labeled resolution pull request) |
+| apply | dispatch (not dry-run) | Cherry-pick onto `hotfix//`, open the resolution pull request (clean polls until mergeable then merges as `state_token`; conflict opens the labeled resolution pull request) |
| check | open pull request to `env/*` | Validate the manifest while the hotfix pull request is open |
| build | merged hotfix | Build the merge SHA, since a cherry-picked commit has no prebuilt artifact |
| deploy | merged hotfix | Deploy to the target environment, paired with a rollback job mirroring the promote workflow |
diff --git a/internal/generate/hotfix.go b/internal/generate/hotfix.go
index 5cc5b38..3f63b37 100644
--- a/internal/generate/hotfix.go
+++ b/internal/generate/hotfix.go
@@ -30,6 +30,9 @@ const hotfixConflictLabel = "cascade-hotfix-conflict"
// The workflow carries two triggers in one file: a workflow_dispatch entry that
// plans and applies the cherry-pick, and a pull_request (closed) entry that runs
// the build, deploy, and finalize stages when the resolution pull request merges.
+// The clean-path merge runs as the configured state token so the merge is
+// trigger capable and the pull_request (closed) chain actually fires; a merge
+// authored by the default GITHUB_TOKEN would not emit that event.
//
// This generator is gated on the configured environment count: it emits only
// when two or more environments are declared, because a single-environment
@@ -48,6 +51,18 @@ func NewHotfixGenerator(cfg *config.TrunkConfig, baseDir string) *HotfixGenerato
}
}
+// getStateTokenRef returns the token expression used to merge the clean-path
+// resolution PR. It mirrors the release and promote generators: users configure
+// the full expression via the state_token config option, and it defaults to the
+// GITHUB_TOKEN expression when unset. The clean-path merge must run as this
+// actor so the merge emits a pull_request(closed) event and reaches the build,
+// deploy, and finalize chain; the default GITHUB_TOKEN does not emit that event,
+// so a hotfix into a repo with no configured state token records no state until
+// the operator supplies a trigger-capable token here.
+func (g *HotfixGenerator) getStateTokenRef() string {
+ return g.config.GetStateToken()
+}
+
// Enabled reports whether the hotfix workflow should be emitted. The workflow is
// emitted only when the manifest declares two or more environments, since the
// first environment is the build target and at least one further environment is
@@ -238,9 +253,11 @@ func (g *HotfixGenerator) writePlanJob(sb *strings.Builder) {
}
// writeApplyJob emits the apply job, run on dispatch when not a dry-run. It
-// cherry-picks the commit onto a hotfix branch and opens a resolution PR. Clean
-// cherry-picks auto-merge; conflicting ones open a labeled PR for local
-// resolution and do not auto-merge.
+// cherry-picks the commit onto a hotfix branch and opens a resolution PR. A
+// clean cherry-pick is merged by the dedicated merge step as the configured
+// state token, which polls until the PR is mergeable so a protected env branch
+// with a required check still gates the merge. A conflicting cherry-pick opens a
+// labeled PR for local resolution and is merged by a human via the UI.
func (g *HotfixGenerator) writeApplyJob(sb *strings.Builder) {
sb.WriteString(" apply:\n")
sb.WriteString(" name: Apply Hotfix Cherry-Pick\n")
@@ -316,15 +333,15 @@ func (g *HotfixGenerator) writeApplyJob(sb *strings.Builder) {
fmt.Fprintf(sb, " --label %s \\\n", hotfixLabel)
sb.WriteString(" --title \"hotfix(${TARGET_ENV}): cherry-pick ${SHORT_SHA}\" \\\n")
sb.WriteString(" --body \"$BODY\"\n")
- // Prefer auto-merge so required checks gate the merge on protected
- // branches. GitHub rejects enablePullRequestAutoMerge when the target
- // branch has no protection rule, so fall back to an immediate squash
- // merge. The `if !` guard keeps the failing attempt from tripping
- // `set -e` and aborting the step.
- sb.WriteString(" if ! gh pr merge --auto --squash \"$BRANCH\"; then\n")
- sb.WriteString(" echo \"::notice::auto-merge unavailable (branch likely unprotected); merging directly\"\n")
- sb.WriteString(" gh pr merge --squash --delete-branch \"$BRANCH\"\n")
- sb.WriteString(" fi\n")
+ // Hand the resolution branch to the dedicated merge step. The merge runs
+ // as the configured state token (a trigger-capable actor), which the
+ // job-level GH_TOKEN is not, so it has to be a separate step with its own
+ // env. The clean path is the only one that auto-merges; the conflict path
+ // leaves the merge to a human via the UI.
+ sb.WriteString(" {\n")
+ sb.WriteString(" echo \"HOTFIX_BRANCH=$BRANCH\"\n")
+ sb.WriteString(" echo \"HOTFIX_CLEAN_MERGE=true\"\n")
+ sb.WriteString(" } >> \"$GITHUB_ENV\"\n")
sb.WriteString(" else\n")
sb.WriteString(" echo \"::warning::Cherry-pick conflicted; opening resolution PR for manual resolve\"\n")
sb.WriteString(" CONFLICTS=$(git diff --name-only --diff-filter=U)\n")
@@ -339,6 +356,57 @@ func (g *HotfixGenerator) writeApplyJob(sb *strings.Builder) {
sb.WriteString(" --title \"hotfix(${TARGET_ENV}): cherry-pick ${SHORT_SHA} (conflicts)\" \\\n")
sb.WriteString(" --body \"$CONFLICT_BODY\"\n")
sb.WriteString(" fi\n")
+
+ g.writeCleanMergeStep(sb)
+}
+
+// writeCleanMergeStep emits the clean-path merge step. It runs only after a
+// clean cherry-pick (signalled by HOTFIX_CLEAN_MERGE) and merges the resolution
+// PR as the configured state token. The merge must be authored by a
+// trigger-capable actor: a merge authored by the default GITHUB_TOKEN does not
+// emit a pull_request(closed) event, so the build, deploy, and finalize chain
+// would never run and the target environment's state would never be recorded.
+//
+// The step polls PR mergeability before merging, so a protected env branch with
+// a required status check still gates the merge until that check is green. An
+// unprotected branch reports mergeable on the first poll, so the same loop
+// merges immediately. On timeout it fails loudly so the operator sees the stuck
+// resolution PR rather than a silently skipped finalize.
+func (g *HotfixGenerator) writeCleanMergeStep(sb *strings.Builder) {
+ sb.WriteString(" - name: Merge clean resolution PR\n")
+ sb.WriteString(" if: env.HOTFIX_CLEAN_MERGE == 'true'\n")
+ sb.WriteString(" env:\n")
+ // Merge as the configured state token so the merge is trigger capable and
+ // reaches finalize. Defaults to GITHUB_TOKEN when unset; that default does
+ // not emit the pull_request event, so operators that need finalize after a
+ // hotfix must configure a trigger-capable state_token.
+ fmt.Fprintf(sb, " GH_TOKEN: %s\n", g.getStateTokenRef())
+ sb.WriteString(" BRANCH: ${{ env.HOTFIX_BRANCH }}\n")
+ sb.WriteString(" run: |\n")
+ // Poll mergeability for up to ~5 minutes (20 attempts, 15s apart) so a
+ // required status check on a protected env branch has time to report. The
+ // `if` guard keeps an individual non-fatal gh call from aborting the loop;
+ // only the timeout path exits non-zero.
+ sb.WriteString(" ATTEMPTS=20\n")
+ sb.WriteString(" SLEEP=15\n")
+ sb.WriteString(" MERGED=false\n")
+ sb.WriteString(" for i in $(seq 1 \"$ATTEMPTS\"); do\n")
+ sb.WriteString(" STATE=$(gh pr view \"$BRANCH\" --json mergeable,mergeStateStatus -q '.mergeable + \" \" + .mergeStateStatus' 2>/dev/null || echo \"UNKNOWN UNKNOWN\")\n")
+ sb.WriteString(" MERGEABLE=$(echo \"$STATE\" | cut -d' ' -f1)\n")
+ sb.WriteString(" STATUS=$(echo \"$STATE\" | cut -d' ' -f2)\n")
+ sb.WriteString(" echo \"::notice::resolution PR mergeable=$MERGEABLE state=$STATUS (attempt $i/$ATTEMPTS)\"\n")
+ sb.WriteString(" if [ \"$MERGEABLE\" = \"MERGEABLE\" ] && [ \"$STATUS\" != \"BLOCKED\" ]; then\n")
+ sb.WriteString(" if gh pr merge --squash --delete-branch \"$BRANCH\"; then\n")
+ sb.WriteString(" MERGED=true\n")
+ sb.WriteString(" break\n")
+ sb.WriteString(" fi\n")
+ sb.WriteString(" fi\n")
+ sb.WriteString(" sleep \"$SLEEP\"\n")
+ sb.WriteString(" done\n")
+ sb.WriteString(" if [ \"$MERGED\" != \"true\" ]; then\n")
+ sb.WriteString(" echo \"::error::Resolution PR for $BRANCH did not become mergeable within the timeout; merge it manually to run the hotfix finalize chain\"\n")
+ sb.WriteString(" exit 1\n")
+ sb.WriteString(" fi\n")
}
// writeCheckJob emits the parse-config validity gate that runs while a hotfix PR
diff --git a/internal/generate/hotfix_test.go b/internal/generate/hotfix_test.go
index 03352f3..d1fe6ae 100644
--- a/internal/generate/hotfix_test.go
+++ b/internal/generate/hotfix_test.go
@@ -150,27 +150,54 @@ func TestHotfixGenerator_CleanPath(t *testing.T) {
require.NoError(t, err)
assert.Contains(t, content, "--label cascade-hotfix")
- assert.Contains(t, content, "gh pr merge --auto")
-}
-
-// TestHotfixGenerator_CleanPathMergeFallback guards the regression where the
-// clean cherry-pick path merged with `gh pr merge --auto`, which calls GitHub's
-// enablePullRequestAutoMerge mutation. GitHub rejects that mutation on a branch
-// with no protection rule, so the hotfix step exited non-zero under `set -e` and
-// the PR was left open on unprotected env branches. The clean path must prefer
-// auto-merge but fall back to an immediate squash merge when auto-merge cannot
-// be enabled, and the `--auto` attempt must be guarded so its failure does not
-// abort the step.
-func TestHotfixGenerator_CleanPathMergeFallback(t *testing.T) {
+ assert.Contains(t, content, "gh pr merge --squash --delete-branch \"$BRANCH\"")
+}
+
+// TestHotfixGenerator_CleanPathPATMerge guards the structural fix where the
+// clean cherry-pick path merged with `gh pr merge --auto`. GitHub auto-merge
+// completes the merge as github-actions[bot] under GITHUB_TOKEN, and merges
+// authored by GITHUB_TOKEN do not emit pull_request events, so the
+// pull_request(closed) finalize chain never fired and state was never recorded
+// after a hotfix. The clean path must instead poll until the resolution PR is
+// mergeable (so a protected env branch with a required check still gates the
+// merge) and then merge with the configured state token, which is trigger
+// capable and reaches the finalize chain.
+func TestHotfixGenerator_CleanPathPATMerge(t *testing.T) {
+ cfg := threeEnvHotfixConfig()
+ cfg.StateToken = "${{ secrets.CASCADE_BOT_TOKEN }}"
+ gen := NewHotfixGenerator(cfg, "")
+ content, err := gen.Generate()
+ require.NoError(t, err)
+
+ // The clean path no longer relies on auto-merge, which suppresses the
+ // finalize trigger.
+ assert.NotContains(t, content, "gh pr merge --auto",
+ "clean path must not use auto-merge; it suppresses the pull_request finalize trigger")
+
+ // The merge step authenticates with the configured state token so the
+ // merge actor is trigger capable and finalize is reachable.
+ assert.Contains(t, content, "GH_TOKEN: ${{ secrets.CASCADE_BOT_TOKEN }}",
+ "the clean-path merge step must authenticate with the configured state token")
+
+ // A poll-until-mergeable construct gates the merge so a protected env
+ // branch with a required check still blocks until the check is green.
+ assert.Contains(t, content, "gh pr view \"$BRANCH\" --json mergeable",
+ "the clean path must poll PR mergeability before merging")
+ assert.Contains(t, content, "gh pr merge --squash --delete-branch \"$BRANCH\"",
+ "the clean path must squash-merge once the PR is mergeable")
+}
+
+// TestHotfixGenerator_CleanPathMergeDefaultsToGitHubToken confirms that when no
+// state token is configured the merge step degrades to the default
+// GITHUB_TOKEN expression, matching the state-write token plumbing used by the
+// release and promote generators.
+func TestHotfixGenerator_CleanPathMergeDefaultsToGitHubToken(t *testing.T) {
gen := NewHotfixGenerator(threeEnvHotfixConfig(), "")
content, err := gen.Generate()
require.NoError(t, err)
- // The auto-merge attempt is guarded by `if !` so a failure on an
- // unprotected branch does not trip `set -e` and abort the step.
- assert.Contains(t, content, "if ! gh pr merge --auto --squash \"$BRANCH\"; then")
- // The fallback merges immediately when auto-merge is unavailable.
- assert.Contains(t, content, "gh pr merge --squash --delete-branch \"$BRANCH\"")
+ assert.NotContains(t, content, "gh pr merge --auto")
+ assert.Contains(t, content, "GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}")
}
// TestHotfixGenerator_SeedsLabels guards the regression where the apply job ran