From 7d1fc5a3422805dde9bff51831df46f7143d9d2b Mon Sep 17 00:00:00 2001 From: Joshua Temple Date: Wed, 17 Jun 2026 09:40:07 -0400 Subject: [PATCH 1/2] fix: push external-update state with state token, not bot The generated external-update receiver checked out with a hardcoded ${{ secrets.GITHUB_TOKEN }}, so its manifest push ran as github-actions[bot]. On a primary repo whose trunk has required pull request reviews, that identity is blocked even with enforce_admins disabled, so external-update failed with a swallowed protected-branch rejection. The other state-writers (orchestrate, promote, hotfix) check out with the configured state token and bypass protection. - external.go: check out the receiver with the state token (falls back to GITHUB_TOKEN when state_token is unset) and fetch-depth: 0, for parity with the other state-writers. - command.go: include the captured push output in the failure error so a rejected push is diagnosable instead of opaque. - external.go: default the receiver concurrency group to the same ref-scoped key orchestrate uses, with cancel-in-progress false, so external and internal state writes serialize on one queue and an incoming notification never cancels a live pipeline. Signed-off-by: Joshua Temple --- internal/external/command.go | 6 +- internal/external/command_test.go | 67 +++++++++++++++ internal/generate/external.go | 44 ++++++---- internal/generate/external_test.go | 133 +++++++++++++++++++++++++++-- 4 files changed, 222 insertions(+), 28 deletions(-) diff --git a/internal/external/command.go b/internal/external/command.go index c9cfdb0..9ca1ae3 100644 --- a/internal/external/command.go +++ b/internal/external/command.go @@ -229,6 +229,7 @@ func runUpdate(sourceRepo, deployName, environment, sha, version, artifactsJSON // the freshly-fetched remote state. func commitWithApplicationRetry(filePath, commitMsg string, maxAttempts int, applyUpdate func() error) error { var lastPushErr error + var lastPushOut []byte for attempt := 1; attempt <= maxAttempts; attempt++ { if err := applyUpdate(); err != nil { @@ -248,11 +249,12 @@ func commitWithApplicationRetry(filePath, commitMsg string, maxAttempts int, app return fmt.Errorf("git commit failed: %s: %w", string(out), err) } - _, pushErr := exec.Command("git", "push").CombinedOutput() + pushOut, pushErr := exec.Command("git", "push").CombinedOutput() if pushErr == nil { return nil } lastPushErr = pushErr + lastPushOut = pushOut // The push did not succeed (typically a non-fast-forward rejection because // a competing update advanced the remote). Recover by resetting onto the @@ -275,7 +277,7 @@ func commitWithApplicationRetry(filePath, commitMsg string, maxAttempts int, app time.Sleep(time.Duration(attempt) * time.Second) } - return fmt.Errorf("git push failed after %d attempts: %w", maxAttempts, lastPushErr) + return fmt.Errorf("git push failed after %d attempts: %s: %w", maxAttempts, strings.TrimSpace(string(lastPushOut)), lastPushErr) } // writeManifest writes the CICD file back to the manifest under the specified key. diff --git a/internal/external/command_test.go b/internal/external/command_test.go index 3083ef7..a677777 100644 --- a/internal/external/command_test.go +++ b/internal/external/command_test.go @@ -2,6 +2,7 @@ package external import ( "os" + "os/exec" "path/filepath" "testing" @@ -260,3 +261,69 @@ func TestUpdateCommand_EnvironmentNotFound(t *testing.T) { assert.Error(t, err) assert.Contains(t, err.Error(), "environment 'nonexistent' not found") } + +// TestCommitWithApplicationRetry_PushFailureIncludesOutput asserts that when +// all push attempts fail the returned error includes the captured git push +// output so operators can diagnose push-rejection failures from logs alone +// without re-running the workflow. +func TestCommitWithApplicationRetry_PushFailureIncludesOutput(t *testing.T) { + // Use a local bare repo as the fetch remote (so fetch succeeds) but configure + // a non-existent path as the push URL (so push fails with a recognisable + // "does not appear to be a git repository" message every attempt). + remoteDir := t.TempDir() + workDir := t.TempDir() + + gitRemote := func(args ...string) { + t.Helper() + out, err := exec.Command("git", append([]string{"-C", remoteDir}, args...)...).CombinedOutput() + require.NoErrorf(t, err, "git -C remote %v failed: %s", args, out) + } + gitWork := func(args ...string) { + t.Helper() + out, err := exec.Command("git", append([]string{"-C", workDir}, args...)...).CombinedOutput() + require.NoErrorf(t, err, "git -C work %v failed: %s", args, out) + } + + // Seed the remote and working tree. + gitRemote("init", "--bare") + gitWork("init") + gitWork("config", "user.email", "test@example.com") + gitWork("config", "user.name", "Test") + gitWork("remote", "add", "origin", remoteDir) + + manifestPath := filepath.Join(workDir, "manifest.yaml") + require.NoError(t, os.WriteFile(manifestPath, []byte("initial: true\n"), 0644)) + gitWork("add", "manifest.yaml") + gitWork("commit", "-m", "init") + gitWork("push", "-u", "origin", "HEAD:main") + + // Redirect push to a non-existent path so every push fails with a message + // while fetch from the real remote continues to succeed. + gitWork("remote", "set-url", "--push", "origin", "/nonexistent-cascade-test-remote") + + // Change to the work dir so git commands in commitWithApplicationRetry run there. + origDir, err := os.Getwd() + require.NoError(t, err) + require.NoError(t, os.Chdir(workDir)) + t.Cleanup(func() { _ = os.Chdir(origDir) }) + + callCount := 0 + applyUpdate := func() error { + callCount++ + return os.WriteFile(manifestPath, []byte("value: "+string(rune('a'+callCount))+"\n"), 0644) + } + + // Two attempts so the retry loop exhausts maxAttempts and reaches the final + // "git push failed after N attempts" return path. + err = commitWithApplicationRetry(manifestPath, "chore: test [skip ci]", 2, applyUpdate) + require.Error(t, err, "must fail when all push attempts are rejected") + + // The error must carry the git push output so operators can diagnose the + // failure without re-running the workflow. + assert.Contains(t, err.Error(), "git push failed after 2 attempts", + "error must state how many attempts were made") + // git prints a diagnostic when the push URL is unreachable; verify it is + // captured and included in the error (not silently discarded). + assert.Contains(t, err.Error(), "does not appear to be a git repository", + "error must include the captured git push output so failures are diagnosable from logs") +} diff --git a/internal/generate/external.go b/internal/generate/external.go index f218592..d0dfedd 100644 --- a/internal/generate/external.go +++ b/internal/generate/external.go @@ -37,6 +37,13 @@ func (g *ExternalUpdateGenerator) getReleaseTokenRef() string { return g.config.GetReleaseToken() } +// getStateTokenRef returns the token expression used to write manifest state to +// the trunk branch. Users configure the full expression via the state_token +// config option; it defaults to "${{ secrets.GITHUB_TOKEN }}". +func (g *ExternalUpdateGenerator) getStateTokenRef() string { + return g.config.GetStateToken() +} + // getManifestFilePath returns the manifest file path for use in generated scripts. func (g *ExternalUpdateGenerator) getManifestFilePath() string { return g.config.GetManifestFile() @@ -111,10 +118,15 @@ func (g *ExternalUpdateGenerator) writeJob(sb *strings.Builder) { sb.WriteString(" name: Update External State\n") sb.WriteString(" runs-on: ubuntu-latest\n") sb.WriteString(" steps:\n") + // The receiver checks out with the state token (the push identity) so that + // the manifest push is not blocked by branch protection rules on the primary + // repo. fetch-depth: 0 is required for parity with orchestrate/promote/hotfix + // state-writers, which also need the full history. When state_token is unset + // this falls back to GITHUB_TOKEN for back-compat. writeActionStep(sb, g.config, " ", actionCheckout) sb.WriteString(" with:\n") - sb.WriteString(" fetch-depth: 1\n") - sb.WriteString(" token: ${{ secrets.GITHUB_TOKEN }}\n") + sb.WriteString(" fetch-depth: 0\n") + fmt.Fprintf(sb, " token: %s\n", g.getStateTokenRef()) sb.WriteString("\n") // Setup CLI @@ -165,24 +177,20 @@ func (g *ExternalUpdateGenerator) writeJob(sb *strings.Builder) { } // writeConcurrency emits a top-level concurrency: block on the external-update -// workflow. Every external update writes back the single shared manifest file -// (cascade external update writes --config under --manifest-key, then commits and -// pushes that same path) regardless of source_repo or environment, so ALL -// concurrent external-update runs contend for that one push. The group key is -// therefore the bare workflow name, which serializes every external-update run. -// Queueing (cancel-in-progress: false) is safer than cancelling because the update -// writes durable manifest state; dropping a mid-flight write leaves the manifest -// inconsistent. Serialization alone is not sufficient: a queued run still holds a -// stale-parent checkout, so cascade external update recovers a rejected push by -// resetting onto the fetched remote tip and re-applying its state mutation, which -// absorbs any change that landed while it waited. +// workflow. The default group is the same ref-scoped key orchestrate uses +// ("orchestrate-${{ github.ref }}"), so external and internal state writes +// serialize on a shared non-cancelling queue and never race on the manifest +// push. cancel-in-progress is always false by default: dropping a mid-flight +// write leaves the manifest inconsistent, and a live orchestrate pipeline must +// never be cancelled by an incoming external notification. +// +// When the manifest explicitly sets concurrency.group, that value is forwarded +// as-is (via GetConcurrencyGroup) so operators can override the group if needed. +// cancel-in-progress follows the explicit config value when set, and defaults to +// false otherwise. func (g *ExternalUpdateGenerator) writeConcurrency(sb *strings.Builder) { sb.WriteString("concurrency:\n") - if g.config.Concurrency != nil && g.config.Concurrency.Group != "" { - fmt.Fprintf(sb, " group: %s\n", g.config.Concurrency.Group) - } else { - sb.WriteString(" group: \"${{ github.workflow }}\"\n") - } + fmt.Fprintf(sb, " group: %s\n", g.config.GetConcurrencyGroup()) if g.config.Concurrency != nil { fmt.Fprintf(sb, " cancel-in-progress: %t\n", g.config.Concurrency.CancelInProgress) } else { diff --git a/internal/generate/external_test.go b/internal/generate/external_test.go index 2b1001b..f06de73 100644 --- a/internal/generate/external_test.go +++ b/internal/generate/external_test.go @@ -31,6 +31,35 @@ jobs: - run: echo "deploying" ` +// checkoutStepBlock returns the slice of the generated workflow content that +// covers the actions/checkout step and its "with:" block, stopping at the next +// "- name:" or "- uses:" step boundary. This lets tests assert on the checkout +// token and fetch-depth in isolation, without false matches from tokens used in +// other steps such as setup-cli. +func checkoutStepBlock(t *testing.T, content string) string { + t.Helper() + lines := strings.Split(content, "\n") + start := -1 + for i, line := range lines { + if strings.Contains(line, "uses: actions/checkout") { + start = i + break + } + } + require.GreaterOrEqual(t, start, 0, "no actions/checkout step found in generated content") + var sb strings.Builder + for i := start; i < len(lines); i++ { + // Stop at the next step boundary (a "- name:" or "- uses:" line that is + // not the checkout line itself). + if i > start && (strings.Contains(lines[i], "- name:") || strings.Contains(lines[i], "- uses:")) { + break + } + sb.WriteString(lines[i]) + sb.WriteString("\n") + } + return sb.String() +} + // createMockWorkflow creates a mock workflow file in a temp directory func createMockWorkflow(t *testing.T, baseDir, workflowPath string) { t.Helper() @@ -534,13 +563,11 @@ func TestExternalUpdateGenerator_NotPrimary(t *testing.T) { } // TestExternalUpdateGenerator_HasConcurrencyBlock asserts the generated -// external-update workflow declares a top-level concurrency: block keyed by the bare -// workflow name. Every external update writes back the single shared manifest file -// (cascade external update writes --config then git-pushes that same path) -// regardless of source_repo or environment, so ALL external-update runs race on that -// one non-fast-forward push (#31); the group must serialize every run. cancel-in -// -progress is false because a dropped mid-flight write leaves the manifest in an -// inconsistent state. +// external-update workflow declares a top-level concurrency: block that shares +// orchestrate's ref-scoped group ("orchestrate-${{ github.ref }}"). This +// serializes external and internal state writes on the same non-cancelling +// queue so they never race on the manifest push (#31). cancel-in-progress is +// false because a dropped mid-flight write leaves the manifest inconsistent. func TestExternalUpdateGenerator_HasConcurrencyBlock(t *testing.T) { cfg := &config.TrunkConfig{ TrunkBranch: "master", @@ -562,7 +589,8 @@ func TestExternalUpdateGenerator_HasConcurrencyBlock(t *testing.T) { assert.Contains(t, content, "\nconcurrency:\n", "external-update workflow must declare top-level concurrency:") group := concurrencyGroupLine(t, content) - assert.Equal(t, " group: \"${{ github.workflow }}\"", group, "external-update concurrency group must be the bare workflow name to serialize all runs") + assert.Equal(t, " group: orchestrate-${{ github.ref }}", group, + "external-update default concurrency group must share orchestrate's ref-scoped queue to serialize state writes") assert.NotContains(t, group, "inputs.source_repo", "external-update group must NOT scope by source_repo: all runs push the same manifest") assert.NotContains(t, group, "inputs.environment", "external-update group must NOT scope by environment: all runs push the same manifest") assert.Contains(t, content, "cancel-in-progress: false", "external-update default must queue, not cancel") @@ -658,6 +686,95 @@ func TestExternalUpdateGenerator_ConcurrencyOverride(t *testing.T) { assert.Contains(t, content, "cancel-in-progress: true", "custom cancel_in_progress must propagate to external-update") } +// TestExternalUpdateGenerator_CheckoutUsesStateToken asserts that the receiver +// checkout step uses the configured state token (not the hardcoded GITHUB_TOKEN) +// so that manifest pushes are not blocked by branch protection rules on the +// primary repo. When state_token is unset the checkout falls back to the +// default GITHUB_TOKEN expression for back-compat. +func TestExternalUpdateGenerator_CheckoutUsesStateToken(t *testing.T) { + externalRepos := []config.ExternalRepoConfig{ + { + Repo: "example/cdk-infra", + Ref: "main", + Deploys: []config.ExternalDeployConfig{ + {Name: "cdk", Workflow: "example/cdk-infra/.github/workflows/deploy.yaml"}, + }, + }, + } + + t.Run("state_token configured", func(t *testing.T) { + cfg := &config.TrunkConfig{ + TrunkBranch: "main", + Environments: []string{"dev", "prod"}, + External: externalRepos, + StateToken: "CASCADE_STATE_TOKEN", + } + gen := NewExternalUpdateGenerator(cfg, "/tmp") + content, err := gen.Generate() + require.NoError(t, err) + + // Extract the checkout step block (everything up to the next "- name:" step) + // so we assert on the checkout token in isolation, not on tokens used in + // other steps (e.g., the setup-cli step which uses the release token). + checkoutSection := checkoutStepBlock(t, content) + assert.Contains(t, checkoutSection, "token: ${{ secrets.CASCADE_STATE_TOKEN }}", + "checkout must use the configured state token so the manifest push is not blocked by branch protection") + assert.NotContains(t, checkoutSection, "token: ${{ secrets.GITHUB_TOKEN }}", + "when state_token is set, GITHUB_TOKEN must not appear as the checkout token") + assert.Contains(t, checkoutSection, "fetch-depth: 0", + "checkout must use fetch-depth: 0 for parity with other state-writers") + }) + + t.Run("state_token unset uses GITHUB_TOKEN back-compat", func(t *testing.T) { + cfg := &config.TrunkConfig{ + TrunkBranch: "main", + Environments: []string{"dev", "prod"}, + External: externalRepos, + } + gen := NewExternalUpdateGenerator(cfg, "/tmp") + content, err := gen.Generate() + require.NoError(t, err) + + checkoutSection := checkoutStepBlock(t, content) + assert.Contains(t, checkoutSection, "token: ${{ secrets.GITHUB_TOKEN }}", + "when state_token is unset, checkout must fall back to the default GITHUB_TOKEN for back-compat") + assert.Contains(t, checkoutSection, "fetch-depth: 0", + "checkout must use fetch-depth: 0 regardless of token configuration") + }) +} + +// TestExternalUpdateGenerator_DefaultConcurrencySharesOrchestrateGroup asserts +// that, with no explicit concurrency config, the external-update workflow uses +// the same ref-scoped group as orchestrate ("orchestrate-${{ github.ref }}") +// so external and internal state writes serialize on a shared non-cancelling +// queue. cancel-in-progress must remain false to never drop a live pipeline. +func TestExternalUpdateGenerator_DefaultConcurrencySharesOrchestrateGroup(t *testing.T) { + cfg := &config.TrunkConfig{ + TrunkBranch: "main", + Environments: []string{"dev", "prod"}, + External: []config.ExternalRepoConfig{ + { + Repo: "example/cdk-infra", + Ref: "main", + Deploys: []config.ExternalDeployConfig{ + {Name: "cdk", Workflow: "example/cdk-infra/.github/workflows/deploy.yaml"}, + }, + }, + }, + } + + gen := NewExternalUpdateGenerator(cfg, "/tmp") + content, err := gen.Generate() + require.NoError(t, err) + + assert.Contains(t, content, "\nconcurrency:\n", "external-update workflow must declare top-level concurrency:") + group := concurrencyGroupLine(t, content) + assert.Equal(t, " group: orchestrate-${{ github.ref }}", group, + "external-update default concurrency group must share orchestrate's ref-scoped group to serialize state writes") + assert.Contains(t, content, "cancel-in-progress: false", + "external-update must never cancel a live pipeline; cancel-in-progress must be false") +} + // TestNotifyPrimaryStep_BuildOnlySatellite_EmitsDeployName verifies that a // build-only satellite (builds + notify, zero deploys) still emits a non-empty // deploy_name in the dispatch to the primary's external-update workflow. The From e50561a0e8d00ef519fc8747745cc396941741b0 Mon Sep 17 00:00:00 2001 From: Joshua Temple Date: Wed, 17 Jun 2026 10:02:35 -0400 Subject: [PATCH 2/2] test: pin external-update retry test branch to main for hermetic CI Signed-off-by: Joshua Temple --- internal/external/command_test.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/internal/external/command_test.go b/internal/external/command_test.go index a677777..504938a 100644 --- a/internal/external/command_test.go +++ b/internal/external/command_test.go @@ -284,9 +284,14 @@ func TestCommitWithApplicationRetry_PushFailureIncludesOutput(t *testing.T) { require.NoErrorf(t, err, "git -C work %v failed: %s", args, out) } - // Seed the remote and working tree. + // Seed the remote and working tree. Pin the working-tree branch to "main" + // explicitly: `git init` honours the caller's init.defaultBranch config + // (CI runners default to "master"), and commitWithApplicationRetry fetches + // the current branch by name, so the local branch must match the remote ref + // pushed below for the test to be hermetic. gitRemote("init", "--bare") gitWork("init") + gitWork("checkout", "-B", "main") gitWork("config", "user.email", "test@example.com") gitWork("config", "user.name", "Test") gitWork("remote", "add", "origin", remoteDir)