Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions internal/external/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ func runUpdate(sourceRepo, deployName, environment, sha, version, artifactsJSON
// the freshly-fetched remote state.
func commitWithApplicationRetry(filePath, commitMsg string, maxAttempts int, applyUpdate func() error) error {
var lastPushErr error
var lastPushOut []byte

for attempt := 1; attempt <= maxAttempts; attempt++ {
if err := applyUpdate(); err != nil {
Expand All @@ -248,11 +249,12 @@ func commitWithApplicationRetry(filePath, commitMsg string, maxAttempts int, app
return fmt.Errorf("git commit failed: %s: %w", string(out), err)
}

_, pushErr := exec.Command("git", "push").CombinedOutput()
pushOut, pushErr := exec.Command("git", "push").CombinedOutput()
if pushErr == nil {
return nil
}
lastPushErr = pushErr
lastPushOut = pushOut

// The push did not succeed (typically a non-fast-forward rejection because
// a competing update advanced the remote). Recover by resetting onto the
Expand All @@ -275,7 +277,7 @@ func commitWithApplicationRetry(filePath, commitMsg string, maxAttempts int, app
time.Sleep(time.Duration(attempt) * time.Second)
}

return fmt.Errorf("git push failed after %d attempts: %w", maxAttempts, lastPushErr)
return fmt.Errorf("git push failed after %d attempts: %s: %w", maxAttempts, strings.TrimSpace(string(lastPushOut)), lastPushErr)
}

// writeManifest writes the CICD file back to the manifest under the specified key.
Expand Down
72 changes: 72 additions & 0 deletions internal/external/command_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package external

import (
"os"
"os/exec"
"path/filepath"
"testing"

Expand Down Expand Up @@ -260,3 +261,74 @@ func TestUpdateCommand_EnvironmentNotFound(t *testing.T) {
assert.Error(t, err)
assert.Contains(t, err.Error(), "environment 'nonexistent' not found")
}

// TestCommitWithApplicationRetry_PushFailureIncludesOutput asserts that when
// all push attempts fail the returned error includes the captured git push
// output so operators can diagnose push-rejection failures from logs alone
// without re-running the workflow.
func TestCommitWithApplicationRetry_PushFailureIncludesOutput(t *testing.T) {
// Use a local bare repo as the fetch remote (so fetch succeeds) but configure
// a non-existent path as the push URL (so push fails with a recognisable
// "does not appear to be a git repository" message every attempt).
remoteDir := t.TempDir()
workDir := t.TempDir()

gitRemote := func(args ...string) {
t.Helper()
out, err := exec.Command("git", append([]string{"-C", remoteDir}, args...)...).CombinedOutput()
require.NoErrorf(t, err, "git -C remote %v failed: %s", args, out)
}
gitWork := func(args ...string) {
t.Helper()
out, err := exec.Command("git", append([]string{"-C", workDir}, args...)...).CombinedOutput()
require.NoErrorf(t, err, "git -C work %v failed: %s", args, out)
}

// Seed the remote and working tree. Pin the working-tree branch to "main"
// explicitly: `git init` honours the caller's init.defaultBranch config
// (CI runners default to "master"), and commitWithApplicationRetry fetches
// the current branch by name, so the local branch must match the remote ref
// pushed below for the test to be hermetic.
gitRemote("init", "--bare")
gitWork("init")
gitWork("checkout", "-B", "main")
gitWork("config", "user.email", "test@example.com")
gitWork("config", "user.name", "Test")
gitWork("remote", "add", "origin", remoteDir)

manifestPath := filepath.Join(workDir, "manifest.yaml")
require.NoError(t, os.WriteFile(manifestPath, []byte("initial: true\n"), 0644))
gitWork("add", "manifest.yaml")
gitWork("commit", "-m", "init")
gitWork("push", "-u", "origin", "HEAD:main")

// Redirect push to a non-existent path so every push fails with a message
// while fetch from the real remote continues to succeed.
gitWork("remote", "set-url", "--push", "origin", "/nonexistent-cascade-test-remote")

// Change to the work dir so git commands in commitWithApplicationRetry run there.
origDir, err := os.Getwd()
require.NoError(t, err)
require.NoError(t, os.Chdir(workDir))
t.Cleanup(func() { _ = os.Chdir(origDir) })

callCount := 0
applyUpdate := func() error {
callCount++
return os.WriteFile(manifestPath, []byte("value: "+string(rune('a'+callCount))+"\n"), 0644)
}

// Two attempts so the retry loop exhausts maxAttempts and reaches the final
// "git push failed after N attempts" return path.
err = commitWithApplicationRetry(manifestPath, "chore: test [skip ci]", 2, applyUpdate)
require.Error(t, err, "must fail when all push attempts are rejected")

// The error must carry the git push output so operators can diagnose the
// failure without re-running the workflow.
assert.Contains(t, err.Error(), "git push failed after 2 attempts",
"error must state how many attempts were made")
// git prints a diagnostic when the push URL is unreachable; verify it is
// captured and included in the error (not silently discarded).
assert.Contains(t, err.Error(), "does not appear to be a git repository",
"error must include the captured git push output so failures are diagnosable from logs")
}
44 changes: 26 additions & 18 deletions internal/generate/external.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ func (g *ExternalUpdateGenerator) getReleaseTokenRef() string {
return g.config.GetReleaseToken()
}

// getStateTokenRef returns the token expression used to write manifest state to
// the trunk branch. Users configure the full expression via the state_token
// config option; it defaults to "${{ secrets.GITHUB_TOKEN }}".
func (g *ExternalUpdateGenerator) getStateTokenRef() string {
return g.config.GetStateToken()
}

// getManifestFilePath returns the manifest file path for use in generated scripts.
func (g *ExternalUpdateGenerator) getManifestFilePath() string {
return g.config.GetManifestFile()
Expand Down Expand Up @@ -111,10 +118,15 @@ func (g *ExternalUpdateGenerator) writeJob(sb *strings.Builder) {
sb.WriteString(" name: Update External State\n")
sb.WriteString(" runs-on: ubuntu-latest\n")
sb.WriteString(" steps:\n")
// The receiver checks out with the state token (the push identity) so that
// the manifest push is not blocked by branch protection rules on the primary
// repo. fetch-depth: 0 is required for parity with orchestrate/promote/hotfix
// state-writers, which also need the full history. When state_token is unset
// this falls back to GITHUB_TOKEN for back-compat.
writeActionStep(sb, g.config, " ", actionCheckout)
sb.WriteString(" with:\n")
sb.WriteString(" fetch-depth: 1\n")
sb.WriteString(" token: ${{ secrets.GITHUB_TOKEN }}\n")
sb.WriteString(" fetch-depth: 0\n")
fmt.Fprintf(sb, " token: %s\n", g.getStateTokenRef())
sb.WriteString("\n")

// Setup CLI
Expand Down Expand Up @@ -165,24 +177,20 @@ func (g *ExternalUpdateGenerator) writeJob(sb *strings.Builder) {
}

// writeConcurrency emits a top-level concurrency: block on the external-update
// workflow. Every external update writes back the single shared manifest file
// (cascade external update writes --config under --manifest-key, then commits and
// pushes that same path) regardless of source_repo or environment, so ALL
// concurrent external-update runs contend for that one push. The group key is
// therefore the bare workflow name, which serializes every external-update run.
// Queueing (cancel-in-progress: false) is safer than cancelling because the update
// writes durable manifest state; dropping a mid-flight write leaves the manifest
// inconsistent. Serialization alone is not sufficient: a queued run still holds a
// stale-parent checkout, so cascade external update recovers a rejected push by
// resetting onto the fetched remote tip and re-applying its state mutation, which
// absorbs any change that landed while it waited.
// workflow. The default group is the same ref-scoped key orchestrate uses
// ("orchestrate-${{ github.ref }}"), so external and internal state writes
// serialize on a shared non-cancelling queue and never race on the manifest
// push. cancel-in-progress is always false by default: dropping a mid-flight
// write leaves the manifest inconsistent, and a live orchestrate pipeline must
// never be cancelled by an incoming external notification.
//
// When the manifest explicitly sets concurrency.group, that value is forwarded
// as-is (via GetConcurrencyGroup) so operators can override the group if needed.
// cancel-in-progress follows the explicit config value when set, and defaults to
// false otherwise.
func (g *ExternalUpdateGenerator) writeConcurrency(sb *strings.Builder) {
sb.WriteString("concurrency:\n")
if g.config.Concurrency != nil && g.config.Concurrency.Group != "" {
fmt.Fprintf(sb, " group: %s\n", g.config.Concurrency.Group)
} else {
sb.WriteString(" group: \"${{ github.workflow }}\"\n")
}
fmt.Fprintf(sb, " group: %s\n", g.config.GetConcurrencyGroup())
if g.config.Concurrency != nil {
fmt.Fprintf(sb, " cancel-in-progress: %t\n", g.config.Concurrency.CancelInProgress)
} else {
Expand Down
133 changes: 125 additions & 8 deletions internal/generate/external_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,35 @@ jobs:
- run: echo "deploying"
`

// checkoutStepBlock returns the slice of the generated workflow content that
// covers the actions/checkout step and its "with:" block, stopping at the next
// "- name:" or "- uses:" step boundary. This lets tests assert on the checkout
// token and fetch-depth in isolation, without false matches from tokens used in
// other steps such as setup-cli.
func checkoutStepBlock(t *testing.T, content string) string {
t.Helper()
lines := strings.Split(content, "\n")
start := -1
for i, line := range lines {
if strings.Contains(line, "uses: actions/checkout") {
start = i
break
}
}
require.GreaterOrEqual(t, start, 0, "no actions/checkout step found in generated content")
var sb strings.Builder
for i := start; i < len(lines); i++ {
// Stop at the next step boundary (a "- name:" or "- uses:" line that is
// not the checkout line itself).
if i > start && (strings.Contains(lines[i], "- name:") || strings.Contains(lines[i], "- uses:")) {
break
}
sb.WriteString(lines[i])
sb.WriteString("\n")
}
return sb.String()
}

// createMockWorkflow creates a mock workflow file in a temp directory
func createMockWorkflow(t *testing.T, baseDir, workflowPath string) {
t.Helper()
Expand Down Expand Up @@ -534,13 +563,11 @@ func TestExternalUpdateGenerator_NotPrimary(t *testing.T) {
}

// TestExternalUpdateGenerator_HasConcurrencyBlock asserts the generated
// external-update workflow declares a top-level concurrency: block keyed by the bare
// workflow name. Every external update writes back the single shared manifest file
// (cascade external update writes --config then git-pushes that same path)
// regardless of source_repo or environment, so ALL external-update runs race on that
// one non-fast-forward push (#31); the group must serialize every run. cancel-in
// -progress is false because a dropped mid-flight write leaves the manifest in an
// inconsistent state.
// external-update workflow declares a top-level concurrency: block that shares
// orchestrate's ref-scoped group ("orchestrate-${{ github.ref }}"). This
// serializes external and internal state writes on the same non-cancelling
// queue so they never race on the manifest push (#31). cancel-in-progress is
// false because a dropped mid-flight write leaves the manifest inconsistent.
func TestExternalUpdateGenerator_HasConcurrencyBlock(t *testing.T) {
cfg := &config.TrunkConfig{
TrunkBranch: "master",
Expand All @@ -562,7 +589,8 @@ func TestExternalUpdateGenerator_HasConcurrencyBlock(t *testing.T) {

assert.Contains(t, content, "\nconcurrency:\n", "external-update workflow must declare top-level concurrency:")
group := concurrencyGroupLine(t, content)
assert.Equal(t, " group: \"${{ github.workflow }}\"", group, "external-update concurrency group must be the bare workflow name to serialize all runs")
assert.Equal(t, " group: orchestrate-${{ github.ref }}", group,
"external-update default concurrency group must share orchestrate's ref-scoped queue to serialize state writes")
assert.NotContains(t, group, "inputs.source_repo", "external-update group must NOT scope by source_repo: all runs push the same manifest")
assert.NotContains(t, group, "inputs.environment", "external-update group must NOT scope by environment: all runs push the same manifest")
assert.Contains(t, content, "cancel-in-progress: false", "external-update default must queue, not cancel")
Expand Down Expand Up @@ -658,6 +686,95 @@ func TestExternalUpdateGenerator_ConcurrencyOverride(t *testing.T) {
assert.Contains(t, content, "cancel-in-progress: true", "custom cancel_in_progress must propagate to external-update")
}

// TestExternalUpdateGenerator_CheckoutUsesStateToken asserts that the receiver
// checkout step uses the configured state token (not the hardcoded GITHUB_TOKEN)
// so that manifest pushes are not blocked by branch protection rules on the
// primary repo. When state_token is unset the checkout falls back to the
// default GITHUB_TOKEN expression for back-compat.
func TestExternalUpdateGenerator_CheckoutUsesStateToken(t *testing.T) {
externalRepos := []config.ExternalRepoConfig{
{
Repo: "example/cdk-infra",
Ref: "main",
Deploys: []config.ExternalDeployConfig{
{Name: "cdk", Workflow: "example/cdk-infra/.github/workflows/deploy.yaml"},
},
},
}

t.Run("state_token configured", func(t *testing.T) {
cfg := &config.TrunkConfig{
TrunkBranch: "main",
Environments: []string{"dev", "prod"},
External: externalRepos,
StateToken: "CASCADE_STATE_TOKEN",
}
gen := NewExternalUpdateGenerator(cfg, "/tmp")
content, err := gen.Generate()
require.NoError(t, err)

// Extract the checkout step block (everything up to the next "- name:" step)
// so we assert on the checkout token in isolation, not on tokens used in
// other steps (e.g., the setup-cli step which uses the release token).
checkoutSection := checkoutStepBlock(t, content)
assert.Contains(t, checkoutSection, "token: ${{ secrets.CASCADE_STATE_TOKEN }}",
"checkout must use the configured state token so the manifest push is not blocked by branch protection")
assert.NotContains(t, checkoutSection, "token: ${{ secrets.GITHUB_TOKEN }}",
"when state_token is set, GITHUB_TOKEN must not appear as the checkout token")
assert.Contains(t, checkoutSection, "fetch-depth: 0",
"checkout must use fetch-depth: 0 for parity with other state-writers")
})

t.Run("state_token unset uses GITHUB_TOKEN back-compat", func(t *testing.T) {
cfg := &config.TrunkConfig{
TrunkBranch: "main",
Environments: []string{"dev", "prod"},
External: externalRepos,
}
gen := NewExternalUpdateGenerator(cfg, "/tmp")
content, err := gen.Generate()
require.NoError(t, err)

checkoutSection := checkoutStepBlock(t, content)
assert.Contains(t, checkoutSection, "token: ${{ secrets.GITHUB_TOKEN }}",
"when state_token is unset, checkout must fall back to the default GITHUB_TOKEN for back-compat")
assert.Contains(t, checkoutSection, "fetch-depth: 0",
"checkout must use fetch-depth: 0 regardless of token configuration")
})
}

// TestExternalUpdateGenerator_DefaultConcurrencySharesOrchestrateGroup asserts
// that, with no explicit concurrency config, the external-update workflow uses
// the same ref-scoped group as orchestrate ("orchestrate-${{ github.ref }}")
// so external and internal state writes serialize on a shared non-cancelling
// queue. cancel-in-progress must remain false to never drop a live pipeline.
func TestExternalUpdateGenerator_DefaultConcurrencySharesOrchestrateGroup(t *testing.T) {
cfg := &config.TrunkConfig{
TrunkBranch: "main",
Environments: []string{"dev", "prod"},
External: []config.ExternalRepoConfig{
{
Repo: "example/cdk-infra",
Ref: "main",
Deploys: []config.ExternalDeployConfig{
{Name: "cdk", Workflow: "example/cdk-infra/.github/workflows/deploy.yaml"},
},
},
},
}

gen := NewExternalUpdateGenerator(cfg, "/tmp")
content, err := gen.Generate()
require.NoError(t, err)

assert.Contains(t, content, "\nconcurrency:\n", "external-update workflow must declare top-level concurrency:")
group := concurrencyGroupLine(t, content)
assert.Equal(t, " group: orchestrate-${{ github.ref }}", group,
"external-update default concurrency group must share orchestrate's ref-scoped group to serialize state writes")
assert.Contains(t, content, "cancel-in-progress: false",
"external-update must never cancel a live pipeline; cancel-in-progress must be false")
}

// TestNotifyPrimaryStep_BuildOnlySatellite_EmitsDeployName verifies that a
// build-only satellite (builds + notify, zero deploys) still emits a non-empty
// deploy_name in the dispatch to the primary's external-update workflow. The
Expand Down
Loading