Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 95 additions & 7 deletions e2e/harness/act.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ import (
"fmt"
"io"
"strings"
"time"

"github.com/moby/moby/api/pkg/stdcopy"
"github.com/moby/moby/api/types/container"
"github.com/moby/moby/api/types/mount"
"github.com/testcontainers/testcontainers-go"
tcexec "github.com/testcontainers/testcontainers-go/exec"
"github.com/testcontainers/testcontainers-go/wait"
)

Expand Down Expand Up @@ -69,14 +71,12 @@ func NewActRunner(ctx context.Context, giteaURL, giteaToken, networkName string,
return nil, fmt.Errorf("failed to start act container: %w", err)
}

// Install act in the container
_, _, err = container.Exec(ctx, []string{
"bash", "-c",
"curl -s https://raw.githubusercontent.com/nektos/act/master/install.sh | bash -s -- -b /usr/local/bin",
})
if err != nil {
// Install act in the container, verifying the binary actually lands and
// retrying the transient network hiccups that the install script is prone
// to. The real *DockerContainer satisfies containerExecer directly.
if err := installActWithVerify(ctx, container, actInstallMaxAttempts); err != nil {
_ = container.Terminate(ctx) // Best-effort cleanup
return nil, fmt.Errorf("failed to install act: %w", err)
return nil, err
}

// Network override is passed on the CLI as `--network=<name>`. Act's
Expand Down Expand Up @@ -112,6 +112,94 @@ EOF`
}, nil
}

// containerExecer is the slice of the container surface the act install needs:
// running a command and getting back its exit code plus an output reader. The
// real *DockerContainer satisfies it, and a fake satisfies it in tests, so the
// install/verify/retry logic is exercisable without Docker or the network.
type containerExecer interface {
Exec(ctx context.Context, cmd []string, options ...tcexec.ProcessOption) (int, io.Reader, error)
}

// actInstallMaxAttempts bounds how many times we try the act install before
// giving up. The install failure mode is a transient network hiccup (registry
// rate-limit, a dropped connection while fetching the install script), which a
// clean retry reliably clears, so a small fixed budget is enough.
const actInstallMaxAttempts = 3

// actInstallBackoff returns the pause before the retry that follows a failed
// install attempt (attempt is 1-based). It grows 2s then 4s so a momentary
// network blip has time to clear without stalling the suite. It is a var so
// tests can zero the wait and stay fast.
var actInstallBackoff = func(attempt int) time.Duration {
return time.Duration(attempt*2) * time.Second
}

// installActWithVerify installs the act binary into the container and confirms
// it is actually runnable, retrying transient failures up to maxAttempts.
//
// This exists because container Exec returns a non-nil error ONLY for a Docker
// API/transport failure, NOT when the executed command exits non-zero. The
// original install discarded the exit code, so a failed `curl | bash` (a
// registry rate-limit or a dropped connection) looked like success: the runner
// came back "healthy" and the first workflow run died with
// `bash: act: command not found`. Here we check the exit code, then prove the
// binary resolves with `command -v act`, and only trust an attempt that clears
// both gates. A bounded retry with backoff absorbs the transient hiccup that is
// the usual cause; the final failure surfaces the failing command output so the
// real reason is not lost.
func installActWithVerify(ctx context.Context, exec containerExecer, maxAttempts int) error {
const installScript = "curl -sSf https://raw.githubusercontent.com/nektos/act/master/install.sh | bash -s -- -b /usr/local/bin"

var lastErr error
for attempt := 1; attempt <= maxAttempts; attempt++ {
if attempt > 1 {
select {
case <-ctx.Done():
return fmt.Errorf("act install cancelled after %d attempt(s): %w", attempt-1, ctx.Err())
case <-time.After(actInstallBackoff(attempt - 1)):
}
}

installCode, installReader, err := exec.Exec(ctx, []string{"bash", "-c", installScript})
if err != nil {
lastErr = fmt.Errorf("act install exec failed: %w", err)
continue
}
if installCode != 0 {
lastErr = fmt.Errorf("act install exited with code %d: %s", installCode, readExecOutput(installReader))
continue
}

verifyCode, verifyReader, err := exec.Exec(ctx, []string{"bash", "-c", "command -v act"})
if err != nil {
lastErr = fmt.Errorf("act verify exec failed: %w", err)
continue
}
if verifyCode != 0 {
lastErr = fmt.Errorf("act binary not resolvable after install (command -v act exited %d): %s", verifyCode, readExecOutput(verifyReader))
continue
}

return nil
}

return fmt.Errorf("failed to install act after %d attempt(s): %w", maxAttempts, lastErr)
}

// readExecOutput drains an exec output reader into a trimmed string for error
// messages, tolerating a nil reader. Output here is short (a curl error line or
// a `command -v` result), so reading it whole is fine.
func readExecOutput(reader io.Reader) string {
if reader == nil {
return ""
}
out, err := io.ReadAll(reader)
if err != nil {
return fmt.Sprintf("(failed to read command output: %v)", err)
}
return strings.TrimSpace(string(out))
}

// Container returns the underlying testcontainers.Container
func (a *ActRunner) Container() testcontainers.Container {
return a.container
Expand Down
117 changes: 117 additions & 0 deletions e2e/harness/act_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@ import (
"bytes"
"context"
"encoding/binary"
"io"
"strings"
"testing"
"time"

"github.com/moby/moby/api/pkg/stdcopy"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
tcexec "github.com/testcontainers/testcontainers-go/exec"
)

// frameDockerStream builds a single Docker hijacked-attach frame: an 8-byte
Expand Down Expand Up @@ -399,3 +401,118 @@ func TestNormalizeWorkflowResult(t *testing.T) {
})
}
}

// scriptedExec is a per-call response the fake execer hands back: an exit code
// and the text the returned reader yields. The install path keys its decisions
// off both, so the fake must control them independently.
type scriptedExec struct {
exitCode int
output string
}

// fakeExecer is a containerExecer that scripts responses without Docker. It
// classifies each call as an install (the curl|bash command) or a verify
// (`command -v act`) by inspecting the command string, and pops the next
// scripted response from the matching queue. This lets the tests drive the
// transient-failure and binary-missing paths deterministically.
type fakeExecer struct {
installResponses []scriptedExec
verifyResponses []scriptedExec
installCalls int
verifyCalls int
}

func (f *fakeExecer) Exec(_ context.Context, cmd []string, _ ...tcexec.ProcessOption) (int, io.Reader, error) {
joined := strings.Join(cmd, " ")
switch {
case strings.Contains(joined, "command -v act"):
resp := f.verifyResponses[f.verifyCalls]
f.verifyCalls++
return resp.exitCode, strings.NewReader(resp.output), nil
default:
resp := f.installResponses[f.installCalls]
f.installCalls++
return resp.exitCode, strings.NewReader(resp.output), nil
}
}

// noInstallBackoff zeroes the install retry pause for the duration of a test so
// the retry path runs instantly. These install tests therefore do not call
// t.Parallel: they share the package-level backoff var.
func noInstallBackoff(t *testing.T) {
t.Helper()
prev := actInstallBackoff
actInstallBackoff = func(int) time.Duration { return 0 }
t.Cleanup(func() { actInstallBackoff = prev })
}

// TestInstallActWithVerify_TransientInstallFailureThenSuccess proves the install
// path survives a transient non-zero curl|bash exit: the first attempt fails
// (the silent-non-zero-exit footgun this code closes), and a clean retry
// installs and verifies act. Before the fix the discarded exit code let a failed
// install masquerade as success, so the first real workflow run died with
// `bash: act: command not found`.
func TestInstallActWithVerify_TransientInstallFailureThenSuccess(t *testing.T) {
noInstallBackoff(t)

fake := &fakeExecer{
installResponses: []scriptedExec{
{exitCode: 1, output: "curl: (28) timed out"},
{exitCode: 0, output: ""},
},
verifyResponses: []scriptedExec{
{exitCode: 0, output: "/usr/local/bin/act"},
},
}

err := installActWithVerify(context.Background(), fake, 3)
require.NoError(t, err)
assert.Equal(t, 2, fake.installCalls, "install should be retried once after the transient failure")
assert.Equal(t, 1, fake.verifyCalls, "verify runs once, after the successful install")
}

// TestInstallActWithVerify_BinaryMissingThenRecovers proves the install path
// does not trust a zero-exit install alone. The first attempt's install exits 0
// but `command -v act` cannot resolve the binary (a partial or interrupted
// install), so the attempt is rejected and retried; the second attempt both
// installs and verifies cleanly.
func TestInstallActWithVerify_BinaryMissingThenRecovers(t *testing.T) {
noInstallBackoff(t)

fake := &fakeExecer{
installResponses: []scriptedExec{
{exitCode: 0, output: ""},
{exitCode: 0, output: ""},
},
verifyResponses: []scriptedExec{
{exitCode: 1, output: ""},
{exitCode: 0, output: "/usr/local/bin/act"},
},
}

err := installActWithVerify(context.Background(), fake, 3)
require.NoError(t, err)
assert.Equal(t, 2, fake.installCalls, "install should be retried after the binary check failed")
assert.Equal(t, 2, fake.verifyCalls, "verify runs once per attempt")
}

// TestInstallActWithVerify_AllAttemptsFail proves the bounded retry gives up and
// returns an error (rather than a falsely healthy runner) when every attempt
// fails, and that the failing command output is surfaced for debugging.
func TestInstallActWithVerify_AllAttemptsFail(t *testing.T) {
noInstallBackoff(t)

fake := &fakeExecer{
installResponses: []scriptedExec{
{exitCode: 1, output: "curl: (6) could not resolve host"},
{exitCode: 1, output: "curl: (6) could not resolve host"},
{exitCode: 1, output: "curl: (6) could not resolve host"},
},
verifyResponses: []scriptedExec{},
}

err := installActWithVerify(context.Background(), fake, 3)
require.Error(t, err)
assert.Equal(t, 3, fake.installCalls, "all attempts should be exhausted")
assert.Contains(t, err.Error(), "could not resolve host", "failing command output should be surfaced")
}
Loading