From 886dfa24dce5eb6e325c4036ab0e056ea1075ae9 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 1 May 2026 17:33:25 +0200 Subject: [PATCH 1/3] fix(provision/docker): probe host apiserver before declaring ready k3s writes /etc/rancher/k3s/k3s.yaml inside the container as soon as the apiserver socket is bound, but two host-side concerns lag behind: docker's userland port-forward to 127.0.0.1:HostAPIPort takes a moment to start accepting, and the apiserver itself needs a beat to advance from "listening" to "ready". The very next step in Provision -- envoygateway.Install via kubectl apply --server-side against the merged kubeconfig -- raced against both, deterministically failing with: dial tcp 127.0.0.1:6443: connect: connection refused Add waitForHostAPIServer between waitForKubeconfig and the "k3s ready" log. It polls `kubectl get --raw=/readyz` against the merged context (1s interval, 60s deadline). /readyz covers both failure modes -- a transport error from a not-yet-bound port and a 503 from a starting apiserver are both retried. The qemu provisioner is unaffected: its ssh-driven k3s install script returns only after the apiserver is fully up, so "k3s ready" there is already truthful. Verified locally: TestDocker_ProvisionTeardown sees ~10s between "starting docker" and "k3s ready" (was ~2s) and the subsequent envoy-gateway install runs without retry. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/provision/docker/docker.go | 55 ++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/pkg/provision/docker/docker.go b/pkg/provision/docker/docker.go index 2cbe590..bcff322 100644 --- a/pkg/provision/docker/docker.go +++ b/pkg/provision/docker/docker.go @@ -205,6 +205,20 @@ func Provision(ctx context.Context, cfg config.DockerConfig, logger *zap.Logger) if err := kubecfg.Import(rawKubeconfig); err != nil { return nil, fmt.Errorf("merge kubeconfig: %w", err) } + + // k3s.yaml landing in the container doesn't mean the host can + // reach the apiserver yet -- the docker port-forward to the + // host-mapped 6443 is bound a moment later, and the very next + // step (envoygateway.Install -> kubectl apply --server-side) + // would otherwise race against it with "dial tcp 127.0.0.1: + // 6443: connect: connection refused". Probe the host endpoint + // through the merged kubeconfig until /readyz succeeds before + // declaring readiness. + if err := c.waitForHostAPIServer(ctx); err != nil { + dlogs, _ := dockerexec.Logs(ctx, cli, cfg.Name, "100") + return nil, fmt.Errorf("wait for host apiserver: %w\ncontainer logs:\n%s", err, dlogs) + } + logger.Info("k3s ready", zap.String("context", cfg.Context)) // Install the bundled Envoy Gateway (CRDs + controller + @@ -337,8 +351,11 @@ func (c *Cluster) NodeExec(ctx context.Context, command string, stdin io.Reader) } // waitForKubeconfig polls until the k3s-managed kubeconfig appears -// inside the container. k3s writes /etc/rancher/k3s/k3s.yaml after -// the apiserver is ready to accept connections. +// inside the container. k3s writes /etc/rancher/k3s/k3s.yaml when +// the in-container apiserver socket is bound; the host-side port +// forward and full apiserver readiness lag behind, so the host +// must additionally probe via waitForHostAPIServer before any +// kubectl call against the merged kubeconfig. func (c *Cluster) waitForKubeconfig(ctx context.Context) error { deadline := time.Now().Add(2 * time.Minute) for { @@ -357,6 +374,40 @@ func (c *Cluster) waitForKubeconfig(ctx context.Context) error { } } +// waitForHostAPIServer polls `kubectl get --raw=/readyz` against +// the merged context until the apiserver responds with a 200. Two +// host-side concerns lag behind kubeconfig-in-container readiness: +// docker's userland port forward to 127.0.0.1:HostAPIPort needs a +// moment to bind, and the apiserver itself takes a beat to advance +// from "listening" to "ready". /readyz covers both -- a connection +// refused, a 503 from a still-starting apiserver, or a transport +// error are all retried. +func (c *Cluster) waitForHostAPIServer(ctx context.Context) error { + deadline := time.Now().Add(60 * time.Second) + var lastErr error + for { + probe := exec.CommandContext(ctx, "kubectl", + "--context="+c.cfg.Context, + "get", "--raw=/readyz", + ) + // Discard noisy intermediate failures; surface only the + // final state via lastErr if we time out. + out, err := probe.CombinedOutput() + if err == nil { + return nil + } + lastErr = fmt.Errorf("%w: %s", err, strings.TrimSpace(string(out))) + if time.Now().After(deadline) { + return fmt.Errorf("apiserver /readyz never returned 200 within 60s on context %q: %v", c.cfg.Context, lastErr) + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(time.Second): + } + } +} + // extractKubeconfig reads the container's kubeconfig and rewrites // the embedded server URL to the host-mapped API port so the host's // kubectl can reach it. From 29b01ef63fe533639fe43b137798cd3265164807 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 1 May 2026 19:06:04 +0200 Subject: [PATCH 2/3] review: log start-of-wait, document kubectl shell-out choice Two non-blocking review nits from #12: - The function was silent until success/timeout; a 30s wait looked like a 30s mystery. Log the start so successive log lines bracket the readiness gate. - Spell out *why* we shell out to kubectl instead of dialing the apiserver directly (consistency with envoygateway.Install, which is the very next caller and uses the same code path). Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/provision/docker/docker.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/provision/docker/docker.go b/pkg/provision/docker/docker.go index bcff322..63bbf5d 100644 --- a/pkg/provision/docker/docker.go +++ b/pkg/provision/docker/docker.go @@ -382,7 +382,13 @@ func (c *Cluster) waitForKubeconfig(ctx context.Context) error { // from "listening" to "ready". /readyz covers both -- a connection // refused, a 503 from a still-starting apiserver, or a transport // error are all retried. +// +// We shell out to kubectl rather than dialing the apiserver +// directly because envoygateway.Install drives the host kubeconfig +// the same way -- using kubectl here keeps the readiness probe on +// the same code path that the very next caller will use. func (c *Cluster) waitForHostAPIServer(ctx context.Context) error { + c.logger.Info("waiting for host apiserver", zap.String("context", c.cfg.Context)) deadline := time.Now().Add(60 * time.Second) var lastErr error for { From 62a94d7622d7639ce08d5806835f8a975be5d3b3 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 1 May 2026 19:10:18 +0200 Subject: [PATCH 3/3] test(provision/docker): cover host-apiserver-ready poll loop Address review nit #5 from #12: lock in the deadline path and the ctx-cancel path with a fake-kubectl-on-PATH harness. To run the loop in milliseconds rather than the production 60s, the body of waitForHostAPIServer is split into pollHostAPIServerReadyz which takes the timeout and interval as arguments. Production keeps the same effective behaviour via const wrappers; the test drives the helper directly with sub-second knobs. Three cases: - success: kubectl exits 0, returns nil immediately. - deadline-honored: always-failing kubectl, expect the wrapped "/readyz never returned 200" error (not ctx.Err()) and the context name in the message. - ctx-cancelled: 50ms ctx vs 10s loop timeout, expect ctx.Err() (not the deadline message) -- guards against a refactor that drops the select { <-ctx.Done() } branch. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/provision/docker/docker.go | 19 +++++- pkg/provision/docker/docker_test.go | 97 +++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 pkg/provision/docker/docker_test.go diff --git a/pkg/provision/docker/docker.go b/pkg/provision/docker/docker.go index 63bbf5d..80de02b 100644 --- a/pkg/provision/docker/docker.go +++ b/pkg/provision/docker/docker.go @@ -374,6 +374,11 @@ func (c *Cluster) waitForKubeconfig(ctx context.Context) error { } } +const ( + hostAPIServerReadyTimeout = 60 * time.Second + hostAPIServerReadyInterval = time.Second +) + // waitForHostAPIServer polls `kubectl get --raw=/readyz` against // the merged context until the apiserver responds with a 200. Two // host-side concerns lag behind kubeconfig-in-container readiness: @@ -388,8 +393,16 @@ func (c *Cluster) waitForKubeconfig(ctx context.Context) error { // the same way -- using kubectl here keeps the readiness probe on // the same code path that the very next caller will use. func (c *Cluster) waitForHostAPIServer(ctx context.Context) error { + return c.pollHostAPIServerReadyz(ctx, hostAPIServerReadyTimeout, hostAPIServerReadyInterval) +} + +// pollHostAPIServerReadyz is the parameterised body of +// waitForHostAPIServer; the timeout and interval are arguments so +// tests can drive the loop with a fake kubectl on $PATH at sub-second +// resolution. +func (c *Cluster) pollHostAPIServerReadyz(ctx context.Context, timeout, interval time.Duration) error { c.logger.Info("waiting for host apiserver", zap.String("context", c.cfg.Context)) - deadline := time.Now().Add(60 * time.Second) + deadline := time.Now().Add(timeout) var lastErr error for { probe := exec.CommandContext(ctx, "kubectl", @@ -404,12 +417,12 @@ func (c *Cluster) waitForHostAPIServer(ctx context.Context) error { } lastErr = fmt.Errorf("%w: %s", err, strings.TrimSpace(string(out))) if time.Now().After(deadline) { - return fmt.Errorf("apiserver /readyz never returned 200 within 60s on context %q: %v", c.cfg.Context, lastErr) + return fmt.Errorf("apiserver /readyz never returned 200 within %s on context %q: %v", timeout, c.cfg.Context, lastErr) } select { case <-ctx.Done(): return ctx.Err() - case <-time.After(time.Second): + case <-time.After(interval): } } } diff --git a/pkg/provision/docker/docker_test.go b/pkg/provision/docker/docker_test.go new file mode 100644 index 0000000..0c1fc00 --- /dev/null +++ b/pkg/provision/docker/docker_test.go @@ -0,0 +1,97 @@ +package docker + +import ( + "context" + "errors" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + "go.uber.org/zap" + + "github.com/Yolean/y-cluster/pkg/provision/config" +) + +// fakeKubectlOnPATH writes an executable shell script named `kubectl` +// to a fresh temp dir and prepends that dir to $PATH for the test. +// pollHostAPIServerReadyz exec's `kubectl` by name, so the resolved +// binary is the script rather than any real kubectl on the system. +// `body` is the shell body (no shebang); use `exit 0` for the +// success case and `exit 1` (with a stderr message) for failure. +func fakeKubectlOnPATH(t *testing.T, body string) { + t.Helper() + if runtime.GOOS == "windows" { + t.Skip("fake kubectl shim is /bin/sh-only") + } + dir := t.TempDir() + script := "#!/bin/sh\n" + body + "\n" + if err := os.WriteFile(filepath.Join(dir, "kubectl"), []byte(script), 0o755); err != nil { + t.Fatal(err) + } + t.Setenv("PATH", dir+string(os.PathListSeparator)+os.Getenv("PATH")) +} + +func newProbeTestCluster() *Cluster { + return &Cluster{ + cfg: config.DockerConfig{ + CommonConfig: config.CommonConfig{Context: "unit-test-ctx"}, + }, + logger: zap.NewNop(), + } +} + +// First-call success: a kubectl that exits 0 returns nil immediately. +func TestPollHostAPIServerReadyz_Success(t *testing.T) { + fakeKubectlOnPATH(t, "exit 0") + c := newProbeTestCluster() + if err := c.pollHostAPIServerReadyz(context.Background(), time.Second, 10*time.Millisecond); err != nil { + t.Fatalf("expected success, got: %v", err) + } +} + +// Always-failing kubectl: deadline trips, the wrapped "never returned +// 200" error is returned (not ctx.Err()) and carries the context name. +func TestPollHostAPIServerReadyz_DeadlineHonored(t *testing.T) { + fakeKubectlOnPATH(t, `echo 'connection refused' >&2; exit 1`) + c := newProbeTestCluster() + start := time.Now() + err := c.pollHostAPIServerReadyz(context.Background(), 100*time.Millisecond, 20*time.Millisecond) + elapsed := time.Since(start) + if err == nil { + t.Fatal("expected deadline error, got nil") + } + if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) { + t.Fatalf("expected wrapped readiness error, got ctx error: %v", err) + } + if !strings.Contains(err.Error(), "/readyz never returned 200") { + t.Fatalf("expected readiness deadline message, got: %v", err) + } + if !strings.Contains(err.Error(), "unit-test-ctx") { + t.Fatalf("expected context name in error, got: %v", err) + } + // Sanity: we shouldn't have run anywhere near the production 60s. + if elapsed > 5*time.Second { + t.Fatalf("loop ran far longer than the test timeout: %s", elapsed) + } +} + +// Caller-cancelled ctx: the loop returns ctx.Err() rather than the +// readiness deadline message. Guards against a refactor that drops +// the select { <-ctx.Done() } branch and silently makes the wait +// non-cancellable. +func TestPollHostAPIServerReadyz_ContextCanceled(t *testing.T) { + fakeKubectlOnPATH(t, `echo failing >&2; exit 1`) + c := newProbeTestCluster() + ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) + defer cancel() + err := c.pollHostAPIServerReadyz(ctx, 10*time.Second, 5*time.Millisecond) + if err == nil { + t.Fatal("expected ctx error, got nil") + } + if !errors.Is(err, context.DeadlineExceeded) { + t.Fatalf("expected context.DeadlineExceeded, got: %v", err) + } +}