diff --git a/e2e/envoygateway_test.go b/e2e/envoygateway_test.go index 627e225..b579ea9 100644 --- a/e2e/envoygateway_test.go +++ b/e2e/envoygateway_test.go @@ -35,11 +35,21 @@ func sharedEnvoyGatewayCache(t *testing.T) string { } // TestEnvoyGateway_InstallAgainstKwok exercises the full Install -// path -- CRDs first, then the install manifest, then the default -// GatewayClass -- against the shared kwok cluster. The Deployment -// rollout wait is skipped (ReadyTimeout=-1) because kwok stages -// pods through its own controller, not the real one, and we only -// need to prove the apply path produces the right object graph. +// path -- CRDs first, then the install manifest, then the +// controller-resources strategic-merge patch, then the EnvoyProxy +// CR, then the default GatewayClass with parametersRef -- +// against the shared kwok cluster. The Deployment rollout wait +// is skipped (ReadyTimeout=-1) because kwok stages pods through +// its own controller, not the real one, and we only need to +// prove the apply path produces the right object graph. +// +// Resource requests are non-zero by design: they exercise the +// kubectl-patch branch (controller) and the EnvoyProxy CR +// branch (proxy) which production CommonConfig defaults flow +// through. Pre-fix the patch step failed with kubectl's +// client-side schema validation -- PR-CI didn't catch it +// because this test was calling Install without the resource +// fields set, short-circuiting both branches. // // Coverage assertions afterwards use kubectl directly so we can // verify what landed without making the test depend on every @@ -47,13 +57,23 @@ func sharedEnvoyGatewayCache(t *testing.T) string { func TestEnvoyGateway_InstallAgainstKwok(t *testing.T) { setupCluster(t) + const ( + ctrlCPU = "10m" + ctrlMem = "64Mi" + proxyCPU = "10m" + proxyMem = "128Mi" + ) if err := envoygateway.Install(context.Background(), envoygateway.Options{ - ContextName: contextName, - CacheOverride: sharedEnvoyGatewayCache(t), - Logger: logger(t), - ReadyTimeout: -1, // skip wait: kwok doesn't run the real controller - GatewayClassName: "y-cluster", // matches the production default - DNSHintIP: "127.0.0.1", // simulates qemu/docker host-loopback case + ContextName: contextName, + CacheOverride: sharedEnvoyGatewayCache(t), + Logger: logger(t), + ReadyTimeout: -1, // skip wait: kwok doesn't run the real controller + GatewayClassName: "y-cluster", // matches the production default + DNSHintIP: "127.0.0.1", // simulates qemu/docker host-loopback case + ControllerCPURequest: ctrlCPU, + ControllerMemRequest: ctrlMem, + ProxyCPURequest: proxyCPU, + ProxyMemRequest: proxyMem, }); err != nil { t.Fatalf("Install: %v", err) } @@ -114,6 +134,55 @@ func TestEnvoyGateway_InstallAgainstKwok(t *testing.T) { t.Errorf("GatewayClass y-cluster annotation %s = %q, want 127.0.0.1", envoygateway.DNSHintIPAnnotation, hintOut) } + + // Controller-resources strategic-merge patch landed. The + // kubectl-patch step would fail outright pre-fix (kubectl + // rejected the partial SSA-apply manifest), so reaching this + // assertion at all proves the patch verb works; reading the + // requests back proves the merge target is the right + // container and the values stuck. + cpuOut := kubectl(t, "get", "deployment", envoygateway.DeploymentName, + "-n", envoygateway.Namespace, + "-o", `jsonpath={.spec.template.spec.containers[?(@.name=="envoy-gateway")].resources.requests.cpu}`) + if cpuOut != ctrlCPU { + t.Errorf("controller resources.requests.cpu = %q, want %q (patch step landed?)", cpuOut, ctrlCPU) + } + memOut := kubectl(t, "get", "deployment", envoygateway.DeploymentName, + "-n", envoygateway.Namespace, + "-o", `jsonpath={.spec.template.spec.containers[?(@.name=="envoy-gateway")].resources.requests.memory}`) + if memOut != ctrlMem { + t.Errorf("controller resources.requests.memory = %q, want %q (patch step landed?)", memOut, ctrlMem) + } + + // EnvoyProxy CR was applied with the proxy resource values. + // Pin both axes so a future schema rename inside the CR + // surfaces as a failure rather than a silent no-op. + proxyCPUOut := kubectl(t, "get", "envoyproxy", envoygateway.EnvoyProxyName, + "-n", envoygateway.Namespace, + "-o", "jsonpath={.spec.provider.kubernetes.envoyDeployment.container.resources.requests.cpu}") + if proxyCPUOut != proxyCPU { + t.Errorf("EnvoyProxy proxy.cpu = %q, want %q", proxyCPUOut, proxyCPU) + } + proxyMemOut := kubectl(t, "get", "envoyproxy", envoygateway.EnvoyProxyName, + "-n", envoygateway.Namespace, + "-o", "jsonpath={.spec.provider.kubernetes.envoyDeployment.container.resources.requests.memory}") + if proxyMemOut != proxyMem { + t.Errorf("EnvoyProxy proxy.memory = %q, want %q", proxyMemOut, proxyMem) + } + + // GatewayClass parametersRef points at our EnvoyProxy CR so + // Gateways under the class inherit the tuned resources + // without any per-Gateway boilerplate. + refKind := kubectl(t, "get", "gatewayclass", "y-cluster", + "-o", "jsonpath={.spec.parametersRef.kind}") + if refKind != "EnvoyProxy" { + t.Errorf("GatewayClass parametersRef.kind = %q, want EnvoyProxy", refKind) + } + refName := kubectl(t, "get", "gatewayclass", "y-cluster", + "-o", "jsonpath={.spec.parametersRef.name}") + if refName != envoygateway.EnvoyProxyName { + t.Errorf("GatewayClass parametersRef.name = %q, want %q", refName, envoygateway.EnvoyProxyName) + } } // TestEnvoyGateway_InstallEmptyClassNameSkipsApply verifies that diff --git a/pkg/images/load.go b/pkg/images/load.go index 208ee1c..2d96f40 100644 --- a/pkg/images/load.go +++ b/pkg/images/load.go @@ -162,23 +162,31 @@ func Load(ctx context.Context, lr *cluster.LookupResult, archive io.Reader, logg } // aliasFor decides what alias (if any) to create for a new -// post-import ref. Two cases produce a useful alias; one (the +// post-import ref. Three cases produce a useful alias; one (the // bare config-digest row containerd writes alongside the // canonical ref) produces "" so the caller skips. // -// - Tag-form ref (":") -> "@". +// - Tag-only ref (":") -> "@". // Required by deployments pinning images in name:tag@sha256: // digest form; without it kubelet's digest lookup misses the // tag-only row in containerd's image store and falls back to // a registry pull. // -// - Digest-form ref ("@") -> ":latest@ +// - Digest-only ref ("@") -> ":latest@ // ". Required by kubelet's checkpoint-image check on // containerd v2: it resolves images by config-digest and // normalizes the bare config-digest row to "docker.io/library/ // sha256@..." (not found). A tag-form alias gives the lookup // a parseable repo to land on. // +// - Tag+digest ref (":@") -> "@ +// ". The import already stored under the full ref +// name (kubelet resolves either form), but legacy consumers +// expecting the digest-only form (checkit's appliance-init.sh +// post-load retag) need that row too. Without it, a partial +// `ctr image tag --force @ ` from the +// consumer side fails "image not found". +// // - Bare "sha256:" (the config-digest row ctr emits as a // side effect) -> "". Treating it as a tagged ref would // mangle it through stripTag into the literal "sha256" and @@ -188,10 +196,42 @@ func aliasFor(ref, digest string) string { if strings.HasPrefix(ref, "sha256:") { return "" } - if at := strings.Index(ref, "@"); at >= 0 { - return ref[:at] + ":latest" + ref[at:] + at := strings.Index(ref, "@") + if at < 0 { + // Tag-only input. Synthesize @ so + // deployments pinning by digest resolve from the + // tag-only row. + return stripTag(ref) + "@" + digest + } + repoPart := ref[:at] + if hasTag(repoPart) { + // Tag+digest input (":@"). The + // import already stored under the full ref name -- + // kubelet resolves either form. Also create a + // digest-only alias so legacy consumers expecting + // "@" (checkit's appliance-init.sh + // post-load retag for minio-deduplication, kubelet's + // checkpoint-image lookup on containerd v2) can + // still find it. + return stripTag(repoPart) + ref[at:] + } + // Digest-only input ("@"). Synthesize + // ":latest@" alias so crictl + kubelet's + // checkpoint-image lookup resolve it. + return repoPart + ":latest" + ref[at:] +} + +// hasTag reports whether ref (already trimmed of any "@digest" +// suffix) carries a "<...>:" tail. The tag colon must be +// after the last "/" so a "host:port/path" prefix doesn't +// false-positive. +func hasTag(ref string) bool { + slash := strings.LastIndex(ref, "/") + tail := ref + if slash >= 0 { + tail = ref[slash+1:] } - return stripTag(ref) + "@" + digest + return strings.Contains(tail, ":") } // TarOCIDir streams a USTAR archive of an OCI v1 layout rooted diff --git a/pkg/images/load_test.go b/pkg/images/load_test.go index 02c8cf1..9ef359e 100644 --- a/pkg/images/load_test.go +++ b/pkg/images/load_test.go @@ -261,24 +261,40 @@ func TestStripTag(t *testing.T) { } // TestAliasFor pins the post-import alias policy. Each case -// captures one of the three shapes ctr writes into the image -// store after `image import`: tag-form, digest-form, and the -// bare config-digest row that mustn't be aliased. +// captures one of the four shapes ctr writes into the image +// store after `image import`: tag-only, digest-only, tag+digest, +// and the bare config-digest row that mustn't be aliased. func TestAliasFor(t *testing.T) { const digest = "sha256:af91c49ce795f3b2c1a4e6d8b9c0e1f2a3b4c5d6e7f80112233445566778899aa" cases := []struct { name, ref, want string }{ { - name: "tag-form -> digest alias", + name: "tag-only -> digest alias", ref: "ghcr.io/yolean/echo:v1", want: "ghcr.io/yolean/echo@" + digest, }, { - name: "digest-form -> :latest alias (kubelet checkpoint-image lookup)", + name: "digest-only -> :latest alias (kubelet checkpoint-image lookup)", ref: "ghcr.io/yolean/minio-deduplication@" + digest, want: "ghcr.io/yolean/minio-deduplication:latest@" + digest, }, + { + // y-site-images-load emits this shape for every line + // in the pinned image list. Pre-fix this case generated + // "ghcr.io/yolean/busybox:1.37.0-glibc:latest@" + // which ctr rejects as invalid; checkit's post-load + // retag for minio-deduplication then errored out on the + // missing @ row. + name: "tag+digest -> digest-only alias", + ref: "ghcr.io/yolean/busybox:1.37.0-glibc@" + digest, + want: "ghcr.io/yolean/busybox@" + digest, + }, + { + name: ":latest+digest -> digest-only alias (checkit's minio-deduplication shape)", + ref: "ghcr.io/yolean/minio-deduplication:latest@" + digest, + want: "ghcr.io/yolean/minio-deduplication@" + digest, + }, { name: "bare config-digest row -> no alias (would mangle to sha256@sha256:...)", ref: "sha256:dc863b8391abb7c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f70819253647586978a9", @@ -289,6 +305,11 @@ func TestAliasFor(t *testing.T) { ref: "registry.example:5000/foo/bar:tag", want: "registry.example:5000/foo/bar@" + digest, }, + { + name: "hostport tag+digest stripped at correct colon", + ref: "registry.example:5000/foo/bar:tag@" + digest, + want: "registry.example:5000/foo/bar@" + digest, + }, } for _, c := range cases { t.Run(c.name, func(t *testing.T) { diff --git a/pkg/provision/envoygateway/embed.go b/pkg/provision/envoygateway/embed.go index c5d136e..7a4df94 100644 --- a/pkg/provision/envoygateway/embed.go +++ b/pkg/provision/envoygateway/embed.go @@ -101,22 +101,27 @@ spec: `, EnvoyProxyName, Namespace, cpuRequest, memRequest)) } -// ControllerResourcesYAML is a partial Deployment manifest -// declaring ownership over the envoy-gateway controller -// container's resources.requests under server-side apply. The -// apply uses field-manager y-cluster; existing fields (image, -// env, replicas, container args) stay with their original -// owners. Limits are not declared -- intentional, so the -// upstream limit (currently 1Gi memory, no CPU cap) stays in -// effect. -func ControllerResourcesYAML(cpuRequest, memRequest string) []byte { - return []byte(fmt.Sprintf(`--- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: %s - namespace: %s -spec: +// ControllerResourcesPatch is a strategic-merge patch body for +// the envoy-gateway controller container's resources.requests. +// Applied via `kubectl patch deployment envoy-gateway -n +// envoy-gateway-system --type=strategic --patch ''`. +// +// We use kubectl patch rather than `kubectl apply --server-side` +// with a partial Deployment because the Deployment kind has +// required fields (spec.selector, template.metadata.labels, +// containers[*].image) that kubectl validates client-side before +// sending -- a partial SSA manifest fails with "Required value" +// errors even when SSA semantics would otherwise have merged +// cleanly. Strategic merge patch only validates the merge +// RESULT, which keeps the existing required fields intact. +// +// Strategic merge handles the containers list by merge key +// (name): the existing container's image, env, ports, args stay +// intact; only resources.requests fields land. Limits aren't +// declared, so the upstream limit (currently 1Gi memory, no +// CPU cap) stays in effect. +func ControllerResourcesPatch(cpuRequest, memRequest string) []byte { + return []byte(fmt.Sprintf(`spec: template: spec: containers: @@ -125,6 +130,6 @@ spec: requests: cpu: %s memory: %s -`, DeploymentName, Namespace, cpuRequest, memRequest)) +`, cpuRequest, memRequest)) } diff --git a/pkg/provision/envoygateway/embed_test.go b/pkg/provision/envoygateway/embed_test.go index ed1c41f..d3816c2 100644 --- a/pkg/provision/envoygateway/embed_test.go +++ b/pkg/provision/envoygateway/embed_test.go @@ -103,16 +103,16 @@ func TestEnvoyProxyYAML_ShapesResources(t *testing.T) { } } -// TestControllerResourcesYAML_RequestsOnly pins the partial -// Deployment shape: requests-only, container matched by name so -// SSA targets the right container, no limits/image/env claimed -// (so upstream owners keep them). -func TestControllerResourcesYAML_RequestsOnly(t *testing.T) { - got := string(ControllerResourcesYAML("10m", "64Mi")) +// TestControllerResourcesPatch_RequestsOnly pins the +// strategic-merge patch shape: starts at `spec:` (no +// apiVersion/kind/metadata wrapper, since kubectl patch +// identifies the target via CLI args), container matched by +// name so the merge targets the right container, no +// limits/image/env claimed (so upstream owners keep them through +// the merge). +func TestControllerResourcesPatch_RequestsOnly(t *testing.T) { + got := string(ControllerResourcesPatch("10m", "64Mi")) for _, want := range []string{ - "kind: Deployment", - "name: " + DeploymentName, - "namespace: " + Namespace, "- name: envoy-gateway", "cpu: 10m", "memory: 64Mi", @@ -121,6 +121,14 @@ func TestControllerResourcesYAML_RequestsOnly(t *testing.T) { t.Errorf("missing %q:\n%s", want, got) } } + // Patch body must NOT carry an apiVersion/kind/metadata + // header -- kubectl patch identifies the target via CLI args + // and would fail to parse a manifest-shaped body. + for _, banned := range []string{"apiVersion:", "kind:", "metadata:"} { + if strings.Contains(got, banned) { + t.Errorf("patch body must not include %q:\n%s", banned, got) + } + } if strings.Contains(got, "limits:") { t.Errorf("patch should declare requests only:\n%s", got) } diff --git a/pkg/provision/envoygateway/install.go b/pkg/provision/envoygateway/install.go index 21a4a6f..4305cc6 100644 --- a/pkg/provision/envoygateway/install.go +++ b/pkg/provision/envoygateway/install.go @@ -147,10 +147,10 @@ func Install(ctx context.Context, opts Options) error { zap.String("cpu", opts.ControllerCPURequest), zap.String("memory", opts.ControllerMemRequest), ) - if err := kubectlApplyStdin(ctx, opts.ContextName, - ControllerResourcesYAML(opts.ControllerCPURequest, opts.ControllerMemRequest), + if err := kubectlPatch(ctx, opts.ContextName, "deployment", DeploymentName, Namespace, + ControllerResourcesPatch(opts.ControllerCPURequest, opts.ControllerMemRequest), ); err != nil { - return fmt.Errorf("apply controller resources: %w", err) + return fmt.Errorf("patch controller resources: %w", err) } } @@ -230,6 +230,33 @@ func kubectlApplyStdin(ctx context.Context, contextName string, manifest []byte) return nil } +// kubectlPatch applies a strategic-merge patch to a named +// resource via `kubectl patch -n --type=strategic +// --patch `. Used for the controller resources tweak +// where a full SSA-apply of a partial Deployment fails kubectl's +// client-side schema validation (selector / image required). +// +// The patch is passed as a flag value (not stdin) because +// `kubectl patch` doesn't read the body from stdin by default; +// `--patch-file=/dev/stdin` works but the inline form keeps the +// shellout symmetric with kubectlApplyStdin. +func kubectlPatch(ctx context.Context, contextName, kind, name, namespace string, patch []byte) error { + cmd := exec.CommandContext(ctx, "kubectl", + "--context="+contextName, + "patch", kind, name, + "-n", namespace, + "--type=strategic", + "--patch", string(patch), + "--field-manager=y-cluster", + ) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("kubectl patch: %w", err) + } + return nil +} + // kubectlRolloutStatus runs `kubectl rollout status deployment/ // -n --timeout=`. The timeout flag accepts Go duration // strings via .String().