From f7b3d6062cbf4e0808d24ddca0d930bb5fc193ad Mon Sep 17 00:00:00 2001 From: Luka Skugor Date: Thu, 21 May 2026 06:00:04 +0000 Subject: [PATCH] fix: create RolloutGate before GitHub API calls to close deployment race window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an Environment with a relationship is created at the same time as its Rollout, the rollout controller can deploy freely during the window between Rollout creation and RolloutGate creation by the environment controller. The gate was created after syncDeploymentHistory and buildRelationshipGraph (both GitHub API calls), introducing a multi-second delay. Move createOrUpdateRolloutGate to the top of Reconcile, before any GitHub API calls. Also initialize AllowedVersions to [] (not nil) when a relationship is configured — nil is treated as "no restriction" by the rollout controller, so a freshly created gate would not block until updateAllowedVersionsFromRelationships ran a second time. Observed: kuberik-demo prod deployed main-1779290372-b797408 at 15:19:48 but the prod gate was not created until 15:19:50, allowing prod to bypass the staging-before-prod ordering requirement. Co-Authored-By: Claude Sonnet 4.6 --- .../githubenvironment_controller.go | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/internal/controller/githubenvironment_controller.go b/internal/controller/githubenvironment_controller.go index 029f14d..edfdcd5 100644 --- a/internal/controller/githubenvironment_controller.go +++ b/internal/controller/githubenvironment_controller.go @@ -98,6 +98,15 @@ func (r *GitHubEnvironmentReconciler) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{}, fmt.Errorf("unsupported backend: %s", deployment.Spec.Backend.Type) } + // Create or update RolloutGate before any GitHub API calls. The gate starts + // blocking immediately (allowedVersions=[]) when a relationship is configured, + // so the rollout controller cannot deploy to this environment before the gate + // has been populated with versions that passed the upstream environment. + if err := r.createOrUpdateRolloutGate(ctx, deployment); err != nil { + log.Error(err, "Failed to create or update RolloutGate") + return ctrl.Result{}, err + } + // Get the referenced Rollout to get the current version rollout, err := r.getReferencedRollout(ctx, deployment) if err != nil { @@ -131,12 +140,6 @@ func (r *GitHubEnvironmentReconciler) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{}, err } - // Create or update RolloutGate - if err := r.createOrUpdateRolloutGate(ctx, deployment); err != nil { - log.Error(err, "Failed to create or update RolloutGate") - return ctrl.Result{}, err - } - // Update allowed versions on RolloutGate based on relationships if err := r.updateAllowedVersionsFromRelationships(ctx, deployment); err != nil { log.Error(err, "Failed to update allowed versions from relationships") @@ -1178,6 +1181,16 @@ func (r *GitHubEnvironmentReconciler) applyRolloutGateDesiredState(rolloutGate * // Set spec rolloutGate.Spec.RolloutRef = &deployment.Spec.RolloutRef + // When a relationship is configured, initialize AllowedVersions to an empty + // slice (not nil) so the gate blocks all versions by default until + // updateAllowedVersionsFromRelationships populates it. A nil AllowedVersions + // is treated by the rollout controller as "no restriction", which would allow + // deployments to race through before the gate is fully set up. + if deployment.Spec.Relationship != nil && rolloutGate.Spec.AllowedVersions == nil { + empty := []string{} + rolloutGate.Spec.AllowedVersions = &empty + } + // Set owner reference if err := ctrl.SetControllerReference(deployment, rolloutGate, r.Scheme); err != nil { return fmt.Errorf("failed to set owner reference: %w", err)