Skip to content

Commit b6ed0ce

Browse files
bdchathamclaude
andauthored
fix: remove unused RolloutNodeStatus and prevent templateHash race (#81)
* fix: remove unused RolloutNodeStatus and prevent templateHash race - Remove Nodes field and RolloutNodeStatus type from RolloutStatus. The field was required by CRD validation but never populated, causing status writes to fail with "nodes: Required value". Rollout lifecycle is fully managed by completePlan/failPlan. - Remove reconcileRolloutStatus (was operating on the removed Nodes field; rollout clearing is handled by completePlan). - Prevent templateHash from advancing when RolloutInProgress is true. Without this, updateStatus races with detectDeploymentNeeded: the hash converges before the plan can be created. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: failPlan returns requeue to avoid double updateStatus, add nil guard - failPlan now returns ResultRequeueImmediate (matching completePlan) so the controller short-circuits before the final updateStatus call - Add defensive nil check on Rollout before accessing TargetHash in the supersession event message Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 22e6d72 commit b6ed0ce

9 files changed

Lines changed: 6 additions & 231 deletions

File tree

api/v1alpha1/seinodedeployment_types.go

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -292,11 +292,6 @@ type RolloutStatus struct {
292292
// StartedAt is when the rollout was first detected.
293293
StartedAt metav1.Time `json:"startedAt"`
294294

295-
// Nodes reports per-node rollout state.
296-
// +listType=map
297-
// +listMapKey=name
298-
Nodes []RolloutNodeStatus `json:"nodes"`
299-
300295
// IncumbentNodes lists the names of the currently active SeiNode
301296
// resources. Only populated for BlueGreen and HardFork strategies.
302297
// +optional
@@ -318,18 +313,6 @@ type RolloutStatus struct {
318313
EntrantRevision string `json:"entrantRevision,omitempty"`
319314
}
320315

321-
// RolloutNodeStatus tracks a single node's convergence during a rollout.
322-
type RolloutNodeStatus struct {
323-
// Name is the SeiNode resource name.
324-
Name string `json:"name"`
325-
326-
// Ready is true when the node is Running with a ready pod.
327-
Ready bool `json:"ready"`
328-
329-
// Phase is the SeiNode's current phase.
330-
Phase SeiNodePhase `json:"phase,omitempty"`
331-
}
332-
333316
// Status condition types for SeiNodeDeployment.
334317
const (
335318
ConditionNodesReady = "NodesReady"

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 0 additions & 20 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/sei.io_seinodedeployments.yaml

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -999,36 +999,6 @@ spec:
999999
IncumbentRevision identifies the generation of the currently live nodes.
10001000
Only populated for BlueGreen and HardFork strategies.
10011001
type: string
1002-
nodes:
1003-
description: Nodes reports per-node rollout state.
1004-
items:
1005-
description: RolloutNodeStatus tracks a single node's convergence
1006-
during a rollout.
1007-
properties:
1008-
name:
1009-
description: Name is the SeiNode resource name.
1010-
type: string
1011-
phase:
1012-
description: Phase is the SeiNode's current phase.
1013-
enum:
1014-
- Pending
1015-
- Initializing
1016-
- Running
1017-
- Failed
1018-
- Terminating
1019-
type: string
1020-
ready:
1021-
description: Ready is true when the node is Running with
1022-
a ready pod.
1023-
type: boolean
1024-
required:
1025-
- name
1026-
- ready
1027-
type: object
1028-
type: array
1029-
x-kubernetes-list-map-keys:
1030-
- name
1031-
x-kubernetes-list-type: map
10321002
startedAt:
10331003
description: StartedAt is when the rollout was first detected.
10341004
format: date-time
@@ -1044,7 +1014,6 @@ spec:
10441014
description: TargetHash is the templateHash being rolled out to.
10451015
type: string
10461016
required:
1047-
- nodes
10481017
- startedAt
10491018
- strategy
10501019
- targetHash

internal/controller/nodedeployment/nodes.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,13 @@ func (r *SeiNodeDeploymentReconciler) detectDeploymentNeeded(group *seiv1alpha1.
9797
if group.Status.Rollout != nil && group.Status.Rollout.TargetHash == currentHash {
9898
return // rollout already targets the current spec
9999
}
100+
oldTarget := ""
101+
if group.Status.Rollout != nil {
102+
oldTarget = group.Status.Rollout.TargetHash
103+
}
100104
group.Status.Plan = nil
101105
r.Recorder.Eventf(group, corev1.EventTypeNormal, "RolloutSuperseded",
102-
"Spec changed during active rollout, replacing plan (old target: %s)", group.Status.Rollout.TargetHash)
106+
"Spec changed during active rollout, replacing plan (old target: %s)", oldTarget)
103107
}
104108

105109
if !hasConditionTrue(group, seiv1alpha1.ConditionRolloutInProgress) &&

internal/controller/nodedeployment/plan.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ func (r *SeiNodeDeploymentReconciler) failPlan(ctx context.Context, group *seiv1
121121
if err := r.updateStatus(ctx, group, statusBase); err != nil {
122122
return ctrl.Result{}, fmt.Errorf("updating status after plan failure: %w", err)
123123
}
124-
return ctrl.Result{}, nil
124+
return planner.ResultRequeueImmediate, nil
125125
}
126126

127127
func setPlanInProgress(group *seiv1alpha1.SeiNodeDeployment, reason, message string) {

internal/controller/nodedeployment/plan_test.go

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,6 @@ func TestCompletePlan_ClearsRolloutInProgress(t *testing.T) {
5454
Strategy: seiv1alpha1.UpdateStrategyInPlace,
5555
TargetHash: "newhash1234",
5656
StartedAt: metav1.Now(),
57-
Nodes: []seiv1alpha1.RolloutNodeStatus{
58-
{Name: "archive-rpc-0", Ready: true, Phase: seiv1alpha1.PhaseRunning},
59-
},
6057
}
6158
group.Status.Plan = &seiv1alpha1.TaskPlan{Phase: seiv1alpha1.TaskPlanComplete}
6259
setPlanInProgress(group, "Deployment", "deploying")
@@ -111,11 +108,6 @@ func TestFailPlan_ClearsRolloutInProgress(t *testing.T) {
111108
Strategy: seiv1alpha1.UpdateStrategyInPlace,
112109
TargetHash: "newhash1234",
113110
StartedAt: metav1.Now(),
114-
Nodes: []seiv1alpha1.RolloutNodeStatus{
115-
{Name: "archive-rpc-0"},
116-
{Name: "archive-rpc-1"},
117-
{Name: "archive-rpc-2"},
118-
},
119111
}
120112
group.Status.Plan = &seiv1alpha1.TaskPlan{Phase: seiv1alpha1.TaskPlanFailed}
121113
setPlanInProgress(group, "Deployment", "deploying")

internal/controller/nodedeployment/status.go

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@ func (r *SeiNodeDeploymentReconciler) updateStatus(ctx context.Context, group *s
4141
group.Status.ReadyReplicas = readyReplicas
4242
group.Status.Nodes = nodeStatuses
4343

44-
reconcileRolloutStatus(group, nodes)
45-
4644
group.Status.Phase = computeGroupPhase(group, readyReplicas, group.Spec.Replicas, nodes)
4745

4846
group.Status.NetworkingStatus = r.buildNetworkingStatus(group)
@@ -52,37 +50,6 @@ func (r *SeiNodeDeploymentReconciler) updateStatus(ctx context.Context, group *s
5250
return r.Status().Patch(ctx, group, statusBase)
5351
}
5452

55-
func reconcileRolloutStatus(group *seiv1alpha1.SeiNodeDeployment, nodes []seiv1alpha1.SeiNode) {
56-
if group.Status.Rollout == nil || group.Status.Rollout.Strategy != seiv1alpha1.UpdateStrategyInPlace {
57-
return
58-
}
59-
60-
nodePhaseMap := make(map[string]seiv1alpha1.SeiNodePhase, len(nodes))
61-
for i := range nodes {
62-
nodePhaseMap[nodes[i].Name] = nodes[i].Status.Phase
63-
}
64-
65-
allReady := true
66-
for i := range group.Status.Rollout.Nodes {
67-
rn := &group.Status.Rollout.Nodes[i]
68-
phase := nodePhaseMap[rn.Name]
69-
rn.Phase = phase
70-
rn.Ready = phase == seiv1alpha1.PhaseRunning
71-
if !rn.Ready {
72-
allReady = false
73-
}
74-
}
75-
76-
if allReady && !hasConditionTrue(group, seiv1alpha1.ConditionPlanInProgress) {
77-
group.Status.TemplateHash = group.Status.Rollout.TargetHash
78-
group.Status.ObservedGeneration = group.Generation
79-
group.Status.Rollout = nil
80-
setCondition(group, seiv1alpha1.ConditionRolloutInProgress, metav1.ConditionFalse,
81-
"RolloutComplete", "All nodes converged")
82-
return
83-
}
84-
}
85-
8653
func computeGroupPhase(group *seiv1alpha1.SeiNodeDeployment, ready, desired int32, nodes []seiv1alpha1.SeiNode) seiv1alpha1.SeiNodeDeploymentPhase {
8754
if hasConditionTrue(group, seiv1alpha1.ConditionRolloutInProgress) {
8855
return seiv1alpha1.GroupPhaseUpgrading

internal/controller/nodedeployment/status_test.go

Lines changed: 0 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"testing"
55

66
. "github.com/onsi/gomega"
7-
apimeta "k8s.io/apimachinery/pkg/api/meta"
87
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
98

109
seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1"
@@ -104,94 +103,6 @@ func makeNodes(n int, phase seiv1alpha1.SeiNodePhase) []seiv1alpha1.SeiNode {
104103

105104
// --- NetworkingStatus ---
106105

107-
func TestReconcileRolloutStatus_InPlace_AllReady(t *testing.T) {
108-
g := NewWithT(t)
109-
group := emptyGroup()
110-
group.Generation = 2
111-
group.Status.Rollout = &seiv1alpha1.RolloutStatus{
112-
Strategy: seiv1alpha1.UpdateStrategyInPlace,
113-
TargetHash: "newhash1234",
114-
StartedAt: metav1.Now(),
115-
Nodes: []seiv1alpha1.RolloutNodeStatus{
116-
{Name: "node-0"},
117-
{Name: "node-1"},
118-
},
119-
}
120-
group.Status.TemplateHash = testOldHash
121-
122-
nodes := []seiv1alpha1.SeiNode{
123-
{ObjectMeta: metav1.ObjectMeta{Name: "node-0"}, Status: seiv1alpha1.SeiNodeStatus{Phase: seiv1alpha1.PhaseRunning}},
124-
{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}, Status: seiv1alpha1.SeiNodeStatus{Phase: seiv1alpha1.PhaseRunning}},
125-
}
126-
127-
reconcileRolloutStatus(group, nodes)
128-
129-
g.Expect(group.Status.Rollout).To(BeNil())
130-
g.Expect(group.Status.TemplateHash).To(Equal("newhash1234"))
131-
g.Expect(group.Status.ObservedGeneration).To(Equal(int64(2)))
132-
133-
cond := apimeta.FindStatusCondition(group.Status.Conditions, seiv1alpha1.ConditionRolloutInProgress)
134-
g.Expect(cond).NotTo(BeNil())
135-
g.Expect(cond.Status).To(Equal(metav1.ConditionFalse))
136-
g.Expect(cond.Reason).To(Equal("RolloutComplete"))
137-
}
138-
139-
func TestReconcileRolloutStatus_InPlace_DoesNotClearWhilePlanActive(t *testing.T) {
140-
g := NewWithT(t)
141-
group := emptyGroup()
142-
group.Generation = 2
143-
group.Status.Rollout = &seiv1alpha1.RolloutStatus{
144-
Strategy: seiv1alpha1.UpdateStrategyInPlace,
145-
TargetHash: "newhash1234",
146-
StartedAt: metav1.Now(),
147-
Nodes: []seiv1alpha1.RolloutNodeStatus{
148-
{Name: "node-0"},
149-
{Name: "node-1"},
150-
},
151-
}
152-
group.Status.TemplateHash = testOldHash
153-
setPlanInProgress(group, "Deployment", "deploying")
154-
155-
nodes := []seiv1alpha1.SeiNode{
156-
{ObjectMeta: metav1.ObjectMeta{Name: "node-0"}, Status: seiv1alpha1.SeiNodeStatus{Phase: seiv1alpha1.PhaseRunning}},
157-
{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}, Status: seiv1alpha1.SeiNodeStatus{Phase: seiv1alpha1.PhaseRunning}},
158-
}
159-
160-
reconcileRolloutStatus(group, nodes)
161-
162-
g.Expect(group.Status.Rollout).NotTo(BeNil(), "rollout should not be cleared while PlanInProgress is true")
163-
g.Expect(group.Status.TemplateHash).To(Equal(testOldHash), "templateHash should not change while plan is active")
164-
g.Expect(group.Status.Rollout.Nodes[0].Ready).To(BeTrue())
165-
g.Expect(group.Status.Rollout.Nodes[1].Ready).To(BeTrue())
166-
}
167-
168-
func TestReconcileRolloutStatus_InPlace_Partial(t *testing.T) {
169-
g := NewWithT(t)
170-
group := emptyGroup()
171-
group.Status.Rollout = &seiv1alpha1.RolloutStatus{
172-
Strategy: seiv1alpha1.UpdateStrategyInPlace,
173-
TargetHash: "newhash1234",
174-
StartedAt: metav1.Now(),
175-
Nodes: []seiv1alpha1.RolloutNodeStatus{
176-
{Name: "node-0"},
177-
{Name: "node-1"},
178-
},
179-
}
180-
group.Status.TemplateHash = testOldHash
181-
182-
nodes := []seiv1alpha1.SeiNode{
183-
{ObjectMeta: metav1.ObjectMeta{Name: "node-0"}, Status: seiv1alpha1.SeiNodeStatus{Phase: seiv1alpha1.PhaseRunning}},
184-
{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}, Status: seiv1alpha1.SeiNodeStatus{Phase: seiv1alpha1.PhaseInitializing}},
185-
}
186-
187-
reconcileRolloutStatus(group, nodes)
188-
189-
g.Expect(group.Status.Rollout).NotTo(BeNil())
190-
g.Expect(group.Status.TemplateHash).To(Equal(testOldHash))
191-
g.Expect(group.Status.Rollout.Nodes[0].Ready).To(BeTrue())
192-
g.Expect(group.Status.Rollout.Nodes[1].Ready).To(BeFalse())
193-
}
194-
195106
func TestComputeGroupPhase_RolloutInProgress(t *testing.T) {
196107
g := NewWithT(t)
197108
group := emptyGroup()

manifests/sei.io_seinodedeployments.yaml

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -999,36 +999,6 @@ spec:
999999
IncumbentRevision identifies the generation of the currently live nodes.
10001000
Only populated for BlueGreen and HardFork strategies.
10011001
type: string
1002-
nodes:
1003-
description: Nodes reports per-node rollout state.
1004-
items:
1005-
description: RolloutNodeStatus tracks a single node's convergence
1006-
during a rollout.
1007-
properties:
1008-
name:
1009-
description: Name is the SeiNode resource name.
1010-
type: string
1011-
phase:
1012-
description: Phase is the SeiNode's current phase.
1013-
enum:
1014-
- Pending
1015-
- Initializing
1016-
- Running
1017-
- Failed
1018-
- Terminating
1019-
type: string
1020-
ready:
1021-
description: Ready is true when the node is Running with
1022-
a ready pod.
1023-
type: boolean
1024-
required:
1025-
- name
1026-
- ready
1027-
type: object
1028-
type: array
1029-
x-kubernetes-list-map-keys:
1030-
- name
1031-
x-kubernetes-list-type: map
10321002
startedAt:
10331003
description: StartedAt is when the rollout was first detected.
10341004
format: date-time
@@ -1044,7 +1014,6 @@ spec:
10441014
description: TargetHash is the templateHash being rolled out to.
10451015
type: string
10461016
required:
1047-
- nodes
10481017
- startedAt
10491018
- strategy
10501019
- targetHash

0 commit comments

Comments
 (0)