Skip to content

Commit e93b40b

Browse files
committed
fix: advance pre-init plan when Job completes successfully
When the pre-init Job finishes, the sidecar container is already terminated and unreachable. Instead of polling a dead sidecar indefinitely, check the Job status directly: Complete means all tasks succeeded (the sidecar only kills seid after await-condition passes), Failed means something went wrong.
1 parent 9380a86 commit e93b40b

1 file changed

Lines changed: 25 additions & 0 deletions

File tree

internal/controller/node/pre_init.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,21 @@ func (r *SeiNodeReconciler) reconcilePreInitializing(ctx context.Context, node *
6767
return ctrl.Result{RequeueAfter: immediateRequeue}, nil
6868
}
6969

70+
if isJobComplete(job) {
71+
log.FromContext(ctx).Info("pre-init job completed, advancing plan", "job", job.Name)
72+
patch := client.MergeFrom(node.DeepCopy())
73+
for i := range plan.Tasks {
74+
if plan.Tasks[i].Status != seiv1alpha1.PlannedTaskComplete {
75+
plan.Tasks[i].Status = seiv1alpha1.PlannedTaskComplete
76+
}
77+
}
78+
plan.Phase = seiv1alpha1.TaskPlanComplete
79+
if err := r.Status().Patch(ctx, node, patch); err != nil {
80+
return ctrl.Result{}, fmt.Errorf("marking pre-init plan complete after job success: %w", err)
81+
}
82+
return ctrl.Result{RequeueAfter: immediateRequeue}, nil
83+
}
84+
7085
sc := r.buildJobSidecarClient(node)
7186
if sc == nil {
7287
log.FromContext(ctx).Info("pre-init job sidecar not reachable yet, will retry")
@@ -158,6 +173,16 @@ func isJobFailed(job *batchv1.Job) bool {
158173
return false
159174
}
160175

176+
// isJobComplete returns true if the Job has a Complete condition.
177+
func isJobComplete(job *batchv1.Job) bool {
178+
for _, c := range job.Status.Conditions {
179+
if c.Type == batchv1.JobComplete && c.Status == corev1.ConditionTrue {
180+
return true
181+
}
182+
}
183+
return false
184+
}
185+
161186
// jobFailureReason extracts a human-readable failure reason from the Job's conditions.
162187
func jobFailureReason(job *batchv1.Job) string {
163188
for _, c := range job.Status.Conditions {

0 commit comments

Comments
 (0)