From 6a2e8f255f28538556ed4c42e76ba645370ccde9 Mon Sep 17 00:00:00 2001 From: Christopher Maher Date: Tue, 19 May 2026 00:46:48 -0700 Subject: [PATCH] fix: report Stopped phase when InferenceService.spec.replicas=0 on Metal path With spec.replicas=0 the metal-agent correctly tears down the runtime process, but the operator's determinePhase function had no replicas==0 branch on the Metal path, so it always returned Creating/WaitingForMetalAgent regardless of whether the user had intentionally stopped the service. The fix adds an early return in determinePhase: when desiredReplicas==0 and readyReplicas==0 it now returns PhaseStopped instead of falling through to the Metal Creating path. PhaseStopped is defined alongside the other phase constants in model_controller.go. Two regression tests cover both the generic and Metal code paths. Fixes #489 Signed-off-by: Christopher Maher --- .../inferenceservice_reconcile_test.go | 18 ++++++++++++++++++ internal/controller/model_controller.go | 1 + internal/controller/scheduling.go | 3 +++ 3 files changed, 22 insertions(+) diff --git a/internal/controller/inferenceservice_reconcile_test.go b/internal/controller/inferenceservice_reconcile_test.go index 3e524251..5659fc45 100644 --- a/internal/controller/inferenceservice_reconcile_test.go +++ b/internal/controller/inferenceservice_reconcile_test.go @@ -296,6 +296,24 @@ var _ = Describe("determinePhase", func() { phase, _ := reconciler.determinePhase(context.Background(), isvc, 0, 1, true, nil) Expect(phase).To(Equal("Creating")) }) + + It("should return Stopped when replicas=0 on generic path", func() { + isvc := &inferencev1alpha1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"}, + } + phase, info := reconciler.determinePhase(context.Background(), isvc, 0, 0, false, &appsv1.Deployment{}) + Expect(phase).To(Equal(PhaseStopped)) + Expect(info).To(BeNil()) + }) + + It("should return Stopped when replicas=0 on Metal path", func() { + isvc := &inferencev1alpha1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"}, + } + phase, info := reconciler.determinePhase(context.Background(), isvc, 0, 0, true, nil) + Expect(phase).To(Equal(PhaseStopped)) + Expect(info).To(BeNil()) + }) }) var _ = Describe("findInferenceServiceForPod", func() { diff --git a/internal/controller/model_controller.go b/internal/controller/model_controller.go index 189731ad..8ca30db6 100644 --- a/internal/controller/model_controller.go +++ b/internal/controller/model_controller.go @@ -49,6 +49,7 @@ const ( PhaseFailed = "Failed" PhaseCached = "Cached" PhaseCreating = "Creating" + PhaseStopped = "Stopped" // acceleratorMetal is the Model.Spec.Hardware.Accelerator value for the // host metal-agent path. acceleratorMetal = "metal" diff --git a/internal/controller/scheduling.go b/internal/controller/scheduling.go index 4e2ebf54..61690ebe 100644 --- a/internal/controller/scheduling.go +++ b/internal/controller/scheduling.go @@ -72,6 +72,9 @@ func (r *InferenceServiceReconciler) determinePhase(ctx context.Context, isvc *i if readyReplicas > 0 { return "Progressing", nil } + if desiredReplicas == 0 && readyReplicas == 0 { + return PhaseStopped, nil + } if !isMetal && deployment != nil { schedulingInfo, err := r.getPodSchedulingInfo(ctx, isvc) if err != nil {