From 554d493b1a7e8b552ea88bed638bf785f2c95a10 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Thu, 2 Apr 2026 10:30:51 +0200 Subject: [PATCH] HypervisorOffboarding: Decomission & Node are incorrect The reconciler was already reconciling on a hypervisor custom-resource, so reflect that in the name. "Decomission" is misspelled, so lets go with offboarding as it matches onboarding on the other side, and also we may offboard a hypervisor for various reasons, decommissioning being one of them. --- cmd/main.go | 4 +- ...ontroller.go => offboarding_controller.go} | 42 +++++------ ...test.go => offboarding_controller_test.go} | 72 +++++++++---------- 3 files changed, 59 insertions(+), 59 deletions(-) rename internal/controller/{decomission_controller.go => offboarding_controller.go} (80%) rename internal/controller/{decomission_controller_test.go => offboarding_controller_test.go} (87%) diff --git a/cmd/main.go b/cmd/main.go index 525d81b..a9fe3a3 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -260,11 +260,11 @@ func main() { os.Exit(1) } - if err = (&controller.NodeDecommissionReconciler{ + if err = (&controller.HypervisorOffboardingReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), }).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "NodeDecommission") + setupLog.Error(err, "unable to create controller", "controller", "HypervisorOffboarding") os.Exit(1) } diff --git a/internal/controller/decomission_controller.go b/internal/controller/offboarding_controller.go similarity index 80% rename from internal/controller/decomission_controller.go rename to internal/controller/offboarding_controller.go index 1bbd18a..8790dfd 100644 --- a/internal/controller/decomission_controller.go +++ b/internal/controller/offboarding_controller.go @@ -1,5 +1,5 @@ /* -SPDX-FileCopyrightText: Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors +SPDX-FileCopyrightText: Copyright 2026 SAP SE or an SAP affiliate company and cobaltcore-dev contributors SPDX-License-Identifier: Apache-2.0 Licensed under the Apache License, Version 2.0 (the "License"); @@ -38,10 +38,10 @@ import ( ) const ( - DecommissionControllerName = "offboarding" + OffboardingControllerName = "offboarding" ) -type NodeDecommissionReconciler struct { +type HypervisorOffboardingReconciler struct { k8sclient.Client Scheme *runtime.Scheme computeClient *gophercloud.ServiceClient @@ -53,7 +53,7 @@ type NodeDecommissionReconciler struct { // +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors,verbs=get;list;watch // +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors/status,verbs=get;list;watch;update;patch -func (r *NodeDecommissionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (r *HypervisorOffboardingReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { hostname := req.Name log := logger.FromContext(ctx).WithName(req.Name).WithValues("hostname", hostname) ctx = logger.IntoContext(ctx, log) @@ -69,7 +69,7 @@ func (r *NodeDecommissionReconciler) Reconcile(ctx context.Context, req ctrl.Req } if meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeReady) { - return r.setDecommissioningCondition(ctx, hv, "Node is being decommissioned, removing host from nova") + return r.setOffboardingCondition(ctx, hv, "Hypervisor is being offboarded, removing host from nova") } if meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeOffboarded) { @@ -95,22 +95,22 @@ func (r *NodeDecommissionReconciler) Reconcile(ctx context.Context, req ctrl.Req // We are (hopefully) done return ctrl.Result{}, r.markOffboarded(ctx, hv) } - return r.setDecommissioningCondition(ctx, hv, fmt.Sprintf("cannot get hypervisor by name %s due to %s", hostname, err)) + return r.setOffboardingCondition(ctx, hv, fmt.Sprintf("cannot get hypervisor by name %s due to %s", hostname, err)) } // TODO: remove since RunningVMs is only available until micro-version 2.87, and also is updated asynchronously // so it might be not accurate if hypervisor.RunningVMs > 0 { // Still running VMs, cannot delete the service - msg := fmt.Sprintf("Node is being decommissioned, but still has %d running VMs", hypervisor.RunningVMs) - return r.setDecommissioningCondition(ctx, hv, msg) + msg := fmt.Sprintf("Hypervisor is being offboarded, but still has %d running VMs", hypervisor.RunningVMs) + return r.setOffboardingCondition(ctx, hv, msg) } if hypervisor.Servers != nil && len(*hypervisor.Servers) > 0 { // Still VMs assigned to the host, cannot delete the service - msg := fmt.Sprintf("Node is being decommissioned, but still has %d assigned VMs, "+ + msg := fmt.Sprintf("Hypervisor is being offboarded, but still has %d assigned VMs, "+ "check with `openstack server list --all-projects --host %s`", len(*hypervisor.Servers), hostname) - return r.setDecommissioningCondition(ctx, hv, msg) + return r.setOffboardingCondition(ctx, hv, msg) } // Wait for aggregates controller to remove from all aggregates @@ -120,44 +120,44 @@ func (r *NodeDecommissionReconciler) Reconcile(ctx context.Context, req ctrl.Req aggregateNames[i] = agg.Name } msg := fmt.Sprintf("Waiting for aggregates to be removed, current: %v", aggregateNames) - return r.setDecommissioningCondition(ctx, hv, msg) + return r.setOffboardingCondition(ctx, hv, msg) } // Deleting and evicted, so better delete the service err = services.Delete(ctx, r.computeClient, hypervisor.Service.ID).ExtractErr() if err != nil && !gophercloud.ResponseCodeIs(err, http.StatusNotFound) { msg := fmt.Sprintf("cannot delete service %s due to %v", hypervisor.Service.ID, err) - return r.setDecommissioningCondition(ctx, hv, msg) + return r.setOffboardingCondition(ctx, hv, msg) } rp, err := resourceproviders.Get(ctx, r.placementClient, hypervisor.ID).Extract() if err != nil && !gophercloud.ResponseCodeIs(err, http.StatusNotFound) { - return r.setDecommissioningCondition(ctx, hv, fmt.Sprintf("cannot get resource provider: %v", err)) + return r.setOffboardingCondition(ctx, hv, fmt.Sprintf("cannot get resource provider: %v", err)) } if err = openstack.CleanupResourceProvider(ctx, r.placementClient, rp); err != nil { - return r.setDecommissioningCondition(ctx, hv, fmt.Sprintf("cannot clean up resource provider: %v", err)) + return r.setOffboardingCondition(ctx, hv, fmt.Sprintf("cannot clean up resource provider: %v", err)) } return ctrl.Result{}, r.markOffboarded(ctx, hv) } -func (r *NodeDecommissionReconciler) setDecommissioningCondition(ctx context.Context, hv *kvmv1.Hypervisor, message string) (ctrl.Result, error) { +func (r *HypervisorOffboardingReconciler) setOffboardingCondition(ctx context.Context, hv *kvmv1.Hypervisor, message string) (ctrl.Result, error) { base := hv.DeepCopy() meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ Type: kvmv1.ConditionTypeReady, Status: metav1.ConditionFalse, - Reason: "Decommissioning", + Reason: "Offboarding", Message: message, }) if err := r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base, - k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(DecommissionControllerName)); err != nil { + k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(OffboardingControllerName)); err != nil { return ctrl.Result{}, fmt.Errorf("cannot update hypervisor status due to %w", err) } return ctrl.Result{}, nil } -func (r *NodeDecommissionReconciler) markOffboarded(ctx context.Context, hv *kvmv1.Hypervisor) error { +func (r *HypervisorOffboardingReconciler) markOffboarded(ctx context.Context, hv *kvmv1.Hypervisor) error { base := hv.DeepCopy() meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ Type: kvmv1.ConditionTypeOffboarded, @@ -172,14 +172,14 @@ func (r *NodeDecommissionReconciler) markOffboarded(ctx context.Context, hv *kvm Message: "Offboarding successful", }) if err := r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base, - k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(DecommissionControllerName)); err != nil { + k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(OffboardingControllerName)); err != nil { return fmt.Errorf("cannot update hypervisor status due to %w", err) } return nil } // SetupWithManager sets up the controller with the Manager. -func (r *NodeDecommissionReconciler) SetupWithManager(mgr ctrl.Manager) error { +func (r *HypervisorOffboardingReconciler) SetupWithManager(mgr ctrl.Manager) error { ctx := context.Background() var err error @@ -196,7 +196,7 @@ func (r *NodeDecommissionReconciler) SetupWithManager(mgr ctrl.Manager) error { r.placementClient.Microversion = "1.39" // yoga, or later return ctrl.NewControllerManagedBy(mgr). - Named(DecommissionControllerName). + Named(OffboardingControllerName). For(&kvmv1.Hypervisor{}). Complete(r) } diff --git a/internal/controller/decomission_controller_test.go b/internal/controller/offboarding_controller_test.go similarity index 87% rename from internal/controller/decomission_controller_test.go rename to internal/controller/offboarding_controller_test.go index 8b6308d..49107e0 100644 --- a/internal/controller/decomission_controller_test.go +++ b/internal/controller/offboarding_controller_test.go @@ -1,5 +1,5 @@ /* -SPDX-FileCopyrightText: Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors +SPDX-FileCopyrightText: Copyright 2026 SAP SE or an SAP affiliate company and cobaltcore-dev contributors SPDX-License-Identifier: Apache-2.0 Licensed under the Apache License, Version 2.0 (the "License"); @@ -36,7 +36,7 @@ import ( kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" ) -var _ = Describe("Decommission Controller", func() { +var _ = Describe("Offboarding Controller", func() { const ( EOF = "EOF" serviceId = "service-1234" @@ -45,10 +45,10 @@ var _ = Describe("Decommission Controller", func() { ) var ( - decommissionReconciler *NodeDecommissionReconciler - resourceName = types.NamespacedName{Name: hypervisorName} - reconcileReq = ctrl.Request{NamespacedName: resourceName} - fakeServer testhelper.FakeServer + offboardingReconciler *HypervisorOffboardingReconciler + resourceName = types.NamespacedName{Name: hypervisorName} + reconcileReq = ctrl.Request{NamespacedName: resourceName} + fakeServer testhelper.FakeServer ) BeforeEach(func(ctx SpecContext) { @@ -67,8 +67,8 @@ var _ = Describe("Decommission Controller", func() { os.Unsetenv("KVM_HA_SERVICE_URL") }) - By("Creating the NodeDecommissionReconciler") - decommissionReconciler = &NodeDecommissionReconciler{ + By("Creating the HypervisorOffboardingReconciler") + offboardingReconciler = &HypervisorOffboardingReconciler{ Client: k8sClient, Scheme: k8sClient.Scheme(), computeClient: client.ServiceClient(fakeServer), @@ -101,7 +101,7 @@ var _ = Describe("Decommission Controller", func() { Context("When marking the hypervisor terminating", func() { JustBeforeEach(func(ctx SpecContext) { By("Reconciling first to add the finalizer") - _, err := decommissionReconciler.Reconcile(ctx, reconcileReq) + _, err := offboardingReconciler.Reconcile(ctx, reconcileReq) Expect(err).NotTo(HaveOccurred()) By("Marking the hypervisor for termination") @@ -176,7 +176,7 @@ var _ = Describe("Decommission Controller", func() { }) It("should set the hypervisor ready condition", func(ctx SpecContext) { - _, err := decommissionReconciler.Reconcile(ctx, reconcileReq) + _, err := offboardingReconciler.Reconcile(ctx, reconcileReq) Expect(err).NotTo(HaveOccurred()) hypervisor := &kvmv1.Hypervisor{} @@ -185,14 +185,14 @@ var _ = Describe("Decommission Controller", func() { SatisfyAll( HaveField("Type", kvmv1.ConditionTypeReady), HaveField("Status", metav1.ConditionFalse), - HaveField("Reason", "Decommissioning"), + HaveField("Reason", "Offboarding"), ), )) }) It("should set the hypervisor offboarded condition", func(ctx SpecContext) { for range 3 { - _, err := decommissionReconciler.Reconcile(ctx, reconcileReq) + _, err := offboardingReconciler.Reconcile(ctx, reconcileReq) Expect(err).NotTo(HaveOccurred()) } Expect(getHypervisorsCalled).To(BeNumerically(">", 0)) @@ -227,8 +227,8 @@ var _ = Describe("Decommission Controller", func() { hypervisor.Status.HypervisorID = "c48f6247-abe4-4a24-824e-ea39e108874f" Expect(k8sClient.Status().Update(ctx, hypervisor)).To(Succeed()) - By("Reconciling - decommission controller will wait for aggregates to be cleared") - _, err := decommissionReconciler.Reconcile(ctx, reconcileReq) + By("Reconciling - offboarding controller will wait for aggregates to be cleared") + _, err := offboardingReconciler.Reconcile(ctx, reconcileReq) Expect(err).NotTo(HaveOccurred()) By("Simulating aggregates controller clearing aggregates") @@ -238,13 +238,13 @@ var _ = Describe("Decommission Controller", func() { By("Reconciling again after aggregates are cleared") for range 3 { - _, err := decommissionReconciler.Reconcile(ctx, reconcileReq) + _, err := offboardingReconciler.Reconcile(ctx, reconcileReq) Expect(err).NotTo(HaveOccurred()) } By("Verifying Status.Aggregates is empty") Expect(k8sClient.Get(ctx, resourceName, hypervisor)).To(Succeed()) - Expect(hypervisor.Status.Aggregates).To(BeEmpty(), "Status.Aggregates should be cleared after decommissioning") + Expect(hypervisor.Status.Aggregates).To(BeEmpty(), "Status.Aggregates should be cleared after offboarding") }) }) }) @@ -252,7 +252,7 @@ var _ = Describe("Decommission Controller", func() { Context("Guard Conditions", func() { JustBeforeEach(func(ctx SpecContext) { - result, err := decommissionReconciler.Reconcile(ctx, reconcileReq) + result, err := offboardingReconciler.Reconcile(ctx, reconcileReq) Expect(err).NotTo(HaveOccurred()) Expect(result).To(Equal(ctrl.Result{})) }) @@ -291,14 +291,14 @@ var _ = Describe("Decommission Controller", func() { }) Context("Failure Modes", func() { - var sharedDecommissioningErrorCheck = func(ctx SpecContext, expectedMessageSubstring string) { + var sharedOffboardingErrorCheck = func(ctx SpecContext, expectedMessageSubstring string) { hypervisor := &kvmv1.Hypervisor{} Expect(k8sClient.Get(ctx, resourceName, hypervisor)).To(Succeed()) Expect(hypervisor.Status.Conditions).To(ContainElement( SatisfyAll( HaveField("Type", kvmv1.ConditionTypeReady), HaveField("Status", metav1.ConditionFalse), - HaveField("Reason", "Decommissioning"), + HaveField("Reason", "Offboarding"), HaveField("Message", ContainSubstring(expectedMessageSubstring)), ), )) @@ -320,10 +320,10 @@ var _ = Describe("Decommission Controller", func() { }) }) - It("should set decommissioning condition with error", func(ctx SpecContext) { - _, err := decommissionReconciler.Reconcile(ctx, reconcileReq) + It("should set offboarding condition with error", func(ctx SpecContext) { + _, err := offboardingReconciler.Reconcile(ctx, reconcileReq) Expect(err).NotTo(HaveOccurred()) - sharedDecommissioningErrorCheck(ctx, "") + sharedOffboardingErrorCheck(ctx, "") }) }) @@ -352,10 +352,10 @@ var _ = Describe("Decommission Controller", func() { }) }) - It("should set decommissioning condition about running VMs", func(ctx SpecContext) { - _, err := decommissionReconciler.Reconcile(ctx, reconcileReq) + It("should set offboarding condition about running VMs", func(ctx SpecContext) { + _, err := offboardingReconciler.Reconcile(ctx, reconcileReq) Expect(err).NotTo(HaveOccurred()) - sharedDecommissioningErrorCheck(ctx, "still has 2 running VMs") + sharedOffboardingErrorCheck(ctx, "still has 2 running VMs") }) }) @@ -391,9 +391,9 @@ var _ = Describe("Decommission Controller", func() { }) It("should wait for aggregates to be cleared", func(ctx SpecContext) { - _, err := decommissionReconciler.Reconcile(ctx, reconcileReq) + _, err := offboardingReconciler.Reconcile(ctx, reconcileReq) Expect(err).NotTo(HaveOccurred()) - sharedDecommissioningErrorCheck(ctx, "Waiting for aggregates to be removed") + sharedOffboardingErrorCheck(ctx, "Waiting for aggregates to be removed") }) }) @@ -424,10 +424,10 @@ var _ = Describe("Decommission Controller", func() { }) }) - It("should set decommissioning condition with error", func(ctx SpecContext) { - _, err := decommissionReconciler.Reconcile(ctx, reconcileReq) + It("should set offboarding condition with error", func(ctx SpecContext) { + _, err := offboardingReconciler.Reconcile(ctx, reconcileReq) Expect(err).NotTo(HaveOccurred()) - sharedDecommissioningErrorCheck(ctx, "cannot delete service") + sharedOffboardingErrorCheck(ctx, "cannot delete service") }) }) @@ -462,10 +462,10 @@ var _ = Describe("Decommission Controller", func() { }) }) - It("should set decommissioning condition with error", func(ctx SpecContext) { - _, err := decommissionReconciler.Reconcile(ctx, reconcileReq) + It("should set offboarding condition with error", func(ctx SpecContext) { + _, err := offboardingReconciler.Reconcile(ctx, reconcileReq) Expect(err).NotTo(HaveOccurred()) - sharedDecommissioningErrorCheck(ctx, "cannot get resource provider") + sharedOffboardingErrorCheck(ctx, "cannot get resource provider") }) }) @@ -512,10 +512,10 @@ var _ = Describe("Decommission Controller", func() { }) }) - It("should set decommissioning condition with error", func(ctx SpecContext) { - _, err := decommissionReconciler.Reconcile(ctx, reconcileReq) + It("should set offboarding condition with error", func(ctx SpecContext) { + _, err := offboardingReconciler.Reconcile(ctx, reconcileReq) Expect(err).NotTo(HaveOccurred()) - sharedDecommissioningErrorCheck(ctx, "cannot clean up resource provider") + sharedOffboardingErrorCheck(ctx, "cannot clean up resource provider") }) }) })