Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,11 +260,11 @@ func main() {
os.Exit(1)
}

if err = (&controller.NodeDecommissionReconciler{
if err = (&controller.HypervisorOffboardingReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "NodeDecommission")
setupLog.Error(err, "unable to create controller", "controller", "HypervisorOffboarding")
os.Exit(1)
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
SPDX-FileCopyrightText: Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors
SPDX-FileCopyrightText: Copyright 2026 SAP SE or an SAP affiliate company and cobaltcore-dev contributors
SPDX-License-Identifier: Apache-2.0

Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -38,10 +38,10 @@ import (
)

const (
DecommissionControllerName = "offboarding"
OffboardingControllerName = "offboarding"
)

type NodeDecommissionReconciler struct {
type HypervisorOffboardingReconciler struct {
k8sclient.Client
Scheme *runtime.Scheme
computeClient *gophercloud.ServiceClient
Expand All @@ -53,7 +53,7 @@ type NodeDecommissionReconciler struct {

// +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors,verbs=get;list;watch
// +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors/status,verbs=get;list;watch;update;patch
func (r *NodeDecommissionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
func (r *HypervisorOffboardingReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
hostname := req.Name
log := logger.FromContext(ctx).WithName(req.Name).WithValues("hostname", hostname)
ctx = logger.IntoContext(ctx, log)
Expand All @@ -69,7 +69,7 @@ func (r *NodeDecommissionReconciler) Reconcile(ctx context.Context, req ctrl.Req
}

if meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeReady) {
return r.setDecommissioningCondition(ctx, hv, "Node is being decommissioned, removing host from nova")
return r.setOffboardingCondition(ctx, hv, "Hypervisor is being offboarded, removing host from nova")
}

if meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeOffboarded) {
Expand All @@ -95,22 +95,22 @@ func (r *NodeDecommissionReconciler) Reconcile(ctx context.Context, req ctrl.Req
// We are (hopefully) done
return ctrl.Result{}, r.markOffboarded(ctx, hv)
}
return r.setDecommissioningCondition(ctx, hv, fmt.Sprintf("cannot get hypervisor by name %s due to %s", hostname, err))
return r.setOffboardingCondition(ctx, hv, fmt.Sprintf("cannot get hypervisor by name %s due to %s", hostname, err))
}

// TODO: remove since RunningVMs is only available until micro-version 2.87, and also is updated asynchronously
// so it might be not accurate
if hypervisor.RunningVMs > 0 {
// Still running VMs, cannot delete the service
msg := fmt.Sprintf("Node is being decommissioned, but still has %d running VMs", hypervisor.RunningVMs)
return r.setDecommissioningCondition(ctx, hv, msg)
msg := fmt.Sprintf("Hypervisor is being offboarded, but still has %d running VMs", hypervisor.RunningVMs)
return r.setOffboardingCondition(ctx, hv, msg)
}

if hypervisor.Servers != nil && len(*hypervisor.Servers) > 0 {
// Still VMs assigned to the host, cannot delete the service
msg := fmt.Sprintf("Node is being decommissioned, but still has %d assigned VMs, "+
msg := fmt.Sprintf("Hypervisor is being offboarded, but still has %d assigned VMs, "+
"check with `openstack server list --all-projects --host %s`", len(*hypervisor.Servers), hostname)
return r.setDecommissioningCondition(ctx, hv, msg)
return r.setOffboardingCondition(ctx, hv, msg)
}

// Wait for aggregates controller to remove from all aggregates
Expand All @@ -120,44 +120,44 @@ func (r *NodeDecommissionReconciler) Reconcile(ctx context.Context, req ctrl.Req
aggregateNames[i] = agg.Name
}
msg := fmt.Sprintf("Waiting for aggregates to be removed, current: %v", aggregateNames)
return r.setDecommissioningCondition(ctx, hv, msg)
return r.setOffboardingCondition(ctx, hv, msg)
}

// Deleting and evicted, so better delete the service
err = services.Delete(ctx, r.computeClient, hypervisor.Service.ID).ExtractErr()
if err != nil && !gophercloud.ResponseCodeIs(err, http.StatusNotFound) {
msg := fmt.Sprintf("cannot delete service %s due to %v", hypervisor.Service.ID, err)
return r.setDecommissioningCondition(ctx, hv, msg)
return r.setOffboardingCondition(ctx, hv, msg)
}

rp, err := resourceproviders.Get(ctx, r.placementClient, hypervisor.ID).Extract()
if err != nil && !gophercloud.ResponseCodeIs(err, http.StatusNotFound) {
return r.setDecommissioningCondition(ctx, hv, fmt.Sprintf("cannot get resource provider: %v", err))
return r.setOffboardingCondition(ctx, hv, fmt.Sprintf("cannot get resource provider: %v", err))
}

if err = openstack.CleanupResourceProvider(ctx, r.placementClient, rp); err != nil {
return r.setDecommissioningCondition(ctx, hv, fmt.Sprintf("cannot clean up resource provider: %v", err))
return r.setOffboardingCondition(ctx, hv, fmt.Sprintf("cannot clean up resource provider: %v", err))
}

return ctrl.Result{}, r.markOffboarded(ctx, hv)
}

func (r *NodeDecommissionReconciler) setDecommissioningCondition(ctx context.Context, hv *kvmv1.Hypervisor, message string) (ctrl.Result, error) {
func (r *HypervisorOffboardingReconciler) setOffboardingCondition(ctx context.Context, hv *kvmv1.Hypervisor, message string) (ctrl.Result, error) {
base := hv.DeepCopy()
meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{
Type: kvmv1.ConditionTypeReady,
Status: metav1.ConditionFalse,
Reason: "Decommissioning",
Reason: "Offboarding",
Message: message,
})
if err := r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base,
k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(DecommissionControllerName)); err != nil {
k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(OffboardingControllerName)); err != nil {
return ctrl.Result{}, fmt.Errorf("cannot update hypervisor status due to %w", err)
}
return ctrl.Result{}, nil
}

func (r *NodeDecommissionReconciler) markOffboarded(ctx context.Context, hv *kvmv1.Hypervisor) error {
func (r *HypervisorOffboardingReconciler) markOffboarded(ctx context.Context, hv *kvmv1.Hypervisor) error {
base := hv.DeepCopy()
meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{
Type: kvmv1.ConditionTypeOffboarded,
Expand All @@ -172,14 +172,14 @@ func (r *NodeDecommissionReconciler) markOffboarded(ctx context.Context, hv *kvm
Message: "Offboarding successful",
})
if err := r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base,
k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(DecommissionControllerName)); err != nil {
k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(OffboardingControllerName)); err != nil {
return fmt.Errorf("cannot update hypervisor status due to %w", err)
}
return nil
}

// SetupWithManager sets up the controller with the Manager.
func (r *NodeDecommissionReconciler) SetupWithManager(mgr ctrl.Manager) error {
func (r *HypervisorOffboardingReconciler) SetupWithManager(mgr ctrl.Manager) error {
ctx := context.Background()

var err error
Expand All @@ -196,7 +196,7 @@ func (r *NodeDecommissionReconciler) SetupWithManager(mgr ctrl.Manager) error {
r.placementClient.Microversion = "1.39" // yoga, or later

return ctrl.NewControllerManagedBy(mgr).
Named(DecommissionControllerName).
Named(OffboardingControllerName).
For(&kvmv1.Hypervisor{}).
Complete(r)
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
SPDX-FileCopyrightText: Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors
SPDX-FileCopyrightText: Copyright 2026 SAP SE or an SAP affiliate company and cobaltcore-dev contributors
SPDX-License-Identifier: Apache-2.0

Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -36,7 +36,7 @@ import (
kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
)

var _ = Describe("Decommission Controller", func() {
var _ = Describe("Offboarding Controller", func() {
const (
EOF = "EOF"
serviceId = "service-1234"
Expand All @@ -45,10 +45,10 @@ var _ = Describe("Decommission Controller", func() {
)

var (
decommissionReconciler *NodeDecommissionReconciler
resourceName = types.NamespacedName{Name: hypervisorName}
reconcileReq = ctrl.Request{NamespacedName: resourceName}
fakeServer testhelper.FakeServer
offboardingReconciler *HypervisorOffboardingReconciler
resourceName = types.NamespacedName{Name: hypervisorName}
reconcileReq = ctrl.Request{NamespacedName: resourceName}
fakeServer testhelper.FakeServer
)

BeforeEach(func(ctx SpecContext) {
Expand All @@ -67,8 +67,8 @@ var _ = Describe("Decommission Controller", func() {
os.Unsetenv("KVM_HA_SERVICE_URL")
})

By("Creating the NodeDecommissionReconciler")
decommissionReconciler = &NodeDecommissionReconciler{
By("Creating the HypervisorOffboardingReconciler")
offboardingReconciler = &HypervisorOffboardingReconciler{
Client: k8sClient,
Scheme: k8sClient.Scheme(),
computeClient: client.ServiceClient(fakeServer),
Expand Down Expand Up @@ -101,7 +101,7 @@ var _ = Describe("Decommission Controller", func() {
Context("When marking the hypervisor terminating", func() {
JustBeforeEach(func(ctx SpecContext) {
By("Reconciling first to add the finalizer")
_, err := decommissionReconciler.Reconcile(ctx, reconcileReq)
_, err := offboardingReconciler.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())

By("Marking the hypervisor for termination")
Expand Down Expand Up @@ -176,7 +176,7 @@ var _ = Describe("Decommission Controller", func() {
})

It("should set the hypervisor ready condition", func(ctx SpecContext) {
_, err := decommissionReconciler.Reconcile(ctx, reconcileReq)
_, err := offboardingReconciler.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())

hypervisor := &kvmv1.Hypervisor{}
Expand All @@ -185,14 +185,14 @@ var _ = Describe("Decommission Controller", func() {
SatisfyAll(
HaveField("Type", kvmv1.ConditionTypeReady),
HaveField("Status", metav1.ConditionFalse),
HaveField("Reason", "Decommissioning"),
HaveField("Reason", "Offboarding"),
),
))
})

It("should set the hypervisor offboarded condition", func(ctx SpecContext) {
for range 3 {
_, err := decommissionReconciler.Reconcile(ctx, reconcileReq)
_, err := offboardingReconciler.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())
}
Expect(getHypervisorsCalled).To(BeNumerically(">", 0))
Expand Down Expand Up @@ -227,8 +227,8 @@ var _ = Describe("Decommission Controller", func() {
hypervisor.Status.HypervisorID = "c48f6247-abe4-4a24-824e-ea39e108874f"
Expect(k8sClient.Status().Update(ctx, hypervisor)).To(Succeed())

By("Reconciling - decommission controller will wait for aggregates to be cleared")
_, err := decommissionReconciler.Reconcile(ctx, reconcileReq)
By("Reconciling - offboarding controller will wait for aggregates to be cleared")
_, err := offboardingReconciler.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())

By("Simulating aggregates controller clearing aggregates")
Expand All @@ -238,21 +238,21 @@ var _ = Describe("Decommission Controller", func() {

By("Reconciling again after aggregates are cleared")
for range 3 {
_, err := decommissionReconciler.Reconcile(ctx, reconcileReq)
_, err := offboardingReconciler.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())
}

By("Verifying Status.Aggregates is empty")
Expect(k8sClient.Get(ctx, resourceName, hypervisor)).To(Succeed())
Expect(hypervisor.Status.Aggregates).To(BeEmpty(), "Status.Aggregates should be cleared after decommissioning")
Expect(hypervisor.Status.Aggregates).To(BeEmpty(), "Status.Aggregates should be cleared after offboarding")
})
})
})
})

Context("Guard Conditions", func() {
JustBeforeEach(func(ctx SpecContext) {
result, err := decommissionReconciler.Reconcile(ctx, reconcileReq)
result, err := offboardingReconciler.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())
Expect(result).To(Equal(ctrl.Result{}))
})
Expand Down Expand Up @@ -291,14 +291,14 @@ var _ = Describe("Decommission Controller", func() {
})

Context("Failure Modes", func() {
var sharedDecommissioningErrorCheck = func(ctx SpecContext, expectedMessageSubstring string) {
var sharedOffboardingErrorCheck = func(ctx SpecContext, expectedMessageSubstring string) {
hypervisor := &kvmv1.Hypervisor{}
Expect(k8sClient.Get(ctx, resourceName, hypervisor)).To(Succeed())
Expect(hypervisor.Status.Conditions).To(ContainElement(
SatisfyAll(
HaveField("Type", kvmv1.ConditionTypeReady),
HaveField("Status", metav1.ConditionFalse),
HaveField("Reason", "Decommissioning"),
HaveField("Reason", "Offboarding"),
HaveField("Message", ContainSubstring(expectedMessageSubstring)),
),
))
Expand All @@ -320,10 +320,10 @@ var _ = Describe("Decommission Controller", func() {
})
})

It("should set decommissioning condition with error", func(ctx SpecContext) {
_, err := decommissionReconciler.Reconcile(ctx, reconcileReq)
It("should set offboarding condition with error", func(ctx SpecContext) {
_, err := offboardingReconciler.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())
sharedDecommissioningErrorCheck(ctx, "")
sharedOffboardingErrorCheck(ctx, "")
})
})

Expand Down Expand Up @@ -352,10 +352,10 @@ var _ = Describe("Decommission Controller", func() {
})
})

It("should set decommissioning condition about running VMs", func(ctx SpecContext) {
_, err := decommissionReconciler.Reconcile(ctx, reconcileReq)
It("should set offboarding condition about running VMs", func(ctx SpecContext) {
_, err := offboardingReconciler.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())
sharedDecommissioningErrorCheck(ctx, "still has 2 running VMs")
sharedOffboardingErrorCheck(ctx, "still has 2 running VMs")
})
})

Expand Down Expand Up @@ -391,9 +391,9 @@ var _ = Describe("Decommission Controller", func() {
})

It("should wait for aggregates to be cleared", func(ctx SpecContext) {
_, err := decommissionReconciler.Reconcile(ctx, reconcileReq)
_, err := offboardingReconciler.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())
sharedDecommissioningErrorCheck(ctx, "Waiting for aggregates to be removed")
sharedOffboardingErrorCheck(ctx, "Waiting for aggregates to be removed")
})
})

Expand Down Expand Up @@ -424,10 +424,10 @@ var _ = Describe("Decommission Controller", func() {
})
})

It("should set decommissioning condition with error", func(ctx SpecContext) {
_, err := decommissionReconciler.Reconcile(ctx, reconcileReq)
It("should set offboarding condition with error", func(ctx SpecContext) {
_, err := offboardingReconciler.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())
sharedDecommissioningErrorCheck(ctx, "cannot delete service")
sharedOffboardingErrorCheck(ctx, "cannot delete service")
})
})

Expand Down Expand Up @@ -462,10 +462,10 @@ var _ = Describe("Decommission Controller", func() {
})
})

It("should set decommissioning condition with error", func(ctx SpecContext) {
_, err := decommissionReconciler.Reconcile(ctx, reconcileReq)
It("should set offboarding condition with error", func(ctx SpecContext) {
_, err := offboardingReconciler.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())
sharedDecommissioningErrorCheck(ctx, "cannot get resource provider")
sharedOffboardingErrorCheck(ctx, "cannot get resource provider")
})
})

Expand Down Expand Up @@ -512,10 +512,10 @@ var _ = Describe("Decommission Controller", func() {
})
})

It("should set decommissioning condition with error", func(ctx SpecContext) {
_, err := decommissionReconciler.Reconcile(ctx, reconcileReq)
It("should set offboarding condition with error", func(ctx SpecContext) {
_, err := offboardingReconciler.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())
sharedDecommissioningErrorCheck(ctx, "cannot clean up resource provider")
sharedOffboardingErrorCheck(ctx, "cannot clean up resource provider")
})
})
})
Expand Down
Loading