From cdba27c4fab67e61c007c909d2203b99acabbd13 Mon Sep 17 00:00:00 2001 From: Harshal Patil <12152047+harche@users.noreply.github.com> Date: Wed, 27 May 2026 23:42:54 -0400 Subject: [PATCH] pkg/readiness: Add readiness checks and wire into proposal controller Add pkg/readiness package with 9 cluster readiness checks that gather pre-upgrade health data: cluster conditions, operator health, API deprecations, node capacity, PDB drain blockers, etcd health, network config, CRD compatibility, and OLM operator lifecycle. Wire readiness.RunAll() into the proposal controller, replacing the hardcoded readinessJSON placeholder with real per-target readiness data that gets embedded in each proposal's request body. Plumb dynamic.Interface from pkg/start through cvo.New() to the proposal controller to support the readiness checks' cluster queries. Co-Authored-By: Claude Opus 4.6 (1M context) --- ...hift_payload_cluster-version-operator.json | 90 ++ pkg/cvo/availableupdates_test.go | 2 +- pkg/cvo/cvo.go | 5 + pkg/cvo/cvo_test.go | 2 +- pkg/proposal/controller.go | 33 +- pkg/proposal/controller_test.go | 305 ++++- pkg/readiness/api_deprecations.go | 75 ++ pkg/readiness/check.go | 169 +++ pkg/readiness/check_test.go | 264 ++++ pkg/readiness/checks_test.go | 1097 +++++++++++++++++ pkg/readiness/client.go | 175 +++ pkg/readiness/client_test.go | 169 +++ pkg/readiness/cluster_conditions.go | 76 ++ pkg/readiness/crd_compat.go | 68 + pkg/readiness/etcd_health.go | 67 + pkg/readiness/network.go | 72 ++ pkg/readiness/node_capacity.go | 48 + pkg/readiness/olm_lifecycle.go | 282 +++++ pkg/readiness/olm_lifecycle_test.go | 449 +++++++ pkg/readiness/operator_health.go | 125 ++ pkg/readiness/pdb_drain.go | 57 + pkg/start/start.go | 7 + test/cvo/readiness.go | 228 ++++ 23 files changed, 3849 insertions(+), 16 deletions(-) create mode 100644 pkg/readiness/api_deprecations.go create mode 100644 pkg/readiness/check.go create mode 100644 pkg/readiness/check_test.go create mode 100644 pkg/readiness/checks_test.go create mode 100644 pkg/readiness/client.go create mode 100644 pkg/readiness/client_test.go create mode 100644 pkg/readiness/cluster_conditions.go create mode 100644 pkg/readiness/crd_compat.go create mode 100644 pkg/readiness/etcd_health.go create mode 100644 pkg/readiness/network.go create mode 100644 pkg/readiness/node_capacity.go create mode 100644 pkg/readiness/olm_lifecycle.go create mode 100644 pkg/readiness/olm_lifecycle_test.go create mode 100644 pkg/readiness/operator_health.go create mode 100644 pkg/readiness/pdb_drain.go create mode 100644 test/cvo/readiness.go diff --git a/.openshift-tests-extension/openshift_payload_cluster-version-operator.json b/.openshift-tests-extension/openshift_payload_cluster-version-operator.json index 928c2adf6..02637340a 100644 --- a/.openshift-tests-extension/openshift_payload_cluster-version-operator.json +++ b/.openshift-tests-extension/openshift_payload_cluster-version-operator.json @@ -110,5 +110,95 @@ "source": "openshift:payload:cluster-version-operator", "lifecycle": "informing", "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should run all checks without errors", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should produce valid JSON that round-trips", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report node count matching the actual cluster", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report operator count matching actual ClusterOperators", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report etcd member count matching actual etcd pods", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report network type matching actual Network config", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report PDB count matching actual PodDisruptionBudgets", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report cluster conditions matching ClusterVersion status", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should complete all checks within 60 seconds", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} } ] \ No newline at end of file diff --git a/pkg/cvo/availableupdates_test.go b/pkg/cvo/availableupdates_test.go index aec0c29ae..d53d2efa3 100644 --- a/pkg/cvo/availableupdates_test.go +++ b/pkg/cvo/availableupdates_test.go @@ -208,7 +208,7 @@ func newOperator(url string, cluster release, promqlMock clusterconditions.Condi func() ([]configv1.Release, []configv1.ConditionalUpdate, error) { return nil, nil, nil }, - fake.NewClientBuilder().Build(), func(_ string) (*configv1.ClusterVersion, error) { + fake.NewClientBuilder().Build(), nil, func(_ string) (*configv1.ClusterVersion, error) { return &configv1.ClusterVersion{}, nil }, func(_ context.Context, namespace, name string, _ metav1.GetOptions) (*corev1.ConfigMap, error) { diff --git a/pkg/cvo/cvo.go b/pkg/cvo/cvo.go index 3dbb6d661..c085de0b4 100644 --- a/pkg/cvo/cvo.go +++ b/pkg/cvo/cvo.go @@ -17,6 +17,7 @@ import ( utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/dynamic" informerscorev1 "k8s.io/client-go/informers/core/v1" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" @@ -109,6 +110,7 @@ type Operator struct { client clientset.Interface kubeClient kubernetes.Interface + dynamicClient dynamic.Interface operatorClient operatorclientset.Interface eventRecorder record.EventRecorder @@ -235,6 +237,7 @@ func New( featureGateInformer configinformersv1.FeatureGateInformer, client clientset.Interface, kubeClient kubernetes.Interface, + dynamicClient dynamic.Interface, operatorClient operatorclientset.Interface, exclude string, clusterProfile string, @@ -267,6 +270,7 @@ func New( client: client, kubeClient: kubeClient, + dynamicClient: dynamicClient, operatorClient: operatorClient, eventRecorder: eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: namespace}), queue: workqueue.NewTypedRateLimitingQueueWithConfig(workqueue.DefaultTypedControllerRateLimiter[any](), workqueue.TypedRateLimitingQueueConfig[any]{Name: "clusterversion"}), @@ -354,6 +358,7 @@ func New( return availableUpdates.Updates, availableUpdates.ConditionalUpdates, nil }, rtClient, + dynamicClient, cvInformer.Lister().Get, func(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*corev1.ConfigMap, error) { return kubeClient.CoreV1().ConfigMaps(namespace).Get(ctx, name, opts) diff --git a/pkg/cvo/cvo_test.go b/pkg/cvo/cvo_test.go index 68de18927..b1587e99d 100644 --- a/pkg/cvo/cvo_test.go +++ b/pkg/cvo/cvo_test.go @@ -2756,7 +2756,7 @@ func TestOperator_availableUpdatesSync(t *testing.T) { ctx := context.Background() optr.proposalController = proposal.NewController(func() ([]configv1.Release, []configv1.ConditionalUpdate, error) { return nil, nil, nil - }, ctrlruntimefake.NewClientBuilder().Build(), func(_ string) (*configv1.ClusterVersion, error) { + }, ctrlruntimefake.NewClientBuilder().Build(), nil, func(_ string) (*configv1.ClusterVersion, error) { return &configv1.ClusterVersion{}, nil }, func(_ context.Context, namespace, name string, _ metav1.GetOptions) (*corev1.ConfigMap, error) { return &corev1.ConfigMap{}, nil diff --git a/pkg/proposal/controller.go b/pkg/proposal/controller.go index 9e13e9770..78f16d0fc 100644 --- a/pkg/proposal/controller.go +++ b/pkg/proposal/controller.go @@ -2,6 +2,7 @@ package proposal import ( "context" + "encoding/json" "fmt" "os" "regexp" @@ -17,6 +18,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kutilerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/dynamic" "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" @@ -24,6 +26,7 @@ import ( proposalv1alpha1 "github.com/openshift/lightspeed-agentic-operator/api/v1alpha1" i "github.com/openshift/cluster-version-operator/pkg/internal" + "github.com/openshift/cluster-version-operator/pkg/readiness" ) type Controller struct { @@ -31,6 +34,7 @@ type Controller struct { queue workqueue.TypedRateLimitingInterface[any] updatesGetterFunc updatesGetterFunc client ctrlruntimeclient.Client + dynamicClient dynamic.Interface cvGetterFunc cvGetterFunc configMapGetterFunc configMapGetterFunc getCurrentVersionFunc getCurrentVersionFunc @@ -57,6 +61,7 @@ type configMapGetterFunc func(ctx context.Context, namespace, name string, opts func NewController( updatesGetterFunc updatesGetterFunc, client ctrlruntimeclient.Client, + dynamicClient dynamic.Interface, cvGetterFunc cvGetterFunc, configMapGetterFunc configMapGetterFunc, getCurrentVersionFunc getCurrentVersionFunc, @@ -68,6 +73,7 @@ func NewController( workqueue.TypedRateLimitingQueueConfig[any]{Name: controllerName}), updatesGetterFunc: updatesGetterFunc, client: client, + dynamicClient: dynamicClient, cvGetterFunc: cvGetterFunc, configMapGetterFunc: configMapGetterFunc, getCurrentVersionFunc: getCurrentVersionFunc, @@ -152,9 +158,7 @@ func (c *Controller) Sync(ctx context.Context, key string) error { return kutilerrors.NewAggregate(errs) } - // TODO: Implement it - readinessJSON := "{}" - proposals, err := getProposals(updates, conditionalUpdates, c.config.Namespace, currentVersion, cv.Spec.Channel, prompt, readinessJSON) + proposals, err := getProposals(ctx, c.dynamicClient, updates, conditionalUpdates, c.config.Namespace, currentVersion, cv.Spec.Channel, prompt) if err != nil { klog.V(i.Normal).Infof("Getting proposals hit an error: %v", err) return kutilerrors.NewAggregate(append(errs, err)) @@ -277,17 +281,23 @@ func deleteProposal(ctx context.Context, client ctrlruntimeclient.Client, propos } func getProposals( + ctx context.Context, + dynamicClient dynamic.Interface, availableUpdates []configv1.Release, conditionalUpdates []configv1.ConditionalUpdate, namespace string, currentVersion, channel, systemPrompt string, - readinessJSON string, ) ([]*proposalv1alpha1.Proposal, error) { + // TODO: Only 2 of 9 readiness checks (api_deprecations, olm_lifecycle) use the target version. + // The other 7 query cluster-wide state identical across targets. For clusters with many available + // updates, split into target-independent checks (run once) and target-dependent checks (run per + // target) to reduce redundant API calls. var errs []error var proposals []*proposalv1alpha1.Proposal for _, au := range availableUpdates { targetVersion := au.Version + readinessJSON := runReadinessJSON(ctx, dynamicClient, currentVersion, targetVersion) if proposal, err := getProposal(namespace, currentVersion, targetVersion, channel, updateKindRecommended, systemPrompt, readinessJSON, availableUpdates); err != nil { errs = append(errs, err) continue @@ -298,6 +308,7 @@ func getProposals( for _, cu := range conditionalUpdates { targetVersion := cu.Release.Version + readinessJSON := runReadinessJSON(ctx, dynamicClient, currentVersion, targetVersion) if proposal, err := getProposal(namespace, currentVersion, targetVersion, channel, updateKindConditional, systemPrompt, readinessJSON, availableUpdates); err != nil { errs = append(errs, err) continue @@ -437,6 +448,20 @@ func classifyUpdate(current, target string) string { return i.UpdateType(cv, tv) } +func runReadinessJSON(ctx context.Context, dynamicClient dynamic.Interface, currentVersion, targetVersion string) string { + if dynamicClient == nil { + klog.V(i.Normal).Infof("Dynamic client is nil; skipping readiness checks for %s -> %s", currentVersion, targetVersion) + return "{}" + } + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + data, err := json.Marshal(output) + if err != nil { + klog.V(i.Normal).Infof("Failed to marshal readiness output for %s -> %s: %v", currentVersion, targetVersion, err) + return "{}" + } + return string(data) +} + // buildRequest constructs the proposal request with system prompt, metadata, and readiness data. func buildRequest(systemPrompt, current, target, channel, updateType, targetType string, updates []configv1.Release, readinessJSON string) string { diff --git a/pkg/proposal/controller_test.go b/pkg/proposal/controller_test.go index 9c9d58563..6126b69d4 100644 --- a/pkg/proposal/controller_test.go +++ b/pkg/proposal/controller_test.go @@ -2,6 +2,7 @@ package proposal import ( "context" + "encoding/json" "fmt" "strings" "testing" @@ -16,11 +17,17 @@ import ( apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" kutilerrors "k8s.io/apimachinery/pkg/util/errors" + dynamicfake "k8s.io/client-go/dynamic/fake" "k8s.io/client-go/kubernetes/scheme" configv1 "github.com/openshift/api/config/v1" proposalv1alpha1 "github.com/openshift/lightspeed-agentic-operator/api/v1alpha1" + + "github.com/openshift/cluster-version-operator/pkg/readiness" ) func init() { @@ -137,7 +144,7 @@ Update path: Recommended } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - c := NewController(tt.updatesGetterFunc, tt.client, tt.cvGetterFunc, func(_ context.Context, namespace, name string, _ metav1.GetOptions) (*corev1.ConfigMap, error) { + c := NewController(tt.updatesGetterFunc, tt.client, nil, tt.cvGetterFunc, func(_ context.Context, namespace, name string, _ metav1.GetOptions) (*corev1.ConfigMap, error) { if namespace == "openshift-lightspeed" && name == "cluster-update-advisory-prompt" { return &corev1.ConfigMap{ Data: map[string]string{ @@ -743,7 +750,6 @@ func TestGetProposals(t *testing.T) { currentVersion string channel string systemPrompt string - readinessJSON string expected []*proposalv1alpha1.Proposal expectError error }{ @@ -757,7 +763,6 @@ func TestGetProposals(t *testing.T) { currentVersion: "4.15.3", channel: "stable-4.16", systemPrompt: "Test prompt", - readinessJSON: `{"test": "data"}`, expected: []*proposalv1alpha1.Proposal{ { ObjectMeta: metav1.ObjectMeta{ @@ -787,7 +792,7 @@ Other recommended versions available: ## Cluster Readiness Data ` + "```json\n" + - `{"test": "data"}` + "\n```\n", + `{}` + "\n```\n", Analysis: proposalv1alpha1.ProposalStep{ Agent: "smart", }, @@ -851,7 +856,7 @@ Other recommended versions available: ## Cluster Readiness Data ` + "```json\n" + - `{"test": "data"}` + "\n```\n", + `{}` + "\n```\n", Analysis: proposalv1alpha1.ProposalStep{ Agent: "smart", }, @@ -924,7 +929,7 @@ Update path: Recommended Other recommended versions available: - 4.16.1 -`, +` + "## Cluster Readiness Data\n\n```json\n{}\n```\n", Analysis: proposalv1alpha1.ProposalStep{ Agent: "smart", }, @@ -981,7 +986,7 @@ Update path: Recommended Other recommended versions available: - 4.16.0 -`, +` + "## Cluster Readiness Data\n\n```json\n{}\n```\n", Analysis: proposalv1alpha1.ProposalStep{ Agent: "smart", }, @@ -1043,7 +1048,7 @@ Other recommended versions available: - 4.16.0 - 4.16.1 -`, +` + "## Cluster Readiness Data\n\n```json\n{}\n```\n", Analysis: proposalv1alpha1.ProposalStep{ Agent: "smart", }, @@ -1105,7 +1110,7 @@ Other recommended versions available: - 4.16.0 - 4.16.1 -`, +` + "## Cluster Readiness Data\n\n```json\n{}\n```\n", Analysis: proposalv1alpha1.ProposalStep{ Agent: "smart", }, @@ -1174,13 +1179,14 @@ Other recommended versions available: for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { proposals, err := getProposals( + context.Background(), + nil, tt.availableUpdates, tt.conditionalUpdates, tt.namespace, tt.currentVersion, tt.channel, tt.systemPrompt, - tt.readinessJSON, ) if diff := cmp.Diff(err, tt.expectError, cmp.Transformer("Error", func(e error) string { @@ -1236,3 +1242,282 @@ func Test_expired(t *testing.T) { }) } } + +func newFakeDynamicClient(objects ...runtime.Object) *dynamicfake.FakeDynamicClient { + s := runtime.NewScheme() + gvrs := map[schema.GroupVersionResource]string{ + readiness.GVRClusterVersion: "ClusterVersionList", + readiness.GVRClusterOperator: "ClusterOperatorList", + readiness.GVRMachineConfigPool: "MachineConfigPoolList", + readiness.GVRNode: "NodeList", + readiness.GVRPod: "PodList", + readiness.GVRPDB: "PodDisruptionBudgetList", + readiness.GVRCRD: "CustomResourceDefinitionList", + readiness.GVRSubscription: "SubscriptionList", + readiness.GVRCSV: "ClusterServiceVersionList", + readiness.GVRInstallPlan: "InstallPlanList", + readiness.GVRPackageManifest: "PackageManifestList", + readiness.GVRAPIRequestCount: "APIRequestCountList", + readiness.GVRNetwork: "NetworkList", + readiness.GVRProxy: "ProxyList", + readiness.GVRAPIServer: "APIServerList", + } + for gvr, listKind := range gvrs { + gvk := schema.GroupVersionKind{Group: gvr.Group, Version: gvr.Version, Kind: listKind} + s.AddKnownTypeWithName(gvk, &unstructured.UnstructuredList{}) + } + return dynamicfake.NewSimpleDynamicClientWithCustomListKinds(s, gvrs, objects...) +} + +func TestGetProposals_WithReadinessData(t *testing.T) { + dc := newFakeDynamicClient( + // ClusterVersion + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterVersion", + "metadata": map[string]interface{}{"name": "version"}, + "spec": map[string]interface{}{"channel": "stable-4.21", "clusterID": "test-id"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True"}, + map[string]interface{}{"type": "Progressing", "status": "False"}, + map[string]interface{}{"type": "Upgradeable", "status": "True"}, + }, + "history": []interface{}{ + map[string]interface{}{"version": "4.21.5", "state": "Completed"}, + }, + }, + }}, + // ClusterOperators + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterOperator", + "metadata": map[string]interface{}{"name": "etcd"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True"}, + map[string]interface{}{"type": "Degraded", "status": "False"}, + map[string]interface{}{"type": "Upgradeable", "status": "True"}, + }, + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterOperator", + "metadata": map[string]interface{}{"name": "dns"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True"}, + map[string]interface{}{"type": "Degraded", "status": "False"}, + map[string]interface{}{"type": "Upgradeable", "status": "True"}, + }, + }, + }}, + // MachineConfigPool + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "machineconfiguration.openshift.io/v1", "kind": "MachineConfigPool", + "metadata": map[string]interface{}{"name": "master"}, + "spec": map[string]interface{}{"paused": false}, + "status": map[string]interface{}{ + "machineCount": int64(3), "readyMachineCount": int64(3), "updatedMachineCount": int64(3), + "conditions": []interface{}{ + map[string]interface{}{"type": "Degraded", "status": "False"}, + map[string]interface{}{"type": "Updating", "status": "False"}, + }, + }, + }}, + // Etcd pods + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-0", "namespace": "openshift-etcd", "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-0"}, "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-1", "namespace": "openshift-etcd", "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-1"}, "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-2", "namespace": "openshift-etcd", "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-2"}, "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + // Nodes + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "master-0"}, + "status": map[string]interface{}{"conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "worker-0"}, + "status": map[string]interface{}{"conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + // PDB + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "policy/v1", "kind": "PodDisruptionBudget", + "metadata": map[string]interface{}{"name": "etcd-guard", "namespace": "openshift-etcd"}, + "spec": map[string]interface{}{"maxUnavailable": "1"}, + "status": map[string]interface{}{"currentHealthy": int64(3), "desiredHealthy": int64(2), "disruptionsAllowed": int64(1)}, + }}, + // APIRequestCount with blocker + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiserver.openshift.io/v1", "kind": "APIRequestCount", + "metadata": map[string]interface{}{"name": "flowschemas.v1beta3.flowcontrol.apiserver.k8s.io"}, + "status": map[string]interface{}{ + "removedInRelease": "4.21.8", "requestCount": int64(100), + "conditions": []interface{}{map[string]interface{}{"type": "Deprecated", "status": "True"}}, + }, + }}, + // CRD with version issue + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiextensions.k8s.io/v1", "kind": "CustomResourceDefinition", + "metadata": map[string]interface{}{"name": "widgets.example.com"}, + "spec": map[string]interface{}{ + "versions": []interface{}{ + map[string]interface{}{"name": "v2", "served": true}, + map[string]interface{}{"name": "v1", "served": false}, + }, + }, + "status": map[string]interface{}{"storedVersions": []interface{}{"v1"}}, + }}, + // Network, Proxy, APIServer + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "Network", + "metadata": map[string]interface{}{"name": "cluster"}, + "status": map[string]interface{}{"networkType": "OVNKubernetes"}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "Proxy", + "metadata": map[string]interface{}{"name": "cluster"}, + "spec": map[string]interface{}{}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "APIServer", + "metadata": map[string]interface{}{"name": "cluster"}, + "spec": map[string]interface{}{}, + }}, + // OLM Subscription + CSV + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "Subscription", + "metadata": map[string]interface{}{"name": "elasticsearch-operator", "namespace": "openshift-operators-redhat"}, + "spec": map[string]interface{}{"channel": "stable-5.8", "name": "elasticsearch-operator", "source": "redhat-operators", "sourceNamespace": "openshift-marketplace"}, + "status": map[string]interface{}{"state": "AtLatestKnown", "installedCSV": "elasticsearch-operator.v5.8.6", "currentCSV": "elasticsearch-operator.v5.8.6"}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "ClusterServiceVersion", + "metadata": map[string]interface{}{"name": "elasticsearch-operator.v5.8.6", "namespace": "openshift-operators-redhat"}, + "spec": map[string]interface{}{"version": "5.8.6", "displayName": "OpenShift Elasticsearch Operator"}, + "status": map[string]interface{}{"phase": "Succeeded"}, + }}, + ) + + proposals, err := getProposals( + context.Background(), + dc, + []configv1.Release{{Version: "4.21.8"}}, + nil, + "openshift-lightspeed", + "4.21.5", + "stable-4.21", + "Test prompt", + ) + if err != nil { + t.Fatalf("getProposals returned error: %v", err) + } + if len(proposals) != 1 { + t.Fatalf("expected 1 proposal, got %d", len(proposals)) + } + + request := proposals[0].Spec.Request + + if !strings.Contains(request, "## Cluster Readiness Data") { + t.Fatal("proposal request missing readiness data section") + } + + // Extract JSON from the request + start := strings.Index(request, "```json\n") + if start < 0 { + t.Fatal("could not find readiness JSON fence in request") + } + jsonStart := start + len("```json\n") + jsonEnd := strings.Index(request[jsonStart:], "\n```") + if jsonEnd < 0 { + t.Fatal("could not find closing fence for readiness JSON") + } + readinessJSON := request[jsonStart : jsonStart+jsonEnd] + + // Unmarshal into raw map since CheckResult.Data is json:"-" (flattened during marshal) + var raw map[string]any + if err := json.Unmarshal([]byte(readinessJSON), &raw); err != nil { + t.Fatalf("readiness JSON is not valid: %v\nJSON: %s", err, readinessJSON) + } + + if raw["current_version"] != "4.21.5" { + t.Errorf("readiness current_version = %v, want 4.21.5", raw["current_version"]) + } + if raw["target_version"] != "4.21.8" { + t.Errorf("readiness target_version = %v, want 4.21.8", raw["target_version"]) + } + + meta, ok := raw["meta"].(map[string]any) + if !ok { + t.Fatal("readiness output missing 'meta'") + } + if meta["total_checks"] != float64(9) { + t.Errorf("readiness total_checks = %v, want 9", meta["total_checks"]) + } + if meta["checks_ok"] != float64(9) { + t.Errorf("readiness checks_ok = %v, want 9 (all checks should succeed)", meta["checks_ok"]) + } + + checks, ok := raw["checks"].(map[string]any) + if !ok { + t.Fatal("readiness output missing 'checks'") + } + + // Verify every check produced results with ok status + for _, name := range []string{ + "cluster_conditions", "operator_health", "api_deprecations", + "node_capacity", "pdb_drain", "etcd_health", "network", + "crd_compat", "olm_operator_lifecycle", + } { + check, ok := checks[name].(map[string]any) + if !ok { + t.Errorf("readiness output missing %s check", name) + continue + } + if check["_status"] != "ok" { + t.Errorf("check %s status = %v, error = %v", name, check["_status"], check["_error"]) + } + } + + // Spot-check: api_deprecations found the blocker + if ad, ok := checks["api_deprecations"].(map[string]any); !ok { + t.Fatal("api_deprecations check missing or wrong type") + } else if adSummary, ok := ad["summary"].(map[string]any); !ok { + t.Fatal("api_deprecations summary missing or wrong type") + } else if adSummary["blockers"] != float64(1) { + t.Errorf("api_deprecations blockers = %v, want 1", adSummary["blockers"]) + } + + // Spot-check: olm found the subscription + if olm, ok := checks["olm_operator_lifecycle"].(map[string]any); !ok { + t.Fatal("olm_operator_lifecycle check missing or wrong type") + } else if olmSummary, ok := olm["summary"].(map[string]any); !ok { + t.Fatal("olm_operator_lifecycle summary missing or wrong type") + } else if olmSummary["total_operators"] != float64(1) { + t.Errorf("olm total_operators = %v, want 1", olmSummary["total_operators"]) + } + + // Spot-check: etcd has 3 healthy members + if etcd, ok := checks["etcd_health"].(map[string]any); !ok { + t.Fatal("etcd_health check missing or wrong type") + } else if etcd["total_members"] != float64(3) { + t.Errorf("etcd total_members = %v, want 3", etcd["total_members"]) + } + + // Spot-check: node_capacity found 2 nodes + if nc, ok := checks["node_capacity"].(map[string]any); !ok { + t.Fatal("node_capacity check missing or wrong type") + } else if nc["total_nodes"] != float64(2) { + t.Errorf("node_capacity total_nodes = %v, want 2", nc["total_nodes"]) + } +} diff --git a/pkg/readiness/api_deprecations.go b/pkg/readiness/api_deprecations.go new file mode 100644 index 000000000..33cd31d1a --- /dev/null +++ b/pkg/readiness/api_deprecations.go @@ -0,0 +1,75 @@ +package readiness + +import ( + "context" + "fmt" + "strings" + + "k8s.io/client-go/dynamic" +) + +// APIDeprecationsCheck scans for deprecated or removed API usage. +type APIDeprecationsCheck struct{} + +func (c *APIDeprecationsCheck) Name() string { return "api_deprecations" } + +func (c *APIDeprecationsCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + + // Fetch APIRequestCount resources + arcs, err := ListResources(ctx, dc, GVRAPIRequestCount, "") + if err != nil { + // APIRequestCount may not be available on all clusters + if strings.Contains(err.Error(), "not found") { + result["warning"] = "APIRequestCount resource not available" + result["blocker_apis"] = []any{} + result["warning_apis"] = []any{} + result["summary"] = map[string]any{"blockers": 0, "warnings": 0} + return result, nil + } + return nil, fmt.Errorf("failed to list APIRequestCounts: %w", err) + } + + blockers := make([]map[string]any, 0) + warnings := make([]map[string]any, 0) + + for _, arc := range arcs { + conditions := GetConditions(&arc) + + // Check RemovedInRelease annotation + removedIn := NestedString(arc.Object, "status", "removedInRelease") + cmp, err := CompareVersions(removedIn, target) + if removedIn != "" && err == nil && cmp <= 0 { + requestCount := NestedInt64(arc.Object, "status", "requestCount") + if requestCount > 0 { + blockers = append(blockers, map[string]any{ + "resource": arc.GetName(), + "removed_in_release": removedIn, + "request_count": requestCount, + }) + } + } + + // Check for deprecation condition + if dep, ok := conditions["Deprecated"]; ok && dep.Status == ConditionTrue { + requestCount := NestedInt64(arc.Object, "status", "requestCount") + if requestCount > 0 { + warnings = append(warnings, map[string]any{ + "resource": arc.GetName(), + "request_count": requestCount, + "message": dep.Message, + }) + } + } + } + + result["blocker_apis"] = blockers + result["warning_apis"] = warnings + result["summary"] = map[string]any{ + "blockers": len(blockers), + "warnings": len(warnings), + "total": len(arcs), + } + + return result, nil +} diff --git a/pkg/readiness/check.go b/pkg/readiness/check.go new file mode 100644 index 000000000..5e30d23ca --- /dev/null +++ b/pkg/readiness/check.go @@ -0,0 +1,169 @@ +package readiness + +import ( + "context" + "encoding/json" + "fmt" + "sync" + "time" + + "k8s.io/client-go/dynamic" +) + +// Check is the interface that each readiness check implements. +type Check interface { + Name() string + Run(ctx context.Context, c dynamic.Interface, current, target string) (map[string]any, error) +} + +// CheckResult wraps a check's output with metadata. +type CheckResult struct { + Status string `json:"_status"` + Error string `json:"_error,omitempty"` + Elapsed float64 `json:"_elapsed_seconds"` + Data map[string]any `json:"-"` +} + +func (r CheckResult) MarshalJSON() ([]byte, error) { + m := make(map[string]any, len(r.Data)+3) + for k, v := range r.Data { + m[k] = v + } + m["_status"] = r.Status + m["_elapsed_seconds"] = r.Elapsed + if r.Error != "" { + m["_error"] = r.Error + } + return json.Marshal(m) +} + +// Output is the top-level readiness report structure. +type Output struct { + CurrentVersion string `json:"current_version"` + TargetVersion string `json:"target_version"` + Checks map[string]CheckResult `json:"checks"` + Meta Meta `json:"meta"` +} + +// Meta contains summary information about the readiness check run. +type Meta struct { + TotalChecks int `json:"total_checks"` + ChecksOK int `json:"checks_ok"` + ChecksErrored int `json:"checks_errored"` + ElapsedSeconds float64 `json:"elapsed_seconds"` +} + +const ( + perCheckTimeout = 60 * time.Second + + StatusOK = "ok" + StatusError = "error" +) + +// AllChecks returns all registered readiness checks. +// Checks are split into two categories: +// - cluster_conditions: reads CVO's already-computed state (no re-querying) +// - everything else: gathers NEW data that CVO doesn't already track +var AllChecks = func() []Check { + return []Check{ + &ClusterConditionsCheck{}, // reads existing CVO conditions — no duplication + &OperatorHealthCheck{}, // per-CO detail + MCPs (CVO only aggregates) + &APIDeprecationsCheck{}, // new: deprecated API usage + &NodeCapacityCheck{}, // new: node readiness and headroom + &PDBDrainCheck{}, // new: PDB drain blockers + &EtcdHealthCheck{}, // new: deep etcd health (beyond CO condition) + &NetworkCheck{}, // new: SDN migration, TLS, proxy + &CRDCompatCheck{}, // new: CRD version mismatches + &OLMOperatorLifecycleCheck{}, // new: OLM operator lifecycle (OCPSTRAT-2618) + // Known issues (Jira/KB) are NOT checked here — the agent uses its + // redhat-support skill to query contextually based on readiness findings. + } +} + +// RunAll executes all readiness checks in parallel with per-check timeouts. +func RunAll(ctx context.Context, c dynamic.Interface, current, target string) *Output { + checks := AllChecks() + results := make(map[string]CheckResult, len(checks)) + + var mu sync.Mutex + var wg sync.WaitGroup + + totalStart := time.Now() + + for _, check := range checks { + wg.Add(1) + go func(ch Check) { + defer wg.Done() + + checkCtx, cancel := context.WithTimeout(ctx, perCheckTimeout) + defer cancel() + + start := time.Now() + result := CheckResult{Data: map[string]any{}} + + defer func() { + if r := recover(); r != nil { + result.Status = StatusError + result.Error = fmt.Sprintf("panic: %v", r) + } + result.Elapsed = time.Since(start).Seconds() + if result.Data == nil { + result.Data = map[string]any{} + } + + mu.Lock() + results[ch.Name()] = result + mu.Unlock() + }() + + data, err := ch.Run(checkCtx, c, current, target) + + if err != nil { + result.Status = StatusError + result.Error = err.Error() + if data != nil { + result.Data = data + } + } else { + result.Status = StatusOK + result.Data = data + } + }(check) + } + + wg.Wait() + totalElapsed := time.Since(totalStart).Seconds() + + ok := 0 + errored := 0 + for _, r := range results { + if r.Status == StatusOK { + ok++ + } else { + errored++ + } + } + + return &Output{ + CurrentVersion: current, + TargetVersion: target, + Checks: results, + Meta: Meta{ + TotalChecks: len(checks), + ChecksOK: ok, + ChecksErrored: errored, + ElapsedSeconds: totalElapsed, + }, + } +} + +// SectionError appends a section error entry to the errors slice. +func SectionError(errors *[]map[string]any, section string, err error) { + if err == nil { + return + } + *errors = append(*errors, map[string]any{ + "section": section, + "error": err.Error(), + }) +} diff --git a/pkg/readiness/check_test.go b/pkg/readiness/check_test.go new file mode 100644 index 000000000..73260a97b --- /dev/null +++ b/pkg/readiness/check_test.go @@ -0,0 +1,264 @@ +package readiness + +import ( + "context" + "encoding/json" + "errors" + "testing" + + "k8s.io/client-go/dynamic" +) + +type fakeCheck struct { + name string + data map[string]any + err error + panic bool +} + +func (f *fakeCheck) Name() string { return f.name } +func (f *fakeCheck) Run(_ context.Context, _ dynamic.Interface, _, _ string) (map[string]any, error) { + if f.panic { + panic("check exploded") + } + return f.data, f.err +} + +func TestCheckResultMarshalJSON(t *testing.T) { + t.Run("ok result merges data with metadata", func(t *testing.T) { + r := CheckResult{ + Status: "ok", + Elapsed: 1.5, + Data: map[string]any{"foo": "bar", "count": 42}, + } + b, err := json.Marshal(r) + if err != nil { + t.Fatal(err) + } + var m map[string]any + if err := json.Unmarshal(b, &m); err != nil { + t.Fatal(err) + } + if m["_status"] != "ok" { + t.Errorf("_status = %v, want ok", m["_status"]) + } + if m["foo"] != "bar" { + t.Errorf("foo = %v, want bar", m["foo"]) + } + if _, ok := m["_error"]; ok { + t.Error("_error should be omitted for ok results") + } + }) + + t.Run("error result includes error field", func(t *testing.T) { + r := CheckResult{ + Status: "error", + Error: "something failed", + Elapsed: 0.1, + Data: map[string]any{}, + } + b, err := json.Marshal(r) + if err != nil { + t.Fatal(err) + } + var m map[string]any + if err := json.Unmarshal(b, &m); err != nil { + t.Fatal(err) + } + if m["_error"] != "something failed" { + t.Errorf("_error = %v, want 'something failed'", m["_error"]) + } + }) +} + +func TestFakeCheckInterface(t *testing.T) { + ok := &fakeCheck{name: "ok_check", data: map[string]any{"healthy": true}} + fail := &fakeCheck{name: "err_check", err: errors.New("fail")} + + if ok.Name() != "ok_check" { + t.Errorf("Name() = %q", ok.Name()) + } + + data, err := ok.Run(context.Background(), nil, "4.21.5", "4.21.8") + if err != nil { + t.Errorf("ok check should not error: %v", err) + } + if data["healthy"] != true { + t.Errorf("data = %v", data) + } + + _, err = fail.Run(context.Background(), nil, "4.21.5", "4.21.8") + if err == nil { + t.Error("fail check should error") + } +} + +func TestOutputMarshalJSON(t *testing.T) { + output := &Output{ + CurrentVersion: "4.21.5", + TargetVersion: "4.21.8", + Checks: map[string]CheckResult{ + "test": {Status: "ok", Elapsed: 0.5, Data: map[string]any{"key": "val"}}, + }, + Meta: Meta{TotalChecks: 1, ChecksOK: 1, ChecksErrored: 0, ElapsedSeconds: 0.5}, + } + + b, err := json.Marshal(output) + if err != nil { + t.Fatal(err) + } + + var m map[string]any + if err := json.Unmarshal(b, &m); err != nil { + t.Fatal(err) + } + + if m["current_version"] != "4.21.5" { + t.Errorf("current_version = %v", m["current_version"]) + } + if m["target_version"] != "4.21.8" { + t.Errorf("target_version = %v", m["target_version"]) + } + + checks, ok := m["checks"].(map[string]any) + if !ok { + t.Fatal("checks not a map") + } + testCheck, ok := checks["test"].(map[string]any) + if !ok { + t.Fatal("test check not a map") + } + if testCheck["_status"] != "ok" { + t.Errorf("test._status = %v", testCheck["_status"]) + } + if testCheck["key"] != "val" { + t.Errorf("test.key = %v", testCheck["key"]) + } +} + +func TestSectionError(t *testing.T) { + var errs []map[string]any + SectionError(&errs, "test_section", errors.New("something broke")) + + if len(errs) != 1 { + t.Fatalf("len = %d, want 1", len(errs)) + } + if errs[0]["section"] != "test_section" { + t.Errorf("section = %v", errs[0]["section"]) + } + if errs[0]["error"] != "something broke" { + t.Errorf("error = %v", errs[0]["error"]) + } +} + +func TestRunAllMixedResults(t *testing.T) { + orig := AllChecks + defer func() { AllChecks = orig }() + + AllChecks = func() []Check { + return []Check{ + &fakeCheck{name: "passing", data: map[string]any{"healthy": true}}, + &fakeCheck{name: "failing", err: errors.New("something broke")}, + &fakeCheck{name: "partial", data: map[string]any{"partial": true}, err: errors.New("partial failure")}, + } + } + + output := RunAll(context.Background(), nil, "4.21.5", "4.21.8") + + if output.Meta.TotalChecks != 3 { + t.Errorf("TotalChecks = %d, want 3", output.Meta.TotalChecks) + } + if output.Meta.ChecksOK != 1 { + t.Errorf("ChecksOK = %d, want 1", output.Meta.ChecksOK) + } + if output.Meta.ChecksErrored != 2 { + t.Errorf("ChecksErrored = %d, want 2", output.Meta.ChecksErrored) + } + + passing := output.Checks["passing"] + if passing.Status != StatusOK { + t.Errorf("passing.Status = %q, want ok", passing.Status) + } + if passing.Data["healthy"] != true { + t.Errorf("passing.Data[healthy] = %v", passing.Data["healthy"]) + } + + failing := output.Checks["failing"] + if failing.Status != StatusError { + t.Errorf("failing.Status = %q, want error", failing.Status) + } + if failing.Error != "something broke" { + t.Errorf("failing.Error = %q", failing.Error) + } + + partial := output.Checks["partial"] + if partial.Status != StatusError { + t.Errorf("partial.Status = %q, want error", partial.Status) + } + if partial.Data["partial"] != true { + t.Errorf("partial.Data[partial] = %v, want true", partial.Data["partial"]) + } + if partial.Error != "partial failure" { + t.Errorf("partial.Error = %q", partial.Error) + } +} + +func TestRunAllRecoversPanic(t *testing.T) { + orig := AllChecks + defer func() { AllChecks = orig }() + + AllChecks = func() []Check { + return []Check{ + &fakeCheck{name: "ok_check", data: map[string]any{"healthy": true}}, + &fakeCheck{name: "panicking", panic: true}, + } + } + + output := RunAll(context.Background(), nil, "4.21.5", "4.21.8") + + if output.Meta.TotalChecks != 2 { + t.Errorf("TotalChecks = %d, want 2", output.Meta.TotalChecks) + } + if output.Meta.ChecksOK != 1 { + t.Errorf("ChecksOK = %d, want 1", output.Meta.ChecksOK) + } + if output.Meta.ChecksErrored != 1 { + t.Errorf("ChecksErrored = %d, want 1", output.Meta.ChecksErrored) + } + + ok := output.Checks["ok_check"] + if ok.Status != StatusOK { + t.Errorf("ok_check.Status = %q, want ok", ok.Status) + } + + panicked := output.Checks["panicking"] + if panicked.Status != StatusError { + t.Errorf("panicking.Status = %q, want error", panicked.Status) + } + if panicked.Error != "panic: check exploded" { + t.Errorf("panicking.Error = %q, want 'panic: check exploded'", panicked.Error) + } +} + +func TestAllChecksReturnsExpectedCount(t *testing.T) { + checks := AllChecks() + if len(checks) != 9 { + t.Errorf("AllChecks() returned %d checks, want 9", len(checks)) + } + + names := make(map[string]bool) + for _, c := range checks { + names[c.Name()] = true + } + + expected := []string{ + "cluster_conditions", "operator_health", "api_deprecations", + "node_capacity", "pdb_drain", "etcd_health", "network", "crd_compat", + "olm_operator_lifecycle", + } + for _, name := range expected { + if !names[name] { + t.Errorf("missing check: %s", name) + } + } +} diff --git a/pkg/readiness/checks_test.go b/pkg/readiness/checks_test.go new file mode 100644 index 000000000..f9fae9115 --- /dev/null +++ b/pkg/readiness/checks_test.go @@ -0,0 +1,1097 @@ +package readiness + +import ( + "context" + "encoding/json" + "testing" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + dynamicfake "k8s.io/client-go/dynamic/fake" +) + +func newFakeDynamicClient(objects ...runtime.Object) *dynamicfake.FakeDynamicClient { + scheme := runtime.NewScheme() + gvrs := map[schema.GroupVersionResource]string{ + GVRClusterVersion: "ClusterVersionList", + GVRClusterOperator: "ClusterOperatorList", + GVRMachineConfigPool: "MachineConfigPoolList", + GVRNode: "NodeList", + GVRPod: "PodList", + GVRPDB: "PodDisruptionBudgetList", + GVRCRD: "CustomResourceDefinitionList", + GVRSubscription: "SubscriptionList", + GVRCSV: "ClusterServiceVersionList", + GVRInstallPlan: "InstallPlanList", + GVRPackageManifest: "PackageManifestList", + GVRAPIRequestCount: "APIRequestCountList", + GVRNetwork: "NetworkList", + GVRProxy: "ProxyList", + GVRAPIServer: "APIServerList", + } + for gvr, listKind := range gvrs { + gvk := schema.GroupVersionKind{Group: gvr.Group, Version: gvr.Version, Kind: listKind} + scheme.AddKnownTypeWithName(gvk, &unstructured.UnstructuredList{}) + } + return dynamicfake.NewSimpleDynamicClientWithCustomListKinds(scheme, gvrs, objects...) +} + +func TestNodeCapacityCheck(t *testing.T) { + nodes := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "master-0"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Ready", "status": "True"}, + }, + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "worker-0"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Ready", "status": "True"}, + }, + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "worker-1"}, + "spec": map[string]interface{}{"unschedulable": true}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Ready", "status": "False"}, + }, + }, + }}, + } + + client := newFakeDynamicClient(nodes...) + check := &NodeCapacityCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + if result["total_nodes"] != 3 { + t.Errorf("total_nodes = %v, want 3", result["total_nodes"]) + } + if result["ready_nodes"] != 2 { + t.Errorf("ready_nodes = %v, want 2", result["ready_nodes"]) + } + if result["unschedulable_nodes"] != 1 { + t.Errorf("unschedulable_nodes = %v, want 1", result["unschedulable_nodes"]) + } + + summary, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary not a map") + } + if summary["not_ready"] != 1 { + t.Errorf("summary.not_ready = %v, want 1", summary["not_ready"]) + } +} + +func TestPDBDrainCheck(t *testing.T) { + pdbs := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "policy/v1", "kind": "PodDisruptionBudget", + "metadata": map[string]interface{}{"name": "safe-pdb", "namespace": "default"}, + "spec": map[string]interface{}{"maxUnavailable": "1"}, + "status": map[string]interface{}{ + "currentHealthy": int64(3), + "desiredHealthy": int64(2), + "disruptionsAllowed": int64(1), + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "policy/v1", "kind": "PodDisruptionBudget", + "metadata": map[string]interface{}{"name": "blocking-pdb", "namespace": "critical"}, + "spec": map[string]interface{}{"maxUnavailable": "0"}, + "status": map[string]interface{}{ + "currentHealthy": int64(2), + "desiredHealthy": int64(2), + "disruptionsAllowed": int64(0), + }, + }}, + } + + client := newFakeDynamicClient(pdbs...) + check := &PDBDrainCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + if result["total_pdbs"] != 2 { + t.Errorf("total_pdbs = %v, want 2", result["total_pdbs"]) + } + + blocking, ok := result["blocking_pdbs"].([]map[string]any) + if !ok { + t.Fatal("blocking_pdbs not a slice") + } + if len(blocking) != 1 { + t.Fatalf("blocking_pdbs len = %d, want 1", len(blocking)) + } + if blocking[0]["name"] != "blocking-pdb" { + t.Errorf("blocking pdb name = %v, want blocking-pdb", blocking[0]["name"]) + } + if blocking[0]["namespace"] != "critical" { + t.Errorf("blocking pdb namespace = %v, want critical", blocking[0]["namespace"]) + } +} + +func TestEtcdHealthCheck(t *testing.T) { + objects := []runtime.Object{ + // etcd ClusterOperator + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterOperator", + "metadata": map[string]interface{}{"name": "etcd"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True", "reason": "AsExpected"}, + map[string]interface{}{"type": "Degraded", "status": "False", "reason": "AsExpected"}, + map[string]interface{}{"type": "Upgradeable", "status": "True", "reason": "AsExpected"}, + }, + }, + }}, + // etcd pods + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-0", "namespace": "openshift-etcd", + "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-0"}, + "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{ + map[string]interface{}{"type": "Ready", "status": "True"}, + }}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-1", "namespace": "openshift-etcd", + "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-1"}, + "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{ + map[string]interface{}{"type": "Ready", "status": "True"}, + }}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-2", "namespace": "openshift-etcd", + "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-2"}, + "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{ + map[string]interface{}{"type": "Ready", "status": "False"}, + }}, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &EtcdHealthCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + if result["total_members"] != 3 { + t.Errorf("total_members = %v, want 3", result["total_members"]) + } + if result["healthy_members"] != 2 { + t.Errorf("healthy_members = %v, want 2", result["healthy_members"]) + } + + summary, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary not a map") + } + if summary["operator_available"] != true { + t.Errorf("operator_available = %v, want true", summary["operator_available"]) + } + if summary["operator_degraded"] != false { + t.Errorf("operator_degraded = %v, want false", summary["operator_degraded"]) + } +} + +func TestOperatorHealthCheck(t *testing.T) { + objects := []runtime.Object{ + // Healthy operator + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterOperator", + "metadata": map[string]interface{}{"name": "dns"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True", "reason": "AsExpected"}, + map[string]interface{}{"type": "Degraded", "status": "False", "reason": "AsExpected"}, + map[string]interface{}{"type": "Upgradeable", "status": "True", "reason": "AsExpected"}, + }, + }, + }}, + // Degraded operator + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterOperator", + "metadata": map[string]interface{}{"name": "authentication"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "False", "reason": "OAuthDown", "message": "oauth pods crashlooping"}, + map[string]interface{}{"type": "Degraded", "status": "True", "reason": "OAuthDown", "message": "oauth pods crashlooping"}, + map[string]interface{}{"type": "Upgradeable", "status": "False", "reason": "OAuthDown", "message": "must fix before upgrade"}, + }, + }, + }}, + // MachineConfigPool: healthy master + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "machineconfiguration.openshift.io/v1", "kind": "MachineConfigPool", + "metadata": map[string]interface{}{"name": "master"}, + "spec": map[string]interface{}{"paused": false}, + "status": map[string]interface{}{ + "machineCount": int64(3), + "readyMachineCount": int64(3), + "updatedMachineCount": int64(3), + "conditions": []interface{}{ + map[string]interface{}{"type": "Degraded", "status": "False"}, + map[string]interface{}{"type": "Updating", "status": "False"}, + }, + }, + }}, + // MachineConfigPool: paused and degraded worker + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "machineconfiguration.openshift.io/v1", "kind": "MachineConfigPool", + "metadata": map[string]interface{}{"name": "worker"}, + "spec": map[string]interface{}{"paused": true}, + "status": map[string]interface{}{ + "machineCount": int64(5), + "readyMachineCount": int64(3), + "updatedMachineCount": int64(3), + "conditions": []interface{}{ + map[string]interface{}{"type": "Degraded", "status": "True", "reason": "RenderFailed"}, + map[string]interface{}{"type": "Updating", "status": "True", "reason": "InProgress"}, + }, + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &OperatorHealthCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + // Operator conditions + notUpgradeable, ok := result["not_upgradeable"].([]map[string]any) + if !ok { + t.Fatal("not_upgradeable not a slice") + } + if len(notUpgradeable) != 1 { + t.Fatalf("not_upgradeable len = %d, want 1", len(notUpgradeable)) + } + if notUpgradeable[0]["name"] != "authentication" { + t.Errorf("not_upgradeable[0].name = %v, want authentication", notUpgradeable[0]["name"]) + } + + degraded, ok := result["degraded"].([]map[string]any) + if !ok { + t.Fatal("degraded not a slice") + } + if len(degraded) != 1 { + t.Fatalf("degraded len = %d, want 1", len(degraded)) + } + if degraded[0]["name"] != "authentication" { + t.Errorf("degraded[0].name = %v, want authentication", degraded[0]["name"]) + } + + notAvailable, ok := result["not_available"].([]map[string]any) + if !ok { + t.Fatal("not_available not a slice") + } + if len(notAvailable) != 1 { + t.Fatalf("not_available len = %d, want 1", len(notAvailable)) + } + + // MCP results + mcps, ok := result["machine_config_pools"].([]map[string]any) + if !ok { + t.Fatal("machine_config_pools not a slice") + } + if len(mcps) != 2 { + t.Fatalf("machine_config_pools len = %d, want 2", len(mcps)) + } + + mcpSummary, ok := result["mcp_summary"].(map[string]any) + if !ok { + t.Fatal("mcp_summary not a map") + } + if mcpSummary["paused"] != 1 { + t.Errorf("mcp_summary.paused = %v, want 1", mcpSummary["paused"]) + } + if mcpSummary["degraded"] != 1 { + t.Errorf("mcp_summary.degraded = %v, want 1", mcpSummary["degraded"]) + } + if mcpSummary["updating"] != 1 { + t.Errorf("mcp_summary.updating = %v, want 1", mcpSummary["updating"]) + } + + // Summary + summary, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary not a map") + } + if summary["total_operators"] != 2 { + t.Errorf("total_operators = %v, want 2", summary["total_operators"]) + } + if summary["not_upgradeable_count"] != 1 { + t.Errorf("not_upgradeable_count = %v, want 1", summary["not_upgradeable_count"]) + } + if summary["degraded_count"] != 1 { + t.Errorf("degraded_count = %v, want 1", summary["degraded_count"]) + } + if summary["not_available_count"] != 1 { + t.Errorf("not_available_count = %v, want 1", summary["not_available_count"]) + } +} + +func TestOperatorHealthCheck_AllHealthy(t *testing.T) { + objects := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterOperator", + "metadata": map[string]interface{}{"name": "dns"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True"}, + map[string]interface{}{"type": "Degraded", "status": "False"}, + map[string]interface{}{"type": "Upgradeable", "status": "True"}, + }, + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &OperatorHealthCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + if len(result["not_upgradeable"].([]map[string]any)) != 0 { + t.Error("expected no not_upgradeable operators") + } + if len(result["degraded"].([]map[string]any)) != 0 { + t.Error("expected no degraded operators") + } + if len(result["not_available"].([]map[string]any)) != 0 { + t.Error("expected no not_available operators") + } +} + +func TestClusterConditionsCheck(t *testing.T) { + cv := &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterVersion", + "metadata": map[string]interface{}{"name": "version"}, + "spec": map[string]interface{}{ + "channel": "stable-4.21", + "clusterID": "test-cluster-id-123", + }, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True", "reason": "AsExpected"}, + map[string]interface{}{"type": "Progressing", "status": "False", "reason": "AsExpected"}, + map[string]interface{}{"type": "Upgradeable", "status": "True", "reason": "AsExpected", "message": ""}, + map[string]interface{}{"type": "Failing", "status": "False", "reason": "AsExpected"}, + }, + "history": []interface{}{ + map[string]interface{}{ + "version": "4.21.5", + "state": "Completed", + "startedTime": "2026-04-10T10:00:00Z", + "completionTime": "2026-04-10T11:00:00Z", + }, + map[string]interface{}{ + "version": "4.21.4", + "state": "Completed", + "startedTime": "2026-04-01T10:00:00Z", + "completionTime": "2026-04-01T11:00:00Z", + }, + }, + }, + }} + + client := newFakeDynamicClient(cv) + check := &ClusterConditionsCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + if result["channel"] != "stable-4.21" { + t.Errorf("channel = %v, want stable-4.21", result["channel"]) + } + if result["cluster_id"] != "test-cluster-id-123" { + t.Errorf("cluster_id = %v, want test-cluster-id-123", result["cluster_id"]) + } + if result["update_in_progress"] != false { + t.Errorf("update_in_progress = %v, want false", result["update_in_progress"]) + } + + upgradeable, ok := result["upgradeable"].(map[string]any) + if !ok { + t.Fatal("upgradeable not a map") + } + if upgradeable["status"] != "True" { + t.Errorf("upgradeable.status = %v, want True", upgradeable["status"]) + } + + history, ok := result["recent_history"].([]map[string]any) + if !ok { + t.Fatal("recent_history not a slice") + } + if len(history) != 2 { + t.Fatalf("recent_history len = %d, want 2", len(history)) + } + if history[0]["version"] != "4.21.5" { + t.Errorf("history[0].version = %v, want 4.21.5", history[0]["version"]) + } + + summary, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary not a map") + } + if summary["upgradeable"] != true { + t.Errorf("summary.upgradeable = %v, want true", summary["upgradeable"]) + } + if summary["update_in_progress"] != false { + t.Errorf("summary.update_in_progress = %v, want false", summary["update_in_progress"]) + } +} + +func TestClusterConditionsCheck_ProgressingTrue(t *testing.T) { + cv := &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterVersion", + "metadata": map[string]interface{}{"name": "version"}, + "spec": map[string]interface{}{"channel": "stable-4.21", "clusterID": "abc"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Progressing", "status": "True", "reason": "Updating"}, + map[string]interface{}{"type": "Upgradeable", "status": "False", "reason": "Updating", "message": "update in progress"}, + }, + "history": []interface{}{}, + }, + }} + + client := newFakeDynamicClient(cv) + check := &ClusterConditionsCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + if result["update_in_progress"] != true { + t.Errorf("update_in_progress = %v, want true", result["update_in_progress"]) + } + + summary := result["summary"].(map[string]any) + if summary["upgradeable"] != false { + t.Errorf("summary.upgradeable = %v, want false", summary["upgradeable"]) + } +} + +func TestAPIDeprecationsCheck(t *testing.T) { + objects := []runtime.Object{ + // API removed in target version with active usage — blocker + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiserver.openshift.io/v1", "kind": "APIRequestCount", + "metadata": map[string]interface{}{"name": "flowschemas.v1beta3.flowcontrol.apiserver.k8s.io"}, + "status": map[string]interface{}{ + "removedInRelease": "4.21.8", + "requestCount": int64(150), + "conditions": []interface{}{ + map[string]interface{}{"type": "Deprecated", "status": "True", "message": "deprecated since 4.20"}, + }, + }, + }}, + // Deprecated but not removed — warning + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiserver.openshift.io/v1", "kind": "APIRequestCount", + "metadata": map[string]interface{}{"name": "cronjobs.v1beta1.batch"}, + "status": map[string]interface{}{ + "removedInRelease": "4.25.0", + "requestCount": int64(42), + "conditions": []interface{}{ + map[string]interface{}{"type": "Deprecated", "status": "True", "message": "use v1 instead"}, + }, + }, + }}, + // No usage — should not appear + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiserver.openshift.io/v1", "kind": "APIRequestCount", + "metadata": map[string]interface{}{"name": "unused.v1beta1.example"}, + "status": map[string]interface{}{ + "removedInRelease": "4.21.0", + "requestCount": int64(0), + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &APIDeprecationsCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + blockers, ok := result["blocker_apis"].([]map[string]any) + if !ok { + t.Fatal("blocker_apis not a slice") + } + if len(blockers) != 1 { + t.Fatalf("blocker_apis len = %d, want 1", len(blockers)) + } + if blockers[0]["resource"] != "flowschemas.v1beta3.flowcontrol.apiserver.k8s.io" { + t.Errorf("blocker resource = %v", blockers[0]["resource"]) + } + if blockers[0]["request_count"] != int64(150) { + t.Errorf("blocker request_count = %v, want 150", blockers[0]["request_count"]) + } + + warnings, ok := result["warning_apis"].([]map[string]any) + if !ok { + t.Fatal("warning_apis not a slice") + } + if len(warnings) != 2 { + t.Fatalf("warning_apis len = %d, want 2", len(warnings)) + } + + summary, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary not a map") + } + if summary["blockers"] != 1 { + t.Errorf("summary.blockers = %v, want 1", summary["blockers"]) + } + if summary["warnings"] != 2 { + t.Errorf("summary.warnings = %v, want 2", summary["warnings"]) + } + if summary["total"] != 3 { + t.Errorf("summary.total = %v, want 3", summary["total"]) + } +} + +func TestAPIDeprecationsCheck_NoBlockers(t *testing.T) { + objects := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiserver.openshift.io/v1", "kind": "APIRequestCount", + "metadata": map[string]interface{}{"name": "pods.v1."}, + "status": map[string]interface{}{ + "requestCount": int64(500), + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &APIDeprecationsCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + blockers := result["blocker_apis"].([]map[string]any) + if len(blockers) != 0 { + t.Errorf("expected no blockers, got %d", len(blockers)) + } + + warnings := result["warning_apis"].([]map[string]any) + if len(warnings) != 0 { + t.Errorf("expected no warnings, got %d", len(warnings)) + } +} + +func TestCRDCompatCheck(t *testing.T) { + objects := []runtime.Object{ + // CRD with stored version that is still served — ok + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiextensions.k8s.io/v1", "kind": "CustomResourceDefinition", + "metadata": map[string]interface{}{"name": "widgets.example.com"}, + "spec": map[string]interface{}{ + "versions": []interface{}{ + map[string]interface{}{"name": "v1", "served": true}, + map[string]interface{}{"name": "v1beta1", "served": true}, + }, + }, + "status": map[string]interface{}{ + "storedVersions": []interface{}{"v1", "v1beta1"}, + }, + }}, + // CRD with stored version that is NO LONGER served — issue + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiextensions.k8s.io/v1", "kind": "CustomResourceDefinition", + "metadata": map[string]interface{}{"name": "gadgets.example.com"}, + "spec": map[string]interface{}{ + "versions": []interface{}{ + map[string]interface{}{"name": "v2", "served": true}, + map[string]interface{}{"name": "v1", "served": false}, + }, + }, + "status": map[string]interface{}{ + "storedVersions": []interface{}{"v1"}, + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &CRDCompatCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + if result["total_crds"] != 2 { + t.Errorf("total_crds = %v, want 2", result["total_crds"]) + } + + issues, ok := result["version_issues"].([]map[string]any) + if !ok { + t.Fatal("version_issues not a slice") + } + if len(issues) != 1 { + t.Fatalf("version_issues len = %d, want 1", len(issues)) + } + if issues[0]["crd"] != "gadgets.example.com" { + t.Errorf("crd = %v, want gadgets.example.com", issues[0]["crd"]) + } + if issues[0]["stored_version"] != "v1" { + t.Errorf("stored_version = %v, want v1", issues[0]["stored_version"]) + } + + summary, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary not a map") + } + if summary["version_issues"] != 1 { + t.Errorf("summary.version_issues = %v, want 1", summary["version_issues"]) + } +} + +func TestCRDCompatCheck_NoIssues(t *testing.T) { + objects := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiextensions.k8s.io/v1", "kind": "CustomResourceDefinition", + "metadata": map[string]interface{}{"name": "things.example.com"}, + "spec": map[string]interface{}{ + "versions": []interface{}{ + map[string]interface{}{"name": "v1", "served": true}, + }, + }, + "status": map[string]interface{}{ + "storedVersions": []interface{}{"v1"}, + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &CRDCompatCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + issues := result["version_issues"].([]map[string]any) + if len(issues) != 0 { + t.Errorf("expected no version issues, got %d", len(issues)) + } +} + +func TestNetworkCheck(t *testing.T) { + objects := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "Network", + "metadata": map[string]interface{}{"name": "cluster"}, + "status": map[string]interface{}{ + "networkType": "OpenShiftSDN", + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "Proxy", + "metadata": map[string]interface{}{"name": "cluster"}, + "spec": map[string]interface{}{ + "httpProxy": "http://proxy.example.com:8080", + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "APIServer", + "metadata": map[string]interface{}{"name": "cluster"}, + "spec": map[string]interface{}{ + "tlsSecurityProfile": map[string]interface{}{ + "type": "Old", + }, + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &NetworkCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + if result["network_type"] != "OpenShiftSDN" { + t.Errorf("network_type = %v, want OpenShiftSDN", result["network_type"]) + } + if result["sdn_warning"] == nil { + t.Error("should have sdn_warning for OpenShiftSDN") + } + if result["tls_profile"] != "Old" { + t.Errorf("tls_profile = %v, want Old", result["tls_profile"]) + } + + summary, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary not a map") + } + if summary["is_sdn"] != true { + t.Errorf("is_sdn = %v, want true", summary["is_sdn"]) + } +} + +// fakeClusterObjects returns a representative set of cluster objects that exercises +// every readiness check with non-trivial data. +func fakeClusterObjects() []runtime.Object { + return []runtime.Object{ + // --- ClusterVersion (cluster_conditions) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterVersion", + "metadata": map[string]interface{}{"name": "version"}, + "spec": map[string]interface{}{"channel": "stable-4.21", "clusterID": "test-id"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True", "reason": "AsExpected"}, + map[string]interface{}{"type": "Progressing", "status": "False", "reason": "AsExpected"}, + map[string]interface{}{"type": "Upgradeable", "status": "True", "reason": "AsExpected"}, + }, + "history": []interface{}{ + map[string]interface{}{"version": "4.21.5", "state": "Completed", "startedTime": "2026-04-10T10:00:00Z", "completionTime": "2026-04-10T11:00:00Z"}, + }, + }, + }}, + + // --- ClusterOperators (operator_health + etcd_health) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterOperator", + "metadata": map[string]interface{}{"name": "etcd"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True"}, + map[string]interface{}{"type": "Degraded", "status": "False"}, + map[string]interface{}{"type": "Upgradeable", "status": "True"}, + }, + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterOperator", + "metadata": map[string]interface{}{"name": "authentication"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True"}, + map[string]interface{}{"type": "Degraded", "status": "True", "reason": "OAuthFlaky", "message": "intermittent failures"}, + map[string]interface{}{"type": "Upgradeable", "status": "True"}, + }, + }, + }}, + + // --- MachineConfigPools (operator_health) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "machineconfiguration.openshift.io/v1", "kind": "MachineConfigPool", + "metadata": map[string]interface{}{"name": "master"}, + "spec": map[string]interface{}{"paused": false}, + "status": map[string]interface{}{ + "machineCount": int64(3), "readyMachineCount": int64(3), "updatedMachineCount": int64(3), + "conditions": []interface{}{ + map[string]interface{}{"type": "Degraded", "status": "False"}, + map[string]interface{}{"type": "Updating", "status": "False"}, + }, + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "machineconfiguration.openshift.io/v1", "kind": "MachineConfigPool", + "metadata": map[string]interface{}{"name": "worker"}, + "spec": map[string]interface{}{"paused": false}, + "status": map[string]interface{}{ + "machineCount": int64(3), "readyMachineCount": int64(3), "updatedMachineCount": int64(3), + "conditions": []interface{}{ + map[string]interface{}{"type": "Degraded", "status": "False"}, + map[string]interface{}{"type": "Updating", "status": "False"}, + }, + }, + }}, + + // --- Etcd pods (etcd_health) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-0", "namespace": "openshift-etcd", "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-0"}, "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-1", "namespace": "openshift-etcd", "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-1"}, "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-2", "namespace": "openshift-etcd", "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-2"}, "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + + // --- Nodes (node_capacity) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "master-0"}, + "status": map[string]interface{}{"conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "master-1"}, + "status": map[string]interface{}{"conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "master-2"}, + "status": map[string]interface{}{"conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "worker-0"}, + "status": map[string]interface{}{"conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "worker-1"}, + "spec": map[string]interface{}{"unschedulable": true}, + "status": map[string]interface{}{"conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "False"}}}, + }}, + + // --- PDBs (pdb_drain) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "policy/v1", "kind": "PodDisruptionBudget", + "metadata": map[string]interface{}{"name": "etcd-guard", "namespace": "openshift-etcd"}, + "spec": map[string]interface{}{"maxUnavailable": "1"}, + "status": map[string]interface{}{"currentHealthy": int64(3), "desiredHealthy": int64(2), "disruptionsAllowed": int64(1)}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "policy/v1", "kind": "PodDisruptionBudget", + "metadata": map[string]interface{}{"name": "zero-budget", "namespace": "app-ns"}, + "spec": map[string]interface{}{"maxUnavailable": "0"}, + "status": map[string]interface{}{"currentHealthy": int64(2), "desiredHealthy": int64(2), "disruptionsAllowed": int64(0)}, + }}, + + // --- APIRequestCounts (api_deprecations) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiserver.openshift.io/v1", "kind": "APIRequestCount", + "metadata": map[string]interface{}{"name": "flowschemas.v1beta3.flowcontrol.apiserver.k8s.io"}, + "status": map[string]interface{}{ + "removedInRelease": "4.21.8", "requestCount": int64(100), + "conditions": []interface{}{map[string]interface{}{"type": "Deprecated", "status": "True"}}, + }, + }}, + + // --- CRDs (crd_compat) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiextensions.k8s.io/v1", "kind": "CustomResourceDefinition", + "metadata": map[string]interface{}{"name": "widgets.example.com"}, + "spec": map[string]interface{}{ + "versions": []interface{}{ + map[string]interface{}{"name": "v1", "served": true}, + map[string]interface{}{"name": "v1beta1", "served": false}, + }, + }, + "status": map[string]interface{}{"storedVersions": []interface{}{"v1beta1"}}, + }}, + + // --- Network, Proxy, APIServer (network) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "Network", + "metadata": map[string]interface{}{"name": "cluster"}, + "status": map[string]interface{}{"networkType": "OVNKubernetes"}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "Proxy", + "metadata": map[string]interface{}{"name": "cluster"}, + "spec": map[string]interface{}{"httpProxy": "http://proxy.corp:8080"}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "APIServer", + "metadata": map[string]interface{}{"name": "cluster"}, + "spec": map[string]interface{}{}, + }}, + + // --- OLM: Subscription + CSV (olm_operator_lifecycle) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "Subscription", + "metadata": map[string]interface{}{"name": "elasticsearch-operator", "namespace": "openshift-operators-redhat"}, + "spec": map[string]interface{}{ + "channel": "stable-5.8", "name": "elasticsearch-operator", + "source": "redhat-operators", "sourceNamespace": "openshift-marketplace", + "installPlanApproval": "Automatic", + }, + "status": map[string]interface{}{ + "state": "AtLatestKnown", + "installedCSV": "elasticsearch-operator.v5.8.6", + "currentCSV": "elasticsearch-operator.v5.8.6", + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "ClusterServiceVersion", + "metadata": map[string]interface{}{"name": "elasticsearch-operator.v5.8.6", "namespace": "openshift-operators-redhat"}, + "spec": map[string]interface{}{ + "version": "5.8.6", + "displayName": "OpenShift Elasticsearch Operator", + }, + "status": map[string]interface{}{"phase": "Succeeded"}, + }}, + } +} + +func TestRunAllWithFakeCluster(t *testing.T) { + client := newFakeDynamicClient(fakeClusterObjects()...) + output := RunAll(context.Background(), client, "4.21.5", "4.21.8") + + if output.CurrentVersion != "4.21.5" { + t.Errorf("CurrentVersion = %q, want 4.21.5", output.CurrentVersion) + } + if output.TargetVersion != "4.21.8" { + t.Errorf("TargetVersion = %q, want 4.21.8", output.TargetVersion) + } + if output.Meta.TotalChecks != 9 { + t.Errorf("TotalChecks = %d, want 9", output.Meta.TotalChecks) + } + + for _, name := range []string{ + "cluster_conditions", "operator_health", "api_deprecations", + "node_capacity", "pdb_drain", "etcd_health", "network", + "crd_compat", "olm_operator_lifecycle", + } { + r, ok := output.Checks[name] + if !ok { + t.Errorf("missing check result: %s", name) + continue + } + if r.Status != StatusOK { + t.Errorf("check %s status = %q, error = %q", name, r.Status, r.Error) + } + } + + // cluster_conditions: verify CV data flows through + cc := output.Checks["cluster_conditions"] + if cc.Data["channel"] != "stable-4.21" { + t.Errorf("cluster_conditions.channel = %v, want stable-4.21", cc.Data["channel"]) + } + + // operator_health: 2 COs, 1 degraded; 2 MCPs + oh := output.Checks["operator_health"] + summary := oh.Data["summary"].(map[string]any) + if summary["total_operators"] != 2 { + t.Errorf("operator_health total_operators = %v, want 2", summary["total_operators"]) + } + if summary["degraded_count"] != 1 { + t.Errorf("operator_health degraded_count = %v, want 1", summary["degraded_count"]) + } + mcps := oh.Data["machine_config_pools"].([]map[string]any) + if len(mcps) != 2 { + t.Errorf("operator_health MCPs = %d, want 2", len(mcps)) + } + + // etcd_health: 3 running pods + eh := output.Checks["etcd_health"] + if eh.Data["total_members"] != 3 { + t.Errorf("etcd_health total_members = %v, want 3", eh.Data["total_members"]) + } + if eh.Data["healthy_members"] != 3 { + t.Errorf("etcd_health healthy_members = %v, want 3", eh.Data["healthy_members"]) + } + + // node_capacity: 5 nodes, 4 ready, 1 unschedulable + nc := output.Checks["node_capacity"] + if nc.Data["total_nodes"] != 5 { + t.Errorf("node_capacity total_nodes = %v, want 5", nc.Data["total_nodes"]) + } + if nc.Data["ready_nodes"] != 4 { + t.Errorf("node_capacity ready_nodes = %v, want 4", nc.Data["ready_nodes"]) + } + if nc.Data["unschedulable_nodes"] != 1 { + t.Errorf("node_capacity unschedulable_nodes = %v, want 1", nc.Data["unschedulable_nodes"]) + } + + // pdb_drain: 2 PDBs, 1 blocking + pd := output.Checks["pdb_drain"] + if pd.Data["total_pdbs"] != 2 { + t.Errorf("pdb_drain total_pdbs = %v, want 2", pd.Data["total_pdbs"]) + } + blocking := pd.Data["blocking_pdbs"].([]map[string]any) + if len(blocking) != 1 { + t.Errorf("pdb_drain blocking_pdbs = %d, want 1", len(blocking)) + } + + // api_deprecations: 1 blocker API + ad := output.Checks["api_deprecations"] + adSummary := ad.Data["summary"].(map[string]any) + if adSummary["blockers"] != 1 { + t.Errorf("api_deprecations blockers = %v, want 1", adSummary["blockers"]) + } + + // crd_compat: 1 CRD with stored version no longer served + crd := output.Checks["crd_compat"] + crdSummary := crd.Data["summary"].(map[string]any) + if crdSummary["version_issues"] != 1 { + t.Errorf("crd_compat version_issues = %v, want 1", crdSummary["version_issues"]) + } + + // network: OVN, proxy configured + nw := output.Checks["network"] + if nw.Data["network_type"] != "OVNKubernetes" { + t.Errorf("network type = %v, want OVNKubernetes", nw.Data["network_type"]) + } + proxy := nw.Data["proxy"].(map[string]any) + if proxy["http_proxy"] != "http://proxy.corp:8080" { + t.Errorf("network proxy = %v, want http://proxy.corp:8080", proxy["http_proxy"]) + } + + // olm_operator_lifecycle: 1 subscription + olm := output.Checks["olm_operator_lifecycle"] + olmSummary := olm.Data["summary"].(map[string]any) + if olmSummary["total_operators"] != 1 { + t.Errorf("olm total_operators = %v, want 1", olmSummary["total_operators"]) + } + operators := olm.Data["operators"].([]map[string]any) + if operators[0]["installed_version"] != "5.8.6" { + t.Errorf("olm installed_version = %v, want 5.8.6", operators[0]["installed_version"]) + } + + // Verify the full output marshals to valid JSON + b, err := json.Marshal(output) + if err != nil { + t.Fatalf("failed to marshal output: %v", err) + } + var m map[string]any + if err := json.Unmarshal(b, &m); err != nil { + t.Fatalf("failed to unmarshal output: %v", err) + } + if _, ok := m["checks"]; !ok { + t.Error("marshaled output missing 'checks' key") + } + if _, ok := m["meta"]; !ok { + t.Error("marshaled output missing 'meta' key") + } +} diff --git a/pkg/readiness/client.go b/pkg/readiness/client.go new file mode 100644 index 000000000..121de91fd --- /dev/null +++ b/pkg/readiness/client.go @@ -0,0 +1,175 @@ +package readiness + +import ( + "context" + "fmt" + "strings" + + semver "github.com/blang/semver/v4" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" +) + +var ( + GVRClusterVersion = schema.GroupVersionResource{Group: "config.openshift.io", Version: "v1", Resource: "clusterversions"} + GVRClusterOperator = schema.GroupVersionResource{Group: "config.openshift.io", Version: "v1", Resource: "clusteroperators"} + GVRMachineConfigPool = schema.GroupVersionResource{Group: "machineconfiguration.openshift.io", Version: "v1", Resource: "machineconfigpools"} + GVRNode = schema.GroupVersionResource{Group: "", Version: "v1", Resource: "nodes"} + GVRPod = schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"} + GVRPDB = schema.GroupVersionResource{Group: "policy", Version: "v1", Resource: "poddisruptionbudgets"} + GVRPV = schema.GroupVersionResource{Group: "", Version: "v1", Resource: "persistentvolumes"} + GVRSecret = schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"} + GVRCRD = schema.GroupVersionResource{Group: "apiextensions.k8s.io", Version: "v1", Resource: "customresourcedefinitions"} + GVRCSV = schema.GroupVersionResource{Group: "operators.coreos.com", Version: "v1alpha1", Resource: "clusterserviceversions"} + GVRSubscription = schema.GroupVersionResource{Group: "operators.coreos.com", Version: "v1alpha1", Resource: "subscriptions"} + GVRInstallPlan = schema.GroupVersionResource{Group: "operators.coreos.com", Version: "v1alpha1", Resource: "installplans"} + GVRPackageManifest = schema.GroupVersionResource{Group: "packages.operators.coreos.com", Version: "v1", Resource: "packagemanifests"} + GVRAPIRequestCount = schema.GroupVersionResource{Group: "apiserver.openshift.io", Version: "v1", Resource: "apirequestcounts"} + GVRInfrastructure = schema.GroupVersionResource{Group: "config.openshift.io", Version: "v1", Resource: "infrastructures"} + GVRNetwork = schema.GroupVersionResource{Group: "config.openshift.io", Version: "v1", Resource: "networks"} + GVRAPIServer = schema.GroupVersionResource{Group: "config.openshift.io", Version: "v1", Resource: "apiservers"} + GVRProxy = schema.GroupVersionResource{Group: "config.openshift.io", Version: "v1", Resource: "proxies"} + GVRNodeMetrics = schema.GroupVersionResource{Group: "metrics.k8s.io", Version: "v1beta1", Resource: "nodes"} + GVRValidatingWebhook = schema.GroupVersionResource{Group: "admissionregistration.k8s.io", Version: "v1", Resource: "validatingwebhookconfigurations"} + GVRMutatingWebhook = schema.GroupVersionResource{Group: "admissionregistration.k8s.io", Version: "v1", Resource: "mutatingwebhookconfigurations"} +) + +// GetResource fetches a single cluster-scoped resource by name. +func GetResource(ctx context.Context, c dynamic.Interface, gvr schema.GroupVersionResource, name string) (*unstructured.Unstructured, error) { + return c.Resource(gvr).Get(ctx, name, metav1.GetOptions{}) +} + +// GetNamespacedResource fetches a single namespaced resource. +func GetNamespacedResource(ctx context.Context, c dynamic.Interface, gvr schema.GroupVersionResource, namespace, name string) (*unstructured.Unstructured, error) { + return c.Resource(gvr).Namespace(namespace).Get(ctx, name, metav1.GetOptions{}) +} + +// ListResources lists cluster-scoped resources, optionally filtered by label selector. +func ListResources(ctx context.Context, c dynamic.Interface, gvr schema.GroupVersionResource, labelSelector string) ([]unstructured.Unstructured, error) { + opts := metav1.ListOptions{} + if labelSelector != "" { + opts.LabelSelector = labelSelector + } + list, err := c.Resource(gvr).List(ctx, opts) + if err != nil { + return nil, err + } + return list.Items, nil +} + +// ListNamespacedResources lists resources in a specific namespace. +func ListNamespacedResources(ctx context.Context, c dynamic.Interface, gvr schema.GroupVersionResource, namespace, labelSelector string) ([]unstructured.Unstructured, error) { + opts := metav1.ListOptions{} + if labelSelector != "" { + opts.LabelSelector = labelSelector + } + list, err := c.Resource(gvr).Namespace(namespace).List(ctx, opts) + if err != nil { + return nil, err + } + return list.Items, nil +} + +// ListAllNamespacedResources lists resources across all namespaces. +func ListAllNamespacedResources(ctx context.Context, c dynamic.Interface, gvr schema.GroupVersionResource, labelSelector string) ([]unstructured.Unstructured, error) { + return ListResources(ctx, c, gvr, labelSelector) +} + +// Condition represents a parsed Kubernetes status condition. +type Condition struct { + Status string `json:"status"` + Reason string `json:"reason"` + Message string `json:"message"` + LastTransition string `json:"last_transition"` +} + +// GetConditions extracts status.conditions from an unstructured object into a map keyed by type. +func GetConditions(obj *unstructured.Unstructured) map[string]Condition { + conditions, _, _ := unstructured.NestedSlice(obj.Object, "status", "conditions") + result := make(map[string]Condition, len(conditions)) + for _, raw := range conditions { + c, ok := raw.(map[string]interface{}) + if !ok { + continue + } + t, _ := c["type"].(string) + result[t] = Condition{ + Status: strVal(c, "status"), + Reason: strVal(c, "reason"), + Message: strVal(c, "message"), + LastTransition: strVal(c, "lastTransitionTime"), + } + } + return result +} + +// Convenience wrappers for nested field access. + +func NestedString(obj map[string]interface{}, fields ...string) string { + val, _, _ := unstructured.NestedString(obj, fields...) + return val +} + +func NestedInt64(obj map[string]interface{}, fields ...string) int64 { + val, _, _ := unstructured.NestedInt64(obj, fields...) + return val +} + +func NestedBool(obj map[string]interface{}, fields ...string) bool { + val, _, _ := unstructured.NestedBool(obj, fields...) + return val +} + +func NestedSlice(obj map[string]interface{}, fields ...string) []interface{} { + val, _, _ := unstructured.NestedSlice(obj, fields...) + return val +} + +func NestedMap(obj map[string]interface{}, fields ...string) map[string]interface{} { + val, _, _ := unstructured.NestedMap(obj, fields...) + return val +} + +func strVal(m map[string]interface{}, key string) string { + v, _ := m[key].(string) + return v +} + +const ( + ConditionTrue = "True" + ConditionFalse = "False" +) + +const ( + ConditionAvailable = "Available" + ConditionDegraded = "Degraded" + ConditionProgressing = "Progressing" + ConditionUpgradeable = "Upgradeable" + ConditionUpdating = "Updating" + ConditionRecommended = "Recommended" +) + +// CompareVersions compares two semver strings. Returns -1, 0, or 1. +func CompareVersions(a, b string) (int, error) { + va, err := semver.ParseTolerant(a) + if err != nil { + return 0, fmt.Errorf("invalid version %q: %w", a, err) + } + vb, err := semver.ParseTolerant(b) + if err != nil { + return 0, fmt.Errorf("invalid version %q: %w", b, err) + } + return va.Compare(vb), nil +} + +// FormatLabelSelector converts a map of labels to a label selector string. +func FormatLabelSelector(labels map[string]string) string { + parts := make([]string, 0, len(labels)) + for k, v := range labels { + parts = append(parts, k+"="+v) + } + return strings.Join(parts, ",") +} diff --git a/pkg/readiness/client_test.go b/pkg/readiness/client_test.go new file mode 100644 index 000000000..759660b79 --- /dev/null +++ b/pkg/readiness/client_test.go @@ -0,0 +1,169 @@ +package readiness + +import ( + "testing" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" +) + +func TestGetConditions(t *testing.T) { + obj := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{ + "type": "Available", + "status": "True", + "reason": "AsExpected", + "message": "All is well", + "lastTransitionTime": "2026-04-14T10:00:00Z", + }, + map[string]interface{}{ + "type": "Degraded", + "status": "False", + "reason": "AsExpected", + "message": "", + "lastTransitionTime": "2026-04-14T10:00:00Z", + }, + }, + }, + }, + } + + conditions := GetConditions(obj) + + if len(conditions) != 2 { + t.Fatalf("got %d conditions, want 2", len(conditions)) + } + + avail := conditions["Available"] + if avail.Status != "True" { + t.Errorf("Available.Status = %q, want True", avail.Status) + } + if avail.Reason != "AsExpected" { + t.Errorf("Available.Reason = %q, want AsExpected", avail.Reason) + } + if avail.Message != "All is well" { + t.Errorf("Available.Message = %q", avail.Message) + } + + degraded := conditions["Degraded"] + if degraded.Status != "False" { + t.Errorf("Degraded.Status = %q, want False", degraded.Status) + } +} + +func TestGetConditions_NoConditions(t *testing.T) { + obj := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "status": map[string]interface{}{}, + }, + } + conditions := GetConditions(obj) + if len(conditions) != 0 { + t.Errorf("got %d conditions, want 0", len(conditions)) + } +} + +func TestCompareVersions(t *testing.T) { + tests := []struct { + a, b string + expected int + expectErr bool + }{ + {"4.21.5", "4.21.8", -1, false}, + {"4.21.8", "4.21.5", 1, false}, + {"4.21.5", "4.21.5", 0, false}, + {"4.22.0", "4.21.5", 1, false}, + {"bad", "4.21.5", 0, true}, + {"4.21.5", "bad", 0, true}, + } + + for _, tt := range tests { + t.Run(tt.a+"_vs_"+tt.b, func(t *testing.T) { + got, err := CompareVersions(tt.a, tt.b) + if tt.expectErr && err == nil { + t.Errorf("CompareVersions(%q, %q) expected error, got nil", tt.a, tt.b) + } + if !tt.expectErr && err != nil { + t.Errorf("CompareVersions(%q, %q) unexpected error: %v", tt.a, tt.b, err) + } + if got != tt.expected { + t.Errorf("CompareVersions(%q, %q) = %d, want %d", tt.a, tt.b, got, tt.expected) + } + }) + } +} + +func TestFormatLabelSelector(t *testing.T) { + tests := []struct { + name string + labels map[string]string + contains []string + }{ + { + name: "single label", + labels: map[string]string{"app": "etcd"}, + contains: []string{"app=etcd"}, + }, + { + name: "empty", + labels: map[string]string{}, + contains: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := FormatLabelSelector(tt.labels) + for _, s := range tt.contains { + found := false + for i := 0; i <= len(got)-len(s); i++ { + if got[i:i+len(s)] == s { + found = true + break + } + } + if !found { + t.Errorf("FormatLabelSelector(%v) = %q, want to contain %q", tt.labels, got, s) + } + } + }) + } +} + +func TestNestedHelpers(t *testing.T) { + obj := map[string]interface{}{ + "spec": map[string]interface{}{ + "name": "test", + "count": int64(42), + "enabled": true, + "items": []interface{}{"a", "b"}, + "metadata": map[string]interface{}{"key": "val"}, + }, + } + + if got := NestedString(obj, "spec", "name"); got != "test" { + t.Errorf("NestedString = %q, want test", got) + } + if got := NestedInt64(obj, "spec", "count"); got != 42 { + t.Errorf("NestedInt64 = %d, want 42", got) + } + if got := NestedBool(obj, "spec", "enabled"); got != true { + t.Errorf("NestedBool = %v, want true", got) + } + if got := NestedSlice(obj, "spec", "items"); len(got) != 2 { + t.Errorf("NestedSlice len = %d, want 2", len(got)) + } + if got := NestedMap(obj, "spec", "metadata"); got["key"] != "val" { + t.Errorf("NestedMap[key] = %v, want val", got["key"]) + } + + // Missing fields return zero values + if got := NestedString(obj, "spec", "missing"); got != "" { + t.Errorf("missing string = %q, want empty", got) + } + if got := NestedInt64(obj, "spec", "missing"); got != 0 { + t.Errorf("missing int64 = %d, want 0", got) + } +} diff --git a/pkg/readiness/cluster_conditions.go b/pkg/readiness/cluster_conditions.go new file mode 100644 index 000000000..b14bf9668 --- /dev/null +++ b/pkg/readiness/cluster_conditions.go @@ -0,0 +1,76 @@ +package readiness + +import ( + "context" + "fmt" + + "k8s.io/client-go/dynamic" +) + +// ClusterConditionsCheck reads existing CVO-computed conditions from ClusterVersion status. +// This does NOT re-evaluate anything — it reports what CVO has already determined, +// including Upgradeable sub-conditions, RetrievedUpdates, and precondition state. +type ClusterConditionsCheck struct{} + +func (c *ClusterConditionsCheck) Name() string { return "cluster_conditions" } + +func (c *ClusterConditionsCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + + cv, err := GetResource(ctx, dc, GVRClusterVersion, "version") + if err != nil { + return nil, fmt.Errorf("failed to get ClusterVersion: %w", err) + } + + // Read all conditions CVO has already set + conditions := GetConditions(cv) + condMap := map[string]any{} + for k, v := range conditions { + condMap[k] = v + } + result["conditions"] = condMap + + // Extract key signals for the agent + upgradeable := conditions[ConditionUpgradeable] + result["upgradeable"] = map[string]any{ + "status": upgradeable.Status, + "reason": upgradeable.Reason, + "message": upgradeable.Message, + } + + progressing := conditions[ConditionProgressing] + result["update_in_progress"] = progressing.Status == ConditionTrue + + // Read update history for context + history := NestedSlice(cv.Object, "status", "history") + historyEntries := make([]map[string]any, 0) + for i, h := range history { + if i >= 5 { + break + } + entry, ok := h.(map[string]interface{}) + if !ok { + continue + } + historyEntries = append(historyEntries, map[string]any{ + "version": NestedString(entry, "version"), + "state": NestedString(entry, "state"), + "startedTime": NestedString(entry, "startedTime"), + "completionTime": NestedString(entry, "completionTime"), + }) + } + result["recent_history"] = historyEntries + + // Channel and cluster identity + result["channel"] = NestedString(cv.Object, "spec", "channel") + result["cluster_id"] = NestedString(cv.Object, "spec", "clusterID") + + // Summary for quick agent parsing + result["summary"] = map[string]any{ + "upgradeable": upgradeable.Status == ConditionTrue, + "update_in_progress": progressing.Status == ConditionTrue, + "current_version": current, + } + + return result, nil +} diff --git a/pkg/readiness/crd_compat.go b/pkg/readiness/crd_compat.go new file mode 100644 index 000000000..d76c330b6 --- /dev/null +++ b/pkg/readiness/crd_compat.go @@ -0,0 +1,68 @@ +package readiness + +import ( + "context" + "fmt" + + "k8s.io/client-go/dynamic" +) + +// CRDCompatCheck verifies CRD stored/served version compatibility and operator constraints. +type CRDCompatCheck struct{} + +func (c *CRDCompatCheck) Name() string { return "crd_compat" } + +func (c *CRDCompatCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + var sectionErrors []map[string]any + + // Check CRDs for version mismatches + crds, err := ListResources(ctx, dc, GVRCRD, "") + if err != nil { + return nil, fmt.Errorf("failed to list CRDs: %w", err) + } + + versionIssues := make([]map[string]any, 0) + for _, crd := range crds { + storedVersions := NestedSlice(crd.Object, "status", "storedVersions") + servedVersions := NestedSlice(crd.Object, "spec", "versions") + + served := make(map[string]bool) + for _, v := range servedVersions { + vm, ok := v.(map[string]interface{}) + if !ok { + continue + } + name := NestedString(vm, "name") + isServed := NestedBool(vm, "served") + if isServed { + served[name] = true + } + } + + for _, sv := range storedVersions { + stored, _ := sv.(string) + if stored != "" && !served[stored] { + versionIssues = append(versionIssues, map[string]any{ + "crd": crd.GetName(), + "stored_version": stored, + "issue": "stored version no longer served", + }) + } + } + } + + result["total_crds"] = len(crds) + result["version_issues"] = versionIssues + + result["summary"] = map[string]any{ + "total_crds": len(crds), + "version_issues": len(versionIssues), + } + + if len(sectionErrors) > 0 { + result["errors"] = sectionErrors + } + + return result, nil +} diff --git a/pkg/readiness/etcd_health.go b/pkg/readiness/etcd_health.go new file mode 100644 index 000000000..6ddc9547b --- /dev/null +++ b/pkg/readiness/etcd_health.go @@ -0,0 +1,67 @@ +package readiness + +import ( + "context" + "fmt" + + "k8s.io/client-go/dynamic" +) + +// EtcdHealthCheck verifies etcd member health, backup status, and certificates. +type EtcdHealthCheck struct{} + +func (c *EtcdHealthCheck) Name() string { return "etcd_health" } + +func (c *EtcdHealthCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + var sectionErrors []map[string]any + + // Check etcd ClusterOperator + etcdCO, err := GetResource(ctx, dc, GVRClusterOperator, "etcd") + if err != nil { + return nil, fmt.Errorf("failed to get etcd ClusterOperator: %w", err) + } + + conditions := GetConditions(etcdCO) + result["operator_conditions"] = conditions + + // Check etcd pods + etcdPods, err := ListNamespacedResources(ctx, dc, GVRPod, "openshift-etcd", "app=etcd") + if err != nil { + SectionError(§ionErrors, "etcd_pods", err) + } else { + podStatuses := make([]map[string]any, 0, len(etcdPods)) + healthyMembers := 0 + for _, pod := range etcdPods { + phase := NestedString(pod.Object, "status", "phase") + podConds := GetConditions(&pod) + ready := false + if cond, ok := podConds["Ready"]; ok { + ready = cond.Status == ConditionTrue + } + if ready { + healthyMembers++ + } + podStatuses = append(podStatuses, map[string]any{ + "name": pod.GetName(), + "node": NestedString(pod.Object, "spec", "nodeName"), + "phase": phase, + "ready": ready, + }) + } + result["members"] = podStatuses + result["healthy_members"] = healthyMembers + result["total_members"] = len(etcdPods) + } + + result["summary"] = map[string]any{ + "operator_available": conditions[ConditionAvailable].Status == ConditionTrue, + "operator_degraded": conditions[ConditionDegraded].Status == ConditionTrue, + } + + if len(sectionErrors) > 0 { + result["errors"] = sectionErrors + } + + return result, nil +} diff --git a/pkg/readiness/network.go b/pkg/readiness/network.go new file mode 100644 index 000000000..0342b06f0 --- /dev/null +++ b/pkg/readiness/network.go @@ -0,0 +1,72 @@ +package readiness + +import ( + "context" + "fmt" + + "k8s.io/client-go/dynamic" +) + +// NetworkCheck verifies network plugin type, TLS profile, and proxy configuration. +type NetworkCheck struct{} + +func (c *NetworkCheck) Name() string { return "network" } + +func (c *NetworkCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + var sectionErrors []map[string]any + + // Check Network configuration + network, err := GetResource(ctx, dc, GVRNetwork, "cluster") + if err != nil { + return nil, fmt.Errorf("failed to get Network config: %w", err) + } + + networkType := NestedString(network.Object, "status", "networkType") + result["network_type"] = networkType + + // SDN deprecation warning + if networkType == "OpenShiftSDN" { + cmp, err := CompareVersions(target, "4.17.0") + if target != "" && err == nil && cmp >= 0 { + result["sdn_warning"] = "OpenShiftSDN blocks upgrades to 4.17+; migrate to OVN-Kubernetes first." + } else { + result["sdn_warning"] = "OpenShiftSDN detected. Migration to OVN-Kubernetes is required for future upgrades to 4.17+." + } + } + + // Check proxy + proxy, err := GetResource(ctx, dc, GVRProxy, "cluster") + if err != nil { + SectionError(§ionErrors, "proxy", err) + } else { + result["proxy"] = map[string]any{ + "http_proxy": NestedString(proxy.Object, "spec", "httpProxy"), + "https_proxy": NestedString(proxy.Object, "spec", "httpsProxy"), + "no_proxy": NestedString(proxy.Object, "spec", "noProxy"), + } + } + + // Check TLS profile from APIServer + apiServer, err := GetResource(ctx, dc, GVRAPIServer, "cluster") + if err != nil { + SectionError(§ionErrors, "apiserver_tls", err) + } else { + tlsProfile := NestedString(apiServer.Object, "spec", "tlsSecurityProfile", "type") + if tlsProfile == "" { + tlsProfile = "Intermediate" + } + result["tls_profile"] = tlsProfile + } + + result["summary"] = map[string]any{ + "network_type": networkType, + "is_sdn": networkType == "OpenShiftSDN", + } + + if len(sectionErrors) > 0 { + result["errors"] = sectionErrors + } + + return result, nil +} diff --git a/pkg/readiness/node_capacity.go b/pkg/readiness/node_capacity.go new file mode 100644 index 000000000..ee58c5ac0 --- /dev/null +++ b/pkg/readiness/node_capacity.go @@ -0,0 +1,48 @@ +package readiness + +import ( + "context" + "fmt" + + "k8s.io/client-go/dynamic" +) + +// NodeCapacityCheck assesses node readiness and resource headroom. +type NodeCapacityCheck struct{} + +func (c *NodeCapacityCheck) Name() string { return "node_capacity" } + +func (c *NodeCapacityCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + + nodes, err := ListResources(ctx, dc, GVRNode, "") + if err != nil { + return nil, fmt.Errorf("failed to list nodes: %w", err) + } + + totalNodes := len(nodes) + readyNodes := 0 + unschedulableNodes := 0 + + for _, node := range nodes { + conditions := GetConditions(&node) + if cond, ok := conditions["Ready"]; ok && cond.Status == ConditionTrue { + readyNodes++ + } + if NestedBool(node.Object, "spec", "unschedulable") { + unschedulableNodes++ + } + } + + result["total_nodes"] = totalNodes + result["ready_nodes"] = readyNodes + result["unschedulable_nodes"] = unschedulableNodes + result["summary"] = map[string]any{ + "total": totalNodes, + "ready": readyNodes, + "not_ready": totalNodes - readyNodes, + "unschedulable": unschedulableNodes, + } + + return result, nil +} diff --git a/pkg/readiness/olm_lifecycle.go b/pkg/readiness/olm_lifecycle.go new file mode 100644 index 000000000..1f20a5d2a --- /dev/null +++ b/pkg/readiness/olm_lifecycle.go @@ -0,0 +1,282 @@ +package readiness + +import ( + "context" + "encoding/json" + "fmt" + "sync" + + semver "github.com/blang/semver/v4" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/client-go/dynamic" +) + +const ( + ApprovalAutomatic = "Automatic" + ApprovalManual = "Manual" + PhaseRequiresApproval = "RequiresApproval" +) + +// OLMOperatorLifecycleCheck collects lifecycle information for OLM-installed operators +// by correlating Subscriptions, ClusterServiceVersions, InstallPlans, and PackageManifests. +// This data supports the Operator Update Planner (OCPSTRAT-2618) by providing per-operator +// installed version, OCP compatibility, update policy, pending upgrades, and channel info. +type OLMOperatorLifecycleCheck struct{} + +func (c *OLMOperatorLifecycleCheck) Name() string { return "olm_operator_lifecycle" } + +func (c *OLMOperatorLifecycleCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + var sectionErrors []map[string]any + + // Subscriptions are the anchor — fail hard if unavailable. + subs, err := ListResources(ctx, dc, GVRSubscription, "") + if err != nil { + return nil, fmt.Errorf("failed to list subscriptions: %w", err) + } + + // Fetch CSVs and PackageManifests concurrently; both are independent. + var ( + csvs []unstructured.Unstructured + pkgManifests []unstructured.Unstructured + csvErr error + pkgErr error + fetchWG sync.WaitGroup + ) + fetchWG.Add(2) + go func() { + defer fetchWG.Done() + csvs, csvErr = ListResources(ctx, dc, GVRCSV, "") + }() + go func() { + defer fetchWG.Done() + pkgManifests, pkgErr = ListResources(ctx, dc, GVRPackageManifest, "") + }() + fetchWG.Wait() + + if csvErr != nil { + SectionError(§ionErrors, "clusterserviceversions", csvErr) + } + if pkgErr != nil { + SectionError(§ionErrors, "packagemanifests", pkgErr) + } + + csvIndex := indexByNamespacedName(csvs) + pkgIndex := indexByNamespacedName(pkgManifests) + + // Parse current/target once to avoid repeated semver parsing per operator. + parsedTarget, errTarget := semver.ParseTolerant(target) + parsedCurrent, errCurrent := semver.ParseTolerant(current) + hasTarget := errTarget == nil && target != "" + hasCurrent := errCurrent == nil && current != "" + + operators := make([]map[string]any, 0, len(subs)) + incompatibleWithTarget := 0 + pendingUpgradeCount := 0 + manualApprovalCount := 0 + + for _, sub := range subs { + entry := map[string]any{ + "name": sub.GetName(), + "namespace": sub.GetNamespace(), + } + + entry["channel"] = NestedString(sub.Object, "spec", "channel") + entry["source"] = NestedString(sub.Object, "spec", "source") + entry["source_namespace"] = NestedString(sub.Object, "spec", "sourceNamespace") + entry["package"] = NestedString(sub.Object, "spec", "name") + + approval := NestedString(sub.Object, "spec", "installPlanApproval") + if approval == "" { + approval = ApprovalAutomatic + } + entry["install_plan_approval"] = approval + if approval == ApprovalManual { + manualApprovalCount++ + } + + entry["state"] = NestedString(sub.Object, "status", "state") + installedCSVName := NestedString(sub.Object, "status", "installedCSV") + entry["installed_csv"] = installedCSVName + currentCSVName := NestedString(sub.Object, "status", "currentCSV") + + if installedCSVName != "" { + csvKey := sub.GetNamespace() + "/" + installedCSVName + if csvObj, ok := csvIndex[csvKey]; ok { + entry["installed_version"] = NestedString(csvObj, "spec", "version") + entry["csv_phase"] = NestedString(csvObj, "status", "phase") + entry["csv_display_name"] = NestedString(csvObj, "spec", "displayName") + + minKube := NestedString(csvObj, "spec", "minKubeVersion") + if minKube != "" { + entry["min_kube_version"] = minKube + } + } + } + + pendingUpgrade := false + if currentCSVName != "" && installedCSVName != "" && currentCSVName != installedCSVName { + pendingUpgrade = true + pendingUpgradeCount++ + entry["pending_csv"] = currentCSVName + csvKey := sub.GetNamespace() + "/" + currentCSVName + if csvObj, ok := csvIndex[csvKey]; ok { + entry["pending_version"] = NestedString(csvObj, "spec", "version") + } + } + entry["pending_upgrade"] = pendingUpgrade + + // Fetch the referenced InstallPlan directly instead of listing all. + ipRef := NestedString(sub.Object, "status", "installPlanRef", "name") + if ipRef != "" { + ipObj, ipErr := GetNamespacedResource(ctx, dc, GVRInstallPlan, sub.GetNamespace(), ipRef) + if ipErr == nil { + ipApproved := NestedBool(ipObj.Object, "spec", "approved") + ipPhase := NestedString(ipObj.Object, "status", "phase") + if !ipApproved && ipPhase == PhaseRequiresApproval { + entry["install_plan_awaiting_approval"] = true + } + } + } + + pkgName := NestedString(sub.Object, "spec", "name") + subChannel := NestedString(sub.Object, "spec", "channel") + pkgNS := NestedString(sub.Object, "spec", "sourceNamespace") + if pkgNS == "" { + pkgNS = sub.GetNamespace() + } + if pm, ok := pkgIndex[pkgNS+"/"+pkgName]; ok { + compat := extractOCPCompat(pm, subChannel) + if compat != nil { + entry["ocp_compat"] = compat + + maxOCP, _ := compat["max"].(string) + if maxOCP != "" && hasTarget { + parsedMax, err := semver.ParseTolerant(maxOCP) + if err == nil { + if parsedTarget.Compare(parsedMax) > 0 { + entry["compatible_with_target"] = false + incompatibleWithTarget++ + } else { + entry["compatible_with_target"] = true + } + } + } + minOCP, _ := compat["min"].(string) + if minOCP != "" && hasCurrent { + parsedMin, err := semver.ParseTolerant(minOCP) + if err == nil { + entry["compatible_with_current"] = parsedCurrent.Compare(parsedMin) >= 0 + } + } + } + + channels := extractChannels(pm) + if len(channels) > 0 { + entry["available_channels"] = channels + } + } + + operators = append(operators, entry) + } + + result["operators"] = operators + result["summary"] = map[string]any{ + "total_operators": len(subs), + "pending_upgrades": pendingUpgradeCount, + "manual_approval": manualApprovalCount, + "incompatible_with_target": incompatibleWithTarget, + } + + if len(sectionErrors) > 0 { + result["errors"] = sectionErrors + } + + return result, nil +} + +// indexByNamespacedName builds a lookup map keyed by "namespace/name". +func indexByNamespacedName(items []unstructured.Unstructured) map[string]map[string]interface{} { + idx := make(map[string]map[string]interface{}, len(items)) + for _, item := range items { + key := item.GetNamespace() + "/" + item.GetName() + idx[key] = item.Object + } + return idx +} + +// extractOCPCompat reads olm.maxOpenShiftVersion and olm.properties from a +// PackageManifest's channel entry to determine OCP version compatibility. +func extractOCPCompat(pm map[string]interface{}, channelName string) map[string]any { + channels := NestedSlice(pm, "status", "channels") + for _, ch := range channels { + chMap, ok := ch.(map[string]interface{}) + if !ok { + continue + } + if NestedString(chMap, "name") != channelName { + continue + } + + compat := map[string]any{} + + maxOCP := NestedString(chMap, "currentCSVDesc", "annotations", "olm.maxOpenShiftVersion") + if maxOCP != "" { + compat["max"] = maxOCP + } + + props := NestedString(chMap, "currentCSVDesc", "annotations", "olm.properties") + if props != "" { + minOCP := parseMinOCPFromProperties(props) + if minOCP != "" { + compat["min"] = minOCP + } + } + + if len(compat) > 0 { + return compat + } + } + return nil +} + +// olmProperty represents a single entry in the olm.properties JSON annotation. +type olmProperty struct { + Type string `json:"type"` + Value json.RawMessage `json:"value"` +} + +func parseMinOCPFromProperties(props string) string { + var properties []olmProperty + if err := json.Unmarshal([]byte(props), &properties); err != nil { + return "" + } + for _, p := range properties { + if p.Type == "olm.minOpenShiftVersion" { + var v string + if err := json.Unmarshal(p.Value, &v); err == nil { + return v + } + return "" + } + } + return "" +} + +// extractChannels returns the list of channel names from a PackageManifest. +func extractChannels(pm map[string]interface{}) []string { + channels := NestedSlice(pm, "status", "channels") + names := make([]string, 0, len(channels)) + for _, ch := range channels { + chMap, ok := ch.(map[string]interface{}) + if !ok { + continue + } + name := NestedString(chMap, "name") + if name != "" { + names = append(names, name) + } + } + return names +} diff --git a/pkg/readiness/olm_lifecycle_test.go b/pkg/readiness/olm_lifecycle_test.go new file mode 100644 index 000000000..a0216a68f --- /dev/null +++ b/pkg/readiness/olm_lifecycle_test.go @@ -0,0 +1,449 @@ +package readiness + +import ( + "context" + "testing" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" +) + +func TestOLMOperatorLifecycleCheck_Basic(t *testing.T) { + objects := []runtime.Object{ + // Subscription for elasticsearch-operator + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "Subscription", + "metadata": map[string]interface{}{"name": "elasticsearch-operator", "namespace": "openshift-operators-redhat"}, + "spec": map[string]interface{}{ + "name": "elasticsearch-operator", + "channel": "stable-5.8", + "source": "redhat-operators", + "sourceNamespace": "openshift-marketplace", + "installPlanApproval": "Manual", + }, + "status": map[string]interface{}{ + "state": "AtLatestKnown", + "installedCSV": "elasticsearch-operator.v5.8.5", + "currentCSV": "elasticsearch-operator.v5.8.5", + }, + }}, + // CSV for elasticsearch-operator + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "ClusterServiceVersion", + "metadata": map[string]interface{}{"name": "elasticsearch-operator.v5.8.5", "namespace": "openshift-operators-redhat"}, + "spec": map[string]interface{}{ + "version": "5.8.5", + "displayName": "OpenShift Elasticsearch Operator", + }, + "status": map[string]interface{}{ + "phase": "Succeeded", + }, + }}, + // PackageManifest for elasticsearch-operator + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "packages.operators.coreos.com/v1", "kind": "PackageManifest", + "metadata": map[string]interface{}{"name": "elasticsearch-operator", "namespace": "openshift-marketplace"}, + "status": map[string]interface{}{ + "channels": []interface{}{ + map[string]interface{}{ + "name": "stable-5.8", + "currentCSVDesc": map[string]interface{}{ + "annotations": map[string]interface{}{ + "olm.maxOpenShiftVersion": "4.17", + }, + }, + }, + map[string]interface{}{ + "name": "stable-6.0", + }, + }, + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &OLMOperatorLifecycleCheck{} + + result, err := check.Run(context.Background(), client, "4.16.0", "4.17.0") + if err != nil { + t.Fatal(err) + } + + operators, ok := result["operators"].([]map[string]any) + if !ok { + t.Fatal("operators not a slice") + } + if len(operators) != 1 { + t.Fatalf("operators len = %d, want 1", len(operators)) + } + + op := operators[0] + if op["name"] != "elasticsearch-operator" { + t.Errorf("name = %v, want elasticsearch-operator", op["name"]) + } + if op["installed_version"] != "5.8.5" { + t.Errorf("installed_version = %v, want 5.8.5", op["installed_version"]) + } + if op["csv_phase"] != "Succeeded" { + t.Errorf("csv_phase = %v, want Succeeded", op["csv_phase"]) + } + if op["csv_display_name"] != "OpenShift Elasticsearch Operator" { + t.Errorf("csv_display_name = %v, want OpenShift Elasticsearch Operator", op["csv_display_name"]) + } + if op["install_plan_approval"] != "Manual" { + t.Errorf("install_plan_approval = %v, want Manual", op["install_plan_approval"]) + } + if op["channel"] != "stable-5.8" { + t.Errorf("channel = %v, want stable-5.8", op["channel"]) + } + if op["pending_upgrade"] != false { + t.Errorf("pending_upgrade = %v, want false", op["pending_upgrade"]) + } + + // OCP compat — max is 4.17, target is 4.17, so compatible + compat, ok := op["ocp_compat"].(map[string]any) + if !ok { + t.Fatal("ocp_compat not a map") + } + if compat["max"] != "4.17" { + t.Errorf("ocp_compat.max = %v, want 4.17", compat["max"]) + } + if op["compatible_with_target"] != true { + t.Errorf("compatible_with_target = %v, want true", op["compatible_with_target"]) + } + + // Available channels + channels, ok := op["available_channels"].([]string) + if !ok { + t.Fatal("available_channels not a string slice") + } + if len(channels) != 2 { + t.Errorf("available_channels len = %d, want 2", len(channels)) + } + + // Summary + summary, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary not a map") + } + if summary["total_operators"] != 1 { + t.Errorf("total_operators = %v, want 1", summary["total_operators"]) + } + if summary["manual_approval"] != 1 { + t.Errorf("manual_approval = %v, want 1", summary["manual_approval"]) + } + if summary["incompatible_with_target"] != 0 { + t.Errorf("incompatible_with_target = %v, want 0", summary["incompatible_with_target"]) + } +} + +func TestOLMOperatorLifecycleCheck_PendingUpgrade(t *testing.T) { + objects := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "Subscription", + "metadata": map[string]interface{}{"name": "kiali-ossm", "namespace": "openshift-operators"}, + "spec": map[string]interface{}{ + "name": "kiali-ossm", + "channel": "stable", + "source": "redhat-operators", + }, + "status": map[string]interface{}{ + "state": "UpgradePending", + "installedCSV": "kiali-operator.v1.72.0", + "currentCSV": "kiali-operator.v1.73.0", + }, + }}, + // Installed CSV + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "ClusterServiceVersion", + "metadata": map[string]interface{}{"name": "kiali-operator.v1.72.0", "namespace": "openshift-operators"}, + "spec": map[string]interface{}{ + "version": "1.72.0", + "displayName": "Kiali Operator", + }, + "status": map[string]interface{}{"phase": "Replacing"}, + }}, + // Pending CSV + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "ClusterServiceVersion", + "metadata": map[string]interface{}{"name": "kiali-operator.v1.73.0", "namespace": "openshift-operators"}, + "spec": map[string]interface{}{ + "version": "1.73.0", + "displayName": "Kiali Operator", + }, + "status": map[string]interface{}{"phase": "InstallReady"}, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &OLMOperatorLifecycleCheck{} + + result, err := check.Run(context.Background(), client, "4.16.0", "4.17.0") + if err != nil { + t.Fatal(err) + } + + operators := result["operators"].([]map[string]any) + if len(operators) != 1 { + t.Fatalf("operators len = %d, want 1", len(operators)) + } + + op := operators[0] + if op["pending_upgrade"] != true { + t.Errorf("pending_upgrade = %v, want true", op["pending_upgrade"]) + } + if op["installed_version"] != "1.72.0" { + t.Errorf("installed_version = %v, want 1.72.0", op["installed_version"]) + } + if op["pending_version"] != "1.73.0" { + t.Errorf("pending_version = %v, want 1.73.0", op["pending_version"]) + } + if op["pending_csv"] != "kiali-operator.v1.73.0" { + t.Errorf("pending_csv = %v, want kiali-operator.v1.73.0", op["pending_csv"]) + } + + summary := result["summary"].(map[string]any) + if summary["pending_upgrades"] != 1 { + t.Errorf("pending_upgrades = %v, want 1", summary["pending_upgrades"]) + } +} + +func TestOLMOperatorLifecycleCheck_IncompatibleWithTarget(t *testing.T) { + objects := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "Subscription", + "metadata": map[string]interface{}{"name": "jaeger-product", "namespace": "openshift-operators"}, + "spec": map[string]interface{}{ + "name": "jaeger-product", + "channel": "stable", + "source": "redhat-operators", + }, + "status": map[string]interface{}{ + "state": "AtLatestKnown", + "installedCSV": "jaeger-operator.v1.51.0", + "currentCSV": "jaeger-operator.v1.51.0", + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "ClusterServiceVersion", + "metadata": map[string]interface{}{"name": "jaeger-operator.v1.51.0", "namespace": "openshift-operators"}, + "spec": map[string]interface{}{ + "version": "1.51.0", + "displayName": "Red Hat OpenShift distributed tracing platform", + }, + "status": map[string]interface{}{"phase": "Succeeded"}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "packages.operators.coreos.com/v1", "kind": "PackageManifest", + "metadata": map[string]interface{}{"name": "jaeger-product", "namespace": "openshift-operators"}, + "status": map[string]interface{}{ + "channels": []interface{}{ + map[string]interface{}{ + "name": "stable", + "currentCSVDesc": map[string]interface{}{ + "annotations": map[string]interface{}{ + "olm.maxOpenShiftVersion": "4.16", + }, + }, + }, + }, + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &OLMOperatorLifecycleCheck{} + + // Target is 4.17 but max is 4.16 — incompatible + result, err := check.Run(context.Background(), client, "4.16.0", "4.17.0") + if err != nil { + t.Fatal(err) + } + + operators := result["operators"].([]map[string]any) + op := operators[0] + if op["compatible_with_target"] != false { + t.Errorf("compatible_with_target = %v, want false", op["compatible_with_target"]) + } + + summary := result["summary"].(map[string]any) + if summary["incompatible_with_target"] != 1 { + t.Errorf("incompatible_with_target = %v, want 1", summary["incompatible_with_target"]) + } +} + +func TestOLMOperatorLifecycleCheck_NoSubscriptions(t *testing.T) { + client := newFakeDynamicClient() + check := &OLMOperatorLifecycleCheck{} + + result, err := check.Run(context.Background(), client, "4.16.0", "4.17.0") + if err != nil { + t.Fatal(err) + } + + operators := result["operators"].([]map[string]any) + if len(operators) != 0 { + t.Errorf("operators len = %d, want 0", len(operators)) + } + + summary := result["summary"].(map[string]any) + if summary["total_operators"] != 0 { + t.Errorf("total_operators = %v, want 0", summary["total_operators"]) + } +} + +func TestOLMOperatorLifecycleCheck_DefaultApproval(t *testing.T) { + objects := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "Subscription", + "metadata": map[string]interface{}{"name": "test-op", "namespace": "openshift-operators"}, + "spec": map[string]interface{}{ + "name": "test-op", + "channel": "stable", + "source": "redhat-operators", + // no installPlanApproval — defaults to Automatic + }, + "status": map[string]interface{}{ + "state": "AtLatestKnown", + "installedCSV": "test-op.v1.0.0", + "currentCSV": "test-op.v1.0.0", + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &OLMOperatorLifecycleCheck{} + + result, err := check.Run(context.Background(), client, "4.16.0", "4.17.0") + if err != nil { + t.Fatal(err) + } + + operators := result["operators"].([]map[string]any) + op := operators[0] + if op["install_plan_approval"] != "Automatic" { + t.Errorf("install_plan_approval = %v, want Automatic", op["install_plan_approval"]) + } + + summary := result["summary"].(map[string]any) + if summary["manual_approval"] != 0 { + t.Errorf("manual_approval = %v, want 0", summary["manual_approval"]) + } +} + +func TestExtractChannels(t *testing.T) { + pm := map[string]interface{}{ + "status": map[string]interface{}{ + "channels": []interface{}{ + map[string]interface{}{"name": "stable-5.8"}, + map[string]interface{}{"name": "stable-6.0"}, + map[string]interface{}{"name": "preview"}, + }, + }, + } + + channels := extractChannels(pm) + if len(channels) != 3 { + t.Fatalf("channels len = %d, want 3", len(channels)) + } + expected := []string{"stable-5.8", "stable-6.0", "preview"} + for i, want := range expected { + if channels[i] != want { + t.Errorf("channels[%d] = %v, want %v", i, channels[i], want) + } + } +} + +func TestExtractOCPCompat(t *testing.T) { + t.Run("with maxOpenShiftVersion", func(t *testing.T) { + pm := map[string]interface{}{ + "status": map[string]interface{}{ + "channels": []interface{}{ + map[string]interface{}{ + "name": "stable", + "currentCSVDesc": map[string]interface{}{ + "annotations": map[string]interface{}{ + "olm.maxOpenShiftVersion": "4.16", + }, + }, + }, + }, + }, + } + + compat := extractOCPCompat(pm, "stable") + if compat == nil { + t.Fatal("expected non-nil compat") + } + if compat["max"] != "4.16" { + t.Errorf("max = %v, want 4.16", compat["max"]) + } + }) + + t.Run("channel not found", func(t *testing.T) { + pm := map[string]interface{}{ + "status": map[string]interface{}{ + "channels": []interface{}{ + map[string]interface{}{ + "name": "stable", + }, + }, + }, + } + + compat := extractOCPCompat(pm, "preview") + if compat != nil { + t.Errorf("expected nil compat for missing channel, got %v", compat) + } + }) + + t.Run("no annotations", func(t *testing.T) { + pm := map[string]interface{}{ + "status": map[string]interface{}{ + "channels": []interface{}{ + map[string]interface{}{ + "name": "stable", + "currentCSVDesc": map[string]interface{}{}, + }, + }, + }, + } + + compat := extractOCPCompat(pm, "stable") + if compat != nil { + t.Errorf("expected nil compat for no annotations, got %v", compat) + } + }) +} + +func TestParseMinOCPFromProperties(t *testing.T) { + t.Run("valid olm.minOpenShiftVersion", func(t *testing.T) { + props := `[{"type":"olm.minOpenShiftVersion","value":"4.14"},{"type":"olm.maxOpenShiftVersion","value":"4.17"}]` + got := parseMinOCPFromProperties(props) + if got != "4.14" { + t.Errorf("got %q, want 4.14", got) + } + }) + + t.Run("no minOpenShiftVersion", func(t *testing.T) { + props := `[{"type":"olm.maxOpenShiftVersion","value":"4.17"}]` + got := parseMinOCPFromProperties(props) + if got != "" { + t.Errorf("got %q, want empty", got) + } + }) + + t.Run("invalid JSON", func(t *testing.T) { + got := parseMinOCPFromProperties("not json") + if got != "" { + t.Errorf("got %q, want empty", got) + } + }) + + t.Run("empty array", func(t *testing.T) { + got := parseMinOCPFromProperties("[]") + if got != "" { + t.Errorf("got %q, want empty", got) + } + }) +} diff --git a/pkg/readiness/operator_health.go b/pkg/readiness/operator_health.go new file mode 100644 index 000000000..f9232701a --- /dev/null +++ b/pkg/readiness/operator_health.go @@ -0,0 +1,125 @@ +package readiness + +import ( + "context" + "fmt" + + "k8s.io/client-go/dynamic" +) + +// OperatorHealthCheck provides per-operator detail and MCP state. +// CVO already aggregates operator health into the ClusterVersion Upgradeable condition +// (reported in cluster_conditions check). This check adds per-operator breakdown +// and MachineConfigPool status, which CVO does not expose in conditions. +type OperatorHealthCheck struct{} + +func (c *OperatorHealthCheck) Name() string { return "operator_health" } + +func (c *OperatorHealthCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + var sectionErrors []map[string]any + + // Per-operator breakdown — CVO aggregates this but doesn't expose per-CO detail + operators, err := ListResources(ctx, dc, GVRClusterOperator, "") + if err != nil { + return nil, fmt.Errorf("failed to list ClusterOperators: %w", err) + } + + notUpgradeable := make([]map[string]any, 0) + degraded := make([]map[string]any, 0) + notAvailable := make([]map[string]any, 0) + + for _, co := range operators { + conditions := GetConditions(&co) + name := co.GetName() + + if cond, ok := conditions[ConditionUpgradeable]; ok && cond.Status == ConditionFalse { + notUpgradeable = append(notUpgradeable, map[string]any{ + "name": name, + "reason": cond.Reason, + "message": cond.Message, + }) + } + if cond, ok := conditions[ConditionDegraded]; ok && cond.Status == ConditionTrue { + degraded = append(degraded, map[string]any{ + "name": name, + "reason": cond.Reason, + "message": cond.Message, + }) + } + if cond, ok := conditions[ConditionAvailable]; ok && cond.Status == ConditionFalse { + notAvailable = append(notAvailable, map[string]any{ + "name": name, + "reason": cond.Reason, + "message": cond.Message, + }) + } + } + + result["not_upgradeable"] = notUpgradeable + result["degraded"] = degraded + result["not_available"] = notAvailable + + // MachineConfigPool status — CVO does NOT track this + mcps, err := ListResources(ctx, dc, GVRMachineConfigPool, "") + if err != nil { + SectionError(§ionErrors, "machine_config_pools", err) + } else { + mcpResults := make([]map[string]any, 0, len(mcps)) + pausedMCPs := 0 + degradedMCPs := 0 + updatingMCPs := 0 + + for _, mcp := range mcps { + paused := NestedBool(mcp.Object, "spec", "paused") + machineCount := NestedInt64(mcp.Object, "status", "machineCount") + readyCount := NestedInt64(mcp.Object, "status", "readyMachineCount") + updatedCount := NestedInt64(mcp.Object, "status", "updatedMachineCount") + + conditions := GetConditions(&mcp) + isDegraded := false + isUpdating := false + if cond, ok := conditions[ConditionDegraded]; ok && cond.Status == ConditionTrue { + isDegraded = true + degradedMCPs++ + } + if cond, ok := conditions[ConditionUpdating]; ok && cond.Status == ConditionTrue { + isUpdating = true + updatingMCPs++ + } + if paused { + pausedMCPs++ + } + + mcpResults = append(mcpResults, map[string]any{ + "name": mcp.GetName(), + "paused": paused, + "machine_count": machineCount, + "ready_count": readyCount, + "updated_count": updatedCount, + "degraded": isDegraded, + "updating": isUpdating, + }) + } + result["machine_config_pools"] = mcpResults + result["mcp_summary"] = map[string]any{ + "paused": pausedMCPs, + "degraded": degradedMCPs, + "updating": updatingMCPs, + } + } + + result["summary"] = map[string]any{ + "total_operators": len(operators), + "not_upgradeable_count": len(notUpgradeable), + "degraded_count": len(degraded), + "not_available_count": len(notAvailable), + "note": "CVO's aggregated Upgradeable condition is in the cluster_conditions check", + } + + if len(sectionErrors) > 0 { + result["errors"] = sectionErrors + } + + return result, nil +} diff --git a/pkg/readiness/pdb_drain.go b/pkg/readiness/pdb_drain.go new file mode 100644 index 000000000..55a51714b --- /dev/null +++ b/pkg/readiness/pdb_drain.go @@ -0,0 +1,57 @@ +package readiness + +import ( + "context" + "fmt" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/client-go/dynamic" +) + +// PDBDrainCheck assesses PodDisruptionBudgets that could block node drains. +type PDBDrainCheck struct{} + +func (c *PDBDrainCheck) Name() string { return "pdb_drain" } + +func (c *PDBDrainCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + + pdbs, err := ListResources(ctx, dc, GVRPDB, "") + if err != nil { + return nil, fmt.Errorf("failed to list PodDisruptionBudgets: %w", err) + } + + issues := make([]map[string]any, 0) + for _, pdb := range pdbs { + // Check for zero-disruption PDBs + maxUnavailableRaw, _, _ := unstructured.NestedFieldNoCopy(pdb.Object, "spec", "maxUnavailable") + maxUnavailable := fmt.Sprintf("%v", maxUnavailableRaw) + minAvailableRaw, _, _ := unstructured.NestedFieldNoCopy(pdb.Object, "spec", "minAvailable") + minAvailable := fmt.Sprintf("%v", minAvailableRaw) + + currentHealthy := NestedInt64(pdb.Object, "status", "currentHealthy") + desiredHealthy := NestedInt64(pdb.Object, "status", "desiredHealthy") + disruptionsAllowed := NestedInt64(pdb.Object, "status", "disruptionsAllowed") + + if disruptionsAllowed == 0 && currentHealthy > 0 { + issues = append(issues, map[string]any{ + "name": pdb.GetName(), + "namespace": pdb.GetNamespace(), + "max_unavailable": maxUnavailable, + "min_available": minAvailable, + "current_healthy": currentHealthy, + "desired_healthy": desiredHealthy, + "disruptions_allowed": disruptionsAllowed, + }) + } + } + + result["total_pdbs"] = len(pdbs) + result["blocking_pdbs"] = issues + result["summary"] = map[string]any{ + "total": len(pdbs), + "blocking": len(issues), + } + + return result, nil +} diff --git a/pkg/start/start.go b/pkg/start/start.go index 0470d70cf..59df7d885 100644 --- a/pkg/start/start.go +++ b/pkg/start/start.go @@ -21,6 +21,7 @@ import ( "k8s.io/apimachinery/pkg/fields" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/dynamic" coreinformers "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" @@ -507,6 +508,10 @@ func (cb *ClientBuilder) OperatorClientOrDie(name string, configFns ...func(*res return operatorclientset.NewForConfigOrDie(rest.AddUserAgent(cb.RestConfig(configFns...), name)) } +func (cb *ClientBuilder) DynamicClientOrDie(name string, configFns ...func(*rest.Config)) dynamic.Interface { + return dynamic.NewForConfigOrDie(rest.AddUserAgent(cb.RestConfig(configFns...), name)) +} + func (cb *ClientBuilder) RuntimeControllerClientOrDie(name string, configFns ...func(*rest.Config)) runtimeclient.Client { c, err := runtimeclient.New(rest.AddUserAgent(cb.RestConfig(configFns...), name), runtimeclient.Options{}) if err != nil { @@ -623,6 +628,7 @@ func (o *Options) NewControllerContext( return nil, err } rtClient := cb.RuntimeControllerClientOrDie("runtime-controller-client") + dynamicClient := cb.DynamicClientOrDie("dynamic-client") cvo, err := cvo.New( o.NodeName, @@ -639,6 +645,7 @@ func (o *Options) NewControllerContext( configInformerFactory.Config().V1().FeatureGates(), cb.ClientOrDie(o.Namespace), cvoKubeClient, + dynamicClient, operatorClient, o.Exclude, o.ClusterProfile, diff --git a/test/cvo/readiness.go b/test/cvo/readiness.go new file mode 100644 index 000000000..19da87b8c --- /dev/null +++ b/test/cvo/readiness.go @@ -0,0 +1,228 @@ +package cvo + +import ( + "context" + "encoding/json" + "time" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/kubernetes" + + configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1" + + "github.com/openshift/cluster-version-operator/pkg/readiness" + "github.com/openshift/cluster-version-operator/test/util" +) + +var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator readiness checks`, func() { + var ( + dynamicClient dynamic.Interface + kubeClient kubernetes.Interface + configClient *configv1client.ConfigV1Client + ctx context.Context + currentVersion string + targetVersion string + ) + + g.BeforeEach(func() { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(context.Background(), 2*time.Minute) + g.DeferCleanup(cancel) + + restCfg, err := util.GetRestConfig() + o.Expect(err).NotTo(o.HaveOccurred()) + + dynamicClient, err = dynamic.NewForConfig(restCfg) + o.Expect(err).NotTo(o.HaveOccurred()) + + kubeClient, err = kubernetes.NewForConfig(restCfg) + o.Expect(err).NotTo(o.HaveOccurred()) + + configClient, err = configv1client.NewForConfig(restCfg) + o.Expect(err).NotTo(o.HaveOccurred()) + + // Read actual versions from the cluster + cv, err := configClient.ClusterVersions().Get(ctx, "version", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + currentVersion = cv.Status.Desired.Version + o.Expect(currentVersion).NotTo(o.BeEmpty(), "cluster must have a current version") + + // Pick the first available update as target, or use current if none + targetVersion = currentVersion + if len(cv.Status.AvailableUpdates) > 0 { + targetVersion = cv.Status.AvailableUpdates[0].Version + } + }) + + g.It("should run all checks without errors", func() { + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + + o.Expect(output.Meta.TotalChecks).To(o.Equal(9)) + o.Expect(output.Meta.ChecksErrored).To(o.Equal(0), + "no check should error on a healthy cluster") + }) + + g.It("should produce valid JSON that round-trips", func() { + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + + data, err := json.Marshal(output) + o.Expect(err).NotTo(o.HaveOccurred()) + + var parsed map[string]interface{} + o.Expect(json.Unmarshal(data, &parsed)).To(o.Succeed()) + o.Expect(parsed).To(o.HaveKey("checks")) + o.Expect(parsed).To(o.HaveKey("meta")) + }) + + g.It("should report node count matching the actual cluster", func() { + // Ground truth: list nodes via typed client + nodeList, err := kubeClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + expectedTotal := len(nodeList.Items) + expectedReady := 0 + for _, node := range nodeList.Items { + for _, cond := range node.Status.Conditions { + if cond.Type == "Ready" && cond.Status == "True" { + expectedReady++ + } + } + } + + // Our check + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + result := output.Checks["node_capacity"] + o.Expect(result.Status).To(o.Equal("ok")) + o.Expect(result.Data["total_nodes"]).To(o.Equal(expectedTotal), + "node count should match actual nodes in cluster") + o.Expect(result.Data["ready_nodes"]).To(o.Equal(expectedReady), + "ready node count should match actual ready nodes") + }) + + g.It("should report operator count matching actual ClusterOperators", func() { + // Ground truth: list ClusterOperators via typed client + coList, err := configClient.ClusterOperators().List(ctx, metav1.ListOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + expectedTotal := len(coList.Items) + expectedDegraded := 0 + expectedNotUpgradeable := 0 + for _, co := range coList.Items { + for _, cond := range co.Status.Conditions { + if cond.Type == "Degraded" && cond.Status == "True" { + expectedDegraded++ + } + if cond.Type == "Upgradeable" && cond.Status == "False" { + expectedNotUpgradeable++ + } + } + } + + // Our check + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + result := output.Checks["operator_health"] + o.Expect(result.Status).To(o.Equal("ok")) + + summary, ok := result.Data["summary"].(map[string]any) + o.Expect(ok).To(o.BeTrue(), "operator_health summary should be a map") + o.Expect(summary["total_operators"]).To(o.Equal(expectedTotal), + "operator count should match actual ClusterOperators") + o.Expect(summary["degraded_count"]).To(o.Equal(expectedDegraded), + "degraded count should match actual degraded operators") + o.Expect(summary["not_upgradeable_count"]).To(o.Equal(expectedNotUpgradeable), + "not-upgradeable count should match actual operators") + }) + + g.It("should report etcd member count matching actual etcd pods", func() { + // Ground truth: list etcd pods via typed client + podList, err := kubeClient.CoreV1().Pods("openshift-etcd").List(ctx, metav1.ListOptions{ + LabelSelector: "app=etcd", + }) + o.Expect(err).NotTo(o.HaveOccurred()) + + expectedTotal := len(podList.Items) + expectedHealthy := 0 + for _, pod := range podList.Items { + if pod.Status.Phase == "Running" { + expectedHealthy++ + } + } + + // Our check + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + result := output.Checks["etcd_health"] + o.Expect(result.Status).To(o.Equal("ok")) + o.Expect(result.Data["total_members"]).To(o.Equal(expectedTotal), + "etcd member count should match actual etcd pods") + o.Expect(result.Data["healthy_members"]).To(o.Equal(expectedHealthy), + "healthy member count should match actual running etcd pods") + }) + + g.It("should report network type matching actual Network config", func() { + // Ground truth: get Network config via typed client + network, err := configClient.Networks().Get(ctx, "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + // Our check + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + result := output.Checks["network"] + o.Expect(result.Status).To(o.Equal("ok")) + o.Expect(result.Data["network_type"]).To(o.Equal(network.Status.NetworkType), + "network type should match actual Network config") + }) + + g.It("should report PDB count matching actual PodDisruptionBudgets", func() { + // Ground truth: list PDBs across all namespaces + pdbList, err := kubeClient.PolicyV1().PodDisruptionBudgets("").List(ctx, metav1.ListOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + expectedTotal := len(pdbList.Items) + expectedBlocking := 0 + for _, pdb := range pdbList.Items { + if pdb.Status.DisruptionsAllowed == 0 && pdb.Status.CurrentHealthy > 0 { + expectedBlocking++ + } + } + + // Our check + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + result := output.Checks["pdb_drain"] + o.Expect(result.Status).To(o.Equal("ok")) + o.Expect(result.Data["total_pdbs"]).To(o.Equal(expectedTotal), + "PDB count should match actual PDBs in cluster") + + blockingRaw, ok := result.Data["blocking_pdbs"].([]map[string]any) + o.Expect(ok).To(o.BeTrue(), "blocking_pdbs should be a []map[string]any") + o.Expect(len(blockingRaw)).To(o.Equal(expectedBlocking), + "blocking PDB count should match actual blocking PDBs") + }) + + g.It("should report cluster conditions matching ClusterVersion status", func() { + // Ground truth: get ClusterVersion via typed client + cv, err := configClient.ClusterVersions().Get(ctx, "version", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + // Our check + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + result := output.Checks["cluster_conditions"] + o.Expect(result.Status).To(o.Equal("ok")) + o.Expect(result.Data["channel"]).To(o.Equal(cv.Spec.Channel), + "channel should match ClusterVersion spec") + o.Expect(result.Data["cluster_id"]).To(o.Equal(string(cv.Spec.ClusterID)), + "cluster ID should match ClusterVersion spec") + }) + + g.It("should complete all checks within 60 seconds", func() { + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + + o.Expect(output.Meta.ElapsedSeconds).To(o.BeNumerically("<", 60)) + for name, result := range output.Checks { + o.Expect(result.Elapsed).To(o.BeNumerically("<", 60), + "check %s exceeded timeout", name) + } + }) +})