diff --git a/.openshift-tests-extension/openshift_payload_cluster-version-operator.json b/.openshift-tests-extension/openshift_payload_cluster-version-operator.json index 928c2adf6..02637340a 100644 --- a/.openshift-tests-extension/openshift_payload_cluster-version-operator.json +++ b/.openshift-tests-extension/openshift_payload_cluster-version-operator.json @@ -110,5 +110,95 @@ "source": "openshift:payload:cluster-version-operator", "lifecycle": "informing", "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should run all checks without errors", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should produce valid JSON that round-trips", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report node count matching the actual cluster", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report operator count matching actual ClusterOperators", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report etcd member count matching actual etcd pods", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report network type matching actual Network config", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report PDB count matching actual PodDisruptionBudgets", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report cluster conditions matching ClusterVersion status", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} + }, + { + "name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should complete all checks within 60 seconds", + "labels": {}, + "resources": { + "isolation": {} + }, + "source": "openshift:payload:cluster-version-operator", + "lifecycle": "blocking", + "environmentSelector": {} } ] \ No newline at end of file diff --git a/pkg/cvo/availableupdates_test.go b/pkg/cvo/availableupdates_test.go index aec0c29ae..d53d2efa3 100644 --- a/pkg/cvo/availableupdates_test.go +++ b/pkg/cvo/availableupdates_test.go @@ -208,7 +208,7 @@ func newOperator(url string, cluster release, promqlMock clusterconditions.Condi func() ([]configv1.Release, []configv1.ConditionalUpdate, error) { return nil, nil, nil }, - fake.NewClientBuilder().Build(), func(_ string) (*configv1.ClusterVersion, error) { + fake.NewClientBuilder().Build(), nil, func(_ string) (*configv1.ClusterVersion, error) { return &configv1.ClusterVersion{}, nil }, func(_ context.Context, namespace, name string, _ metav1.GetOptions) (*corev1.ConfigMap, error) { diff --git a/pkg/cvo/cvo.go b/pkg/cvo/cvo.go index 3dbb6d661..c085de0b4 100644 --- a/pkg/cvo/cvo.go +++ b/pkg/cvo/cvo.go @@ -17,6 +17,7 @@ import ( utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/dynamic" informerscorev1 "k8s.io/client-go/informers/core/v1" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" @@ -109,6 +110,7 @@ type Operator struct { client clientset.Interface kubeClient kubernetes.Interface + dynamicClient dynamic.Interface operatorClient operatorclientset.Interface eventRecorder record.EventRecorder @@ -235,6 +237,7 @@ func New( featureGateInformer configinformersv1.FeatureGateInformer, client clientset.Interface, kubeClient kubernetes.Interface, + dynamicClient dynamic.Interface, operatorClient operatorclientset.Interface, exclude string, clusterProfile string, @@ -267,6 +270,7 @@ func New( client: client, kubeClient: kubeClient, + dynamicClient: dynamicClient, operatorClient: operatorClient, eventRecorder: eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: namespace}), queue: workqueue.NewTypedRateLimitingQueueWithConfig(workqueue.DefaultTypedControllerRateLimiter[any](), workqueue.TypedRateLimitingQueueConfig[any]{Name: "clusterversion"}), @@ -354,6 +358,7 @@ func New( return availableUpdates.Updates, availableUpdates.ConditionalUpdates, nil }, rtClient, + dynamicClient, cvInformer.Lister().Get, func(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*corev1.ConfigMap, error) { return kubeClient.CoreV1().ConfigMaps(namespace).Get(ctx, name, opts) diff --git a/pkg/cvo/cvo_test.go b/pkg/cvo/cvo_test.go index 68de18927..b1587e99d 100644 --- a/pkg/cvo/cvo_test.go +++ b/pkg/cvo/cvo_test.go @@ -2756,7 +2756,7 @@ func TestOperator_availableUpdatesSync(t *testing.T) { ctx := context.Background() optr.proposalController = proposal.NewController(func() ([]configv1.Release, []configv1.ConditionalUpdate, error) { return nil, nil, nil - }, ctrlruntimefake.NewClientBuilder().Build(), func(_ string) (*configv1.ClusterVersion, error) { + }, ctrlruntimefake.NewClientBuilder().Build(), nil, func(_ string) (*configv1.ClusterVersion, error) { return &configv1.ClusterVersion{}, nil }, func(_ context.Context, namespace, name string, _ metav1.GetOptions) (*corev1.ConfigMap, error) { return &corev1.ConfigMap{}, nil diff --git a/pkg/proposal/controller.go b/pkg/proposal/controller.go index 9e13e9770..78f16d0fc 100644 --- a/pkg/proposal/controller.go +++ b/pkg/proposal/controller.go @@ -2,6 +2,7 @@ package proposal import ( "context" + "encoding/json" "fmt" "os" "regexp" @@ -17,6 +18,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kutilerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/dynamic" "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" @@ -24,6 +26,7 @@ import ( proposalv1alpha1 "github.com/openshift/lightspeed-agentic-operator/api/v1alpha1" i "github.com/openshift/cluster-version-operator/pkg/internal" + "github.com/openshift/cluster-version-operator/pkg/readiness" ) type Controller struct { @@ -31,6 +34,7 @@ type Controller struct { queue workqueue.TypedRateLimitingInterface[any] updatesGetterFunc updatesGetterFunc client ctrlruntimeclient.Client + dynamicClient dynamic.Interface cvGetterFunc cvGetterFunc configMapGetterFunc configMapGetterFunc getCurrentVersionFunc getCurrentVersionFunc @@ -57,6 +61,7 @@ type configMapGetterFunc func(ctx context.Context, namespace, name string, opts func NewController( updatesGetterFunc updatesGetterFunc, client ctrlruntimeclient.Client, + dynamicClient dynamic.Interface, cvGetterFunc cvGetterFunc, configMapGetterFunc configMapGetterFunc, getCurrentVersionFunc getCurrentVersionFunc, @@ -68,6 +73,7 @@ func NewController( workqueue.TypedRateLimitingQueueConfig[any]{Name: controllerName}), updatesGetterFunc: updatesGetterFunc, client: client, + dynamicClient: dynamicClient, cvGetterFunc: cvGetterFunc, configMapGetterFunc: configMapGetterFunc, getCurrentVersionFunc: getCurrentVersionFunc, @@ -152,9 +158,7 @@ func (c *Controller) Sync(ctx context.Context, key string) error { return kutilerrors.NewAggregate(errs) } - // TODO: Implement it - readinessJSON := "{}" - proposals, err := getProposals(updates, conditionalUpdates, c.config.Namespace, currentVersion, cv.Spec.Channel, prompt, readinessJSON) + proposals, err := getProposals(ctx, c.dynamicClient, updates, conditionalUpdates, c.config.Namespace, currentVersion, cv.Spec.Channel, prompt) if err != nil { klog.V(i.Normal).Infof("Getting proposals hit an error: %v", err) return kutilerrors.NewAggregate(append(errs, err)) @@ -277,17 +281,23 @@ func deleteProposal(ctx context.Context, client ctrlruntimeclient.Client, propos } func getProposals( + ctx context.Context, + dynamicClient dynamic.Interface, availableUpdates []configv1.Release, conditionalUpdates []configv1.ConditionalUpdate, namespace string, currentVersion, channel, systemPrompt string, - readinessJSON string, ) ([]*proposalv1alpha1.Proposal, error) { + // TODO: Only 2 of 9 readiness checks (api_deprecations, olm_lifecycle) use the target version. + // The other 7 query cluster-wide state identical across targets. For clusters with many available + // updates, split into target-independent checks (run once) and target-dependent checks (run per + // target) to reduce redundant API calls. var errs []error var proposals []*proposalv1alpha1.Proposal for _, au := range availableUpdates { targetVersion := au.Version + readinessJSON := runReadinessJSON(ctx, dynamicClient, currentVersion, targetVersion) if proposal, err := getProposal(namespace, currentVersion, targetVersion, channel, updateKindRecommended, systemPrompt, readinessJSON, availableUpdates); err != nil { errs = append(errs, err) continue @@ -298,6 +308,7 @@ func getProposals( for _, cu := range conditionalUpdates { targetVersion := cu.Release.Version + readinessJSON := runReadinessJSON(ctx, dynamicClient, currentVersion, targetVersion) if proposal, err := getProposal(namespace, currentVersion, targetVersion, channel, updateKindConditional, systemPrompt, readinessJSON, availableUpdates); err != nil { errs = append(errs, err) continue @@ -437,6 +448,20 @@ func classifyUpdate(current, target string) string { return i.UpdateType(cv, tv) } +func runReadinessJSON(ctx context.Context, dynamicClient dynamic.Interface, currentVersion, targetVersion string) string { + if dynamicClient == nil { + klog.V(i.Normal).Infof("Dynamic client is nil; skipping readiness checks for %s -> %s", currentVersion, targetVersion) + return "{}" + } + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + data, err := json.Marshal(output) + if err != nil { + klog.V(i.Normal).Infof("Failed to marshal readiness output for %s -> %s: %v", currentVersion, targetVersion, err) + return "{}" + } + return string(data) +} + // buildRequest constructs the proposal request with system prompt, metadata, and readiness data. func buildRequest(systemPrompt, current, target, channel, updateType, targetType string, updates []configv1.Release, readinessJSON string) string { diff --git a/pkg/proposal/controller_test.go b/pkg/proposal/controller_test.go index 9c9d58563..6126b69d4 100644 --- a/pkg/proposal/controller_test.go +++ b/pkg/proposal/controller_test.go @@ -2,6 +2,7 @@ package proposal import ( "context" + "encoding/json" "fmt" "strings" "testing" @@ -16,11 +17,17 @@ import ( apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" kutilerrors "k8s.io/apimachinery/pkg/util/errors" + dynamicfake "k8s.io/client-go/dynamic/fake" "k8s.io/client-go/kubernetes/scheme" configv1 "github.com/openshift/api/config/v1" proposalv1alpha1 "github.com/openshift/lightspeed-agentic-operator/api/v1alpha1" + + "github.com/openshift/cluster-version-operator/pkg/readiness" ) func init() { @@ -137,7 +144,7 @@ Update path: Recommended } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - c := NewController(tt.updatesGetterFunc, tt.client, tt.cvGetterFunc, func(_ context.Context, namespace, name string, _ metav1.GetOptions) (*corev1.ConfigMap, error) { + c := NewController(tt.updatesGetterFunc, tt.client, nil, tt.cvGetterFunc, func(_ context.Context, namespace, name string, _ metav1.GetOptions) (*corev1.ConfigMap, error) { if namespace == "openshift-lightspeed" && name == "cluster-update-advisory-prompt" { return &corev1.ConfigMap{ Data: map[string]string{ @@ -743,7 +750,6 @@ func TestGetProposals(t *testing.T) { currentVersion string channel string systemPrompt string - readinessJSON string expected []*proposalv1alpha1.Proposal expectError error }{ @@ -757,7 +763,6 @@ func TestGetProposals(t *testing.T) { currentVersion: "4.15.3", channel: "stable-4.16", systemPrompt: "Test prompt", - readinessJSON: `{"test": "data"}`, expected: []*proposalv1alpha1.Proposal{ { ObjectMeta: metav1.ObjectMeta{ @@ -787,7 +792,7 @@ Other recommended versions available: ## Cluster Readiness Data ` + "```json\n" + - `{"test": "data"}` + "\n```\n", + `{}` + "\n```\n", Analysis: proposalv1alpha1.ProposalStep{ Agent: "smart", }, @@ -851,7 +856,7 @@ Other recommended versions available: ## Cluster Readiness Data ` + "```json\n" + - `{"test": "data"}` + "\n```\n", + `{}` + "\n```\n", Analysis: proposalv1alpha1.ProposalStep{ Agent: "smart", }, @@ -924,7 +929,7 @@ Update path: Recommended Other recommended versions available: - 4.16.1 -`, +` + "## Cluster Readiness Data\n\n```json\n{}\n```\n", Analysis: proposalv1alpha1.ProposalStep{ Agent: "smart", }, @@ -981,7 +986,7 @@ Update path: Recommended Other recommended versions available: - 4.16.0 -`, +` + "## Cluster Readiness Data\n\n```json\n{}\n```\n", Analysis: proposalv1alpha1.ProposalStep{ Agent: "smart", }, @@ -1043,7 +1048,7 @@ Other recommended versions available: - 4.16.0 - 4.16.1 -`, +` + "## Cluster Readiness Data\n\n```json\n{}\n```\n", Analysis: proposalv1alpha1.ProposalStep{ Agent: "smart", }, @@ -1105,7 +1110,7 @@ Other recommended versions available: - 4.16.0 - 4.16.1 -`, +` + "## Cluster Readiness Data\n\n```json\n{}\n```\n", Analysis: proposalv1alpha1.ProposalStep{ Agent: "smart", }, @@ -1174,13 +1179,14 @@ Other recommended versions available: for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { proposals, err := getProposals( + context.Background(), + nil, tt.availableUpdates, tt.conditionalUpdates, tt.namespace, tt.currentVersion, tt.channel, tt.systemPrompt, - tt.readinessJSON, ) if diff := cmp.Diff(err, tt.expectError, cmp.Transformer("Error", func(e error) string { @@ -1236,3 +1242,282 @@ func Test_expired(t *testing.T) { }) } } + +func newFakeDynamicClient(objects ...runtime.Object) *dynamicfake.FakeDynamicClient { + s := runtime.NewScheme() + gvrs := map[schema.GroupVersionResource]string{ + readiness.GVRClusterVersion: "ClusterVersionList", + readiness.GVRClusterOperator: "ClusterOperatorList", + readiness.GVRMachineConfigPool: "MachineConfigPoolList", + readiness.GVRNode: "NodeList", + readiness.GVRPod: "PodList", + readiness.GVRPDB: "PodDisruptionBudgetList", + readiness.GVRCRD: "CustomResourceDefinitionList", + readiness.GVRSubscription: "SubscriptionList", + readiness.GVRCSV: "ClusterServiceVersionList", + readiness.GVRInstallPlan: "InstallPlanList", + readiness.GVRPackageManifest: "PackageManifestList", + readiness.GVRAPIRequestCount: "APIRequestCountList", + readiness.GVRNetwork: "NetworkList", + readiness.GVRProxy: "ProxyList", + readiness.GVRAPIServer: "APIServerList", + } + for gvr, listKind := range gvrs { + gvk := schema.GroupVersionKind{Group: gvr.Group, Version: gvr.Version, Kind: listKind} + s.AddKnownTypeWithName(gvk, &unstructured.UnstructuredList{}) + } + return dynamicfake.NewSimpleDynamicClientWithCustomListKinds(s, gvrs, objects...) +} + +func TestGetProposals_WithReadinessData(t *testing.T) { + dc := newFakeDynamicClient( + // ClusterVersion + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterVersion", + "metadata": map[string]interface{}{"name": "version"}, + "spec": map[string]interface{}{"channel": "stable-4.21", "clusterID": "test-id"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True"}, + map[string]interface{}{"type": "Progressing", "status": "False"}, + map[string]interface{}{"type": "Upgradeable", "status": "True"}, + }, + "history": []interface{}{ + map[string]interface{}{"version": "4.21.5", "state": "Completed"}, + }, + }, + }}, + // ClusterOperators + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterOperator", + "metadata": map[string]interface{}{"name": "etcd"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True"}, + map[string]interface{}{"type": "Degraded", "status": "False"}, + map[string]interface{}{"type": "Upgradeable", "status": "True"}, + }, + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterOperator", + "metadata": map[string]interface{}{"name": "dns"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True"}, + map[string]interface{}{"type": "Degraded", "status": "False"}, + map[string]interface{}{"type": "Upgradeable", "status": "True"}, + }, + }, + }}, + // MachineConfigPool + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "machineconfiguration.openshift.io/v1", "kind": "MachineConfigPool", + "metadata": map[string]interface{}{"name": "master"}, + "spec": map[string]interface{}{"paused": false}, + "status": map[string]interface{}{ + "machineCount": int64(3), "readyMachineCount": int64(3), "updatedMachineCount": int64(3), + "conditions": []interface{}{ + map[string]interface{}{"type": "Degraded", "status": "False"}, + map[string]interface{}{"type": "Updating", "status": "False"}, + }, + }, + }}, + // Etcd pods + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-0", "namespace": "openshift-etcd", "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-0"}, "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-1", "namespace": "openshift-etcd", "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-1"}, "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-2", "namespace": "openshift-etcd", "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-2"}, "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + // Nodes + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "master-0"}, + "status": map[string]interface{}{"conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "worker-0"}, + "status": map[string]interface{}{"conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + // PDB + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "policy/v1", "kind": "PodDisruptionBudget", + "metadata": map[string]interface{}{"name": "etcd-guard", "namespace": "openshift-etcd"}, + "spec": map[string]interface{}{"maxUnavailable": "1"}, + "status": map[string]interface{}{"currentHealthy": int64(3), "desiredHealthy": int64(2), "disruptionsAllowed": int64(1)}, + }}, + // APIRequestCount with blocker + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiserver.openshift.io/v1", "kind": "APIRequestCount", + "metadata": map[string]interface{}{"name": "flowschemas.v1beta3.flowcontrol.apiserver.k8s.io"}, + "status": map[string]interface{}{ + "removedInRelease": "4.21.8", "requestCount": int64(100), + "conditions": []interface{}{map[string]interface{}{"type": "Deprecated", "status": "True"}}, + }, + }}, + // CRD with version issue + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiextensions.k8s.io/v1", "kind": "CustomResourceDefinition", + "metadata": map[string]interface{}{"name": "widgets.example.com"}, + "spec": map[string]interface{}{ + "versions": []interface{}{ + map[string]interface{}{"name": "v2", "served": true}, + map[string]interface{}{"name": "v1", "served": false}, + }, + }, + "status": map[string]interface{}{"storedVersions": []interface{}{"v1"}}, + }}, + // Network, Proxy, APIServer + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "Network", + "metadata": map[string]interface{}{"name": "cluster"}, + "status": map[string]interface{}{"networkType": "OVNKubernetes"}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "Proxy", + "metadata": map[string]interface{}{"name": "cluster"}, + "spec": map[string]interface{}{}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "APIServer", + "metadata": map[string]interface{}{"name": "cluster"}, + "spec": map[string]interface{}{}, + }}, + // OLM Subscription + CSV + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "Subscription", + "metadata": map[string]interface{}{"name": "elasticsearch-operator", "namespace": "openshift-operators-redhat"}, + "spec": map[string]interface{}{"channel": "stable-5.8", "name": "elasticsearch-operator", "source": "redhat-operators", "sourceNamespace": "openshift-marketplace"}, + "status": map[string]interface{}{"state": "AtLatestKnown", "installedCSV": "elasticsearch-operator.v5.8.6", "currentCSV": "elasticsearch-operator.v5.8.6"}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "ClusterServiceVersion", + "metadata": map[string]interface{}{"name": "elasticsearch-operator.v5.8.6", "namespace": "openshift-operators-redhat"}, + "spec": map[string]interface{}{"version": "5.8.6", "displayName": "OpenShift Elasticsearch Operator"}, + "status": map[string]interface{}{"phase": "Succeeded"}, + }}, + ) + + proposals, err := getProposals( + context.Background(), + dc, + []configv1.Release{{Version: "4.21.8"}}, + nil, + "openshift-lightspeed", + "4.21.5", + "stable-4.21", + "Test prompt", + ) + if err != nil { + t.Fatalf("getProposals returned error: %v", err) + } + if len(proposals) != 1 { + t.Fatalf("expected 1 proposal, got %d", len(proposals)) + } + + request := proposals[0].Spec.Request + + if !strings.Contains(request, "## Cluster Readiness Data") { + t.Fatal("proposal request missing readiness data section") + } + + // Extract JSON from the request + start := strings.Index(request, "```json\n") + if start < 0 { + t.Fatal("could not find readiness JSON fence in request") + } + jsonStart := start + len("```json\n") + jsonEnd := strings.Index(request[jsonStart:], "\n```") + if jsonEnd < 0 { + t.Fatal("could not find closing fence for readiness JSON") + } + readinessJSON := request[jsonStart : jsonStart+jsonEnd] + + // Unmarshal into raw map since CheckResult.Data is json:"-" (flattened during marshal) + var raw map[string]any + if err := json.Unmarshal([]byte(readinessJSON), &raw); err != nil { + t.Fatalf("readiness JSON is not valid: %v\nJSON: %s", err, readinessJSON) + } + + if raw["current_version"] != "4.21.5" { + t.Errorf("readiness current_version = %v, want 4.21.5", raw["current_version"]) + } + if raw["target_version"] != "4.21.8" { + t.Errorf("readiness target_version = %v, want 4.21.8", raw["target_version"]) + } + + meta, ok := raw["meta"].(map[string]any) + if !ok { + t.Fatal("readiness output missing 'meta'") + } + if meta["total_checks"] != float64(9) { + t.Errorf("readiness total_checks = %v, want 9", meta["total_checks"]) + } + if meta["checks_ok"] != float64(9) { + t.Errorf("readiness checks_ok = %v, want 9 (all checks should succeed)", meta["checks_ok"]) + } + + checks, ok := raw["checks"].(map[string]any) + if !ok { + t.Fatal("readiness output missing 'checks'") + } + + // Verify every check produced results with ok status + for _, name := range []string{ + "cluster_conditions", "operator_health", "api_deprecations", + "node_capacity", "pdb_drain", "etcd_health", "network", + "crd_compat", "olm_operator_lifecycle", + } { + check, ok := checks[name].(map[string]any) + if !ok { + t.Errorf("readiness output missing %s check", name) + continue + } + if check["_status"] != "ok" { + t.Errorf("check %s status = %v, error = %v", name, check["_status"], check["_error"]) + } + } + + // Spot-check: api_deprecations found the blocker + if ad, ok := checks["api_deprecations"].(map[string]any); !ok { + t.Fatal("api_deprecations check missing or wrong type") + } else if adSummary, ok := ad["summary"].(map[string]any); !ok { + t.Fatal("api_deprecations summary missing or wrong type") + } else if adSummary["blockers"] != float64(1) { + t.Errorf("api_deprecations blockers = %v, want 1", adSummary["blockers"]) + } + + // Spot-check: olm found the subscription + if olm, ok := checks["olm_operator_lifecycle"].(map[string]any); !ok { + t.Fatal("olm_operator_lifecycle check missing or wrong type") + } else if olmSummary, ok := olm["summary"].(map[string]any); !ok { + t.Fatal("olm_operator_lifecycle summary missing or wrong type") + } else if olmSummary["total_operators"] != float64(1) { + t.Errorf("olm total_operators = %v, want 1", olmSummary["total_operators"]) + } + + // Spot-check: etcd has 3 healthy members + if etcd, ok := checks["etcd_health"].(map[string]any); !ok { + t.Fatal("etcd_health check missing or wrong type") + } else if etcd["total_members"] != float64(3) { + t.Errorf("etcd total_members = %v, want 3", etcd["total_members"]) + } + + // Spot-check: node_capacity found 2 nodes + if nc, ok := checks["node_capacity"].(map[string]any); !ok { + t.Fatal("node_capacity check missing or wrong type") + } else if nc["total_nodes"] != float64(2) { + t.Errorf("node_capacity total_nodes = %v, want 2", nc["total_nodes"]) + } +} diff --git a/pkg/readiness/api_deprecations.go b/pkg/readiness/api_deprecations.go new file mode 100644 index 000000000..33cd31d1a --- /dev/null +++ b/pkg/readiness/api_deprecations.go @@ -0,0 +1,75 @@ +package readiness + +import ( + "context" + "fmt" + "strings" + + "k8s.io/client-go/dynamic" +) + +// APIDeprecationsCheck scans for deprecated or removed API usage. +type APIDeprecationsCheck struct{} + +func (c *APIDeprecationsCheck) Name() string { return "api_deprecations" } + +func (c *APIDeprecationsCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + + // Fetch APIRequestCount resources + arcs, err := ListResources(ctx, dc, GVRAPIRequestCount, "") + if err != nil { + // APIRequestCount may not be available on all clusters + if strings.Contains(err.Error(), "not found") { + result["warning"] = "APIRequestCount resource not available" + result["blocker_apis"] = []any{} + result["warning_apis"] = []any{} + result["summary"] = map[string]any{"blockers": 0, "warnings": 0} + return result, nil + } + return nil, fmt.Errorf("failed to list APIRequestCounts: %w", err) + } + + blockers := make([]map[string]any, 0) + warnings := make([]map[string]any, 0) + + for _, arc := range arcs { + conditions := GetConditions(&arc) + + // Check RemovedInRelease annotation + removedIn := NestedString(arc.Object, "status", "removedInRelease") + cmp, err := CompareVersions(removedIn, target) + if removedIn != "" && err == nil && cmp <= 0 { + requestCount := NestedInt64(arc.Object, "status", "requestCount") + if requestCount > 0 { + blockers = append(blockers, map[string]any{ + "resource": arc.GetName(), + "removed_in_release": removedIn, + "request_count": requestCount, + }) + } + } + + // Check for deprecation condition + if dep, ok := conditions["Deprecated"]; ok && dep.Status == ConditionTrue { + requestCount := NestedInt64(arc.Object, "status", "requestCount") + if requestCount > 0 { + warnings = append(warnings, map[string]any{ + "resource": arc.GetName(), + "request_count": requestCount, + "message": dep.Message, + }) + } + } + } + + result["blocker_apis"] = blockers + result["warning_apis"] = warnings + result["summary"] = map[string]any{ + "blockers": len(blockers), + "warnings": len(warnings), + "total": len(arcs), + } + + return result, nil +} diff --git a/pkg/readiness/check.go b/pkg/readiness/check.go new file mode 100644 index 000000000..5e30d23ca --- /dev/null +++ b/pkg/readiness/check.go @@ -0,0 +1,169 @@ +package readiness + +import ( + "context" + "encoding/json" + "fmt" + "sync" + "time" + + "k8s.io/client-go/dynamic" +) + +// Check is the interface that each readiness check implements. +type Check interface { + Name() string + Run(ctx context.Context, c dynamic.Interface, current, target string) (map[string]any, error) +} + +// CheckResult wraps a check's output with metadata. +type CheckResult struct { + Status string `json:"_status"` + Error string `json:"_error,omitempty"` + Elapsed float64 `json:"_elapsed_seconds"` + Data map[string]any `json:"-"` +} + +func (r CheckResult) MarshalJSON() ([]byte, error) { + m := make(map[string]any, len(r.Data)+3) + for k, v := range r.Data { + m[k] = v + } + m["_status"] = r.Status + m["_elapsed_seconds"] = r.Elapsed + if r.Error != "" { + m["_error"] = r.Error + } + return json.Marshal(m) +} + +// Output is the top-level readiness report structure. +type Output struct { + CurrentVersion string `json:"current_version"` + TargetVersion string `json:"target_version"` + Checks map[string]CheckResult `json:"checks"` + Meta Meta `json:"meta"` +} + +// Meta contains summary information about the readiness check run. +type Meta struct { + TotalChecks int `json:"total_checks"` + ChecksOK int `json:"checks_ok"` + ChecksErrored int `json:"checks_errored"` + ElapsedSeconds float64 `json:"elapsed_seconds"` +} + +const ( + perCheckTimeout = 60 * time.Second + + StatusOK = "ok" + StatusError = "error" +) + +// AllChecks returns all registered readiness checks. +// Checks are split into two categories: +// - cluster_conditions: reads CVO's already-computed state (no re-querying) +// - everything else: gathers NEW data that CVO doesn't already track +var AllChecks = func() []Check { + return []Check{ + &ClusterConditionsCheck{}, // reads existing CVO conditions — no duplication + &OperatorHealthCheck{}, // per-CO detail + MCPs (CVO only aggregates) + &APIDeprecationsCheck{}, // new: deprecated API usage + &NodeCapacityCheck{}, // new: node readiness and headroom + &PDBDrainCheck{}, // new: PDB drain blockers + &EtcdHealthCheck{}, // new: deep etcd health (beyond CO condition) + &NetworkCheck{}, // new: SDN migration, TLS, proxy + &CRDCompatCheck{}, // new: CRD version mismatches + &OLMOperatorLifecycleCheck{}, // new: OLM operator lifecycle (OCPSTRAT-2618) + // Known issues (Jira/KB) are NOT checked here — the agent uses its + // redhat-support skill to query contextually based on readiness findings. + } +} + +// RunAll executes all readiness checks in parallel with per-check timeouts. +func RunAll(ctx context.Context, c dynamic.Interface, current, target string) *Output { + checks := AllChecks() + results := make(map[string]CheckResult, len(checks)) + + var mu sync.Mutex + var wg sync.WaitGroup + + totalStart := time.Now() + + for _, check := range checks { + wg.Add(1) + go func(ch Check) { + defer wg.Done() + + checkCtx, cancel := context.WithTimeout(ctx, perCheckTimeout) + defer cancel() + + start := time.Now() + result := CheckResult{Data: map[string]any{}} + + defer func() { + if r := recover(); r != nil { + result.Status = StatusError + result.Error = fmt.Sprintf("panic: %v", r) + } + result.Elapsed = time.Since(start).Seconds() + if result.Data == nil { + result.Data = map[string]any{} + } + + mu.Lock() + results[ch.Name()] = result + mu.Unlock() + }() + + data, err := ch.Run(checkCtx, c, current, target) + + if err != nil { + result.Status = StatusError + result.Error = err.Error() + if data != nil { + result.Data = data + } + } else { + result.Status = StatusOK + result.Data = data + } + }(check) + } + + wg.Wait() + totalElapsed := time.Since(totalStart).Seconds() + + ok := 0 + errored := 0 + for _, r := range results { + if r.Status == StatusOK { + ok++ + } else { + errored++ + } + } + + return &Output{ + CurrentVersion: current, + TargetVersion: target, + Checks: results, + Meta: Meta{ + TotalChecks: len(checks), + ChecksOK: ok, + ChecksErrored: errored, + ElapsedSeconds: totalElapsed, + }, + } +} + +// SectionError appends a section error entry to the errors slice. +func SectionError(errors *[]map[string]any, section string, err error) { + if err == nil { + return + } + *errors = append(*errors, map[string]any{ + "section": section, + "error": err.Error(), + }) +} diff --git a/pkg/readiness/check_test.go b/pkg/readiness/check_test.go new file mode 100644 index 000000000..73260a97b --- /dev/null +++ b/pkg/readiness/check_test.go @@ -0,0 +1,264 @@ +package readiness + +import ( + "context" + "encoding/json" + "errors" + "testing" + + "k8s.io/client-go/dynamic" +) + +type fakeCheck struct { + name string + data map[string]any + err error + panic bool +} + +func (f *fakeCheck) Name() string { return f.name } +func (f *fakeCheck) Run(_ context.Context, _ dynamic.Interface, _, _ string) (map[string]any, error) { + if f.panic { + panic("check exploded") + } + return f.data, f.err +} + +func TestCheckResultMarshalJSON(t *testing.T) { + t.Run("ok result merges data with metadata", func(t *testing.T) { + r := CheckResult{ + Status: "ok", + Elapsed: 1.5, + Data: map[string]any{"foo": "bar", "count": 42}, + } + b, err := json.Marshal(r) + if err != nil { + t.Fatal(err) + } + var m map[string]any + if err := json.Unmarshal(b, &m); err != nil { + t.Fatal(err) + } + if m["_status"] != "ok" { + t.Errorf("_status = %v, want ok", m["_status"]) + } + if m["foo"] != "bar" { + t.Errorf("foo = %v, want bar", m["foo"]) + } + if _, ok := m["_error"]; ok { + t.Error("_error should be omitted for ok results") + } + }) + + t.Run("error result includes error field", func(t *testing.T) { + r := CheckResult{ + Status: "error", + Error: "something failed", + Elapsed: 0.1, + Data: map[string]any{}, + } + b, err := json.Marshal(r) + if err != nil { + t.Fatal(err) + } + var m map[string]any + if err := json.Unmarshal(b, &m); err != nil { + t.Fatal(err) + } + if m["_error"] != "something failed" { + t.Errorf("_error = %v, want 'something failed'", m["_error"]) + } + }) +} + +func TestFakeCheckInterface(t *testing.T) { + ok := &fakeCheck{name: "ok_check", data: map[string]any{"healthy": true}} + fail := &fakeCheck{name: "err_check", err: errors.New("fail")} + + if ok.Name() != "ok_check" { + t.Errorf("Name() = %q", ok.Name()) + } + + data, err := ok.Run(context.Background(), nil, "4.21.5", "4.21.8") + if err != nil { + t.Errorf("ok check should not error: %v", err) + } + if data["healthy"] != true { + t.Errorf("data = %v", data) + } + + _, err = fail.Run(context.Background(), nil, "4.21.5", "4.21.8") + if err == nil { + t.Error("fail check should error") + } +} + +func TestOutputMarshalJSON(t *testing.T) { + output := &Output{ + CurrentVersion: "4.21.5", + TargetVersion: "4.21.8", + Checks: map[string]CheckResult{ + "test": {Status: "ok", Elapsed: 0.5, Data: map[string]any{"key": "val"}}, + }, + Meta: Meta{TotalChecks: 1, ChecksOK: 1, ChecksErrored: 0, ElapsedSeconds: 0.5}, + } + + b, err := json.Marshal(output) + if err != nil { + t.Fatal(err) + } + + var m map[string]any + if err := json.Unmarshal(b, &m); err != nil { + t.Fatal(err) + } + + if m["current_version"] != "4.21.5" { + t.Errorf("current_version = %v", m["current_version"]) + } + if m["target_version"] != "4.21.8" { + t.Errorf("target_version = %v", m["target_version"]) + } + + checks, ok := m["checks"].(map[string]any) + if !ok { + t.Fatal("checks not a map") + } + testCheck, ok := checks["test"].(map[string]any) + if !ok { + t.Fatal("test check not a map") + } + if testCheck["_status"] != "ok" { + t.Errorf("test._status = %v", testCheck["_status"]) + } + if testCheck["key"] != "val" { + t.Errorf("test.key = %v", testCheck["key"]) + } +} + +func TestSectionError(t *testing.T) { + var errs []map[string]any + SectionError(&errs, "test_section", errors.New("something broke")) + + if len(errs) != 1 { + t.Fatalf("len = %d, want 1", len(errs)) + } + if errs[0]["section"] != "test_section" { + t.Errorf("section = %v", errs[0]["section"]) + } + if errs[0]["error"] != "something broke" { + t.Errorf("error = %v", errs[0]["error"]) + } +} + +func TestRunAllMixedResults(t *testing.T) { + orig := AllChecks + defer func() { AllChecks = orig }() + + AllChecks = func() []Check { + return []Check{ + &fakeCheck{name: "passing", data: map[string]any{"healthy": true}}, + &fakeCheck{name: "failing", err: errors.New("something broke")}, + &fakeCheck{name: "partial", data: map[string]any{"partial": true}, err: errors.New("partial failure")}, + } + } + + output := RunAll(context.Background(), nil, "4.21.5", "4.21.8") + + if output.Meta.TotalChecks != 3 { + t.Errorf("TotalChecks = %d, want 3", output.Meta.TotalChecks) + } + if output.Meta.ChecksOK != 1 { + t.Errorf("ChecksOK = %d, want 1", output.Meta.ChecksOK) + } + if output.Meta.ChecksErrored != 2 { + t.Errorf("ChecksErrored = %d, want 2", output.Meta.ChecksErrored) + } + + passing := output.Checks["passing"] + if passing.Status != StatusOK { + t.Errorf("passing.Status = %q, want ok", passing.Status) + } + if passing.Data["healthy"] != true { + t.Errorf("passing.Data[healthy] = %v", passing.Data["healthy"]) + } + + failing := output.Checks["failing"] + if failing.Status != StatusError { + t.Errorf("failing.Status = %q, want error", failing.Status) + } + if failing.Error != "something broke" { + t.Errorf("failing.Error = %q", failing.Error) + } + + partial := output.Checks["partial"] + if partial.Status != StatusError { + t.Errorf("partial.Status = %q, want error", partial.Status) + } + if partial.Data["partial"] != true { + t.Errorf("partial.Data[partial] = %v, want true", partial.Data["partial"]) + } + if partial.Error != "partial failure" { + t.Errorf("partial.Error = %q", partial.Error) + } +} + +func TestRunAllRecoversPanic(t *testing.T) { + orig := AllChecks + defer func() { AllChecks = orig }() + + AllChecks = func() []Check { + return []Check{ + &fakeCheck{name: "ok_check", data: map[string]any{"healthy": true}}, + &fakeCheck{name: "panicking", panic: true}, + } + } + + output := RunAll(context.Background(), nil, "4.21.5", "4.21.8") + + if output.Meta.TotalChecks != 2 { + t.Errorf("TotalChecks = %d, want 2", output.Meta.TotalChecks) + } + if output.Meta.ChecksOK != 1 { + t.Errorf("ChecksOK = %d, want 1", output.Meta.ChecksOK) + } + if output.Meta.ChecksErrored != 1 { + t.Errorf("ChecksErrored = %d, want 1", output.Meta.ChecksErrored) + } + + ok := output.Checks["ok_check"] + if ok.Status != StatusOK { + t.Errorf("ok_check.Status = %q, want ok", ok.Status) + } + + panicked := output.Checks["panicking"] + if panicked.Status != StatusError { + t.Errorf("panicking.Status = %q, want error", panicked.Status) + } + if panicked.Error != "panic: check exploded" { + t.Errorf("panicking.Error = %q, want 'panic: check exploded'", panicked.Error) + } +} + +func TestAllChecksReturnsExpectedCount(t *testing.T) { + checks := AllChecks() + if len(checks) != 9 { + t.Errorf("AllChecks() returned %d checks, want 9", len(checks)) + } + + names := make(map[string]bool) + for _, c := range checks { + names[c.Name()] = true + } + + expected := []string{ + "cluster_conditions", "operator_health", "api_deprecations", + "node_capacity", "pdb_drain", "etcd_health", "network", "crd_compat", + "olm_operator_lifecycle", + } + for _, name := range expected { + if !names[name] { + t.Errorf("missing check: %s", name) + } + } +} diff --git a/pkg/readiness/checks_test.go b/pkg/readiness/checks_test.go new file mode 100644 index 000000000..f9fae9115 --- /dev/null +++ b/pkg/readiness/checks_test.go @@ -0,0 +1,1097 @@ +package readiness + +import ( + "context" + "encoding/json" + "testing" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + dynamicfake "k8s.io/client-go/dynamic/fake" +) + +func newFakeDynamicClient(objects ...runtime.Object) *dynamicfake.FakeDynamicClient { + scheme := runtime.NewScheme() + gvrs := map[schema.GroupVersionResource]string{ + GVRClusterVersion: "ClusterVersionList", + GVRClusterOperator: "ClusterOperatorList", + GVRMachineConfigPool: "MachineConfigPoolList", + GVRNode: "NodeList", + GVRPod: "PodList", + GVRPDB: "PodDisruptionBudgetList", + GVRCRD: "CustomResourceDefinitionList", + GVRSubscription: "SubscriptionList", + GVRCSV: "ClusterServiceVersionList", + GVRInstallPlan: "InstallPlanList", + GVRPackageManifest: "PackageManifestList", + GVRAPIRequestCount: "APIRequestCountList", + GVRNetwork: "NetworkList", + GVRProxy: "ProxyList", + GVRAPIServer: "APIServerList", + } + for gvr, listKind := range gvrs { + gvk := schema.GroupVersionKind{Group: gvr.Group, Version: gvr.Version, Kind: listKind} + scheme.AddKnownTypeWithName(gvk, &unstructured.UnstructuredList{}) + } + return dynamicfake.NewSimpleDynamicClientWithCustomListKinds(scheme, gvrs, objects...) +} + +func TestNodeCapacityCheck(t *testing.T) { + nodes := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "master-0"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Ready", "status": "True"}, + }, + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "worker-0"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Ready", "status": "True"}, + }, + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "worker-1"}, + "spec": map[string]interface{}{"unschedulable": true}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Ready", "status": "False"}, + }, + }, + }}, + } + + client := newFakeDynamicClient(nodes...) + check := &NodeCapacityCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + if result["total_nodes"] != 3 { + t.Errorf("total_nodes = %v, want 3", result["total_nodes"]) + } + if result["ready_nodes"] != 2 { + t.Errorf("ready_nodes = %v, want 2", result["ready_nodes"]) + } + if result["unschedulable_nodes"] != 1 { + t.Errorf("unschedulable_nodes = %v, want 1", result["unschedulable_nodes"]) + } + + summary, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary not a map") + } + if summary["not_ready"] != 1 { + t.Errorf("summary.not_ready = %v, want 1", summary["not_ready"]) + } +} + +func TestPDBDrainCheck(t *testing.T) { + pdbs := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "policy/v1", "kind": "PodDisruptionBudget", + "metadata": map[string]interface{}{"name": "safe-pdb", "namespace": "default"}, + "spec": map[string]interface{}{"maxUnavailable": "1"}, + "status": map[string]interface{}{ + "currentHealthy": int64(3), + "desiredHealthy": int64(2), + "disruptionsAllowed": int64(1), + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "policy/v1", "kind": "PodDisruptionBudget", + "metadata": map[string]interface{}{"name": "blocking-pdb", "namespace": "critical"}, + "spec": map[string]interface{}{"maxUnavailable": "0"}, + "status": map[string]interface{}{ + "currentHealthy": int64(2), + "desiredHealthy": int64(2), + "disruptionsAllowed": int64(0), + }, + }}, + } + + client := newFakeDynamicClient(pdbs...) + check := &PDBDrainCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + if result["total_pdbs"] != 2 { + t.Errorf("total_pdbs = %v, want 2", result["total_pdbs"]) + } + + blocking, ok := result["blocking_pdbs"].([]map[string]any) + if !ok { + t.Fatal("blocking_pdbs not a slice") + } + if len(blocking) != 1 { + t.Fatalf("blocking_pdbs len = %d, want 1", len(blocking)) + } + if blocking[0]["name"] != "blocking-pdb" { + t.Errorf("blocking pdb name = %v, want blocking-pdb", blocking[0]["name"]) + } + if blocking[0]["namespace"] != "critical" { + t.Errorf("blocking pdb namespace = %v, want critical", blocking[0]["namespace"]) + } +} + +func TestEtcdHealthCheck(t *testing.T) { + objects := []runtime.Object{ + // etcd ClusterOperator + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterOperator", + "metadata": map[string]interface{}{"name": "etcd"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True", "reason": "AsExpected"}, + map[string]interface{}{"type": "Degraded", "status": "False", "reason": "AsExpected"}, + map[string]interface{}{"type": "Upgradeable", "status": "True", "reason": "AsExpected"}, + }, + }, + }}, + // etcd pods + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-0", "namespace": "openshift-etcd", + "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-0"}, + "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{ + map[string]interface{}{"type": "Ready", "status": "True"}, + }}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-1", "namespace": "openshift-etcd", + "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-1"}, + "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{ + map[string]interface{}{"type": "Ready", "status": "True"}, + }}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-2", "namespace": "openshift-etcd", + "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-2"}, + "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{ + map[string]interface{}{"type": "Ready", "status": "False"}, + }}, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &EtcdHealthCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + if result["total_members"] != 3 { + t.Errorf("total_members = %v, want 3", result["total_members"]) + } + if result["healthy_members"] != 2 { + t.Errorf("healthy_members = %v, want 2", result["healthy_members"]) + } + + summary, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary not a map") + } + if summary["operator_available"] != true { + t.Errorf("operator_available = %v, want true", summary["operator_available"]) + } + if summary["operator_degraded"] != false { + t.Errorf("operator_degraded = %v, want false", summary["operator_degraded"]) + } +} + +func TestOperatorHealthCheck(t *testing.T) { + objects := []runtime.Object{ + // Healthy operator + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterOperator", + "metadata": map[string]interface{}{"name": "dns"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True", "reason": "AsExpected"}, + map[string]interface{}{"type": "Degraded", "status": "False", "reason": "AsExpected"}, + map[string]interface{}{"type": "Upgradeable", "status": "True", "reason": "AsExpected"}, + }, + }, + }}, + // Degraded operator + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterOperator", + "metadata": map[string]interface{}{"name": "authentication"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "False", "reason": "OAuthDown", "message": "oauth pods crashlooping"}, + map[string]interface{}{"type": "Degraded", "status": "True", "reason": "OAuthDown", "message": "oauth pods crashlooping"}, + map[string]interface{}{"type": "Upgradeable", "status": "False", "reason": "OAuthDown", "message": "must fix before upgrade"}, + }, + }, + }}, + // MachineConfigPool: healthy master + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "machineconfiguration.openshift.io/v1", "kind": "MachineConfigPool", + "metadata": map[string]interface{}{"name": "master"}, + "spec": map[string]interface{}{"paused": false}, + "status": map[string]interface{}{ + "machineCount": int64(3), + "readyMachineCount": int64(3), + "updatedMachineCount": int64(3), + "conditions": []interface{}{ + map[string]interface{}{"type": "Degraded", "status": "False"}, + map[string]interface{}{"type": "Updating", "status": "False"}, + }, + }, + }}, + // MachineConfigPool: paused and degraded worker + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "machineconfiguration.openshift.io/v1", "kind": "MachineConfigPool", + "metadata": map[string]interface{}{"name": "worker"}, + "spec": map[string]interface{}{"paused": true}, + "status": map[string]interface{}{ + "machineCount": int64(5), + "readyMachineCount": int64(3), + "updatedMachineCount": int64(3), + "conditions": []interface{}{ + map[string]interface{}{"type": "Degraded", "status": "True", "reason": "RenderFailed"}, + map[string]interface{}{"type": "Updating", "status": "True", "reason": "InProgress"}, + }, + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &OperatorHealthCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + // Operator conditions + notUpgradeable, ok := result["not_upgradeable"].([]map[string]any) + if !ok { + t.Fatal("not_upgradeable not a slice") + } + if len(notUpgradeable) != 1 { + t.Fatalf("not_upgradeable len = %d, want 1", len(notUpgradeable)) + } + if notUpgradeable[0]["name"] != "authentication" { + t.Errorf("not_upgradeable[0].name = %v, want authentication", notUpgradeable[0]["name"]) + } + + degraded, ok := result["degraded"].([]map[string]any) + if !ok { + t.Fatal("degraded not a slice") + } + if len(degraded) != 1 { + t.Fatalf("degraded len = %d, want 1", len(degraded)) + } + if degraded[0]["name"] != "authentication" { + t.Errorf("degraded[0].name = %v, want authentication", degraded[0]["name"]) + } + + notAvailable, ok := result["not_available"].([]map[string]any) + if !ok { + t.Fatal("not_available not a slice") + } + if len(notAvailable) != 1 { + t.Fatalf("not_available len = %d, want 1", len(notAvailable)) + } + + // MCP results + mcps, ok := result["machine_config_pools"].([]map[string]any) + if !ok { + t.Fatal("machine_config_pools not a slice") + } + if len(mcps) != 2 { + t.Fatalf("machine_config_pools len = %d, want 2", len(mcps)) + } + + mcpSummary, ok := result["mcp_summary"].(map[string]any) + if !ok { + t.Fatal("mcp_summary not a map") + } + if mcpSummary["paused"] != 1 { + t.Errorf("mcp_summary.paused = %v, want 1", mcpSummary["paused"]) + } + if mcpSummary["degraded"] != 1 { + t.Errorf("mcp_summary.degraded = %v, want 1", mcpSummary["degraded"]) + } + if mcpSummary["updating"] != 1 { + t.Errorf("mcp_summary.updating = %v, want 1", mcpSummary["updating"]) + } + + // Summary + summary, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary not a map") + } + if summary["total_operators"] != 2 { + t.Errorf("total_operators = %v, want 2", summary["total_operators"]) + } + if summary["not_upgradeable_count"] != 1 { + t.Errorf("not_upgradeable_count = %v, want 1", summary["not_upgradeable_count"]) + } + if summary["degraded_count"] != 1 { + t.Errorf("degraded_count = %v, want 1", summary["degraded_count"]) + } + if summary["not_available_count"] != 1 { + t.Errorf("not_available_count = %v, want 1", summary["not_available_count"]) + } +} + +func TestOperatorHealthCheck_AllHealthy(t *testing.T) { + objects := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterOperator", + "metadata": map[string]interface{}{"name": "dns"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True"}, + map[string]interface{}{"type": "Degraded", "status": "False"}, + map[string]interface{}{"type": "Upgradeable", "status": "True"}, + }, + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &OperatorHealthCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + if len(result["not_upgradeable"].([]map[string]any)) != 0 { + t.Error("expected no not_upgradeable operators") + } + if len(result["degraded"].([]map[string]any)) != 0 { + t.Error("expected no degraded operators") + } + if len(result["not_available"].([]map[string]any)) != 0 { + t.Error("expected no not_available operators") + } +} + +func TestClusterConditionsCheck(t *testing.T) { + cv := &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterVersion", + "metadata": map[string]interface{}{"name": "version"}, + "spec": map[string]interface{}{ + "channel": "stable-4.21", + "clusterID": "test-cluster-id-123", + }, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True", "reason": "AsExpected"}, + map[string]interface{}{"type": "Progressing", "status": "False", "reason": "AsExpected"}, + map[string]interface{}{"type": "Upgradeable", "status": "True", "reason": "AsExpected", "message": ""}, + map[string]interface{}{"type": "Failing", "status": "False", "reason": "AsExpected"}, + }, + "history": []interface{}{ + map[string]interface{}{ + "version": "4.21.5", + "state": "Completed", + "startedTime": "2026-04-10T10:00:00Z", + "completionTime": "2026-04-10T11:00:00Z", + }, + map[string]interface{}{ + "version": "4.21.4", + "state": "Completed", + "startedTime": "2026-04-01T10:00:00Z", + "completionTime": "2026-04-01T11:00:00Z", + }, + }, + }, + }} + + client := newFakeDynamicClient(cv) + check := &ClusterConditionsCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + if result["channel"] != "stable-4.21" { + t.Errorf("channel = %v, want stable-4.21", result["channel"]) + } + if result["cluster_id"] != "test-cluster-id-123" { + t.Errorf("cluster_id = %v, want test-cluster-id-123", result["cluster_id"]) + } + if result["update_in_progress"] != false { + t.Errorf("update_in_progress = %v, want false", result["update_in_progress"]) + } + + upgradeable, ok := result["upgradeable"].(map[string]any) + if !ok { + t.Fatal("upgradeable not a map") + } + if upgradeable["status"] != "True" { + t.Errorf("upgradeable.status = %v, want True", upgradeable["status"]) + } + + history, ok := result["recent_history"].([]map[string]any) + if !ok { + t.Fatal("recent_history not a slice") + } + if len(history) != 2 { + t.Fatalf("recent_history len = %d, want 2", len(history)) + } + if history[0]["version"] != "4.21.5" { + t.Errorf("history[0].version = %v, want 4.21.5", history[0]["version"]) + } + + summary, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary not a map") + } + if summary["upgradeable"] != true { + t.Errorf("summary.upgradeable = %v, want true", summary["upgradeable"]) + } + if summary["update_in_progress"] != false { + t.Errorf("summary.update_in_progress = %v, want false", summary["update_in_progress"]) + } +} + +func TestClusterConditionsCheck_ProgressingTrue(t *testing.T) { + cv := &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterVersion", + "metadata": map[string]interface{}{"name": "version"}, + "spec": map[string]interface{}{"channel": "stable-4.21", "clusterID": "abc"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Progressing", "status": "True", "reason": "Updating"}, + map[string]interface{}{"type": "Upgradeable", "status": "False", "reason": "Updating", "message": "update in progress"}, + }, + "history": []interface{}{}, + }, + }} + + client := newFakeDynamicClient(cv) + check := &ClusterConditionsCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + if result["update_in_progress"] != true { + t.Errorf("update_in_progress = %v, want true", result["update_in_progress"]) + } + + summary := result["summary"].(map[string]any) + if summary["upgradeable"] != false { + t.Errorf("summary.upgradeable = %v, want false", summary["upgradeable"]) + } +} + +func TestAPIDeprecationsCheck(t *testing.T) { + objects := []runtime.Object{ + // API removed in target version with active usage — blocker + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiserver.openshift.io/v1", "kind": "APIRequestCount", + "metadata": map[string]interface{}{"name": "flowschemas.v1beta3.flowcontrol.apiserver.k8s.io"}, + "status": map[string]interface{}{ + "removedInRelease": "4.21.8", + "requestCount": int64(150), + "conditions": []interface{}{ + map[string]interface{}{"type": "Deprecated", "status": "True", "message": "deprecated since 4.20"}, + }, + }, + }}, + // Deprecated but not removed — warning + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiserver.openshift.io/v1", "kind": "APIRequestCount", + "metadata": map[string]interface{}{"name": "cronjobs.v1beta1.batch"}, + "status": map[string]interface{}{ + "removedInRelease": "4.25.0", + "requestCount": int64(42), + "conditions": []interface{}{ + map[string]interface{}{"type": "Deprecated", "status": "True", "message": "use v1 instead"}, + }, + }, + }}, + // No usage — should not appear + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiserver.openshift.io/v1", "kind": "APIRequestCount", + "metadata": map[string]interface{}{"name": "unused.v1beta1.example"}, + "status": map[string]interface{}{ + "removedInRelease": "4.21.0", + "requestCount": int64(0), + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &APIDeprecationsCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + blockers, ok := result["blocker_apis"].([]map[string]any) + if !ok { + t.Fatal("blocker_apis not a slice") + } + if len(blockers) != 1 { + t.Fatalf("blocker_apis len = %d, want 1", len(blockers)) + } + if blockers[0]["resource"] != "flowschemas.v1beta3.flowcontrol.apiserver.k8s.io" { + t.Errorf("blocker resource = %v", blockers[0]["resource"]) + } + if blockers[0]["request_count"] != int64(150) { + t.Errorf("blocker request_count = %v, want 150", blockers[0]["request_count"]) + } + + warnings, ok := result["warning_apis"].([]map[string]any) + if !ok { + t.Fatal("warning_apis not a slice") + } + if len(warnings) != 2 { + t.Fatalf("warning_apis len = %d, want 2", len(warnings)) + } + + summary, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary not a map") + } + if summary["blockers"] != 1 { + t.Errorf("summary.blockers = %v, want 1", summary["blockers"]) + } + if summary["warnings"] != 2 { + t.Errorf("summary.warnings = %v, want 2", summary["warnings"]) + } + if summary["total"] != 3 { + t.Errorf("summary.total = %v, want 3", summary["total"]) + } +} + +func TestAPIDeprecationsCheck_NoBlockers(t *testing.T) { + objects := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiserver.openshift.io/v1", "kind": "APIRequestCount", + "metadata": map[string]interface{}{"name": "pods.v1."}, + "status": map[string]interface{}{ + "requestCount": int64(500), + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &APIDeprecationsCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + blockers := result["blocker_apis"].([]map[string]any) + if len(blockers) != 0 { + t.Errorf("expected no blockers, got %d", len(blockers)) + } + + warnings := result["warning_apis"].([]map[string]any) + if len(warnings) != 0 { + t.Errorf("expected no warnings, got %d", len(warnings)) + } +} + +func TestCRDCompatCheck(t *testing.T) { + objects := []runtime.Object{ + // CRD with stored version that is still served — ok + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiextensions.k8s.io/v1", "kind": "CustomResourceDefinition", + "metadata": map[string]interface{}{"name": "widgets.example.com"}, + "spec": map[string]interface{}{ + "versions": []interface{}{ + map[string]interface{}{"name": "v1", "served": true}, + map[string]interface{}{"name": "v1beta1", "served": true}, + }, + }, + "status": map[string]interface{}{ + "storedVersions": []interface{}{"v1", "v1beta1"}, + }, + }}, + // CRD with stored version that is NO LONGER served — issue + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiextensions.k8s.io/v1", "kind": "CustomResourceDefinition", + "metadata": map[string]interface{}{"name": "gadgets.example.com"}, + "spec": map[string]interface{}{ + "versions": []interface{}{ + map[string]interface{}{"name": "v2", "served": true}, + map[string]interface{}{"name": "v1", "served": false}, + }, + }, + "status": map[string]interface{}{ + "storedVersions": []interface{}{"v1"}, + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &CRDCompatCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + if result["total_crds"] != 2 { + t.Errorf("total_crds = %v, want 2", result["total_crds"]) + } + + issues, ok := result["version_issues"].([]map[string]any) + if !ok { + t.Fatal("version_issues not a slice") + } + if len(issues) != 1 { + t.Fatalf("version_issues len = %d, want 1", len(issues)) + } + if issues[0]["crd"] != "gadgets.example.com" { + t.Errorf("crd = %v, want gadgets.example.com", issues[0]["crd"]) + } + if issues[0]["stored_version"] != "v1" { + t.Errorf("stored_version = %v, want v1", issues[0]["stored_version"]) + } + + summary, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary not a map") + } + if summary["version_issues"] != 1 { + t.Errorf("summary.version_issues = %v, want 1", summary["version_issues"]) + } +} + +func TestCRDCompatCheck_NoIssues(t *testing.T) { + objects := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiextensions.k8s.io/v1", "kind": "CustomResourceDefinition", + "metadata": map[string]interface{}{"name": "things.example.com"}, + "spec": map[string]interface{}{ + "versions": []interface{}{ + map[string]interface{}{"name": "v1", "served": true}, + }, + }, + "status": map[string]interface{}{ + "storedVersions": []interface{}{"v1"}, + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &CRDCompatCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + issues := result["version_issues"].([]map[string]any) + if len(issues) != 0 { + t.Errorf("expected no version issues, got %d", len(issues)) + } +} + +func TestNetworkCheck(t *testing.T) { + objects := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "Network", + "metadata": map[string]interface{}{"name": "cluster"}, + "status": map[string]interface{}{ + "networkType": "OpenShiftSDN", + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "Proxy", + "metadata": map[string]interface{}{"name": "cluster"}, + "spec": map[string]interface{}{ + "httpProxy": "http://proxy.example.com:8080", + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "APIServer", + "metadata": map[string]interface{}{"name": "cluster"}, + "spec": map[string]interface{}{ + "tlsSecurityProfile": map[string]interface{}{ + "type": "Old", + }, + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &NetworkCheck{} + + result, err := check.Run(context.Background(), client, "4.21.5", "4.21.8") + if err != nil { + t.Fatal(err) + } + + if result["network_type"] != "OpenShiftSDN" { + t.Errorf("network_type = %v, want OpenShiftSDN", result["network_type"]) + } + if result["sdn_warning"] == nil { + t.Error("should have sdn_warning for OpenShiftSDN") + } + if result["tls_profile"] != "Old" { + t.Errorf("tls_profile = %v, want Old", result["tls_profile"]) + } + + summary, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary not a map") + } + if summary["is_sdn"] != true { + t.Errorf("is_sdn = %v, want true", summary["is_sdn"]) + } +} + +// fakeClusterObjects returns a representative set of cluster objects that exercises +// every readiness check with non-trivial data. +func fakeClusterObjects() []runtime.Object { + return []runtime.Object{ + // --- ClusterVersion (cluster_conditions) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterVersion", + "metadata": map[string]interface{}{"name": "version"}, + "spec": map[string]interface{}{"channel": "stable-4.21", "clusterID": "test-id"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True", "reason": "AsExpected"}, + map[string]interface{}{"type": "Progressing", "status": "False", "reason": "AsExpected"}, + map[string]interface{}{"type": "Upgradeable", "status": "True", "reason": "AsExpected"}, + }, + "history": []interface{}{ + map[string]interface{}{"version": "4.21.5", "state": "Completed", "startedTime": "2026-04-10T10:00:00Z", "completionTime": "2026-04-10T11:00:00Z"}, + }, + }, + }}, + + // --- ClusterOperators (operator_health + etcd_health) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterOperator", + "metadata": map[string]interface{}{"name": "etcd"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True"}, + map[string]interface{}{"type": "Degraded", "status": "False"}, + map[string]interface{}{"type": "Upgradeable", "status": "True"}, + }, + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "ClusterOperator", + "metadata": map[string]interface{}{"name": "authentication"}, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{"type": "Available", "status": "True"}, + map[string]interface{}{"type": "Degraded", "status": "True", "reason": "OAuthFlaky", "message": "intermittent failures"}, + map[string]interface{}{"type": "Upgradeable", "status": "True"}, + }, + }, + }}, + + // --- MachineConfigPools (operator_health) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "machineconfiguration.openshift.io/v1", "kind": "MachineConfigPool", + "metadata": map[string]interface{}{"name": "master"}, + "spec": map[string]interface{}{"paused": false}, + "status": map[string]interface{}{ + "machineCount": int64(3), "readyMachineCount": int64(3), "updatedMachineCount": int64(3), + "conditions": []interface{}{ + map[string]interface{}{"type": "Degraded", "status": "False"}, + map[string]interface{}{"type": "Updating", "status": "False"}, + }, + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "machineconfiguration.openshift.io/v1", "kind": "MachineConfigPool", + "metadata": map[string]interface{}{"name": "worker"}, + "spec": map[string]interface{}{"paused": false}, + "status": map[string]interface{}{ + "machineCount": int64(3), "readyMachineCount": int64(3), "updatedMachineCount": int64(3), + "conditions": []interface{}{ + map[string]interface{}{"type": "Degraded", "status": "False"}, + map[string]interface{}{"type": "Updating", "status": "False"}, + }, + }, + }}, + + // --- Etcd pods (etcd_health) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-0", "namespace": "openshift-etcd", "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-0"}, "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-1", "namespace": "openshift-etcd", "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-1"}, "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{"name": "etcd-master-2", "namespace": "openshift-etcd", "labels": map[string]interface{}{"app": "etcd"}}, + "spec": map[string]interface{}{"nodeName": "master-2"}, "status": map[string]interface{}{"phase": "Running", "conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + + // --- Nodes (node_capacity) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "master-0"}, + "status": map[string]interface{}{"conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "master-1"}, + "status": map[string]interface{}{"conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "master-2"}, + "status": map[string]interface{}{"conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "worker-0"}, + "status": map[string]interface{}{"conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "True"}}}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Node", + "metadata": map[string]interface{}{"name": "worker-1"}, + "spec": map[string]interface{}{"unschedulable": true}, + "status": map[string]interface{}{"conditions": []interface{}{map[string]interface{}{"type": "Ready", "status": "False"}}}, + }}, + + // --- PDBs (pdb_drain) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "policy/v1", "kind": "PodDisruptionBudget", + "metadata": map[string]interface{}{"name": "etcd-guard", "namespace": "openshift-etcd"}, + "spec": map[string]interface{}{"maxUnavailable": "1"}, + "status": map[string]interface{}{"currentHealthy": int64(3), "desiredHealthy": int64(2), "disruptionsAllowed": int64(1)}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "policy/v1", "kind": "PodDisruptionBudget", + "metadata": map[string]interface{}{"name": "zero-budget", "namespace": "app-ns"}, + "spec": map[string]interface{}{"maxUnavailable": "0"}, + "status": map[string]interface{}{"currentHealthy": int64(2), "desiredHealthy": int64(2), "disruptionsAllowed": int64(0)}, + }}, + + // --- APIRequestCounts (api_deprecations) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiserver.openshift.io/v1", "kind": "APIRequestCount", + "metadata": map[string]interface{}{"name": "flowschemas.v1beta3.flowcontrol.apiserver.k8s.io"}, + "status": map[string]interface{}{ + "removedInRelease": "4.21.8", "requestCount": int64(100), + "conditions": []interface{}{map[string]interface{}{"type": "Deprecated", "status": "True"}}, + }, + }}, + + // --- CRDs (crd_compat) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apiextensions.k8s.io/v1", "kind": "CustomResourceDefinition", + "metadata": map[string]interface{}{"name": "widgets.example.com"}, + "spec": map[string]interface{}{ + "versions": []interface{}{ + map[string]interface{}{"name": "v1", "served": true}, + map[string]interface{}{"name": "v1beta1", "served": false}, + }, + }, + "status": map[string]interface{}{"storedVersions": []interface{}{"v1beta1"}}, + }}, + + // --- Network, Proxy, APIServer (network) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "Network", + "metadata": map[string]interface{}{"name": "cluster"}, + "status": map[string]interface{}{"networkType": "OVNKubernetes"}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "Proxy", + "metadata": map[string]interface{}{"name": "cluster"}, + "spec": map[string]interface{}{"httpProxy": "http://proxy.corp:8080"}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "config.openshift.io/v1", "kind": "APIServer", + "metadata": map[string]interface{}{"name": "cluster"}, + "spec": map[string]interface{}{}, + }}, + + // --- OLM: Subscription + CSV (olm_operator_lifecycle) --- + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "Subscription", + "metadata": map[string]interface{}{"name": "elasticsearch-operator", "namespace": "openshift-operators-redhat"}, + "spec": map[string]interface{}{ + "channel": "stable-5.8", "name": "elasticsearch-operator", + "source": "redhat-operators", "sourceNamespace": "openshift-marketplace", + "installPlanApproval": "Automatic", + }, + "status": map[string]interface{}{ + "state": "AtLatestKnown", + "installedCSV": "elasticsearch-operator.v5.8.6", + "currentCSV": "elasticsearch-operator.v5.8.6", + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "ClusterServiceVersion", + "metadata": map[string]interface{}{"name": "elasticsearch-operator.v5.8.6", "namespace": "openshift-operators-redhat"}, + "spec": map[string]interface{}{ + "version": "5.8.6", + "displayName": "OpenShift Elasticsearch Operator", + }, + "status": map[string]interface{}{"phase": "Succeeded"}, + }}, + } +} + +func TestRunAllWithFakeCluster(t *testing.T) { + client := newFakeDynamicClient(fakeClusterObjects()...) + output := RunAll(context.Background(), client, "4.21.5", "4.21.8") + + if output.CurrentVersion != "4.21.5" { + t.Errorf("CurrentVersion = %q, want 4.21.5", output.CurrentVersion) + } + if output.TargetVersion != "4.21.8" { + t.Errorf("TargetVersion = %q, want 4.21.8", output.TargetVersion) + } + if output.Meta.TotalChecks != 9 { + t.Errorf("TotalChecks = %d, want 9", output.Meta.TotalChecks) + } + + for _, name := range []string{ + "cluster_conditions", "operator_health", "api_deprecations", + "node_capacity", "pdb_drain", "etcd_health", "network", + "crd_compat", "olm_operator_lifecycle", + } { + r, ok := output.Checks[name] + if !ok { + t.Errorf("missing check result: %s", name) + continue + } + if r.Status != StatusOK { + t.Errorf("check %s status = %q, error = %q", name, r.Status, r.Error) + } + } + + // cluster_conditions: verify CV data flows through + cc := output.Checks["cluster_conditions"] + if cc.Data["channel"] != "stable-4.21" { + t.Errorf("cluster_conditions.channel = %v, want stable-4.21", cc.Data["channel"]) + } + + // operator_health: 2 COs, 1 degraded; 2 MCPs + oh := output.Checks["operator_health"] + summary := oh.Data["summary"].(map[string]any) + if summary["total_operators"] != 2 { + t.Errorf("operator_health total_operators = %v, want 2", summary["total_operators"]) + } + if summary["degraded_count"] != 1 { + t.Errorf("operator_health degraded_count = %v, want 1", summary["degraded_count"]) + } + mcps := oh.Data["machine_config_pools"].([]map[string]any) + if len(mcps) != 2 { + t.Errorf("operator_health MCPs = %d, want 2", len(mcps)) + } + + // etcd_health: 3 running pods + eh := output.Checks["etcd_health"] + if eh.Data["total_members"] != 3 { + t.Errorf("etcd_health total_members = %v, want 3", eh.Data["total_members"]) + } + if eh.Data["healthy_members"] != 3 { + t.Errorf("etcd_health healthy_members = %v, want 3", eh.Data["healthy_members"]) + } + + // node_capacity: 5 nodes, 4 ready, 1 unschedulable + nc := output.Checks["node_capacity"] + if nc.Data["total_nodes"] != 5 { + t.Errorf("node_capacity total_nodes = %v, want 5", nc.Data["total_nodes"]) + } + if nc.Data["ready_nodes"] != 4 { + t.Errorf("node_capacity ready_nodes = %v, want 4", nc.Data["ready_nodes"]) + } + if nc.Data["unschedulable_nodes"] != 1 { + t.Errorf("node_capacity unschedulable_nodes = %v, want 1", nc.Data["unschedulable_nodes"]) + } + + // pdb_drain: 2 PDBs, 1 blocking + pd := output.Checks["pdb_drain"] + if pd.Data["total_pdbs"] != 2 { + t.Errorf("pdb_drain total_pdbs = %v, want 2", pd.Data["total_pdbs"]) + } + blocking := pd.Data["blocking_pdbs"].([]map[string]any) + if len(blocking) != 1 { + t.Errorf("pdb_drain blocking_pdbs = %d, want 1", len(blocking)) + } + + // api_deprecations: 1 blocker API + ad := output.Checks["api_deprecations"] + adSummary := ad.Data["summary"].(map[string]any) + if adSummary["blockers"] != 1 { + t.Errorf("api_deprecations blockers = %v, want 1", adSummary["blockers"]) + } + + // crd_compat: 1 CRD with stored version no longer served + crd := output.Checks["crd_compat"] + crdSummary := crd.Data["summary"].(map[string]any) + if crdSummary["version_issues"] != 1 { + t.Errorf("crd_compat version_issues = %v, want 1", crdSummary["version_issues"]) + } + + // network: OVN, proxy configured + nw := output.Checks["network"] + if nw.Data["network_type"] != "OVNKubernetes" { + t.Errorf("network type = %v, want OVNKubernetes", nw.Data["network_type"]) + } + proxy := nw.Data["proxy"].(map[string]any) + if proxy["http_proxy"] != "http://proxy.corp:8080" { + t.Errorf("network proxy = %v, want http://proxy.corp:8080", proxy["http_proxy"]) + } + + // olm_operator_lifecycle: 1 subscription + olm := output.Checks["olm_operator_lifecycle"] + olmSummary := olm.Data["summary"].(map[string]any) + if olmSummary["total_operators"] != 1 { + t.Errorf("olm total_operators = %v, want 1", olmSummary["total_operators"]) + } + operators := olm.Data["operators"].([]map[string]any) + if operators[0]["installed_version"] != "5.8.6" { + t.Errorf("olm installed_version = %v, want 5.8.6", operators[0]["installed_version"]) + } + + // Verify the full output marshals to valid JSON + b, err := json.Marshal(output) + if err != nil { + t.Fatalf("failed to marshal output: %v", err) + } + var m map[string]any + if err := json.Unmarshal(b, &m); err != nil { + t.Fatalf("failed to unmarshal output: %v", err) + } + if _, ok := m["checks"]; !ok { + t.Error("marshaled output missing 'checks' key") + } + if _, ok := m["meta"]; !ok { + t.Error("marshaled output missing 'meta' key") + } +} diff --git a/pkg/readiness/client.go b/pkg/readiness/client.go new file mode 100644 index 000000000..121de91fd --- /dev/null +++ b/pkg/readiness/client.go @@ -0,0 +1,175 @@ +package readiness + +import ( + "context" + "fmt" + "strings" + + semver "github.com/blang/semver/v4" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" +) + +var ( + GVRClusterVersion = schema.GroupVersionResource{Group: "config.openshift.io", Version: "v1", Resource: "clusterversions"} + GVRClusterOperator = schema.GroupVersionResource{Group: "config.openshift.io", Version: "v1", Resource: "clusteroperators"} + GVRMachineConfigPool = schema.GroupVersionResource{Group: "machineconfiguration.openshift.io", Version: "v1", Resource: "machineconfigpools"} + GVRNode = schema.GroupVersionResource{Group: "", Version: "v1", Resource: "nodes"} + GVRPod = schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"} + GVRPDB = schema.GroupVersionResource{Group: "policy", Version: "v1", Resource: "poddisruptionbudgets"} + GVRPV = schema.GroupVersionResource{Group: "", Version: "v1", Resource: "persistentvolumes"} + GVRSecret = schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"} + GVRCRD = schema.GroupVersionResource{Group: "apiextensions.k8s.io", Version: "v1", Resource: "customresourcedefinitions"} + GVRCSV = schema.GroupVersionResource{Group: "operators.coreos.com", Version: "v1alpha1", Resource: "clusterserviceversions"} + GVRSubscription = schema.GroupVersionResource{Group: "operators.coreos.com", Version: "v1alpha1", Resource: "subscriptions"} + GVRInstallPlan = schema.GroupVersionResource{Group: "operators.coreos.com", Version: "v1alpha1", Resource: "installplans"} + GVRPackageManifest = schema.GroupVersionResource{Group: "packages.operators.coreos.com", Version: "v1", Resource: "packagemanifests"} + GVRAPIRequestCount = schema.GroupVersionResource{Group: "apiserver.openshift.io", Version: "v1", Resource: "apirequestcounts"} + GVRInfrastructure = schema.GroupVersionResource{Group: "config.openshift.io", Version: "v1", Resource: "infrastructures"} + GVRNetwork = schema.GroupVersionResource{Group: "config.openshift.io", Version: "v1", Resource: "networks"} + GVRAPIServer = schema.GroupVersionResource{Group: "config.openshift.io", Version: "v1", Resource: "apiservers"} + GVRProxy = schema.GroupVersionResource{Group: "config.openshift.io", Version: "v1", Resource: "proxies"} + GVRNodeMetrics = schema.GroupVersionResource{Group: "metrics.k8s.io", Version: "v1beta1", Resource: "nodes"} + GVRValidatingWebhook = schema.GroupVersionResource{Group: "admissionregistration.k8s.io", Version: "v1", Resource: "validatingwebhookconfigurations"} + GVRMutatingWebhook = schema.GroupVersionResource{Group: "admissionregistration.k8s.io", Version: "v1", Resource: "mutatingwebhookconfigurations"} +) + +// GetResource fetches a single cluster-scoped resource by name. +func GetResource(ctx context.Context, c dynamic.Interface, gvr schema.GroupVersionResource, name string) (*unstructured.Unstructured, error) { + return c.Resource(gvr).Get(ctx, name, metav1.GetOptions{}) +} + +// GetNamespacedResource fetches a single namespaced resource. +func GetNamespacedResource(ctx context.Context, c dynamic.Interface, gvr schema.GroupVersionResource, namespace, name string) (*unstructured.Unstructured, error) { + return c.Resource(gvr).Namespace(namespace).Get(ctx, name, metav1.GetOptions{}) +} + +// ListResources lists cluster-scoped resources, optionally filtered by label selector. +func ListResources(ctx context.Context, c dynamic.Interface, gvr schema.GroupVersionResource, labelSelector string) ([]unstructured.Unstructured, error) { + opts := metav1.ListOptions{} + if labelSelector != "" { + opts.LabelSelector = labelSelector + } + list, err := c.Resource(gvr).List(ctx, opts) + if err != nil { + return nil, err + } + return list.Items, nil +} + +// ListNamespacedResources lists resources in a specific namespace. +func ListNamespacedResources(ctx context.Context, c dynamic.Interface, gvr schema.GroupVersionResource, namespace, labelSelector string) ([]unstructured.Unstructured, error) { + opts := metav1.ListOptions{} + if labelSelector != "" { + opts.LabelSelector = labelSelector + } + list, err := c.Resource(gvr).Namespace(namespace).List(ctx, opts) + if err != nil { + return nil, err + } + return list.Items, nil +} + +// ListAllNamespacedResources lists resources across all namespaces. +func ListAllNamespacedResources(ctx context.Context, c dynamic.Interface, gvr schema.GroupVersionResource, labelSelector string) ([]unstructured.Unstructured, error) { + return ListResources(ctx, c, gvr, labelSelector) +} + +// Condition represents a parsed Kubernetes status condition. +type Condition struct { + Status string `json:"status"` + Reason string `json:"reason"` + Message string `json:"message"` + LastTransition string `json:"last_transition"` +} + +// GetConditions extracts status.conditions from an unstructured object into a map keyed by type. +func GetConditions(obj *unstructured.Unstructured) map[string]Condition { + conditions, _, _ := unstructured.NestedSlice(obj.Object, "status", "conditions") + result := make(map[string]Condition, len(conditions)) + for _, raw := range conditions { + c, ok := raw.(map[string]interface{}) + if !ok { + continue + } + t, _ := c["type"].(string) + result[t] = Condition{ + Status: strVal(c, "status"), + Reason: strVal(c, "reason"), + Message: strVal(c, "message"), + LastTransition: strVal(c, "lastTransitionTime"), + } + } + return result +} + +// Convenience wrappers for nested field access. + +func NestedString(obj map[string]interface{}, fields ...string) string { + val, _, _ := unstructured.NestedString(obj, fields...) + return val +} + +func NestedInt64(obj map[string]interface{}, fields ...string) int64 { + val, _, _ := unstructured.NestedInt64(obj, fields...) + return val +} + +func NestedBool(obj map[string]interface{}, fields ...string) bool { + val, _, _ := unstructured.NestedBool(obj, fields...) + return val +} + +func NestedSlice(obj map[string]interface{}, fields ...string) []interface{} { + val, _, _ := unstructured.NestedSlice(obj, fields...) + return val +} + +func NestedMap(obj map[string]interface{}, fields ...string) map[string]interface{} { + val, _, _ := unstructured.NestedMap(obj, fields...) + return val +} + +func strVal(m map[string]interface{}, key string) string { + v, _ := m[key].(string) + return v +} + +const ( + ConditionTrue = "True" + ConditionFalse = "False" +) + +const ( + ConditionAvailable = "Available" + ConditionDegraded = "Degraded" + ConditionProgressing = "Progressing" + ConditionUpgradeable = "Upgradeable" + ConditionUpdating = "Updating" + ConditionRecommended = "Recommended" +) + +// CompareVersions compares two semver strings. Returns -1, 0, or 1. +func CompareVersions(a, b string) (int, error) { + va, err := semver.ParseTolerant(a) + if err != nil { + return 0, fmt.Errorf("invalid version %q: %w", a, err) + } + vb, err := semver.ParseTolerant(b) + if err != nil { + return 0, fmt.Errorf("invalid version %q: %w", b, err) + } + return va.Compare(vb), nil +} + +// FormatLabelSelector converts a map of labels to a label selector string. +func FormatLabelSelector(labels map[string]string) string { + parts := make([]string, 0, len(labels)) + for k, v := range labels { + parts = append(parts, k+"="+v) + } + return strings.Join(parts, ",") +} diff --git a/pkg/readiness/client_test.go b/pkg/readiness/client_test.go new file mode 100644 index 000000000..759660b79 --- /dev/null +++ b/pkg/readiness/client_test.go @@ -0,0 +1,169 @@ +package readiness + +import ( + "testing" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" +) + +func TestGetConditions(t *testing.T) { + obj := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{ + "type": "Available", + "status": "True", + "reason": "AsExpected", + "message": "All is well", + "lastTransitionTime": "2026-04-14T10:00:00Z", + }, + map[string]interface{}{ + "type": "Degraded", + "status": "False", + "reason": "AsExpected", + "message": "", + "lastTransitionTime": "2026-04-14T10:00:00Z", + }, + }, + }, + }, + } + + conditions := GetConditions(obj) + + if len(conditions) != 2 { + t.Fatalf("got %d conditions, want 2", len(conditions)) + } + + avail := conditions["Available"] + if avail.Status != "True" { + t.Errorf("Available.Status = %q, want True", avail.Status) + } + if avail.Reason != "AsExpected" { + t.Errorf("Available.Reason = %q, want AsExpected", avail.Reason) + } + if avail.Message != "All is well" { + t.Errorf("Available.Message = %q", avail.Message) + } + + degraded := conditions["Degraded"] + if degraded.Status != "False" { + t.Errorf("Degraded.Status = %q, want False", degraded.Status) + } +} + +func TestGetConditions_NoConditions(t *testing.T) { + obj := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "status": map[string]interface{}{}, + }, + } + conditions := GetConditions(obj) + if len(conditions) != 0 { + t.Errorf("got %d conditions, want 0", len(conditions)) + } +} + +func TestCompareVersions(t *testing.T) { + tests := []struct { + a, b string + expected int + expectErr bool + }{ + {"4.21.5", "4.21.8", -1, false}, + {"4.21.8", "4.21.5", 1, false}, + {"4.21.5", "4.21.5", 0, false}, + {"4.22.0", "4.21.5", 1, false}, + {"bad", "4.21.5", 0, true}, + {"4.21.5", "bad", 0, true}, + } + + for _, tt := range tests { + t.Run(tt.a+"_vs_"+tt.b, func(t *testing.T) { + got, err := CompareVersions(tt.a, tt.b) + if tt.expectErr && err == nil { + t.Errorf("CompareVersions(%q, %q) expected error, got nil", tt.a, tt.b) + } + if !tt.expectErr && err != nil { + t.Errorf("CompareVersions(%q, %q) unexpected error: %v", tt.a, tt.b, err) + } + if got != tt.expected { + t.Errorf("CompareVersions(%q, %q) = %d, want %d", tt.a, tt.b, got, tt.expected) + } + }) + } +} + +func TestFormatLabelSelector(t *testing.T) { + tests := []struct { + name string + labels map[string]string + contains []string + }{ + { + name: "single label", + labels: map[string]string{"app": "etcd"}, + contains: []string{"app=etcd"}, + }, + { + name: "empty", + labels: map[string]string{}, + contains: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := FormatLabelSelector(tt.labels) + for _, s := range tt.contains { + found := false + for i := 0; i <= len(got)-len(s); i++ { + if got[i:i+len(s)] == s { + found = true + break + } + } + if !found { + t.Errorf("FormatLabelSelector(%v) = %q, want to contain %q", tt.labels, got, s) + } + } + }) + } +} + +func TestNestedHelpers(t *testing.T) { + obj := map[string]interface{}{ + "spec": map[string]interface{}{ + "name": "test", + "count": int64(42), + "enabled": true, + "items": []interface{}{"a", "b"}, + "metadata": map[string]interface{}{"key": "val"}, + }, + } + + if got := NestedString(obj, "spec", "name"); got != "test" { + t.Errorf("NestedString = %q, want test", got) + } + if got := NestedInt64(obj, "spec", "count"); got != 42 { + t.Errorf("NestedInt64 = %d, want 42", got) + } + if got := NestedBool(obj, "spec", "enabled"); got != true { + t.Errorf("NestedBool = %v, want true", got) + } + if got := NestedSlice(obj, "spec", "items"); len(got) != 2 { + t.Errorf("NestedSlice len = %d, want 2", len(got)) + } + if got := NestedMap(obj, "spec", "metadata"); got["key"] != "val" { + t.Errorf("NestedMap[key] = %v, want val", got["key"]) + } + + // Missing fields return zero values + if got := NestedString(obj, "spec", "missing"); got != "" { + t.Errorf("missing string = %q, want empty", got) + } + if got := NestedInt64(obj, "spec", "missing"); got != 0 { + t.Errorf("missing int64 = %d, want 0", got) + } +} diff --git a/pkg/readiness/cluster_conditions.go b/pkg/readiness/cluster_conditions.go new file mode 100644 index 000000000..b14bf9668 --- /dev/null +++ b/pkg/readiness/cluster_conditions.go @@ -0,0 +1,76 @@ +package readiness + +import ( + "context" + "fmt" + + "k8s.io/client-go/dynamic" +) + +// ClusterConditionsCheck reads existing CVO-computed conditions from ClusterVersion status. +// This does NOT re-evaluate anything — it reports what CVO has already determined, +// including Upgradeable sub-conditions, RetrievedUpdates, and precondition state. +type ClusterConditionsCheck struct{} + +func (c *ClusterConditionsCheck) Name() string { return "cluster_conditions" } + +func (c *ClusterConditionsCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + + cv, err := GetResource(ctx, dc, GVRClusterVersion, "version") + if err != nil { + return nil, fmt.Errorf("failed to get ClusterVersion: %w", err) + } + + // Read all conditions CVO has already set + conditions := GetConditions(cv) + condMap := map[string]any{} + for k, v := range conditions { + condMap[k] = v + } + result["conditions"] = condMap + + // Extract key signals for the agent + upgradeable := conditions[ConditionUpgradeable] + result["upgradeable"] = map[string]any{ + "status": upgradeable.Status, + "reason": upgradeable.Reason, + "message": upgradeable.Message, + } + + progressing := conditions[ConditionProgressing] + result["update_in_progress"] = progressing.Status == ConditionTrue + + // Read update history for context + history := NestedSlice(cv.Object, "status", "history") + historyEntries := make([]map[string]any, 0) + for i, h := range history { + if i >= 5 { + break + } + entry, ok := h.(map[string]interface{}) + if !ok { + continue + } + historyEntries = append(historyEntries, map[string]any{ + "version": NestedString(entry, "version"), + "state": NestedString(entry, "state"), + "startedTime": NestedString(entry, "startedTime"), + "completionTime": NestedString(entry, "completionTime"), + }) + } + result["recent_history"] = historyEntries + + // Channel and cluster identity + result["channel"] = NestedString(cv.Object, "spec", "channel") + result["cluster_id"] = NestedString(cv.Object, "spec", "clusterID") + + // Summary for quick agent parsing + result["summary"] = map[string]any{ + "upgradeable": upgradeable.Status == ConditionTrue, + "update_in_progress": progressing.Status == ConditionTrue, + "current_version": current, + } + + return result, nil +} diff --git a/pkg/readiness/crd_compat.go b/pkg/readiness/crd_compat.go new file mode 100644 index 000000000..d76c330b6 --- /dev/null +++ b/pkg/readiness/crd_compat.go @@ -0,0 +1,68 @@ +package readiness + +import ( + "context" + "fmt" + + "k8s.io/client-go/dynamic" +) + +// CRDCompatCheck verifies CRD stored/served version compatibility and operator constraints. +type CRDCompatCheck struct{} + +func (c *CRDCompatCheck) Name() string { return "crd_compat" } + +func (c *CRDCompatCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + var sectionErrors []map[string]any + + // Check CRDs for version mismatches + crds, err := ListResources(ctx, dc, GVRCRD, "") + if err != nil { + return nil, fmt.Errorf("failed to list CRDs: %w", err) + } + + versionIssues := make([]map[string]any, 0) + for _, crd := range crds { + storedVersions := NestedSlice(crd.Object, "status", "storedVersions") + servedVersions := NestedSlice(crd.Object, "spec", "versions") + + served := make(map[string]bool) + for _, v := range servedVersions { + vm, ok := v.(map[string]interface{}) + if !ok { + continue + } + name := NestedString(vm, "name") + isServed := NestedBool(vm, "served") + if isServed { + served[name] = true + } + } + + for _, sv := range storedVersions { + stored, _ := sv.(string) + if stored != "" && !served[stored] { + versionIssues = append(versionIssues, map[string]any{ + "crd": crd.GetName(), + "stored_version": stored, + "issue": "stored version no longer served", + }) + } + } + } + + result["total_crds"] = len(crds) + result["version_issues"] = versionIssues + + result["summary"] = map[string]any{ + "total_crds": len(crds), + "version_issues": len(versionIssues), + } + + if len(sectionErrors) > 0 { + result["errors"] = sectionErrors + } + + return result, nil +} diff --git a/pkg/readiness/etcd_health.go b/pkg/readiness/etcd_health.go new file mode 100644 index 000000000..6ddc9547b --- /dev/null +++ b/pkg/readiness/etcd_health.go @@ -0,0 +1,67 @@ +package readiness + +import ( + "context" + "fmt" + + "k8s.io/client-go/dynamic" +) + +// EtcdHealthCheck verifies etcd member health, backup status, and certificates. +type EtcdHealthCheck struct{} + +func (c *EtcdHealthCheck) Name() string { return "etcd_health" } + +func (c *EtcdHealthCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + var sectionErrors []map[string]any + + // Check etcd ClusterOperator + etcdCO, err := GetResource(ctx, dc, GVRClusterOperator, "etcd") + if err != nil { + return nil, fmt.Errorf("failed to get etcd ClusterOperator: %w", err) + } + + conditions := GetConditions(etcdCO) + result["operator_conditions"] = conditions + + // Check etcd pods + etcdPods, err := ListNamespacedResources(ctx, dc, GVRPod, "openshift-etcd", "app=etcd") + if err != nil { + SectionError(§ionErrors, "etcd_pods", err) + } else { + podStatuses := make([]map[string]any, 0, len(etcdPods)) + healthyMembers := 0 + for _, pod := range etcdPods { + phase := NestedString(pod.Object, "status", "phase") + podConds := GetConditions(&pod) + ready := false + if cond, ok := podConds["Ready"]; ok { + ready = cond.Status == ConditionTrue + } + if ready { + healthyMembers++ + } + podStatuses = append(podStatuses, map[string]any{ + "name": pod.GetName(), + "node": NestedString(pod.Object, "spec", "nodeName"), + "phase": phase, + "ready": ready, + }) + } + result["members"] = podStatuses + result["healthy_members"] = healthyMembers + result["total_members"] = len(etcdPods) + } + + result["summary"] = map[string]any{ + "operator_available": conditions[ConditionAvailable].Status == ConditionTrue, + "operator_degraded": conditions[ConditionDegraded].Status == ConditionTrue, + } + + if len(sectionErrors) > 0 { + result["errors"] = sectionErrors + } + + return result, nil +} diff --git a/pkg/readiness/network.go b/pkg/readiness/network.go new file mode 100644 index 000000000..0342b06f0 --- /dev/null +++ b/pkg/readiness/network.go @@ -0,0 +1,72 @@ +package readiness + +import ( + "context" + "fmt" + + "k8s.io/client-go/dynamic" +) + +// NetworkCheck verifies network plugin type, TLS profile, and proxy configuration. +type NetworkCheck struct{} + +func (c *NetworkCheck) Name() string { return "network" } + +func (c *NetworkCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + var sectionErrors []map[string]any + + // Check Network configuration + network, err := GetResource(ctx, dc, GVRNetwork, "cluster") + if err != nil { + return nil, fmt.Errorf("failed to get Network config: %w", err) + } + + networkType := NestedString(network.Object, "status", "networkType") + result["network_type"] = networkType + + // SDN deprecation warning + if networkType == "OpenShiftSDN" { + cmp, err := CompareVersions(target, "4.17.0") + if target != "" && err == nil && cmp >= 0 { + result["sdn_warning"] = "OpenShiftSDN blocks upgrades to 4.17+; migrate to OVN-Kubernetes first." + } else { + result["sdn_warning"] = "OpenShiftSDN detected. Migration to OVN-Kubernetes is required for future upgrades to 4.17+." + } + } + + // Check proxy + proxy, err := GetResource(ctx, dc, GVRProxy, "cluster") + if err != nil { + SectionError(§ionErrors, "proxy", err) + } else { + result["proxy"] = map[string]any{ + "http_proxy": NestedString(proxy.Object, "spec", "httpProxy"), + "https_proxy": NestedString(proxy.Object, "spec", "httpsProxy"), + "no_proxy": NestedString(proxy.Object, "spec", "noProxy"), + } + } + + // Check TLS profile from APIServer + apiServer, err := GetResource(ctx, dc, GVRAPIServer, "cluster") + if err != nil { + SectionError(§ionErrors, "apiserver_tls", err) + } else { + tlsProfile := NestedString(apiServer.Object, "spec", "tlsSecurityProfile", "type") + if tlsProfile == "" { + tlsProfile = "Intermediate" + } + result["tls_profile"] = tlsProfile + } + + result["summary"] = map[string]any{ + "network_type": networkType, + "is_sdn": networkType == "OpenShiftSDN", + } + + if len(sectionErrors) > 0 { + result["errors"] = sectionErrors + } + + return result, nil +} diff --git a/pkg/readiness/node_capacity.go b/pkg/readiness/node_capacity.go new file mode 100644 index 000000000..ee58c5ac0 --- /dev/null +++ b/pkg/readiness/node_capacity.go @@ -0,0 +1,48 @@ +package readiness + +import ( + "context" + "fmt" + + "k8s.io/client-go/dynamic" +) + +// NodeCapacityCheck assesses node readiness and resource headroom. +type NodeCapacityCheck struct{} + +func (c *NodeCapacityCheck) Name() string { return "node_capacity" } + +func (c *NodeCapacityCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + + nodes, err := ListResources(ctx, dc, GVRNode, "") + if err != nil { + return nil, fmt.Errorf("failed to list nodes: %w", err) + } + + totalNodes := len(nodes) + readyNodes := 0 + unschedulableNodes := 0 + + for _, node := range nodes { + conditions := GetConditions(&node) + if cond, ok := conditions["Ready"]; ok && cond.Status == ConditionTrue { + readyNodes++ + } + if NestedBool(node.Object, "spec", "unschedulable") { + unschedulableNodes++ + } + } + + result["total_nodes"] = totalNodes + result["ready_nodes"] = readyNodes + result["unschedulable_nodes"] = unschedulableNodes + result["summary"] = map[string]any{ + "total": totalNodes, + "ready": readyNodes, + "not_ready": totalNodes - readyNodes, + "unschedulable": unschedulableNodes, + } + + return result, nil +} diff --git a/pkg/readiness/olm_lifecycle.go b/pkg/readiness/olm_lifecycle.go new file mode 100644 index 000000000..1f20a5d2a --- /dev/null +++ b/pkg/readiness/olm_lifecycle.go @@ -0,0 +1,282 @@ +package readiness + +import ( + "context" + "encoding/json" + "fmt" + "sync" + + semver "github.com/blang/semver/v4" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/client-go/dynamic" +) + +const ( + ApprovalAutomatic = "Automatic" + ApprovalManual = "Manual" + PhaseRequiresApproval = "RequiresApproval" +) + +// OLMOperatorLifecycleCheck collects lifecycle information for OLM-installed operators +// by correlating Subscriptions, ClusterServiceVersions, InstallPlans, and PackageManifests. +// This data supports the Operator Update Planner (OCPSTRAT-2618) by providing per-operator +// installed version, OCP compatibility, update policy, pending upgrades, and channel info. +type OLMOperatorLifecycleCheck struct{} + +func (c *OLMOperatorLifecycleCheck) Name() string { return "olm_operator_lifecycle" } + +func (c *OLMOperatorLifecycleCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + var sectionErrors []map[string]any + + // Subscriptions are the anchor — fail hard if unavailable. + subs, err := ListResources(ctx, dc, GVRSubscription, "") + if err != nil { + return nil, fmt.Errorf("failed to list subscriptions: %w", err) + } + + // Fetch CSVs and PackageManifests concurrently; both are independent. + var ( + csvs []unstructured.Unstructured + pkgManifests []unstructured.Unstructured + csvErr error + pkgErr error + fetchWG sync.WaitGroup + ) + fetchWG.Add(2) + go func() { + defer fetchWG.Done() + csvs, csvErr = ListResources(ctx, dc, GVRCSV, "") + }() + go func() { + defer fetchWG.Done() + pkgManifests, pkgErr = ListResources(ctx, dc, GVRPackageManifest, "") + }() + fetchWG.Wait() + + if csvErr != nil { + SectionError(§ionErrors, "clusterserviceversions", csvErr) + } + if pkgErr != nil { + SectionError(§ionErrors, "packagemanifests", pkgErr) + } + + csvIndex := indexByNamespacedName(csvs) + pkgIndex := indexByNamespacedName(pkgManifests) + + // Parse current/target once to avoid repeated semver parsing per operator. + parsedTarget, errTarget := semver.ParseTolerant(target) + parsedCurrent, errCurrent := semver.ParseTolerant(current) + hasTarget := errTarget == nil && target != "" + hasCurrent := errCurrent == nil && current != "" + + operators := make([]map[string]any, 0, len(subs)) + incompatibleWithTarget := 0 + pendingUpgradeCount := 0 + manualApprovalCount := 0 + + for _, sub := range subs { + entry := map[string]any{ + "name": sub.GetName(), + "namespace": sub.GetNamespace(), + } + + entry["channel"] = NestedString(sub.Object, "spec", "channel") + entry["source"] = NestedString(sub.Object, "spec", "source") + entry["source_namespace"] = NestedString(sub.Object, "spec", "sourceNamespace") + entry["package"] = NestedString(sub.Object, "spec", "name") + + approval := NestedString(sub.Object, "spec", "installPlanApproval") + if approval == "" { + approval = ApprovalAutomatic + } + entry["install_plan_approval"] = approval + if approval == ApprovalManual { + manualApprovalCount++ + } + + entry["state"] = NestedString(sub.Object, "status", "state") + installedCSVName := NestedString(sub.Object, "status", "installedCSV") + entry["installed_csv"] = installedCSVName + currentCSVName := NestedString(sub.Object, "status", "currentCSV") + + if installedCSVName != "" { + csvKey := sub.GetNamespace() + "/" + installedCSVName + if csvObj, ok := csvIndex[csvKey]; ok { + entry["installed_version"] = NestedString(csvObj, "spec", "version") + entry["csv_phase"] = NestedString(csvObj, "status", "phase") + entry["csv_display_name"] = NestedString(csvObj, "spec", "displayName") + + minKube := NestedString(csvObj, "spec", "minKubeVersion") + if minKube != "" { + entry["min_kube_version"] = minKube + } + } + } + + pendingUpgrade := false + if currentCSVName != "" && installedCSVName != "" && currentCSVName != installedCSVName { + pendingUpgrade = true + pendingUpgradeCount++ + entry["pending_csv"] = currentCSVName + csvKey := sub.GetNamespace() + "/" + currentCSVName + if csvObj, ok := csvIndex[csvKey]; ok { + entry["pending_version"] = NestedString(csvObj, "spec", "version") + } + } + entry["pending_upgrade"] = pendingUpgrade + + // Fetch the referenced InstallPlan directly instead of listing all. + ipRef := NestedString(sub.Object, "status", "installPlanRef", "name") + if ipRef != "" { + ipObj, ipErr := GetNamespacedResource(ctx, dc, GVRInstallPlan, sub.GetNamespace(), ipRef) + if ipErr == nil { + ipApproved := NestedBool(ipObj.Object, "spec", "approved") + ipPhase := NestedString(ipObj.Object, "status", "phase") + if !ipApproved && ipPhase == PhaseRequiresApproval { + entry["install_plan_awaiting_approval"] = true + } + } + } + + pkgName := NestedString(sub.Object, "spec", "name") + subChannel := NestedString(sub.Object, "spec", "channel") + pkgNS := NestedString(sub.Object, "spec", "sourceNamespace") + if pkgNS == "" { + pkgNS = sub.GetNamespace() + } + if pm, ok := pkgIndex[pkgNS+"/"+pkgName]; ok { + compat := extractOCPCompat(pm, subChannel) + if compat != nil { + entry["ocp_compat"] = compat + + maxOCP, _ := compat["max"].(string) + if maxOCP != "" && hasTarget { + parsedMax, err := semver.ParseTolerant(maxOCP) + if err == nil { + if parsedTarget.Compare(parsedMax) > 0 { + entry["compatible_with_target"] = false + incompatibleWithTarget++ + } else { + entry["compatible_with_target"] = true + } + } + } + minOCP, _ := compat["min"].(string) + if minOCP != "" && hasCurrent { + parsedMin, err := semver.ParseTolerant(minOCP) + if err == nil { + entry["compatible_with_current"] = parsedCurrent.Compare(parsedMin) >= 0 + } + } + } + + channels := extractChannels(pm) + if len(channels) > 0 { + entry["available_channels"] = channels + } + } + + operators = append(operators, entry) + } + + result["operators"] = operators + result["summary"] = map[string]any{ + "total_operators": len(subs), + "pending_upgrades": pendingUpgradeCount, + "manual_approval": manualApprovalCount, + "incompatible_with_target": incompatibleWithTarget, + } + + if len(sectionErrors) > 0 { + result["errors"] = sectionErrors + } + + return result, nil +} + +// indexByNamespacedName builds a lookup map keyed by "namespace/name". +func indexByNamespacedName(items []unstructured.Unstructured) map[string]map[string]interface{} { + idx := make(map[string]map[string]interface{}, len(items)) + for _, item := range items { + key := item.GetNamespace() + "/" + item.GetName() + idx[key] = item.Object + } + return idx +} + +// extractOCPCompat reads olm.maxOpenShiftVersion and olm.properties from a +// PackageManifest's channel entry to determine OCP version compatibility. +func extractOCPCompat(pm map[string]interface{}, channelName string) map[string]any { + channels := NestedSlice(pm, "status", "channels") + for _, ch := range channels { + chMap, ok := ch.(map[string]interface{}) + if !ok { + continue + } + if NestedString(chMap, "name") != channelName { + continue + } + + compat := map[string]any{} + + maxOCP := NestedString(chMap, "currentCSVDesc", "annotations", "olm.maxOpenShiftVersion") + if maxOCP != "" { + compat["max"] = maxOCP + } + + props := NestedString(chMap, "currentCSVDesc", "annotations", "olm.properties") + if props != "" { + minOCP := parseMinOCPFromProperties(props) + if minOCP != "" { + compat["min"] = minOCP + } + } + + if len(compat) > 0 { + return compat + } + } + return nil +} + +// olmProperty represents a single entry in the olm.properties JSON annotation. +type olmProperty struct { + Type string `json:"type"` + Value json.RawMessage `json:"value"` +} + +func parseMinOCPFromProperties(props string) string { + var properties []olmProperty + if err := json.Unmarshal([]byte(props), &properties); err != nil { + return "" + } + for _, p := range properties { + if p.Type == "olm.minOpenShiftVersion" { + var v string + if err := json.Unmarshal(p.Value, &v); err == nil { + return v + } + return "" + } + } + return "" +} + +// extractChannels returns the list of channel names from a PackageManifest. +func extractChannels(pm map[string]interface{}) []string { + channels := NestedSlice(pm, "status", "channels") + names := make([]string, 0, len(channels)) + for _, ch := range channels { + chMap, ok := ch.(map[string]interface{}) + if !ok { + continue + } + name := NestedString(chMap, "name") + if name != "" { + names = append(names, name) + } + } + return names +} diff --git a/pkg/readiness/olm_lifecycle_test.go b/pkg/readiness/olm_lifecycle_test.go new file mode 100644 index 000000000..a0216a68f --- /dev/null +++ b/pkg/readiness/olm_lifecycle_test.go @@ -0,0 +1,449 @@ +package readiness + +import ( + "context" + "testing" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" +) + +func TestOLMOperatorLifecycleCheck_Basic(t *testing.T) { + objects := []runtime.Object{ + // Subscription for elasticsearch-operator + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "Subscription", + "metadata": map[string]interface{}{"name": "elasticsearch-operator", "namespace": "openshift-operators-redhat"}, + "spec": map[string]interface{}{ + "name": "elasticsearch-operator", + "channel": "stable-5.8", + "source": "redhat-operators", + "sourceNamespace": "openshift-marketplace", + "installPlanApproval": "Manual", + }, + "status": map[string]interface{}{ + "state": "AtLatestKnown", + "installedCSV": "elasticsearch-operator.v5.8.5", + "currentCSV": "elasticsearch-operator.v5.8.5", + }, + }}, + // CSV for elasticsearch-operator + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "ClusterServiceVersion", + "metadata": map[string]interface{}{"name": "elasticsearch-operator.v5.8.5", "namespace": "openshift-operators-redhat"}, + "spec": map[string]interface{}{ + "version": "5.8.5", + "displayName": "OpenShift Elasticsearch Operator", + }, + "status": map[string]interface{}{ + "phase": "Succeeded", + }, + }}, + // PackageManifest for elasticsearch-operator + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "packages.operators.coreos.com/v1", "kind": "PackageManifest", + "metadata": map[string]interface{}{"name": "elasticsearch-operator", "namespace": "openshift-marketplace"}, + "status": map[string]interface{}{ + "channels": []interface{}{ + map[string]interface{}{ + "name": "stable-5.8", + "currentCSVDesc": map[string]interface{}{ + "annotations": map[string]interface{}{ + "olm.maxOpenShiftVersion": "4.17", + }, + }, + }, + map[string]interface{}{ + "name": "stable-6.0", + }, + }, + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &OLMOperatorLifecycleCheck{} + + result, err := check.Run(context.Background(), client, "4.16.0", "4.17.0") + if err != nil { + t.Fatal(err) + } + + operators, ok := result["operators"].([]map[string]any) + if !ok { + t.Fatal("operators not a slice") + } + if len(operators) != 1 { + t.Fatalf("operators len = %d, want 1", len(operators)) + } + + op := operators[0] + if op["name"] != "elasticsearch-operator" { + t.Errorf("name = %v, want elasticsearch-operator", op["name"]) + } + if op["installed_version"] != "5.8.5" { + t.Errorf("installed_version = %v, want 5.8.5", op["installed_version"]) + } + if op["csv_phase"] != "Succeeded" { + t.Errorf("csv_phase = %v, want Succeeded", op["csv_phase"]) + } + if op["csv_display_name"] != "OpenShift Elasticsearch Operator" { + t.Errorf("csv_display_name = %v, want OpenShift Elasticsearch Operator", op["csv_display_name"]) + } + if op["install_plan_approval"] != "Manual" { + t.Errorf("install_plan_approval = %v, want Manual", op["install_plan_approval"]) + } + if op["channel"] != "stable-5.8" { + t.Errorf("channel = %v, want stable-5.8", op["channel"]) + } + if op["pending_upgrade"] != false { + t.Errorf("pending_upgrade = %v, want false", op["pending_upgrade"]) + } + + // OCP compat — max is 4.17, target is 4.17, so compatible + compat, ok := op["ocp_compat"].(map[string]any) + if !ok { + t.Fatal("ocp_compat not a map") + } + if compat["max"] != "4.17" { + t.Errorf("ocp_compat.max = %v, want 4.17", compat["max"]) + } + if op["compatible_with_target"] != true { + t.Errorf("compatible_with_target = %v, want true", op["compatible_with_target"]) + } + + // Available channels + channels, ok := op["available_channels"].([]string) + if !ok { + t.Fatal("available_channels not a string slice") + } + if len(channels) != 2 { + t.Errorf("available_channels len = %d, want 2", len(channels)) + } + + // Summary + summary, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary not a map") + } + if summary["total_operators"] != 1 { + t.Errorf("total_operators = %v, want 1", summary["total_operators"]) + } + if summary["manual_approval"] != 1 { + t.Errorf("manual_approval = %v, want 1", summary["manual_approval"]) + } + if summary["incompatible_with_target"] != 0 { + t.Errorf("incompatible_with_target = %v, want 0", summary["incompatible_with_target"]) + } +} + +func TestOLMOperatorLifecycleCheck_PendingUpgrade(t *testing.T) { + objects := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "Subscription", + "metadata": map[string]interface{}{"name": "kiali-ossm", "namespace": "openshift-operators"}, + "spec": map[string]interface{}{ + "name": "kiali-ossm", + "channel": "stable", + "source": "redhat-operators", + }, + "status": map[string]interface{}{ + "state": "UpgradePending", + "installedCSV": "kiali-operator.v1.72.0", + "currentCSV": "kiali-operator.v1.73.0", + }, + }}, + // Installed CSV + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "ClusterServiceVersion", + "metadata": map[string]interface{}{"name": "kiali-operator.v1.72.0", "namespace": "openshift-operators"}, + "spec": map[string]interface{}{ + "version": "1.72.0", + "displayName": "Kiali Operator", + }, + "status": map[string]interface{}{"phase": "Replacing"}, + }}, + // Pending CSV + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "ClusterServiceVersion", + "metadata": map[string]interface{}{"name": "kiali-operator.v1.73.0", "namespace": "openshift-operators"}, + "spec": map[string]interface{}{ + "version": "1.73.0", + "displayName": "Kiali Operator", + }, + "status": map[string]interface{}{"phase": "InstallReady"}, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &OLMOperatorLifecycleCheck{} + + result, err := check.Run(context.Background(), client, "4.16.0", "4.17.0") + if err != nil { + t.Fatal(err) + } + + operators := result["operators"].([]map[string]any) + if len(operators) != 1 { + t.Fatalf("operators len = %d, want 1", len(operators)) + } + + op := operators[0] + if op["pending_upgrade"] != true { + t.Errorf("pending_upgrade = %v, want true", op["pending_upgrade"]) + } + if op["installed_version"] != "1.72.0" { + t.Errorf("installed_version = %v, want 1.72.0", op["installed_version"]) + } + if op["pending_version"] != "1.73.0" { + t.Errorf("pending_version = %v, want 1.73.0", op["pending_version"]) + } + if op["pending_csv"] != "kiali-operator.v1.73.0" { + t.Errorf("pending_csv = %v, want kiali-operator.v1.73.0", op["pending_csv"]) + } + + summary := result["summary"].(map[string]any) + if summary["pending_upgrades"] != 1 { + t.Errorf("pending_upgrades = %v, want 1", summary["pending_upgrades"]) + } +} + +func TestOLMOperatorLifecycleCheck_IncompatibleWithTarget(t *testing.T) { + objects := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "Subscription", + "metadata": map[string]interface{}{"name": "jaeger-product", "namespace": "openshift-operators"}, + "spec": map[string]interface{}{ + "name": "jaeger-product", + "channel": "stable", + "source": "redhat-operators", + }, + "status": map[string]interface{}{ + "state": "AtLatestKnown", + "installedCSV": "jaeger-operator.v1.51.0", + "currentCSV": "jaeger-operator.v1.51.0", + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "ClusterServiceVersion", + "metadata": map[string]interface{}{"name": "jaeger-operator.v1.51.0", "namespace": "openshift-operators"}, + "spec": map[string]interface{}{ + "version": "1.51.0", + "displayName": "Red Hat OpenShift distributed tracing platform", + }, + "status": map[string]interface{}{"phase": "Succeeded"}, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "packages.operators.coreos.com/v1", "kind": "PackageManifest", + "metadata": map[string]interface{}{"name": "jaeger-product", "namespace": "openshift-operators"}, + "status": map[string]interface{}{ + "channels": []interface{}{ + map[string]interface{}{ + "name": "stable", + "currentCSVDesc": map[string]interface{}{ + "annotations": map[string]interface{}{ + "olm.maxOpenShiftVersion": "4.16", + }, + }, + }, + }, + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &OLMOperatorLifecycleCheck{} + + // Target is 4.17 but max is 4.16 — incompatible + result, err := check.Run(context.Background(), client, "4.16.0", "4.17.0") + if err != nil { + t.Fatal(err) + } + + operators := result["operators"].([]map[string]any) + op := operators[0] + if op["compatible_with_target"] != false { + t.Errorf("compatible_with_target = %v, want false", op["compatible_with_target"]) + } + + summary := result["summary"].(map[string]any) + if summary["incompatible_with_target"] != 1 { + t.Errorf("incompatible_with_target = %v, want 1", summary["incompatible_with_target"]) + } +} + +func TestOLMOperatorLifecycleCheck_NoSubscriptions(t *testing.T) { + client := newFakeDynamicClient() + check := &OLMOperatorLifecycleCheck{} + + result, err := check.Run(context.Background(), client, "4.16.0", "4.17.0") + if err != nil { + t.Fatal(err) + } + + operators := result["operators"].([]map[string]any) + if len(operators) != 0 { + t.Errorf("operators len = %d, want 0", len(operators)) + } + + summary := result["summary"].(map[string]any) + if summary["total_operators"] != 0 { + t.Errorf("total_operators = %v, want 0", summary["total_operators"]) + } +} + +func TestOLMOperatorLifecycleCheck_DefaultApproval(t *testing.T) { + objects := []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", "kind": "Subscription", + "metadata": map[string]interface{}{"name": "test-op", "namespace": "openshift-operators"}, + "spec": map[string]interface{}{ + "name": "test-op", + "channel": "stable", + "source": "redhat-operators", + // no installPlanApproval — defaults to Automatic + }, + "status": map[string]interface{}{ + "state": "AtLatestKnown", + "installedCSV": "test-op.v1.0.0", + "currentCSV": "test-op.v1.0.0", + }, + }}, + } + + client := newFakeDynamicClient(objects...) + check := &OLMOperatorLifecycleCheck{} + + result, err := check.Run(context.Background(), client, "4.16.0", "4.17.0") + if err != nil { + t.Fatal(err) + } + + operators := result["operators"].([]map[string]any) + op := operators[0] + if op["install_plan_approval"] != "Automatic" { + t.Errorf("install_plan_approval = %v, want Automatic", op["install_plan_approval"]) + } + + summary := result["summary"].(map[string]any) + if summary["manual_approval"] != 0 { + t.Errorf("manual_approval = %v, want 0", summary["manual_approval"]) + } +} + +func TestExtractChannels(t *testing.T) { + pm := map[string]interface{}{ + "status": map[string]interface{}{ + "channels": []interface{}{ + map[string]interface{}{"name": "stable-5.8"}, + map[string]interface{}{"name": "stable-6.0"}, + map[string]interface{}{"name": "preview"}, + }, + }, + } + + channels := extractChannels(pm) + if len(channels) != 3 { + t.Fatalf("channels len = %d, want 3", len(channels)) + } + expected := []string{"stable-5.8", "stable-6.0", "preview"} + for i, want := range expected { + if channels[i] != want { + t.Errorf("channels[%d] = %v, want %v", i, channels[i], want) + } + } +} + +func TestExtractOCPCompat(t *testing.T) { + t.Run("with maxOpenShiftVersion", func(t *testing.T) { + pm := map[string]interface{}{ + "status": map[string]interface{}{ + "channels": []interface{}{ + map[string]interface{}{ + "name": "stable", + "currentCSVDesc": map[string]interface{}{ + "annotations": map[string]interface{}{ + "olm.maxOpenShiftVersion": "4.16", + }, + }, + }, + }, + }, + } + + compat := extractOCPCompat(pm, "stable") + if compat == nil { + t.Fatal("expected non-nil compat") + } + if compat["max"] != "4.16" { + t.Errorf("max = %v, want 4.16", compat["max"]) + } + }) + + t.Run("channel not found", func(t *testing.T) { + pm := map[string]interface{}{ + "status": map[string]interface{}{ + "channels": []interface{}{ + map[string]interface{}{ + "name": "stable", + }, + }, + }, + } + + compat := extractOCPCompat(pm, "preview") + if compat != nil { + t.Errorf("expected nil compat for missing channel, got %v", compat) + } + }) + + t.Run("no annotations", func(t *testing.T) { + pm := map[string]interface{}{ + "status": map[string]interface{}{ + "channels": []interface{}{ + map[string]interface{}{ + "name": "stable", + "currentCSVDesc": map[string]interface{}{}, + }, + }, + }, + } + + compat := extractOCPCompat(pm, "stable") + if compat != nil { + t.Errorf("expected nil compat for no annotations, got %v", compat) + } + }) +} + +func TestParseMinOCPFromProperties(t *testing.T) { + t.Run("valid olm.minOpenShiftVersion", func(t *testing.T) { + props := `[{"type":"olm.minOpenShiftVersion","value":"4.14"},{"type":"olm.maxOpenShiftVersion","value":"4.17"}]` + got := parseMinOCPFromProperties(props) + if got != "4.14" { + t.Errorf("got %q, want 4.14", got) + } + }) + + t.Run("no minOpenShiftVersion", func(t *testing.T) { + props := `[{"type":"olm.maxOpenShiftVersion","value":"4.17"}]` + got := parseMinOCPFromProperties(props) + if got != "" { + t.Errorf("got %q, want empty", got) + } + }) + + t.Run("invalid JSON", func(t *testing.T) { + got := parseMinOCPFromProperties("not json") + if got != "" { + t.Errorf("got %q, want empty", got) + } + }) + + t.Run("empty array", func(t *testing.T) { + got := parseMinOCPFromProperties("[]") + if got != "" { + t.Errorf("got %q, want empty", got) + } + }) +} diff --git a/pkg/readiness/operator_health.go b/pkg/readiness/operator_health.go new file mode 100644 index 000000000..f9232701a --- /dev/null +++ b/pkg/readiness/operator_health.go @@ -0,0 +1,125 @@ +package readiness + +import ( + "context" + "fmt" + + "k8s.io/client-go/dynamic" +) + +// OperatorHealthCheck provides per-operator detail and MCP state. +// CVO already aggregates operator health into the ClusterVersion Upgradeable condition +// (reported in cluster_conditions check). This check adds per-operator breakdown +// and MachineConfigPool status, which CVO does not expose in conditions. +type OperatorHealthCheck struct{} + +func (c *OperatorHealthCheck) Name() string { return "operator_health" } + +func (c *OperatorHealthCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + var sectionErrors []map[string]any + + // Per-operator breakdown — CVO aggregates this but doesn't expose per-CO detail + operators, err := ListResources(ctx, dc, GVRClusterOperator, "") + if err != nil { + return nil, fmt.Errorf("failed to list ClusterOperators: %w", err) + } + + notUpgradeable := make([]map[string]any, 0) + degraded := make([]map[string]any, 0) + notAvailable := make([]map[string]any, 0) + + for _, co := range operators { + conditions := GetConditions(&co) + name := co.GetName() + + if cond, ok := conditions[ConditionUpgradeable]; ok && cond.Status == ConditionFalse { + notUpgradeable = append(notUpgradeable, map[string]any{ + "name": name, + "reason": cond.Reason, + "message": cond.Message, + }) + } + if cond, ok := conditions[ConditionDegraded]; ok && cond.Status == ConditionTrue { + degraded = append(degraded, map[string]any{ + "name": name, + "reason": cond.Reason, + "message": cond.Message, + }) + } + if cond, ok := conditions[ConditionAvailable]; ok && cond.Status == ConditionFalse { + notAvailable = append(notAvailable, map[string]any{ + "name": name, + "reason": cond.Reason, + "message": cond.Message, + }) + } + } + + result["not_upgradeable"] = notUpgradeable + result["degraded"] = degraded + result["not_available"] = notAvailable + + // MachineConfigPool status — CVO does NOT track this + mcps, err := ListResources(ctx, dc, GVRMachineConfigPool, "") + if err != nil { + SectionError(§ionErrors, "machine_config_pools", err) + } else { + mcpResults := make([]map[string]any, 0, len(mcps)) + pausedMCPs := 0 + degradedMCPs := 0 + updatingMCPs := 0 + + for _, mcp := range mcps { + paused := NestedBool(mcp.Object, "spec", "paused") + machineCount := NestedInt64(mcp.Object, "status", "machineCount") + readyCount := NestedInt64(mcp.Object, "status", "readyMachineCount") + updatedCount := NestedInt64(mcp.Object, "status", "updatedMachineCount") + + conditions := GetConditions(&mcp) + isDegraded := false + isUpdating := false + if cond, ok := conditions[ConditionDegraded]; ok && cond.Status == ConditionTrue { + isDegraded = true + degradedMCPs++ + } + if cond, ok := conditions[ConditionUpdating]; ok && cond.Status == ConditionTrue { + isUpdating = true + updatingMCPs++ + } + if paused { + pausedMCPs++ + } + + mcpResults = append(mcpResults, map[string]any{ + "name": mcp.GetName(), + "paused": paused, + "machine_count": machineCount, + "ready_count": readyCount, + "updated_count": updatedCount, + "degraded": isDegraded, + "updating": isUpdating, + }) + } + result["machine_config_pools"] = mcpResults + result["mcp_summary"] = map[string]any{ + "paused": pausedMCPs, + "degraded": degradedMCPs, + "updating": updatingMCPs, + } + } + + result["summary"] = map[string]any{ + "total_operators": len(operators), + "not_upgradeable_count": len(notUpgradeable), + "degraded_count": len(degraded), + "not_available_count": len(notAvailable), + "note": "CVO's aggregated Upgradeable condition is in the cluster_conditions check", + } + + if len(sectionErrors) > 0 { + result["errors"] = sectionErrors + } + + return result, nil +} diff --git a/pkg/readiness/pdb_drain.go b/pkg/readiness/pdb_drain.go new file mode 100644 index 000000000..55a51714b --- /dev/null +++ b/pkg/readiness/pdb_drain.go @@ -0,0 +1,57 @@ +package readiness + +import ( + "context" + "fmt" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/client-go/dynamic" +) + +// PDBDrainCheck assesses PodDisruptionBudgets that could block node drains. +type PDBDrainCheck struct{} + +func (c *PDBDrainCheck) Name() string { return "pdb_drain" } + +func (c *PDBDrainCheck) Run(ctx context.Context, dc dynamic.Interface, current, target string) (map[string]any, error) { + result := map[string]any{} + + pdbs, err := ListResources(ctx, dc, GVRPDB, "") + if err != nil { + return nil, fmt.Errorf("failed to list PodDisruptionBudgets: %w", err) + } + + issues := make([]map[string]any, 0) + for _, pdb := range pdbs { + // Check for zero-disruption PDBs + maxUnavailableRaw, _, _ := unstructured.NestedFieldNoCopy(pdb.Object, "spec", "maxUnavailable") + maxUnavailable := fmt.Sprintf("%v", maxUnavailableRaw) + minAvailableRaw, _, _ := unstructured.NestedFieldNoCopy(pdb.Object, "spec", "minAvailable") + minAvailable := fmt.Sprintf("%v", minAvailableRaw) + + currentHealthy := NestedInt64(pdb.Object, "status", "currentHealthy") + desiredHealthy := NestedInt64(pdb.Object, "status", "desiredHealthy") + disruptionsAllowed := NestedInt64(pdb.Object, "status", "disruptionsAllowed") + + if disruptionsAllowed == 0 && currentHealthy > 0 { + issues = append(issues, map[string]any{ + "name": pdb.GetName(), + "namespace": pdb.GetNamespace(), + "max_unavailable": maxUnavailable, + "min_available": minAvailable, + "current_healthy": currentHealthy, + "desired_healthy": desiredHealthy, + "disruptions_allowed": disruptionsAllowed, + }) + } + } + + result["total_pdbs"] = len(pdbs) + result["blocking_pdbs"] = issues + result["summary"] = map[string]any{ + "total": len(pdbs), + "blocking": len(issues), + } + + return result, nil +} diff --git a/pkg/start/start.go b/pkg/start/start.go index 0470d70cf..59df7d885 100644 --- a/pkg/start/start.go +++ b/pkg/start/start.go @@ -21,6 +21,7 @@ import ( "k8s.io/apimachinery/pkg/fields" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/dynamic" coreinformers "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" @@ -507,6 +508,10 @@ func (cb *ClientBuilder) OperatorClientOrDie(name string, configFns ...func(*res return operatorclientset.NewForConfigOrDie(rest.AddUserAgent(cb.RestConfig(configFns...), name)) } +func (cb *ClientBuilder) DynamicClientOrDie(name string, configFns ...func(*rest.Config)) dynamic.Interface { + return dynamic.NewForConfigOrDie(rest.AddUserAgent(cb.RestConfig(configFns...), name)) +} + func (cb *ClientBuilder) RuntimeControllerClientOrDie(name string, configFns ...func(*rest.Config)) runtimeclient.Client { c, err := runtimeclient.New(rest.AddUserAgent(cb.RestConfig(configFns...), name), runtimeclient.Options{}) if err != nil { @@ -623,6 +628,7 @@ func (o *Options) NewControllerContext( return nil, err } rtClient := cb.RuntimeControllerClientOrDie("runtime-controller-client") + dynamicClient := cb.DynamicClientOrDie("dynamic-client") cvo, err := cvo.New( o.NodeName, @@ -639,6 +645,7 @@ func (o *Options) NewControllerContext( configInformerFactory.Config().V1().FeatureGates(), cb.ClientOrDie(o.Namespace), cvoKubeClient, + dynamicClient, operatorClient, o.Exclude, o.ClusterProfile, diff --git a/test/cvo/readiness.go b/test/cvo/readiness.go new file mode 100644 index 000000000..19da87b8c --- /dev/null +++ b/test/cvo/readiness.go @@ -0,0 +1,228 @@ +package cvo + +import ( + "context" + "encoding/json" + "time" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/kubernetes" + + configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1" + + "github.com/openshift/cluster-version-operator/pkg/readiness" + "github.com/openshift/cluster-version-operator/test/util" +) + +var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator readiness checks`, func() { + var ( + dynamicClient dynamic.Interface + kubeClient kubernetes.Interface + configClient *configv1client.ConfigV1Client + ctx context.Context + currentVersion string + targetVersion string + ) + + g.BeforeEach(func() { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(context.Background(), 2*time.Minute) + g.DeferCleanup(cancel) + + restCfg, err := util.GetRestConfig() + o.Expect(err).NotTo(o.HaveOccurred()) + + dynamicClient, err = dynamic.NewForConfig(restCfg) + o.Expect(err).NotTo(o.HaveOccurred()) + + kubeClient, err = kubernetes.NewForConfig(restCfg) + o.Expect(err).NotTo(o.HaveOccurred()) + + configClient, err = configv1client.NewForConfig(restCfg) + o.Expect(err).NotTo(o.HaveOccurred()) + + // Read actual versions from the cluster + cv, err := configClient.ClusterVersions().Get(ctx, "version", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + currentVersion = cv.Status.Desired.Version + o.Expect(currentVersion).NotTo(o.BeEmpty(), "cluster must have a current version") + + // Pick the first available update as target, or use current if none + targetVersion = currentVersion + if len(cv.Status.AvailableUpdates) > 0 { + targetVersion = cv.Status.AvailableUpdates[0].Version + } + }) + + g.It("should run all checks without errors", func() { + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + + o.Expect(output.Meta.TotalChecks).To(o.Equal(9)) + o.Expect(output.Meta.ChecksErrored).To(o.Equal(0), + "no check should error on a healthy cluster") + }) + + g.It("should produce valid JSON that round-trips", func() { + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + + data, err := json.Marshal(output) + o.Expect(err).NotTo(o.HaveOccurred()) + + var parsed map[string]interface{} + o.Expect(json.Unmarshal(data, &parsed)).To(o.Succeed()) + o.Expect(parsed).To(o.HaveKey("checks")) + o.Expect(parsed).To(o.HaveKey("meta")) + }) + + g.It("should report node count matching the actual cluster", func() { + // Ground truth: list nodes via typed client + nodeList, err := kubeClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + expectedTotal := len(nodeList.Items) + expectedReady := 0 + for _, node := range nodeList.Items { + for _, cond := range node.Status.Conditions { + if cond.Type == "Ready" && cond.Status == "True" { + expectedReady++ + } + } + } + + // Our check + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + result := output.Checks["node_capacity"] + o.Expect(result.Status).To(o.Equal("ok")) + o.Expect(result.Data["total_nodes"]).To(o.Equal(expectedTotal), + "node count should match actual nodes in cluster") + o.Expect(result.Data["ready_nodes"]).To(o.Equal(expectedReady), + "ready node count should match actual ready nodes") + }) + + g.It("should report operator count matching actual ClusterOperators", func() { + // Ground truth: list ClusterOperators via typed client + coList, err := configClient.ClusterOperators().List(ctx, metav1.ListOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + expectedTotal := len(coList.Items) + expectedDegraded := 0 + expectedNotUpgradeable := 0 + for _, co := range coList.Items { + for _, cond := range co.Status.Conditions { + if cond.Type == "Degraded" && cond.Status == "True" { + expectedDegraded++ + } + if cond.Type == "Upgradeable" && cond.Status == "False" { + expectedNotUpgradeable++ + } + } + } + + // Our check + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + result := output.Checks["operator_health"] + o.Expect(result.Status).To(o.Equal("ok")) + + summary, ok := result.Data["summary"].(map[string]any) + o.Expect(ok).To(o.BeTrue(), "operator_health summary should be a map") + o.Expect(summary["total_operators"]).To(o.Equal(expectedTotal), + "operator count should match actual ClusterOperators") + o.Expect(summary["degraded_count"]).To(o.Equal(expectedDegraded), + "degraded count should match actual degraded operators") + o.Expect(summary["not_upgradeable_count"]).To(o.Equal(expectedNotUpgradeable), + "not-upgradeable count should match actual operators") + }) + + g.It("should report etcd member count matching actual etcd pods", func() { + // Ground truth: list etcd pods via typed client + podList, err := kubeClient.CoreV1().Pods("openshift-etcd").List(ctx, metav1.ListOptions{ + LabelSelector: "app=etcd", + }) + o.Expect(err).NotTo(o.HaveOccurred()) + + expectedTotal := len(podList.Items) + expectedHealthy := 0 + for _, pod := range podList.Items { + if pod.Status.Phase == "Running" { + expectedHealthy++ + } + } + + // Our check + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + result := output.Checks["etcd_health"] + o.Expect(result.Status).To(o.Equal("ok")) + o.Expect(result.Data["total_members"]).To(o.Equal(expectedTotal), + "etcd member count should match actual etcd pods") + o.Expect(result.Data["healthy_members"]).To(o.Equal(expectedHealthy), + "healthy member count should match actual running etcd pods") + }) + + g.It("should report network type matching actual Network config", func() { + // Ground truth: get Network config via typed client + network, err := configClient.Networks().Get(ctx, "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + // Our check + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + result := output.Checks["network"] + o.Expect(result.Status).To(o.Equal("ok")) + o.Expect(result.Data["network_type"]).To(o.Equal(network.Status.NetworkType), + "network type should match actual Network config") + }) + + g.It("should report PDB count matching actual PodDisruptionBudgets", func() { + // Ground truth: list PDBs across all namespaces + pdbList, err := kubeClient.PolicyV1().PodDisruptionBudgets("").List(ctx, metav1.ListOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + expectedTotal := len(pdbList.Items) + expectedBlocking := 0 + for _, pdb := range pdbList.Items { + if pdb.Status.DisruptionsAllowed == 0 && pdb.Status.CurrentHealthy > 0 { + expectedBlocking++ + } + } + + // Our check + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + result := output.Checks["pdb_drain"] + o.Expect(result.Status).To(o.Equal("ok")) + o.Expect(result.Data["total_pdbs"]).To(o.Equal(expectedTotal), + "PDB count should match actual PDBs in cluster") + + blockingRaw, ok := result.Data["blocking_pdbs"].([]map[string]any) + o.Expect(ok).To(o.BeTrue(), "blocking_pdbs should be a []map[string]any") + o.Expect(len(blockingRaw)).To(o.Equal(expectedBlocking), + "blocking PDB count should match actual blocking PDBs") + }) + + g.It("should report cluster conditions matching ClusterVersion status", func() { + // Ground truth: get ClusterVersion via typed client + cv, err := configClient.ClusterVersions().Get(ctx, "version", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + // Our check + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + result := output.Checks["cluster_conditions"] + o.Expect(result.Status).To(o.Equal("ok")) + o.Expect(result.Data["channel"]).To(o.Equal(cv.Spec.Channel), + "channel should match ClusterVersion spec") + o.Expect(result.Data["cluster_id"]).To(o.Equal(string(cv.Spec.ClusterID)), + "cluster ID should match ClusterVersion spec") + }) + + g.It("should complete all checks within 60 seconds", func() { + output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion) + + o.Expect(output.Meta.ElapsedSeconds).To(o.BeNumerically("<", 60)) + for name, result := range output.Checks { + o.Expect(result.Elapsed).To(o.BeNumerically("<", 60), + "check %s exceeded timeout", name) + } + }) +})