From 0544f37cfe9d1e330c658c46cda1d8bacd16ea3b Mon Sep 17 00:00:00 2001 From: Harshal Patil <12152047+harche@users.noreply.github.com> Date: Fri, 29 May 2026 11:41:33 -0400 Subject: [PATCH] product-lifecycle: Add plc_lookup.py CLI, evals, and OWNERS updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add standalone Python CLI (`plc_lookup.py`) wrapping the Red Hat Product Life Cycle API v2 for querying product support status, EOL dates, and OCP version compatibility. Replaces inline curl/python one-liners. Commands: - `products` — query by product name with optional OCP compat check - `olm-check` — batch check OLM operators against a target OCP version Includes 46 tests (unit with mocked API + integration against live API), 3 agent eval test cases (all passing on Claude via Vertex AI), and OWNERS updates. Co-Authored-By: Claude Opus 4.6 (1M context) --- cluster-update/README.md | 2 +- cluster-update/product-lifecycle/SKILL.md | 187 ++----- .../references/api-details.md | 44 +- .../product-lifecycle/scripts/plc_lookup.py | 190 +++++++ .../scripts/tests/__init__.py | 0 .../scripts/tests/test_plc_lookup.py | 526 ++++++++++++++++++ cluster-update/update-advisor/SKILL.md | 167 ++++-- evals/skills/product-lifecycle/README.md | 132 +++++ .../skills/product-lifecycle/system_prompt.md | 19 + .../skills/product-lifecycle/test_cases.yaml | 203 +++++++ evals/skills/update-advisor/README.md | 64 +++ evals/skills/update-advisor/system_prompt.md | 19 + evals/skills/update-advisor/test_cases.yaml | 176 ++++++ evals/workspace/skills/product-lifecycle | 1 + evals/workspace/skills/update-advisor | 1 + 15 files changed, 1511 insertions(+), 220 deletions(-) create mode 100755 cluster-update/product-lifecycle/scripts/plc_lookup.py create mode 100644 cluster-update/product-lifecycle/scripts/tests/__init__.py create mode 100644 cluster-update/product-lifecycle/scripts/tests/test_plc_lookup.py create mode 100644 evals/skills/product-lifecycle/README.md create mode 100644 evals/skills/product-lifecycle/system_prompt.md create mode 100644 evals/skills/product-lifecycle/test_cases.yaml create mode 100644 evals/skills/update-advisor/README.md create mode 100644 evals/skills/update-advisor/system_prompt.md create mode 100644 evals/skills/update-advisor/test_cases.yaml create mode 120000 evals/workspace/skills/product-lifecycle create mode 120000 evals/workspace/skills/update-advisor diff --git a/cluster-update/README.md b/cluster-update/README.md index e5e36dc..2d8c09a 100644 --- a/cluster-update/README.md +++ b/cluster-update/README.md @@ -1,3 +1,3 @@ # Cluster Update skills -This directory contains skills which are designed to help agents with ClusterVersion activities such as preparing for cluster updates. +This directory contains skills which are designed to help agents with ClusterVersion activities such as preparing for cluster updates. \ No newline at end of file diff --git a/cluster-update/product-lifecycle/SKILL.md b/cluster-update/product-lifecycle/SKILL.md index e41e6f2..4c67287 100644 --- a/cluster-update/product-lifecycle/SKILL.md +++ b/cluster-update/product-lifecycle/SKILL.md @@ -1,165 +1,90 @@ --- name: product-lifecycle description: Query Red Hat Product Life Cycle data for support phases, end-of-life dates, and OpenShift version compatibility. Use when evaluating whether installed operators or layered products are supported on a given OCP version, approaching end of life, or need upgrading before a cluster upgrade. Also use when the user asks about product support status, EOL dates, or lifecycle phases for any Red Hat product. +allowed-tools: Bash(python3:*) --- # Red Hat Product Life Cycle -Query the Red Hat Product Life Cycle API to check support status, EOL dates, and OpenShift compatibility for Red Hat products and layered operators. +Query the Red Hat Product Life Cycle API (v2) to check support status, EOL +dates, and OpenShift compatibility for Red Hat products and layered operators. -## API Overview +## CLI Tool -- **Base URL**: `https://access.redhat.com/product-life-cycles/api/v1/products` -- **Authentication**: None required — the API is public. -- **Query parameter**: `?name=` — case-insensitive substring match on product name. -- **Response**: `{ "data": [ { product }, ... ] }` — array of matching products. - -## Quick Start +All queries go through `cluster-update/product-lifecycle/scripts/plc_lookup.py` — a standalone Python 3 script +with no dependencies beyond stdlib. Run with `-h` for full usage: ```bash -# Search for a product by name (substring match) -curl -s "https://access.redhat.com/product-life-cycles/api/v1/products?name=logging+for+Red+Hat+OpenShift" | jq . - -# List all products with "OpenShift" in the name -curl -s "https://access.redhat.com/product-life-cycles/api/v1/products?name=OpenShift" | jq -r '.data[].name' -``` - -## Response Structure - -Each product in `data[]` has: - -```json -{ - "name": "logging for Red Hat OpenShift", - "former_names": ["Red Hat OpenShift Logging"], - "all_phases": [{"name": "General availability", ...}, ...], - "versions": [ - { - "name": "6.5", - "type": "Full Support", - "openshift_compatibility": "4.19, 4.20, 4.21", - "phases": [ - { - "name": "General availability", - "end_date": "2026-04-01T00:00:00.000Z", - "date_format": "date" - }, - { - "name": "Full support", - "end_date": "Release of Logging 6.6 + 1 month", - "date_format": "string" - }, - { - "name": "Maintenance support", - "end_date": "Release of Logging 6.7", - "date_format": "string" - } - ] - } - ] -} +python3 cluster-update/product-lifecycle/scripts/plc_lookup.py -h ``` -For full field descriptions, type enumerations, and phase name details, see `references/api-details.md`. +### Commands -## Common Queries +#### `products` — Query products by name -### Check support status for a specific product version +Maps directly to `GET /v2/products?name=`. ```bash -curl -s "https://access.redhat.com/product-life-cycles/api/v1/products?name=logging+for+Red+Hat+OpenShift" \ - | jq -r '.data[] | "\(.name)", (.versions[] | " \(.name) - \(.type) (OCP: \(.openshift_compatibility // "N/A"))")' -``` +# Look up a product +python3 cluster-update/product-lifecycle/scripts/plc_lookup.py products "logging for Red Hat OpenShift" -### Check if a product version is compatible with a target OCP version +# With OCP compatibility check +python3 cluster-update/product-lifecycle/scripts/plc_lookup.py products "logging for Red Hat OpenShift" --ocp 4.21 -```bash -TARGET_OCP="4.21" -PRODUCT="logging+for+Red+Hat+OpenShift" - -curl -s "https://access.redhat.com/product-life-cycles/api/v1/products?name=$PRODUCT" \ - | jq -r --arg target "$TARGET_OCP" ' - .data[] | .name as $prod | - .versions[] | - .name as $ver | .type as $type | - (.openshift_compatibility // "" | split(", ")) as $compat | - (if ($compat | index($target)) then "COMPATIBLE" else "NOT COMPATIBLE" end) as $status | - "\($prod) \($ver) (\($type)) - \($status) with OCP \($target)"' +# Paginate broad queries +python3 cluster-update/product-lifecycle/scripts/plc_lookup.py products "OpenShift" --limit 5 +python3 cluster-update/product-lifecycle/scripts/plc_lookup.py products "OpenShift" --limit 5 --offset 5 ``` -### Get EOL dates for OCP itself +Returns matching product versions with normalized support status, OCP +compatibility, and lifecycle phase dates. When `--ocp` is provided, adds +`ocp_target` and `ocp_compatible` (true/false/null) to each version entry. -```bash -curl -s "https://access.redhat.com/product-life-cycles/api/v1/products?name=OpenShift+Container+Platform" \ - | jq -r '.data[0].versions[] | - "OCP \(.name) - \(.type) (maintenance ends: \( - [.phases[] | select(.name == "Maintenance support") | .end_date] | first // "N/A" - ))"' -``` +Use `--limit` and `--offset` for broad queries that return many results. +The response includes `total`, `returned`, and `next_offset` (when more +results are available) so you can paginate through the full result set. -### Cross-reference OLM operators with Product Life Cycle data - -Products that are OLM operators have a `package` field that maps directly to the -OLM Subscription's `spec.name`. This is an **exact match key** — more reliable than name -matching. The `is_operator` field confirms the product is OLM-managed. - -When the upgrade advisor readiness JSON includes `olm_operator_lifecycle` data: - -1. Extract the `package` name from each operator in readiness data -2. Search the Product Life Cycle API using that package name -3. Match by comparing `product.package` == operator's `package` -4. Check if the installed version's `openshift_compatibility` includes the target OCP version -5. Check the `type` field for support status +#### `olm-check` — Batch check OLM operators ```bash -# Look up Product Life Cycle data for an OLM operator by its package name -OLM_PACKAGE="cluster-logging" -TARGET_OCP="4.21" - -curl -s "https://access.redhat.com/product-life-cycles/api/v1/products?name=logging" \ - | jq -r --arg pkg "$OLM_PACKAGE" --arg target "$TARGET_OCP" ' - [.data[] | select(.package == $pkg)] | - if length == 0 then "No Product Life Cycle entry with package=\($pkg)" - else .[0] | - "\(.name) (package: \(.package))", - (.versions[] | - .name as $ver | .type as $type | - (.openshift_compatibility // "" | split(", ")) as $compat | - (if ($compat | index($target)) then "YES" else "NO" end) as $ok | - " \($ver) - \($type) - OCP \($target) compatible: \($ok)") - end' +python3 cluster-update/product-lifecycle/scripts/plc_lookup.py olm-check --ocp 4.21 \ + --operators '[{"package":"cluster-logging"},{"package":"elasticsearch-operator"}]' ``` -If the `?name=` search doesn't return the operator, try searching by `csv_display_name` -from the readiness data as a fallback. +Looks up each operator by its OLM `package` name. First searches the bulk +"OpenShift" product set, then falls back to individual queries. Reports +`lifecycle_unavailable` for operators not tracked in the API. -**Not all operators have Product Life Cycle entries.** If a search returns no results, that's expected — -it means the product isn't tracked in the Product Life Cycle API. Report this as "lifecycle data unavailable" -rather than an error. +### Output Format -### Batch lookup for multiple OLM operators +All commands output JSON. Each product version entry includes: -When cross-referencing several operators, avoid N+1 API calls. Fetch `?name=OpenShift` -once (~14 products covering most Red Hat layered operators), then make individual calls -only for operators not found in that initial batch. +| Field | Description | +|---|---| +| `status` | Normalized: `supported`, `maintenance`, `extended`, `end-of-maintenance`, `eol`, or `unknown` | +| `status_raw` | Original API value (e.g. `"Full Support"`, `"End of life"`) | +| `ocp_versions` | List of compatible OCP versions (empty for non-layered products) | +| `ocp_compatible` | `true`/`false`/`null` — only present when `--ocp` is used | +| `ga_date` | General availability date | +| `full_support_end` | End of full support phase | +| `maintenance_end` | End of maintenance support phase | -```bash -TARGET_OCP="4.21" - -# Single call covers most Red Hat operator products -curl -s "https://access.redhat.com/product-life-cycles/api/v1/products?name=OpenShift" \ - | jq -r --arg target "$TARGET_OCP" ' - .data[] | select(.is_operator) | - (.package // "") as $pkg | .name as $prod | - .versions[] | - .name as $ver | .type as $type | - (.openshift_compatibility // "" | split(", ")) as $compat | - (if ($compat | index($target)) then "YES" else "NO" end) as $ok | - "\($pkg): \($prod) \($ver) (\($type)) - OCP \($target): \($ok)"' -``` +Date fields are objects with `date` (ISO 8601 or descriptive string) and +`format` (`"date"` or `"string"`). + +## When to Use + +- **Upgrade readiness**: check if installed operators are compatible with the + target OCP version before upgrading +- **EOL planning**: identify products approaching or past end of life +- **Support status**: determine current support phase for any Red Hat product +- **Cross-reference with update-advisor**: when `olm_operator_lifecycle` data + is present in readiness JSON, use `olm-check` to verify lifecycle status ## Important -- **Always use `?name=`** to filter — never fetch the unfiltered `/products` endpoint. -- `openshift_compatibility` is only present on **layered product** versions, not on OCP itself. -- When cross-referencing with OLM data, a missing Product Life Cycle entry is normal — report "lifecycle data unavailable" and move on. +- `ocp_versions` is only present on **layered product** versions, not on OCP itself. +- Not all operators have lifecycle entries — report "lifecycle data unavailable" + rather than treating missing data as an error. +- The `package` field in API responses maps to the OLM Subscription's + `spec.name` — use this for exact matching, not product name. diff --git a/cluster-update/product-lifecycle/references/api-details.md b/cluster-update/product-lifecycle/references/api-details.md index eb98f8c..6d81e9e 100644 --- a/cluster-update/product-lifecycle/references/api-details.md +++ b/cluster-update/product-lifecycle/references/api-details.md @@ -1,9 +1,9 @@ -# Product Life Cycle API Reference +# Product Life Cycle API Reference (v2) ## Endpoint ``` -GET https://access.redhat.com/product-life-cycles/api/v1/products?name= +GET https://access.redhat.com/product-life-cycles/api/v2/products?name= ``` No authentication required. The `name` parameter is a case-insensitive substring match. @@ -23,7 +23,7 @@ No authentication required. The `name` parameter is a case-insensitive substring ### The `package` field The `package` field is the OLM package name and provides an **exact match key** to correlate -Product Life Cycle products with OLM Subscriptions. This is more reliable than name matching. +products with OLM Subscriptions. This is more reliable than name matching. Mapping: `product.package` == `subscription.spec.name` @@ -32,8 +32,8 @@ Mapping: `product.package` == `subscription.spec.name` | Field | Type | Description | |---|---|---| | `name` | string | Version number (e.g., `"6.5"`, `"4.21"`) | -| `type` | string | **Current support status** — see table below | -| `openshift_compatibility` | string\|null | Comma-separated OCP versions (e.g., `"4.19, 4.20, 4.21"`) — only on layered products | +| `type` | string | Current support status (see below) | +| `openshift_compatibility` | string\|null | Comma-separated OCP versions — only on layered products | | `phases` | object[] | Lifecycle phase details with dates | ### Support status (`type`) @@ -42,41 +42,31 @@ Mapping: `product.package` == `subscription.spec.name` |---|---| | `"Full Support"` | Active development, bug fixes, security patches | | `"Maintenance Support"` | Critical/security fixes only, no new features | -| `"End of Maintenance"` | Maintenance support has ended; no EUS/ELS applies to this version | -| `"Extended Support"` | Past maintenance, currently in a paid Extended Life Cycle Support (ELS) phase | +| `"Extended Support"` | Similar to Maintenance, may require add-on purchases | +| `"End of Maintenance"` | Maintenance phase ended, transitioning to EOL | | `"End of life"` | No fixes, no support — must upgrade | -| `""` (empty) | Status not yet determined (e.g., version has incomplete lifecycle data) | ## Phase Object | Field | Type | Description | |---|---|---| -| `name` | string | Phase name (e.g., `"General availability"`, `"Full support"`, `"Maintenance support"`) | +| `name` | string | Phase name | | `start_date` | string | Phase start — ISO 8601 date or descriptive string | | `end_date` | string | Phase end — ISO 8601 date or descriptive string | -| `date_format` | string | `"date"` (ISO 8601) or `"string"` (relative/TBD) | +| `start_date_format` | string | `"date"` (ISO 8601) or `"string"` (relative/TBD) | +| `end_date_format` | string | `"date"` (ISO 8601) or `"string"` (relative/TBD) | -Phase names vary by product. Common categories: - -| Category | Phase names | Meaning | -|---|---|---| -| Release | `General availability` | When the version was first released | -| Active support | `Full support` | Active development, bug fixes, security patches | -| Reduced support | `Maintenance support`, `Maintenance Support 1`, `Maintenance support 2` | Critical/security fixes only | -| Extended support | `Extended update support`, `Extended update support Term 2`, `Extended update support Term 3` | EUS — available for select versions, may require add-on purchase | -| Extended lifecycle | `Extended life phase`, `Extended life cycle support (ELS) 1`/`2`, `Extended life cycle support (ELS) add-on`/`Term 2 add-on`/`Term 3 add-on` | Paid extended support beyond normal EOL | -| End | `End of Life`, `Retired` | No further updates or support | -| Other | `Migration support`, `Third-party certification period` | Product-specific transitional phases | - -Phase names are not standardized across products. Use the `start_date` and `end_date` fields -to determine whether a phase is current, rather than relying on the phase name alone. - -For detailed lifecycle policy definitions, see the [Red Hat product lifecycle policies](https://access.redhat.com/support/policy/updates/openshift#dates). +Common phases: +- **General availability** — when the version was released +- **Full support** — active development period +- **Maintenance support** — critical fixes only +- **Extended update support** — EUS terms (1, 2, 3) +- **Extended life cycle support (ELS)** — add-on extended support ## Search Tips 1. **Be specific with `?name=`** — `"logging+for+Red+Hat+OpenShift"` is better than `"logging"` -2. **Check `former_names`** — products may appear under a previous name in the `former_names` field +2. **Try former names** — if a search returns nothing, the product may have been renamed 3. **Use `is_operator: true`** to filter for OLM operators in results 4. **Use `package` for OLM correlation** — more reliable than name matching 5. **Never omit `?name=`** — the unfiltered response is very large diff --git a/cluster-update/product-lifecycle/scripts/plc_lookup.py b/cluster-update/product-lifecycle/scripts/plc_lookup.py new file mode 100755 index 0000000..b4706d8 --- /dev/null +++ b/cluster-update/product-lifecycle/scripts/plc_lookup.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 +"""Query Red Hat Product Life Cycle API for support status, EOL dates, and OCP compatibility.""" + +import argparse +import json +import sys +import urllib.error +import urllib.parse +import urllib.request + +API_BASE = "https://access.redhat.com/product-life-cycles/api/v2/products" + +STATUS_MAP = { + "Full Support": "supported", + "Maintenance Support": "maintenance", + "Extended Support": "extended", + "End of Maintenance": "end-of-maintenance", + "End of life": "eol", +} + + +def api_search(name): + url = f"{API_BASE}?{urllib.parse.urlencode({'name': name})}" + req = urllib.request.Request(url, headers={"User-Agent": "plc-lookup/1.0"}) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + body = json.loads(resp.read()) + except urllib.error.URLError as e: + raise SystemExit(json.dumps({"error": "api_request_failed", "detail": str(e)}, indent=2)) + except (json.JSONDecodeError, ValueError) as e: + raise SystemExit(json.dumps({"error": "invalid_response", "detail": str(e)}, indent=2)) + if "data" not in body: + raise SystemExit(json.dumps({"error": "unexpected_response", "keys": list(body.keys())}, indent=2)) + return body["data"] + + +def normalize_status(raw_type): + return STATUS_MAP.get(raw_type, "unknown") + + +def parse_ocp_versions(compat_string): + if not compat_string: + return [] + return [v.strip() for v in compat_string.split(",") if v.strip()] + + +def extract_phase_date(version, phase_name): + for ph in version.get("phases", []): + if ph["name"].lower() == phase_name.lower(): + fmt = ph.get("end_date_format", "string") + return {"date": ph["end_date"], "format": fmt} + return None + + +def format_product_version(product, version, target_ocp=None): + ocp_versions = parse_ocp_versions(version.get("openshift_compatibility")) + result = { + "product": product["name"], + "package": product.get("package"), + "version": version["name"], + "status": normalize_status(version.get("type", "")), + "status_raw": version.get("type", ""), + "ocp_versions": ocp_versions, + "ga_date": extract_phase_date(version, "General availability"), + "full_support_end": extract_phase_date(version, "Full support"), + "maintenance_end": extract_phase_date(version, "Maintenance support"), + } + if target_ocp: + result["ocp_target"] = target_ocp + result["ocp_compatible"] = target_ocp in ocp_versions if ocp_versions else None + return result + + +def paginate(results, limit, offset): + total = len(results) + start = min(offset, total) + end = min(start + limit, total) if limit else total + page = results[start:end] + meta = {"total": total, "offset": start, "limit": limit, "returned": len(page)} + if end < total: + meta["next_offset"] = end + return page, meta + + +def cmd_products(args): + products = api_search(args.name) + if not products: + json.dump({"error": "no products found", "query": args.name}, sys.stdout, indent=2) + sys.stdout.write("\n") + return 1 + + target_ocp = getattr(args, "ocp", None) + results = [] + for p in products: + for v in p["versions"]: + results.append(format_product_version(p, v, target_ocp=target_ocp)) + + page, meta = paginate(results, args.limit, args.offset) + output = {"results": page, **meta} + if target_ocp: + output["ocp_target"] = target_ocp + json.dump(output, sys.stdout, indent=2) + sys.stdout.write("\n") + return 0 + + +def cmd_olm_check(args): + operators = json.loads(args.operators) + target = args.ocp + + batch = api_search("OpenShift") + by_package = {} + for p in batch: + pkg = p.get("package") + if pkg: + by_package[pkg] = p + + results = [] + missed_packages = [] + + for op in operators: + pkg = op.get("package", "") + product = by_package.get(pkg) + + if not product: + extra = api_search(pkg) + product = next((p for p in extra if p.get("package") == pkg), None) + + if not product: + results.append({ + "package": pkg, + "status": "unavailable", + "reason": "no lifecycle data found", + }) + missed_packages.append(pkg) + continue + + for v in product["versions"]: + results.append(format_product_version(product, v, target_ocp=target)) + + json.dump({ + "ocp_target": target, + "operators_checked": len(operators), + "lifecycle_unavailable": missed_packages, + "results": results, + }, sys.stdout, indent=2) + sys.stdout.write("\n") + return 0 + + +def main(): + parser = argparse.ArgumentParser( + description="Query Red Hat Product Life Cycle API (v2) for support status, EOL dates, and OCP compatibility.", + epilog="Examples:\n" + ' %(prog)s products "logging for Red Hat OpenShift"\n' + ' %(prog)s products "logging for Red Hat OpenShift" --ocp 4.21\n' + ' %(prog)s products "OpenShift" --limit 5\n' + ' %(prog)s products "OpenShift" --limit 5 --offset 5\n' + ' %(prog)s olm-check --ocp 4.21 --operators \'[{"package":"cluster-logging"}]\'\n', + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + p_products = subparsers.add_parser( + "products", + help="Query products by name. Maps to GET /v2/products?name=", + ) + p_products.add_argument("name", help="Product name (substring match, e.g. 'logging for Red Hat OpenShift')") + p_products.add_argument("--ocp", help="Check compatibility against this OCP version (e.g. 4.21)") + p_products.add_argument("--limit", type=int, default=0, help="Max results to return (0 = all, default: all)") + p_products.add_argument("--offset", type=int, default=0, help="Skip this many results (for pagination, default: 0)") + + p_olm = subparsers.add_parser( + "olm-check", + help="Batch check OLM operators against a target OCP version", + ) + p_olm.add_argument("--ocp", required=True, help="Target OCP version (e.g. 4.21)") + p_olm.add_argument( + "--operators", + required=True, + help='JSON array of operators, e.g. \'[{"package":"cluster-logging"}]\'', + ) + + args = parser.parse_args() + handlers = {"products": cmd_products, "olm-check": cmd_olm_check} + sys.exit(handlers[args.command](args)) + + +if __name__ == "__main__": + main() diff --git a/cluster-update/product-lifecycle/scripts/tests/__init__.py b/cluster-update/product-lifecycle/scripts/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cluster-update/product-lifecycle/scripts/tests/test_plc_lookup.py b/cluster-update/product-lifecycle/scripts/tests/test_plc_lookup.py new file mode 100644 index 0000000..d030c59 --- /dev/null +++ b/cluster-update/product-lifecycle/scripts/tests/test_plc_lookup.py @@ -0,0 +1,526 @@ +"""Tests for plc_lookup.py — unit tests with mocked API and integration tests against live API.""" + +import json +import os +import subprocess +import sys +import unittest +import urllib.error +from unittest.mock import patch, MagicMock + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) +import plc_lookup + + +SAMPLE_PRODUCT = { + "name": "logging for Red Hat OpenShift", + "package": "cluster-logging", + "is_operator": True, + "is_layered_product": True, + "is_retired": False, + "former_names": ["Red Hat OpenShift Logging"], + "versions": [ + { + "name": "6.5", + "type": "Full Support", + "openshift_compatibility": "4.19, 4.20, 4.21", + "phases": [ + { + "name": "General availability", + "start_date": "N/A", + "end_date": "2026-04-01T00:00:00.000Z", + "start_date_format": "string", + "end_date_format": "date", + }, + { + "name": "Full support", + "start_date": "2026-04-01T00:00:00.000Z", + "end_date": "Release of Logging 6.6 + 1 month", + "start_date_format": "date", + "end_date_format": "string", + }, + { + "name": "Maintenance support", + "start_date": "Release of Logging 6.6 + 1 month", + "end_date": "Release of Logging 6.7", + "start_date_format": "string", + "end_date_format": "string", + }, + ], + }, + { + "name": "5.9", + "type": "End of life", + "openshift_compatibility": "4.13, 4.14, 4.15, 4.16", + "phases": [ + { + "name": "General availability", + "start_date": "N/A", + "end_date": "2024-04-04T00:00:00.000Z", + "start_date_format": "string", + "end_date_format": "date", + }, + { + "name": "Full support", + "start_date": "2024-04-04T00:00:00.000Z", + "end_date": "2024-10-24T00:00:00.000Z", + "start_date_format": "date", + "end_date_format": "date", + }, + { + "name": "Maintenance support", + "start_date": "2024-10-24T00:00:00.000Z", + "end_date": "2025-11-03T00:00:00.000Z", + "start_date_format": "date", + "end_date_format": "date", + }, + ], + }, + ], +} + +SAMPLE_OCP_PRODUCT = { + "name": "Red Hat OpenShift Container Platform", + "package": None, + "is_operator": False, + "is_layered_product": False, + "is_retired": False, + "former_names": [], + "versions": [ + { + "name": "4.21", + "type": "Full Support", + "openshift_compatibility": None, + "phases": [ + { + "name": "General availability", + "start_date": "N/A", + "end_date": "2026-02-03T00:00:00.000Z", + "start_date_format": "string", + "end_date_format": "date", + }, + ], + }, + ], +} + +SCRIPT_PATH = os.path.join(os.path.dirname(__file__), "..", "plc_lookup.py") + + +# --------------------------------------------------------------------------- +# Unit tests (mocked API) +# --------------------------------------------------------------------------- + + +class TestApiSearchErrors(unittest.TestCase): + @patch("plc_lookup.urllib.request.urlopen") + def test_network_error(self, mock_urlopen): + mock_urlopen.side_effect = urllib.error.URLError("connection refused") + with self.assertRaises(SystemExit) as ctx: + plc_lookup.api_search("anything") + output = json.loads(str(ctx.exception)) + self.assertEqual(output["error"], "api_request_failed") + + @patch("plc_lookup.urllib.request.urlopen") + def test_http_error(self, mock_urlopen): + mock_urlopen.side_effect = urllib.error.HTTPError( + "http://example.com", 500, "Internal Server Error", {}, None + ) + with self.assertRaises(SystemExit) as ctx: + plc_lookup.api_search("anything") + output = json.loads(str(ctx.exception)) + self.assertEqual(output["error"], "api_request_failed") + + @patch("plc_lookup.urllib.request.urlopen") + def test_invalid_json(self, mock_urlopen): + mock_resp = MagicMock() + mock_resp.read.return_value = b"not json" + mock_resp.__enter__ = lambda s: s + mock_resp.__exit__ = MagicMock(return_value=False) + mock_urlopen.return_value = mock_resp + with self.assertRaises(SystemExit) as ctx: + plc_lookup.api_search("anything") + output = json.loads(str(ctx.exception)) + self.assertEqual(output["error"], "invalid_response") + + @patch("plc_lookup.urllib.request.urlopen") + def test_missing_data_key(self, mock_urlopen): + mock_resp = MagicMock() + mock_resp.read.return_value = json.dumps({"unexpected": "schema"}).encode() + mock_resp.__enter__ = lambda s: s + mock_resp.__exit__ = MagicMock(return_value=False) + mock_urlopen.return_value = mock_resp + with self.assertRaises(SystemExit) as ctx: + plc_lookup.api_search("anything") + output = json.loads(str(ctx.exception)) + self.assertEqual(output["error"], "unexpected_response") + + +class TestNormalizeStatus(unittest.TestCase): + def test_known_statuses(self): + self.assertEqual(plc_lookup.normalize_status("Full Support"), "supported") + self.assertEqual(plc_lookup.normalize_status("Maintenance Support"), "maintenance") + self.assertEqual(plc_lookup.normalize_status("Extended Support"), "extended") + self.assertEqual(plc_lookup.normalize_status("End of Maintenance"), "end-of-maintenance") + self.assertEqual(plc_lookup.normalize_status("End of life"), "eol") + + def test_unknown_status(self): + self.assertEqual(plc_lookup.normalize_status("Something New"), "unknown") + self.assertEqual(plc_lookup.normalize_status(""), "unknown") + + +class TestParseOcpVersions(unittest.TestCase): + def test_normal(self): + self.assertEqual(plc_lookup.parse_ocp_versions("4.19, 4.20, 4.21"), ["4.19", "4.20", "4.21"]) + + def test_none(self): + self.assertEqual(plc_lookup.parse_ocp_versions(None), []) + + def test_empty(self): + self.assertEqual(plc_lookup.parse_ocp_versions(""), []) + + def test_single(self): + self.assertEqual(plc_lookup.parse_ocp_versions("4.21"), ["4.21"]) + + +class TestExtractPhaseDate(unittest.TestCase): + def test_found(self): + version = SAMPLE_PRODUCT["versions"][0] + result = plc_lookup.extract_phase_date(version, "General availability") + self.assertEqual(result["date"], "2026-04-01T00:00:00.000Z") + self.assertEqual(result["format"], "date") + + def test_string_format(self): + version = SAMPLE_PRODUCT["versions"][0] + result = plc_lookup.extract_phase_date(version, "Full support") + self.assertEqual(result["date"], "Release of Logging 6.6 + 1 month") + self.assertEqual(result["format"], "string") + + def test_not_found(self): + version = SAMPLE_PRODUCT["versions"][0] + result = plc_lookup.extract_phase_date(version, "Nonexistent phase") + self.assertIsNone(result) + + def test_case_insensitive(self): + version = SAMPLE_PRODUCT["versions"][0] + result = plc_lookup.extract_phase_date(version, "general availability") + self.assertIsNotNone(result) + + def test_empty_phases(self): + result = plc_lookup.extract_phase_date({"phases": []}, "Full support") + self.assertIsNone(result) + + +class TestFormatProductVersion(unittest.TestCase): + def test_without_target(self): + result = plc_lookup.format_product_version(SAMPLE_PRODUCT, SAMPLE_PRODUCT["versions"][0]) + self.assertEqual(result["product"], "logging for Red Hat OpenShift") + self.assertEqual(result["package"], "cluster-logging") + self.assertEqual(result["version"], "6.5") + self.assertEqual(result["status"], "supported") + self.assertEqual(result["ocp_versions"], ["4.19", "4.20", "4.21"]) + self.assertNotIn("ocp_target", result) + self.assertNotIn("ocp_compatible", result) + + def test_with_compatible_target(self): + result = plc_lookup.format_product_version(SAMPLE_PRODUCT, SAMPLE_PRODUCT["versions"][0], target_ocp="4.21") + self.assertEqual(result["ocp_target"], "4.21") + self.assertTrue(result["ocp_compatible"]) + + def test_with_incompatible_target(self): + result = plc_lookup.format_product_version(SAMPLE_PRODUCT, SAMPLE_PRODUCT["versions"][0], target_ocp="4.16") + self.assertFalse(result["ocp_compatible"]) + + def test_ocp_product_no_compatibility(self): + result = plc_lookup.format_product_version(SAMPLE_OCP_PRODUCT, SAMPLE_OCP_PRODUCT["versions"][0], target_ocp="4.21") + self.assertIsNone(result["ocp_compatible"]) + + def test_eol_version(self): + result = plc_lookup.format_product_version(SAMPLE_PRODUCT, SAMPLE_PRODUCT["versions"][1]) + self.assertEqual(result["status"], "eol") + + +class TestPaginate(unittest.TestCase): + def test_no_limit(self): + items = list(range(10)) + page, meta = plc_lookup.paginate(items, limit=0, offset=0) + self.assertEqual(page, items) + self.assertEqual(meta["total"], 10) + self.assertEqual(meta["returned"], 10) + self.assertNotIn("next_offset", meta) + + def test_limit(self): + items = list(range(10)) + page, meta = plc_lookup.paginate(items, limit=3, offset=0) + self.assertEqual(page, [0, 1, 2]) + self.assertEqual(meta["total"], 10) + self.assertEqual(meta["returned"], 3) + self.assertEqual(meta["next_offset"], 3) + + def test_offset(self): + items = list(range(10)) + page, meta = plc_lookup.paginate(items, limit=3, offset=3) + self.assertEqual(page, [3, 4, 5]) + self.assertEqual(meta["next_offset"], 6) + + def test_last_page(self): + items = list(range(10)) + page, meta = plc_lookup.paginate(items, limit=3, offset=9) + self.assertEqual(page, [9]) + self.assertEqual(meta["returned"], 1) + self.assertNotIn("next_offset", meta) + + def test_offset_beyond_total(self): + items = list(range(5)) + page, meta = plc_lookup.paginate(items, limit=3, offset=10) + self.assertEqual(page, []) + self.assertEqual(meta["returned"], 0) + + def test_exact_boundary(self): + items = list(range(6)) + page, meta = plc_lookup.paginate(items, limit=3, offset=3) + self.assertEqual(page, [3, 4, 5]) + self.assertEqual(meta["returned"], 3) + self.assertNotIn("next_offset", meta) + + +class TestCmdProducts(unittest.TestCase): + @patch("plc_lookup.api_search") + def test_found(self, mock_search): + mock_search.return_value = [SAMPLE_PRODUCT] + args = MagicMock() + args.name = "logging" + args.ocp = None + args.limit = 0 + args.offset = 0 + from io import StringIO + with patch("sys.stdout", new_callable=StringIO) as mock_out: + ret = plc_lookup.cmd_products(args) + output = json.loads(mock_out.getvalue()) + self.assertEqual(ret, 0) + self.assertEqual(output["total"], 2) + self.assertEqual(output["returned"], 2) + self.assertEqual(output["results"][0]["version"], "6.5") + self.assertNotIn("ocp_target", output) + + @patch("plc_lookup.api_search") + def test_not_found(self, mock_search): + mock_search.return_value = [] + args = MagicMock() + args.name = "nonexistent" + args.ocp = None + args.limit = 0 + args.offset = 0 + from io import StringIO + with patch("sys.stdout", new_callable=StringIO) as mock_out: + ret = plc_lookup.cmd_products(args) + output = json.loads(mock_out.getvalue()) + self.assertEqual(ret, 1) + self.assertIn("error", output) + + @patch("plc_lookup.api_search") + def test_with_ocp_flag(self, mock_search): + mock_search.return_value = [SAMPLE_PRODUCT] + args = MagicMock() + args.name = "logging" + args.ocp = "4.21" + args.limit = 0 + args.offset = 0 + from io import StringIO + with patch("sys.stdout", new_callable=StringIO) as mock_out: + ret = plc_lookup.cmd_products(args) + output = json.loads(mock_out.getvalue()) + self.assertEqual(ret, 0) + self.assertEqual(output["ocp_target"], "4.21") + compatible = [r for r in output["results"] if r["ocp_compatible"]] + incompatible = [r for r in output["results"] if r["ocp_compatible"] is False] + self.assertGreater(len(compatible), 0) + self.assertGreater(len(incompatible), 0) + + @patch("plc_lookup.api_search") + def test_with_pagination(self, mock_search): + mock_search.return_value = [SAMPLE_PRODUCT] + args = MagicMock() + args.name = "logging" + args.ocp = None + args.limit = 1 + args.offset = 0 + from io import StringIO + with patch("sys.stdout", new_callable=StringIO) as mock_out: + ret = plc_lookup.cmd_products(args) + output = json.loads(mock_out.getvalue()) + self.assertEqual(ret, 0) + self.assertEqual(output["total"], 2) + self.assertEqual(output["returned"], 1) + self.assertEqual(output["next_offset"], 1) + self.assertEqual(output["results"][0]["version"], "6.5") + + @patch("plc_lookup.api_search") + def test_with_pagination_second_page(self, mock_search): + mock_search.return_value = [SAMPLE_PRODUCT] + args = MagicMock() + args.name = "logging" + args.ocp = None + args.limit = 1 + args.offset = 1 + from io import StringIO + with patch("sys.stdout", new_callable=StringIO) as mock_out: + ret = plc_lookup.cmd_products(args) + output = json.loads(mock_out.getvalue()) + self.assertEqual(ret, 0) + self.assertEqual(output["total"], 2) + self.assertEqual(output["returned"], 1) + self.assertNotIn("next_offset", output) + self.assertEqual(output["results"][0]["version"], "5.9") + + +class TestCmdOlmCheck(unittest.TestCase): + @patch("plc_lookup.api_search") + def test_found_in_batch(self, mock_search): + mock_search.return_value = [SAMPLE_PRODUCT, SAMPLE_OCP_PRODUCT] + args = MagicMock() + args.ocp = "4.21" + args.operators = '[{"package":"cluster-logging"}]' + from io import StringIO + with patch("sys.stdout", new_callable=StringIO) as mock_out: + ret = plc_lookup.cmd_olm_check(args) + output = json.loads(mock_out.getvalue()) + self.assertEqual(ret, 0) + self.assertEqual(output["operators_checked"], 1) + self.assertEqual(output["lifecycle_unavailable"], []) + self.assertGreater(len(output["results"]), 0) + + @patch("plc_lookup.api_search") + def test_not_found(self, mock_search): + mock_search.return_value = [] + args = MagicMock() + args.ocp = "4.21" + args.operators = '[{"package":"no-such-operator"}]' + from io import StringIO + with patch("sys.stdout", new_callable=StringIO) as mock_out: + ret = plc_lookup.cmd_olm_check(args) + output = json.loads(mock_out.getvalue()) + self.assertEqual(ret, 0) + self.assertEqual(output["lifecycle_unavailable"], ["no-such-operator"]) + self.assertEqual(output["results"][0]["status"], "unavailable") + + +# --------------------------------------------------------------------------- +# Integration tests (live API — skipped in CI if no network) +# --------------------------------------------------------------------------- + + +@unittest.skipUnless( + os.environ.get("PLC_INTEGRATION_TESTS", "1") == "1", + "Set PLC_INTEGRATION_TESTS=1 to run live API tests", +) +class TestLiveAPI(unittest.TestCase): + """Tests against the real Red Hat Product Life Cycle API.""" + + def _run_cli(self, *args): + result = subprocess.run( + [sys.executable, SCRIPT_PATH, *args], + capture_output=True, text=True, timeout=30, + ) + return result + + def test_products_logging(self): + r = self._run_cli("products", "logging for Red Hat OpenShift") + self.assertEqual(r.returncode, 0, r.stderr) + output = json.loads(r.stdout) + self.assertGreater(output["total"], 0) + self.assertEqual(output["results"][0]["package"], "cluster-logging") + self.assertNotIn("ocp_target", output) + + def test_products_ocp(self): + r = self._run_cli("products", "OpenShift Container Platform") + self.assertEqual(r.returncode, 0, r.stderr) + output = json.loads(r.stdout) + names = {entry["product"] for entry in output["results"]} + self.assertTrue(any("OpenShift" in n for n in names)) + + def test_products_not_found(self): + r = self._run_cli("products", "xyzzy_nonexistent_product_12345") + self.assertEqual(r.returncode, 1) + output = json.loads(r.stdout) + self.assertIn("error", output) + + def test_products_with_ocp_compatible(self): + r = self._run_cli("products", "logging for Red Hat OpenShift", "--ocp", "4.21") + self.assertEqual(r.returncode, 0, r.stderr) + output = json.loads(r.stdout) + self.assertEqual(output["ocp_target"], "4.21") + compatible = [e for e in output["results"] if e.get("ocp_compatible")] + self.assertGreater(len(compatible), 0, "Expected at least one compatible version") + + def test_products_pagination(self): + r = self._run_cli("products", "logging for Red Hat OpenShift", "--limit", "3") + self.assertEqual(r.returncode, 0, r.stderr) + output = json.loads(r.stdout) + self.assertEqual(output["returned"], 3) + self.assertGreater(output["total"], 3) + self.assertEqual(output["next_offset"], 3) + + def test_products_pagination_second_page(self): + r = self._run_cli("products", "logging for Red Hat OpenShift", "--limit", "3", "--offset", "3") + self.assertEqual(r.returncode, 0, r.stderr) + output = json.loads(r.stdout) + self.assertEqual(output["offset"], 3) + self.assertLessEqual(output["returned"], 3) + + def test_products_with_ocp_incompatible(self): + r = self._run_cli("products", "logging for Red Hat OpenShift", "--ocp", "3.11") + self.assertEqual(r.returncode, 0, r.stderr) + output = json.loads(r.stdout) + compatible = [e for e in output["results"] if e.get("ocp_compatible")] + self.assertEqual(len(compatible), 0, "No logging version should be compatible with OCP 3.11") + + def test_olm_check_known_operator(self): + r = self._run_cli("olm-check", "--ocp", "4.21", "--operators", '[{"package":"cluster-logging"}]') + self.assertEqual(r.returncode, 0, r.stderr) + output = json.loads(r.stdout) + self.assertEqual(output["operators_checked"], 1) + self.assertEqual(output["lifecycle_unavailable"], []) + self.assertGreater(len(output["results"]), 0) + + def test_olm_check_unknown_operator(self): + r = self._run_cli("olm-check", "--ocp", "4.21", "--operators", '[{"package":"no-such-operator-xyz"}]') + self.assertEqual(r.returncode, 0, r.stderr) + output = json.loads(r.stdout) + self.assertIn("no-such-operator-xyz", output["lifecycle_unavailable"]) + + def test_olm_check_mixed(self): + operators = json.dumps([ + {"package": "cluster-logging"}, + {"package": "no-such-operator-xyz"}, + ]) + r = self._run_cli("olm-check", "--ocp", "4.21", "--operators", operators) + self.assertEqual(r.returncode, 0, r.stderr) + output = json.loads(r.stdout) + self.assertEqual(output["operators_checked"], 2) + self.assertIn("no-such-operator-xyz", output["lifecycle_unavailable"]) + found = [e for e in output["results"] if e.get("package") == "cluster-logging"] + self.assertGreater(len(found), 0) + + def test_status_values_are_normalized(self): + r = self._run_cli("products", "logging for Red Hat OpenShift") + self.assertEqual(r.returncode, 0, r.stderr) + output = json.loads(r.stdout) + valid_statuses = {"supported", "maintenance", "extended", "end-of-maintenance", "eol", "unknown"} + for entry in output["results"]: + self.assertIn(entry["status"], valid_statuses, f"Unexpected status: {entry['status']}") + + def test_help_flag(self): + r = self._run_cli("-h") + self.assertEqual(r.returncode, 0) + self.assertIn("products", r.stdout) + self.assertIn("olm-check", r.stdout) + + def test_subcommand_help(self): + for cmd in ["products", "olm-check"]: + r = self._run_cli(cmd, "-h") + self.assertEqual(r.returncode, 0, f"{cmd} -h failed") + + +if __name__ == "__main__": + unittest.main() diff --git a/cluster-update/update-advisor/SKILL.md b/cluster-update/update-advisor/SKILL.md index 4e8302c..e75834c 100644 --- a/cluster-update/update-advisor/SKILL.md +++ b/cluster-update/update-advisor/SKILL.md @@ -5,7 +5,7 @@ description: Assess OpenShift cluster update (upgrade) readiness and risk. Use w # Cluster Update Advisor -## Purpose +## 1. Purpose Assess cluster update readiness and produce a structured risk report with actionable prerequisites, blockers, and recommendations. @@ -15,12 +15,12 @@ gathered by the Cluster Version Operator. Analyze this data, classify findings, and produce a decision with evidence. Do not re-collect cluster data — it is already in the request. -## Inputs +## 2. Inputs The proposal request contains: - Current and target version metadata - Channel and update path information -- **Cluster readiness JSON** — cluster health checks with context relevant to preparing for the update +- **Cluster readiness JSON** — pre-collected by CVO with results from 9 parallel checks The readiness JSON is embedded in the request between ` ```json ` markers under the "Cluster Readiness Data" heading. Parse it to begin analysis. @@ -41,39 +41,96 @@ the "Cluster Readiness Data" heading. Parse it to begin analysis. "network": { "_status": "ok", "summary": {...}, ... }, "crd_compat": { "_status": "ok", "summary": {...}, ... }, "olm_operator_lifecycle": { "_status": "ok", "summary": {...}, ... } + }, + "meta": { + "total_checks": 9, + "checks_ok": 9, + "checks_errored": 0, + "elapsed_seconds": 0.65 } } ``` -Each check contains `_status` (`ok` or `error`) and check-specific data -with a `summary` section for quick parsing. +Each check contains `_status` (`ok` or `error`), `_elapsed_seconds`, and +check-specific data with a `summary` section for quick parsing. -## Evaluation +### What the checks cover -### Parse readiness data +| Check key | What it assesses | +|---|---| +| `cluster_conditions` | CVO's existing conditions (Upgradeable, Progressing, RetrievedUpdates), update history, channel | +| `operator_health` | Per-ClusterOperator breakdown (Upgradeable, Degraded, Available), MachineConfigPool status | +| `api_deprecations` | Deprecated/removed API usage by workloads via APIRequestCount | +| `node_capacity` | Node readiness, schedulability | +| `pdb_drain` | PodDisruptionBudgets that could block node drains | +| `etcd_health` | etcd pod health, member status, operator conditions | +| `network` | Network plugin type (SDN vs OVN), TLS profile, proxy config | +| `crd_compat` | CRD stored/served version mismatches, operator subscriptions | +| `olm_operator_lifecycle` | Per-operator installed version, OCP compatibility, update policy, pending upgrades, channel info | -Extract the JSON from the proposal request. -Count checks with `_status` `ok` vs `error` for completeness. +## 3. When to Investigate Further -### Verify data completeness +After analyzing the readiness JSON, use other skills to dig deeper into +specific findings: -Any check with `_status` `error` represents a gap in visibility. -Note incomplete areas — they reduce confidence. +- **`prometheus`** — if `etcd_health` shows degraded conditions, query + `etcd_disk_backend_commit_duration_seconds` for trends +- **`openshift-docs`** — if `network` check shows SDN, read the migration + guide for the target version +- **`jira`** — search Jira for bugs against the target version and known upgrade issues +- **`product-lifecycle`** — if `olm_operator_lifecycle` data is present, + cross-reference installed operators with Red Hat Product Life Cycle data + to check support status, EOL dates, and OCP version compatibility. + Use `cluster-update/product-lifecycle/scripts/plc_lookup.py olm-check --ocp --operators ''` for batch lookups -### Evaluate findings in detail +## 4. Decision Policy -If the system prompt includes organization-specific policy (thresholds, scheduling preferences, risk tolerance), apply those constraints. -Otherwise use sensible defaults. -Walk through each check's summary and detail data: +### 4.1 Workflow -- Compare numeric thresholds (node headroom, etcd backup age) -- Evaluate conditional update risks against cluster state -- Identify compounding risks (e.g., paused MCP + cert expiry) -- Estimate update duration (~10 min/node) +``` +Step 1: Parse readiness data + Extract the JSON from the proposal request. Review + meta.checks_ok vs meta.total_checks for completeness. + │ +Step 2: Verify data completeness + Any check with _status "error" represents a gap in + visibility. Note incomplete areas — they reduce confidence. + │ +Step 3: Evaluate findings + If the system prompt includes organization-specific policy + (thresholds, scheduling preferences, risk tolerance), apply + those constraints. Otherwise use sensible defaults. + Walk through each check's summary and detail data: + - Compare numeric thresholds (node headroom, etcd backup age) + - Evaluate conditional update risks against cluster state + - Identify compounding risks (e.g., paused MCP + cert expiry) + - Estimate upgrade duration (~10 min/node) + │ +Step 4: Classify and decide + Assign each finding a severity per the classification table + in section 4.2. Then determine the overall assessment: + recommend — all checks pass within acceptable thresholds + caution — findings exist but manageable with prerequisites + block — findings must be resolved before upgrade + escalate — insufficient data for confident assessment + │ +Step 5: Investigate (as needed) + Use prometheus, platform-docs, redhat-support, or + product-lifecycle skills for deeper analysis. + │ + ▼ + Produce structured risk report +``` -### Classify findings +### 4.2 Blocker Classification -Assign each finding a severity per the classification table. +| Severity | Criteria | Action | +|---|---|---| +| **Blocker** | Upgrade will fail or cause data loss | `decision: block` | +| **Warning** | Upgrade may cause disruption | `decision: caution` | +| **Info** | Noteworthy but non-blocking | Include for awareness | + +Classification rules: | Check | Blocker if... | Warning if... | |---|---|---| @@ -86,67 +143,55 @@ Assign each finding a severity per the classification table. | etcd health | Any member unhealthy | No recent backup (within 24h) | | Network plugin | SDN in use and target requires OVN (4.17+) | Using deprecated SDN (< 4.17) | | CRD compatibility | Stored version not served; operator maxOpenShiftVersion < target | Deprecated versions still served | -| OLM operator lifecycle | Installed operator incompatible with target OCP; operator product EOL | Operator has pending update; operator product in Maintenance Support | - -For other checks, treat an issue as a blocker if would cause data loss, a performance regression, or a failed update. -Treat the issue as a warning if would cause temporary disruption or slow updates. - -#### Investigate with other skills - -If additional information or context is needed to classify a finding, these skills may be useful: - -- **`openshift-docs`** — Read official OpenShift update docs for version-specific - procedures and breaking changes. - -- **`prometheus`** — Query cluster metrics for trend analysis (etcd latency, - CPU headroom, firing alerts). - -- **`jira`** — Search Red Hat Jira for bugs and known issues affecting the target version. - -- **`product-lifecycle`** — Query Red Hat Product Life Cycle API to check - support status and OCP compatibility for installed operators. Use the operator's - `package` name from OLM readiness data to look up entries via the `package` - field (exact match). Flag operators whose product version is End of life or whose - `openshift_compatibility` does not include the target OCP version. +| OLM operator lifecycle | Installed operator incompatible with target OCP; operator product EOL (via Product Life Cycle API) | Operator has pending upgrade; operator product in Maintenance Support (via Product Life Cycle API) | -### Classify overall recommendation - -Aggregate finding classification, and and make a decision on the overall assessment: - -* escalate — insufficient data for confident assessment. -* block — findings must be resolved before update. -* warn — findings exist but manageable with prerequisites. -* recommend — all checks pass within acceptable thresholds. +### 4.3 Decision Matrix | Blockers | Warnings | Decision | |---|---|---| -| Unable to assess | any | `escalate` | -| 1+ | any | `block` | -| 0 | 1+ | `warn` | | 0 | 0 | `recommend` | +| 0 | 1+ | `caution` | +| 1+ | any | `block` | +| Unable to assess | any | `escalate` | -### Produce a structured risk report +### 4.4 Output The output schema is enforced by the OlsAgent CR's `outputSchema` field — the operator handles structured output compliance via the LLM API. -## Failure Modes — What NOT to Do +## 5. Failure Modes — What NOT to Do -1. **Never recommend updating without analyzing the readiness data.** The JSON +1. **Never recommend upgrading without analyzing the readiness data.** The JSON in the request is the source of truth. 2. **Never dismiss conditional update risks.** If the update path is conditional, evaluate each risk against the cluster. 3. **Never skip the API deprecation check.** Workloads using removed APIs will - break after the update. + break after upgrade. 4. **Never assume etcd is healthy.** Always check member health in the readiness data. 5. **Never fabricate Jira issue keys, KB article IDs, or CVE numbers.** Use the `redhat-support` skill to get real data. -6. **Never recommend skipping an update version** unless the readiness data shows +6. **Never recommend skipping an upgrade version** unless the readiness data shows that path exists. -7. **Never recommend force-updating.** If the standard path is blocked, report it. +7. **Never recommend force-upgrading.** If the standard path is blocked, report it. + +## 6. Using Other Skills + +- **`openshift-docs`** — Read official OpenShift upgrade docs for version-specific + procedures and breaking changes. + +- **`prometheus`** — Query cluster metrics for trend analysis (etcd latency, + CPU headroom, firing alerts). + +- **`jira`** — Search Red Hat Jira for bugs and known issues affecting the target version. + +- **`product-lifecycle`** — Query Red Hat Product Life Cycle API to check + support status and OCP compatibility for installed operators. Use + `cluster-update/product-lifecycle/scripts/plc_lookup.py olm-check --ocp --operators ''` for batch + lookups. Flag operators whose product version is End of life or whose + `openshift_compatibility` does not include the target OCP version. diff --git a/evals/skills/product-lifecycle/README.md b/evals/skills/product-lifecycle/README.md new file mode 100644 index 0000000..9c34e7a --- /dev/null +++ b/evals/skills/product-lifecycle/README.md @@ -0,0 +1,132 @@ +# product-lifecycle eval + +Tests that the agent can use `plc_lookup.py` to query the Red Hat Product Life Cycle API for product support status, EOL dates, and OCP version compatibility. + +## Prerequisites + +### Cluster + +A live OpenShift cluster is required. The test cases embed operator metadata in the query (so the lightspeed operator does not need to be installed), but the agent still probes the cluster for deeper investigation and calls `plc_lookup.py` which queries the live [Red Hat Product Life Cycle API](https://access.redhat.com/support/policy/update_policies). + +The following OLM operators must be installed on the cluster: + +| Operator | Package | Version | Channel | +|---|---|---|---| +| Red Hat OpenShift Logging | `cluster-logging` | 6.5.1 | stable-6.5 | +| Compliance Operator | `compliance-operator` | 1.9.0 | stable | +| Red Hat OpenShift Pipelines | `openshift-pipelines-operator-rh` | 1.22.0 | latest | +| Web Terminal | `web-terminal` | 1.16.0 | fast | +| DevWorkspace Operator | `devworkspace-operator` | 0.41.0 | fast | + +Cluster version: OCP 4.21.5 (GCP, 6 nodes: 3 master + 3 worker) + +### Install operators (if reproducing on a fresh cluster) + +```bash +# cluster-logging +oc create ns openshift-logging +oc apply -f - < + A cluster upgrade proposal from OCP 4.21.5 to 4.21.16 shows + cluster-logging v6.5.1 is installed (package: cluster-logging). + + Use the product-lifecycle skill to check: is cluster-logging compatible + with OCP 4.21? What is its current lifecycle status? + schema: + type: object + properties: + product_found: + type: boolean + description: "Whether cluster-logging was found in the PLC API. Use the 'product-lifecycle' skill." + status: + type: string + enum: ["supported", "maintenance", "extended", "end-of-maintenance", "eol", "unknown"] + description: "Normalized lifecycle status of the version compatible with OCP 4.21" + ocp_compatible: + type: boolean + description: "Whether the product is compatible with OCP 4.21" + required: ["product_found", "status", "ocp_compatible"] + expected: + product_found: true + status: "supported" + ocp_compatible: true + +# Test 3: Verify web-terminal has no OCP 4.21-compatible version. +# web-terminal v1.15 supports OCP 4.20, v1.16 maps to a newer OCP. +# The PLC API does NOT list 4.21 in any web-terminal version's ocp_versions. +- name: plc_web_terminal_compat_check + query: > + A cluster running OCP 4.21 has web-terminal v1.16.0 installed + (package: web-terminal). + + Use the product-lifecycle skill to check if web-terminal has a + version that is compatible with OCP 4.21 and currently in full support. + schema: + type: object + properties: + product_found: + type: boolean + description: "Whether web-terminal was found in the PLC API. Use the 'product-lifecycle' skill." + has_supported_version_for_421: + type: boolean + description: "Whether any web-terminal version is both 'supported' and compatible with OCP 4.21" + required: ["product_found", "has_supported_version_for_421"] + expected: + product_found: true + has_supported_version_for_421: false + +# Test 4: compliance-operator — found by product name search, v1.9 is supported. +# Note: olm-check returns "unavailable" for this package, but products search +# finds "compliance operator" as a product name. The agent may use either command. +- name: plc_compliance_operator_status + query: > + During an upgrade readiness check, the compliance-operator v1.9.0 was + found installed (package: compliance-operator). + + Use the product-lifecycle skill to check the lifecycle status of + compliance-operator v1.9. Is it currently supported? + schema: + type: object + properties: + product_found: + type: boolean + description: "Whether the PLC API has lifecycle data for this operator. Use the 'product-lifecycle' skill." + status: + type: string + enum: ["supported", "maintenance", "extended", "end-of-maintenance", "eol", "unknown", "unavailable"] + description: "Normalized lifecycle status of compliance-operator v1.9" + required: ["product_found", "status"] + expected: + product_found: true + status: "supported" + +# Test 5: OCP platform lifecycle check — verify OCP 4.21 is supported. +- name: plc_ocp_platform_status + query: > + Before upgrading from OCP 4.21.5 to 4.21.16, verify that the + target OCP version 4.21 is still in full support. + + Use the product-lifecycle skill to look up "Red Hat OpenShift + Container Platform" and check version 4.21's lifecycle status. + schema: + type: object + properties: + product_found: + type: boolean + description: "Whether OCP was found in the PLC API. Use the 'product-lifecycle' skill." + status: + type: string + enum: ["supported", "maintenance", "extended", "end-of-maintenance", "eol", "unknown"] + description: "Normalized lifecycle status of OCP 4.21" + required: ["product_found", "status"] + expected: + product_found: true + status: "supported" + +# Test 6: Older OCP version lifecycle — OCP 4.14 is in Extended Support. +- name: plc_ocp_old_version_extended + query: > + Use the product-lifecycle skill to check the lifecycle status + of Red Hat OpenShift Container Platform version 4.14. + Is it still in full support, or has it moved to a later phase? + schema: + type: object + properties: + product_found: + type: boolean + description: "Whether OCP was found in the PLC API. Use the 'product-lifecycle' skill." + status: + type: string + enum: ["supported", "maintenance", "extended", "end-of-maintenance", "eol", "unknown"] + description: "Normalized lifecycle status of OCP 4.14" + required: ["product_found", "status"] + expected: + product_found: true + status: "extended" + +# Test 7: Multiple operators batch check from proposal context. +# Only test the 2 operators that HAVE lifecycle data (cluster-logging, web-terminal). +- name: plc_batch_known_operators_only + query: > + From a proposal's readiness data, two operators need lifecycle verification: + + 1. cluster-logging (package: cluster-logging) + 2. web-terminal (package: web-terminal) + + Use the product-lifecycle skill to run olm-check for these 2 operators + against OCP 4.21. Are both products found? Is either one end-of-life? + schema: + type: object + properties: + check_completed: + type: boolean + description: "Whether plc_lookup.py olm-check ran successfully. Use the 'product-lifecycle' skill." + both_found: + type: boolean + description: "Whether lifecycle data was found for both operators" + any_eol: + type: boolean + description: "Whether any version of either operator that is compatible with OCP 4.21 is end-of-life" + required: ["check_completed", "both_found", "any_eol"] + expected: + check_completed: true + both_found: true + any_eol: false diff --git a/evals/skills/update-advisor/README.md b/evals/skills/update-advisor/README.md new file mode 100644 index 0000000..5d384b6 --- /dev/null +++ b/evals/skills/update-advisor/README.md @@ -0,0 +1,64 @@ +# update-advisor eval + +Tests that the agent correctly analyzes CVO readiness data and produces the right upgrade decision (`recommend`, `caution`, `block`, or `escalate`) following the classification rules in `cluster-update/update-advisor/SKILL.md`. + +## Prerequisites + +### Cluster + +A live OpenShift cluster is required. The test cases embed readiness JSON in the query (so the lightspeed operator does not need to be installed), but the agent still probes the cluster for deeper investigation — querying prometheus for metrics, checking operator status, etc. + +The readiness JSON in each test case matches the exact format CVO proposals produce. It replaces the need for the lightspeed operator and CVO proposal controller, but does not replace the cluster itself. + +The readiness JSON format was validated against a real OCP 4.21.5 cluster on GCP (6 nodes: 3 master + 3 worker) with these OLM operators installed: + +| Operator | Package | Version | Channel | +|---|---|---|---| +| Red Hat OpenShift Logging | `cluster-logging` | 6.5.1 | stable-6.5 | +| Compliance Operator | `compliance-operator` | 1.9.0 | stable | +| Red Hat OpenShift Pipelines | `openshift-pipelines-operator-rh` | 1.22.0 | latest | +| Web Terminal | `web-terminal` | 1.16.0 | fast | +| DevWorkspace Operator | `devworkspace-operator` | 0.41.0 | fast | + +### CVO readiness data source + +The readiness JSON was generated by `pkg/readiness` from [openshift/cluster-version-operator#1395](https://github.com/openshift/cluster-version-operator/pull/1395), which adds 9 parallel cluster health checks: + +| Check | What it assesses | +|---|---| +| `cluster_conditions` | CVO conditions, update history, channel | +| `operator_health` | ClusterOperator status, MachineConfigPool state | +| `api_deprecations` | Deprecated/removed API usage | +| `node_capacity` | Node readiness, schedulability | +| `pdb_drain` | PDB drain blockers | +| `etcd_health` | etcd member health | +| `network` | Network plugin, TLS, proxy | +| `crd_compat` | CRD version mismatches | +| `olm_operator_lifecycle` | OLM operator versions, OCP compatibility | + +## Test scenarios + +| Test | Scenario | Expected decision | +|---|---|---| +| `advisor_healthy_cluster_recommend` | All 9 checks pass, 0 issues | `recommend` | +| `advisor_degraded_operator_caution` | 1 degraded operator (authentication) | `caution` | +| `advisor_api_deprecation_block` | Removed API with 1250 active requests | `block` | +| `advisor_etcd_unhealthy_block` | 1 of 3 etcd members not ready | `block` | +| `advisor_errored_checks_escalate` | 7 of 9 checks errored (API unreachable) | `escalate` | + +## Running + +```bash +bash evals/run.sh -k "update-advisor" +``` + +## Decision matrix reference + +From `cluster-update/update-advisor/SKILL.md`: + +| Blockers | Warnings | Decision | +|---|---|---| +| Unable to assess | any | `escalate` | +| 1+ | any | `block` | +| 0 | 1+ | `warn` | +| 0 | 0 | `recommend` | diff --git a/evals/skills/update-advisor/system_prompt.md b/evals/skills/update-advisor/system_prompt.md new file mode 100644 index 0000000..28d1c97 --- /dev/null +++ b/evals/skills/update-advisor/system_prompt.md @@ -0,0 +1,19 @@ +You are an OpenShift upgrade advisor. Analyze the cluster readiness +data in the proposal request and produce an upgrade risk assessment. + +The request contains a "Cluster Readiness Data" section with a JSON +block. This was collected by the Cluster Version Operator — do not +re-collect it. Parse the JSON, evaluate each check's results, and +classify findings as blockers, warnings, or informational. + +Use the cluster-update-advisor skill for the decision framework and +blocker classification rules. When findings need deeper investigation, +use prometheus, platform-docs, redhat-support, or product-lifecycle +skills. + +When the readiness data includes olm_operator_lifecycle results, use +the product-lifecycle skill to cross-reference each operator's package +name against the Red Hat Product Life Cycle API. Report support phase, +EOL dates, and OCP compatibility from PLCC alongside the OLM data. + +Do not guess or assume cluster state. Do not execute upgrade commands. diff --git a/evals/skills/update-advisor/test_cases.yaml b/evals/skills/update-advisor/test_cases.yaml new file mode 100644 index 0000000..58209ef --- /dev/null +++ b/evals/skills/update-advisor/test_cases.yaml @@ -0,0 +1,176 @@ +# update-advisor skill eval test cases +# +# These test cases send real CVO proposal requests to the agent and verify +# the upgrade decision (recommend/caution/block/escalate) based on the +# readiness data. Each case embeds a complete readiness JSON matching the +# exact format CVO produces. +# +# Decision matrix (from SKILL.md): +# 0 blockers, 0 warnings → recommend +# 0 blockers, 1+ warnings → caution +# 1+ blockers, any → block +# Unable to assess → escalate + +# Test 1: Healthy cluster — all checks pass, no issues. +# Expected decision: recommend +- name: advisor_healthy_cluster_recommend + query: | + Current version: OCP 4.21.5 + Target version: OCP 4.21.16 + Channel: stable-4.21 + Update type: Patch + Update path: Recommended + + ## Cluster Readiness Data + + ```json + {"current_version":"4.21.5","target_version":"4.21.16","checks":{"cluster_conditions":{"_status":"ok","_elapsed_seconds":0.01,"channel":"stable-4.21","cluster_id":"test-id","conditions":{"Available":{"status":"True","reason":"","message":"Done applying 4.21.5","last_transition":"2026-05-28T12:36:04Z"},"Progressing":{"status":"False","reason":"","message":"Cluster version is 4.21.5","last_transition":"2026-05-28T14:42:57Z"},"Upgradeable":{"status":"True","reason":"AsExpected","message":"","last_transition":"2026-05-28T12:06:46Z"}},"recent_history":[{"version":"4.21.5","state":"Completed","startedTime":"2026-05-28T12:06:46Z","completionTime":"2026-05-28T12:36:04Z"}],"summary":{"current_version":"4.21.5","update_in_progress":false,"upgradeable":true},"update_in_progress":false,"upgradeable":{"status":"True","reason":"AsExpected","message":""}},"operator_health":{"_status":"ok","_elapsed_seconds":0.6,"degraded":[],"not_available":[],"not_upgradeable":[],"machine_config_pools":[{"name":"master","paused":false,"degraded":false,"updating":false,"machine_count":3,"ready_count":3,"updated_count":3},{"name":"worker","paused":false,"degraded":false,"updating":false,"machine_count":3,"ready_count":3,"updated_count":3}],"mcp_summary":{"degraded":0,"paused":0,"updating":0},"summary":{"total_operators":34,"degraded_count":0,"not_available_count":0,"not_upgradeable_count":0}},"api_deprecations":{"_status":"ok","_elapsed_seconds":0.01,"blocker_apis":[],"warning_apis":[],"summary":{"blockers":0,"warnings":0,"total":0}},"node_capacity":{"_status":"ok","_elapsed_seconds":0.02,"total_nodes":6,"ready_nodes":6,"unschedulable_nodes":0,"summary":{"total":6,"ready":6,"not_ready":0,"unschedulable":0}},"pdb_drain":{"_status":"ok","_elapsed_seconds":0.01,"total_pdbs":21,"blocking_pdbs":[],"summary":{"total":21,"blocking":0}},"etcd_health":{"_status":"ok","_elapsed_seconds":0.2,"total_members":3,"healthy_members":3,"members":[{"name":"etcd-master-0","node":"master-0","phase":"Running","ready":true},{"name":"etcd-master-1","node":"master-1","phase":"Running","ready":true},{"name":"etcd-master-2","node":"master-2","phase":"Running","ready":true}],"operator_conditions":{"Available":{"status":"True","reason":"AsExpected","message":"3 members available"},"Degraded":{"status":"False","reason":"AsExpected","message":"No unhealthy members"},"Upgradeable":{"status":"True","reason":"AsExpected","message":"All is well"}},"summary":{"operator_available":true,"operator_degraded":false}},"network":{"_status":"ok","_elapsed_seconds":0.4,"network_type":"OVNKubernetes","proxy":{"http_proxy":"","https_proxy":"","no_proxy":""},"tls_profile":"Intermediate","summary":{"network_type":"OVNKubernetes","is_sdn":false}},"crd_compat":{"_status":"ok","_elapsed_seconds":0.5,"total_crds":160,"version_issues":[],"summary":{"total_crds":160,"version_issues":0}},"olm_operator_lifecycle":{"_status":"ok","_elapsed_seconds":2.0,"operators":[],"summary":{"total_operators":0,"pending_upgrades":0,"manual_approval":0,"incompatible_with_target":0}}},"meta":{"total_checks":9,"checks_ok":9,"checks_errored":0,"elapsed_seconds":2.5}} + ``` + schema: + type: object + properties: + decision: + type: string + enum: ["recommend", "caution", "block", "escalate"] + description: "The upgrade decision based on the readiness data. Use the 'cluster-update-advisor' skill's decision policy." + blockers_found: + type: integer + description: "Number of blocking issues found" + warnings_found: + type: integer + description: "Number of warning issues found" + required: ["decision", "blockers_found"] + expected: + decision: "recommend" + blockers_found: 0 + +# Test 2: Cluster with degraded operator — warning. +# Expected decision: caution +- name: advisor_degraded_operator_caution + query: | + Current version: OCP 4.21.5 + Target version: OCP 4.21.16 + Channel: stable-4.21 + Update type: Patch + Update path: Recommended + + ## Cluster Readiness Data + + ```json + {"current_version":"4.21.5","target_version":"4.21.16","checks":{"cluster_conditions":{"_status":"ok","_elapsed_seconds":0.01,"channel":"stable-4.21","cluster_id":"test-id","conditions":{"Available":{"status":"True","reason":"","message":"Done applying 4.21.5","last_transition":"2026-05-28T12:36:04Z"},"Progressing":{"status":"False","reason":"","message":"Cluster version is 4.21.5","last_transition":"2026-05-28T14:42:57Z"},"Upgradeable":{"status":"True","reason":"AsExpected","message":"","last_transition":"2026-05-28T12:06:46Z"}},"recent_history":[{"version":"4.21.5","state":"Completed"}],"summary":{"current_version":"4.21.5","update_in_progress":false,"upgradeable":true},"update_in_progress":false,"upgradeable":{"status":"True","reason":"AsExpected","message":""}},"operator_health":{"_status":"ok","_elapsed_seconds":0.6,"degraded":[{"name":"authentication","conditions":{"Degraded":{"status":"True","reason":"OAuthFlaky","message":"oauth pods intermittently failing"}}}],"not_available":[],"not_upgradeable":[],"machine_config_pools":[{"name":"master","paused":false,"degraded":false,"updating":false,"machine_count":3,"ready_count":3,"updated_count":3},{"name":"worker","paused":false,"degraded":false,"updating":false,"machine_count":3,"ready_count":3,"updated_count":3}],"mcp_summary":{"degraded":0,"paused":0,"updating":0},"summary":{"total_operators":34,"degraded_count":1,"not_available_count":0,"not_upgradeable_count":0}},"api_deprecations":{"_status":"ok","_elapsed_seconds":0.01,"blocker_apis":[],"warning_apis":[],"summary":{"blockers":0,"warnings":0,"total":0}},"node_capacity":{"_status":"ok","_elapsed_seconds":0.02,"total_nodes":6,"ready_nodes":6,"unschedulable_nodes":0,"summary":{"total":6,"ready":6,"not_ready":0,"unschedulable":0}},"pdb_drain":{"_status":"ok","_elapsed_seconds":0.01,"total_pdbs":21,"blocking_pdbs":[],"summary":{"total":21,"blocking":0}},"etcd_health":{"_status":"ok","_elapsed_seconds":0.2,"total_members":3,"healthy_members":3,"members":[{"name":"etcd-master-0","node":"master-0","phase":"Running","ready":true},{"name":"etcd-master-1","node":"master-1","phase":"Running","ready":true},{"name":"etcd-master-2","node":"master-2","phase":"Running","ready":true}],"summary":{"operator_available":true,"operator_degraded":false}},"network":{"_status":"ok","_elapsed_seconds":0.4,"network_type":"OVNKubernetes","summary":{"network_type":"OVNKubernetes","is_sdn":false}},"crd_compat":{"_status":"ok","_elapsed_seconds":0.5,"total_crds":160,"version_issues":[],"summary":{"total_crds":160,"version_issues":0}},"olm_operator_lifecycle":{"_status":"ok","_elapsed_seconds":2.0,"operators":[],"summary":{"total_operators":0,"pending_upgrades":0,"manual_approval":0,"incompatible_with_target":0}}},"meta":{"total_checks":9,"checks_ok":9,"checks_errored":0,"elapsed_seconds":2.5}} + ``` + schema: + type: object + properties: + decision: + type: string + enum: ["recommend", "caution", "block", "escalate"] + description: "The upgrade decision. Use the 'cluster-update-advisor' skill. A degraded operator is a warning per the classification rules." + blockers_found: + type: integer + description: "Number of blocking issues" + degraded_operator_detected: + type: boolean + description: "Whether the agent identified a degraded operator in the readiness data" + required: ["decision", "blockers_found", "degraded_operator_detected"] + expected: + decision: "caution" + blockers_found: 0 + degraded_operator_detected: true + +# Test 3: Cluster with API deprecation blocker — removed API with active usage. +# Expected decision: block +- name: advisor_api_deprecation_block + query: | + Current version: OCP 4.21.5 + Target version: OCP 4.21.16 + Channel: stable-4.21 + Update type: Patch + Update path: Recommended + + ## Cluster Readiness Data + + ```json + {"current_version":"4.21.5","target_version":"4.21.16","checks":{"cluster_conditions":{"_status":"ok","_elapsed_seconds":0.01,"channel":"stable-4.21","cluster_id":"test-id","conditions":{"Available":{"status":"True","reason":"","message":"Done applying 4.21.5","last_transition":"2026-05-28T12:36:04Z"},"Progressing":{"status":"False","reason":"","message":"Cluster version is 4.21.5","last_transition":"2026-05-28T14:42:57Z"},"Upgradeable":{"status":"True","reason":"AsExpected","message":""}},"recent_history":[{"version":"4.21.5","state":"Completed"}],"summary":{"current_version":"4.21.5","update_in_progress":false,"upgradeable":true},"update_in_progress":false,"upgradeable":{"status":"True","reason":"AsExpected","message":""}},"operator_health":{"_status":"ok","_elapsed_seconds":0.6,"degraded":[],"not_available":[],"not_upgradeable":[],"machine_config_pools":[{"name":"master","paused":false,"degraded":false,"updating":false,"machine_count":3,"ready_count":3,"updated_count":3},{"name":"worker","paused":false,"degraded":false,"updating":false,"machine_count":3,"ready_count":3,"updated_count":3}],"mcp_summary":{"degraded":0,"paused":0,"updating":0},"summary":{"total_operators":34,"degraded_count":0,"not_available_count":0,"not_upgradeable_count":0}},"api_deprecations":{"_status":"ok","_elapsed_seconds":0.03,"blocker_apis":[{"resource":"flowschemas.v1beta3.flowcontrol.apiserver.k8s.io","removed_in_release":"4.21.16","request_count":1250}],"warning_apis":[{"resource":"cronjobs.v1beta1.batch","deprecated_in":"4.20","request_count":42}],"summary":{"blockers":1,"warnings":1,"total":2}},"node_capacity":{"_status":"ok","_elapsed_seconds":0.02,"total_nodes":6,"ready_nodes":6,"unschedulable_nodes":0,"summary":{"total":6,"ready":6,"not_ready":0,"unschedulable":0}},"pdb_drain":{"_status":"ok","_elapsed_seconds":0.01,"total_pdbs":21,"blocking_pdbs":[],"summary":{"total":21,"blocking":0}},"etcd_health":{"_status":"ok","_elapsed_seconds":0.2,"total_members":3,"healthy_members":3,"members":[{"name":"etcd-master-0","node":"master-0","phase":"Running","ready":true},{"name":"etcd-master-1","node":"master-1","phase":"Running","ready":true},{"name":"etcd-master-2","node":"master-2","phase":"Running","ready":true}],"summary":{"operator_available":true,"operator_degraded":false}},"network":{"_status":"ok","_elapsed_seconds":0.4,"network_type":"OVNKubernetes","summary":{"network_type":"OVNKubernetes","is_sdn":false}},"crd_compat":{"_status":"ok","_elapsed_seconds":0.5,"total_crds":160,"version_issues":[],"summary":{"total_crds":160,"version_issues":0}},"olm_operator_lifecycle":{"_status":"ok","_elapsed_seconds":2.0,"operators":[],"summary":{"total_operators":0,"pending_upgrades":0,"manual_approval":0,"incompatible_with_target":0}}},"meta":{"total_checks":9,"checks_ok":9,"checks_errored":0,"elapsed_seconds":2.5}} + ``` + schema: + type: object + properties: + decision: + type: string + enum: ["recommend", "caution", "block", "escalate"] + description: "The upgrade decision. Use the 'cluster-update-advisor' skill. A removed API with active usage is a blocker." + blockers_found: + type: integer + description: "Number of blocking issues" + api_blocker_detected: + type: boolean + description: "Whether the agent identified the removed API as a blocker" + required: ["decision", "blockers_found", "api_blocker_detected"] + expected: + decision: "block" + api_blocker_detected: true + +# Test 4: Cluster with etcd member down — blocker. +# Expected decision: block +- name: advisor_etcd_unhealthy_block + query: | + Current version: OCP 4.21.5 + Target version: OCP 4.21.16 + Channel: stable-4.21 + Update type: Patch + Update path: Recommended + + ## Cluster Readiness Data + + ```json + {"current_version":"4.21.5","target_version":"4.21.16","checks":{"cluster_conditions":{"_status":"ok","_elapsed_seconds":0.01,"channel":"stable-4.21","cluster_id":"test-id","conditions":{"Available":{"status":"True","reason":"","message":"Done applying 4.21.5","last_transition":"2026-05-28T12:36:04Z"},"Progressing":{"status":"False","reason":"","message":"Cluster version is 4.21.5","last_transition":"2026-05-28T14:42:57Z"},"Upgradeable":{"status":"True","reason":"AsExpected","message":""}},"recent_history":[{"version":"4.21.5","state":"Completed"}],"summary":{"current_version":"4.21.5","update_in_progress":false,"upgradeable":true},"update_in_progress":false,"upgradeable":{"status":"True","reason":"AsExpected","message":""}},"operator_health":{"_status":"ok","_elapsed_seconds":0.6,"degraded":[],"not_available":[],"not_upgradeable":[],"machine_config_pools":[{"name":"master","paused":false,"degraded":false,"updating":false,"machine_count":3,"ready_count":3,"updated_count":3},{"name":"worker","paused":false,"degraded":false,"updating":false,"machine_count":3,"ready_count":3,"updated_count":3}],"mcp_summary":{"degraded":0,"paused":0,"updating":0},"summary":{"total_operators":34,"degraded_count":0,"not_available_count":0,"not_upgradeable_count":0}},"api_deprecations":{"_status":"ok","_elapsed_seconds":0.01,"blocker_apis":[],"warning_apis":[],"summary":{"blockers":0,"warnings":0,"total":0}},"node_capacity":{"_status":"ok","_elapsed_seconds":0.02,"total_nodes":6,"ready_nodes":6,"unschedulable_nodes":0,"summary":{"total":6,"ready":6,"not_ready":0,"unschedulable":0}},"pdb_drain":{"_status":"ok","_elapsed_seconds":0.01,"total_pdbs":21,"blocking_pdbs":[],"summary":{"total":21,"blocking":0}},"etcd_health":{"_status":"ok","_elapsed_seconds":0.2,"total_members":3,"healthy_members":2,"members":[{"name":"etcd-master-0","node":"master-0","phase":"Running","ready":true},{"name":"etcd-master-1","node":"master-1","phase":"Running","ready":true},{"name":"etcd-master-2","node":"master-2","phase":"Running","ready":false}],"operator_conditions":{"Available":{"status":"True","reason":"AsExpected","message":"2 of 3 members available"},"Degraded":{"status":"True","reason":"MemberDown","message":"etcd-master-2 is not ready"}},"summary":{"operator_available":true,"operator_degraded":true}},"network":{"_status":"ok","_elapsed_seconds":0.4,"network_type":"OVNKubernetes","summary":{"network_type":"OVNKubernetes","is_sdn":false}},"crd_compat":{"_status":"ok","_elapsed_seconds":0.5,"total_crds":160,"version_issues":[],"summary":{"total_crds":160,"version_issues":0}},"olm_operator_lifecycle":{"_status":"ok","_elapsed_seconds":2.0,"operators":[],"summary":{"total_operators":0,"pending_upgrades":0,"manual_approval":0,"incompatible_with_target":0}}},"meta":{"total_checks":9,"checks_ok":9,"checks_errored":0,"elapsed_seconds":2.5}} + ``` + schema: + type: object + properties: + decision: + type: string + enum: ["recommend", "caution", "block", "escalate"] + description: "The upgrade decision. Use the 'cluster-update-advisor' skill. An unhealthy etcd member is a blocker." + etcd_issue_detected: + type: boolean + description: "Whether the agent identified the unhealthy etcd member" + unhealthy_member: + type: string + description: "Name of the unhealthy etcd member" + required: ["decision", "etcd_issue_detected"] + expected: + decision: "block" + etcd_issue_detected: true + unhealthy_member: "etcd-master-2" + +# Test 5: Cluster with errored readiness checks — incomplete data. +# Expected decision: escalate +- name: advisor_errored_checks_escalate + query: | + Current version: OCP 4.21.5 + Target version: OCP 4.21.16 + Channel: stable-4.21 + Update type: Patch + Update path: Recommended + + ## Cluster Readiness Data + + ```json + {"current_version":"4.21.5","target_version":"4.21.16","checks":{"cluster_conditions":{"_status":"ok","_elapsed_seconds":0.01,"channel":"stable-4.21","cluster_id":"test-id","conditions":{"Available":{"status":"True","reason":"","message":"Done applying 4.21.5","last_transition":"2026-05-28T12:36:04Z"},"Progressing":{"status":"False","reason":"","message":"Cluster version is 4.21.5","last_transition":"2026-05-28T14:42:57Z"},"Upgradeable":{"status":"True","reason":"AsExpected","message":""}},"summary":{"current_version":"4.21.5","update_in_progress":false,"upgradeable":true},"update_in_progress":false,"upgradeable":{"status":"True","reason":"AsExpected","message":""}},"operator_health":{"_status":"error","_error":"failed to list ClusterOperators: connection refused","_elapsed_seconds":60.0},"api_deprecations":{"_status":"error","_error":"failed to list APIRequestCounts: connection refused","_elapsed_seconds":60.0},"node_capacity":{"_status":"error","_error":"failed to list Nodes: connection refused","_elapsed_seconds":60.0},"pdb_drain":{"_status":"error","_error":"failed to list PodDisruptionBudgets: connection refused","_elapsed_seconds":60.0},"etcd_health":{"_status":"error","_error":"failed to get ClusterOperator etcd: connection refused","_elapsed_seconds":60.0},"network":{"_status":"ok","_elapsed_seconds":0.4,"network_type":"OVNKubernetes","summary":{"network_type":"OVNKubernetes","is_sdn":false}},"crd_compat":{"_status":"error","_error":"failed to list CRDs: connection refused","_elapsed_seconds":60.0},"olm_operator_lifecycle":{"_status":"error","_error":"failed to list subscriptions: connection refused","_elapsed_seconds":60.0}},"meta":{"total_checks":9,"checks_ok":2,"checks_errored":7,"elapsed_seconds":60.0}} + ``` + schema: + type: object + properties: + decision: + type: string + enum: ["recommend", "caution", "block", "escalate"] + description: "The upgrade decision. Use the 'cluster-update-advisor' skill. When most checks errored, there is insufficient data to assess." + checks_errored: + type: integer + description: "Number of readiness checks that errored" + required: ["decision", "checks_errored"] + expected: + decision: "escalate" + checks_errored: 7 diff --git a/evals/workspace/skills/product-lifecycle b/evals/workspace/skills/product-lifecycle new file mode 120000 index 0000000..6a238ba --- /dev/null +++ b/evals/workspace/skills/product-lifecycle @@ -0,0 +1 @@ +../../../cluster-update/product-lifecycle \ No newline at end of file diff --git a/evals/workspace/skills/update-advisor b/evals/workspace/skills/update-advisor new file mode 120000 index 0000000..dc29e18 --- /dev/null +++ b/evals/workspace/skills/update-advisor @@ -0,0 +1 @@ +../../../cluster-update/update-advisor \ No newline at end of file