From 1a81f3023ee9bd8293fe4f91a86208326fbfd1f9 Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Wed, 13 May 2026 12:48:37 +0530 Subject: [PATCH 01/19] management_plane_benchmarking --- .../k8s_management_benchmark.py | 1143 +++++++++++++++++ 1 file changed, 1143 insertions(+) create mode 100644 perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py diff --git a/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py new file mode 100644 index 0000000000..1e319b49a0 --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py @@ -0,0 +1,1143 @@ +""" +Benchmark: K8s Management Plane Operations +=========================================== +Covers GKE / EKS / AKS management plane operations as defined in: + "Benchmark Methodology: GKE Management Plane Operations" (katmitchell@, Mar 2026) + +Scenarios implemented +--------------------- + A. Concurrent Node Pool operations – CreateNodePool, UpdateNodePool, DeleteNodePool + B. Overlapping Cluster + Node Pool op – CreateNodePool fired during ClusterUpdate + C. Large-scale Node Pool provisioning – up to MAX_NODE_POOLS node pools + +Metrics collected per operation +-------------------------------- + - initiation_latency : time from API call to async operation accepted + - end_to_end_latency : time from API call to operation DONE/SUCCEEDED + - success / failure : per-operation outcome + - aggregate stats : median, mean, min, max, stddev, success_rate (via PKB sample metadata) + +Cloud coverage +-------------- + GCP → GKE (google-cloud-sdk / container_v1 client) + AWS → EKS (boto3) + Azure → AKS (azure-mgmt-containerservice) + +Author: vendor implementation based on methodology doc +""" + +import logging +import math +import statistics +import time +import uuid +from concurrent.futures import ThreadPoolExecutor, as_completed, wait, ALL_COMPLETED +from typing import Callable, List, Optional, Tuple + +from absl import flags +from perfkitbenchmarker import configs +from perfkitbenchmarker import errors +from perfkitbenchmarker import sample +from perfkitbenchmarker import vm_util + +# --------------------------------------------------------------------------- +# Benchmark identity +# --------------------------------------------------------------------------- +BENCHMARK_NAME = 'k8s_management_benchmark' + +BENCHMARK_CONFIG = """ +k8s_management_benchmark: + description: > + Benchmarks GKE/EKS/AKS management plane operations: concurrent node pool + create/upgrade/delete, overlapping cluster+nodepool ops, and large-scale + provisioning. No data-plane workloads required. + container_cluster: + type: Kubernetes + # Minimal node count – benchmark is control-plane-only + vm_count: 1 + vm_spec: + GCP: + machine_type: e2-standard-2 + zone: us-central1-a + AWS: + machine_type: t3.medium + zone: us-east-1a + Azure: + machine_type: Standard_D2s_v3 + zone: eastus +""" + +# --------------------------------------------------------------------------- +# Configurable flags +# --------------------------------------------------------------------------- +FLAGS = flags.FLAGS + +flags.DEFINE_integer( + 'mgmt_concurrent_nodepools', 5, + 'Number of node pools to create/upgrade/delete concurrently in Scenario A.') + +flags.DEFINE_integer( + 'mgmt_large_scale_nodepools', 50, + 'Number of node pools to provision in the large-scale Scenario C. ' + 'Set up to 1000 for full stress test (ensure quota is available).') + +flags.DEFINE_integer( + 'mgmt_nodes_per_nodepool', 1, + 'Number of nodes per node pool. Kept low to reduce quota consumption.') + +flags.DEFINE_string( + 'mgmt_k8s_version', None, + 'Kubernetes version for the cluster (None = cloud default / latest).') + +flags.DEFINE_string( + 'mgmt_nodepool_initial_version', None, + 'Initial node pool version (N-2). None = derive from cluster version.') + +flags.DEFINE_string( + 'mgmt_nodepool_target_version', None, + 'Target node pool upgrade version (N-1 or latest). None = latest available.') + +flags.DEFINE_integer( + 'mgmt_operation_timeout_sec', 2700, + 'Maximum seconds to wait for any single async management-plane operation.') + +flags.DEFINE_integer( + 'mgmt_poll_interval_sec', 15, + 'Polling interval in seconds when waiting for async operations to complete.') + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- +# Operation result states +STATE_DONE = 'DONE' +STATE_SUCCEEDED = 'SUCCEEDED' # Azure uses this +STATE_FAILED = 'FAILED' +STATE_RUNNING = 'RUNNING' +STATE_CREATING = 'CREATING' +STATE_UPDATING = 'UPDATING' +STATE_DELETING = 'DELETING' + +TERMINAL_STATES = {STATE_DONE, STATE_SUCCEEDED, STATE_FAILED, 'ERROR', 'CANCELED'} + + +# =========================================================================== +# PKB lifecycle hooks +# =========================================================================== + +def GetConfig(user_config): + """Returns the benchmark configuration merged with user overrides.""" + return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) + + +def Prepare(benchmark_spec): + """ + Verifies the cluster is reachable and collects version metadata. + PKB has already created the cluster at this point. + """ + cluster = benchmark_spec.container_cluster + + # PKB stores cloud on the cluster's CLOUD class attribute (e.g. 'AWS', 'GCP', 'Azure'). + # Fall back to FLAGS.cloud if the cluster doesn't expose it directly. + cloud = getattr(cluster, 'CLOUD', None) or FLAGS.cloud + cloud = cloud.upper() if cloud else 'AWS' + + logging.info('[Prepare] Cluster %s on %s - cluster is ready (PKB provisioned).', cluster.name, cloud) + # PKB already waits for the cluster to be ready during _PostCreate. + # Verify reachability via kubectl before proceeding. + try: + cluster.RunKubectl(['get', 'nodes', '--no-headers']) + logging.info('[Prepare] kubectl get nodes succeeded - cluster is reachable.') + except Exception as exc: # pylint: disable=broad-except + logging.warning('[Prepare] kubectl get nodes warning: %s', exc) + + client = _get_cloud_client(cloud, cluster) + + # Resolve Kubernetes / node-pool versions dynamically if not pinned via flags + initial_version, target_version = client.resolve_versions( + flags_initial = FLAGS.mgmt_nodepool_initial_version, + flags_target = FLAGS.mgmt_nodepool_target_version, + ) + + # Stash on benchmark_spec so Run() can access them without re-querying + benchmark_spec.mgmt_client = client + benchmark_spec.mgmt_initial_version = initial_version + benchmark_spec.mgmt_target_version = target_version + + logging.info('[Prepare] Node pool initial version: %s → target version: %s', + initial_version, target_version) + + +def Run(benchmark_spec): + """ + Executes all three benchmark scenarios and returns a flat list of Samples. + """ + client = benchmark_spec.mgmt_client + initial_version = benchmark_spec.mgmt_initial_version + target_version = benchmark_spec.mgmt_target_version + results = [] + + # ------------------------------------------------------------------ + # Scenario A – Concurrent Node Pool operations + # ------------------------------------------------------------------ + logging.info('=' * 60) + logging.info('SCENARIO A: Concurrent Node Pool Operations') + logging.info('=' * 60) + results += _run_scenario_a(client, initial_version, target_version) + + # ------------------------------------------------------------------ + # Scenario B – Overlapping Cluster Update + Node Pool Create + # ------------------------------------------------------------------ + logging.info('=' * 60) + logging.info('SCENARIO B: Overlapping Cluster Update + NodePool Create') + logging.info('=' * 60) + results += _run_scenario_b(client, initial_version) + + # ------------------------------------------------------------------ + # Scenario C – Large-scale Node Pool provisioning + # ------------------------------------------------------------------ + logging.info('=' * 60) + logging.info('SCENARIO C: Large-Scale Node Pool Provisioning (%d pools)', + FLAGS.mgmt_large_scale_nodepools) + logging.info('=' * 60) + results += _run_scenario_c(client, initial_version) + + return results + + +def Cleanup(benchmark_spec): + """ + Best-effort deletion of any node pools created during the run. + PKB deletes the cluster itself; we only clean up leftover node pools. + """ + client = getattr(benchmark_spec, 'mgmt_client', None) + if client is None: + return + logging.info('[Cleanup] Removing any benchmark node pools…') + try: + client.delete_all_benchmark_nodepools() + except Exception as exc: # pylint: disable=broad-except + logging.warning('[Cleanup] Non-fatal error during node pool cleanup: %s', exc) + + +# =========================================================================== +# Scenario implementations +# =========================================================================== + +def _run_scenario_a( + client, + initial_version: str, + target_version: str, +) -> List[sample.Sample]: + """ + Scenario A: Concurrent CreateNodePool, UpdateNodePool, DeleteNodePool. + + Steps + ----- + 1. Concurrently create N node pools (initial_version). + 2. Concurrently upgrade all N node pools (initial_version → target_version). + 3. Concurrently delete all N node pools. + + Returns one Sample per individual operation plus aggregate stat Samples. + """ + n = FLAGS.mgmt_concurrent_nodepools + results = [] + + # ---- Step A1: Concurrent Create ---------------------------------------- + pool_names = [_pool_name('a-create', i) for i in range(n)] + create_ops = _run_concurrent_operations( + operation_fn = lambda name: client.create_nodepool( + name = name, + node_count = FLAGS.mgmt_nodes_per_nodepool, + node_version = initial_version, + ), + items = pool_names, + op_label = 'ScenarioA_CreateNodePool', + ) + results += _ops_to_samples(create_ops, 'ScenarioA_CreateNodePool') + + # ---- Step A2: Concurrent Upgrade ---------------------------------------- + # Only upgrade pools that were successfully created + created_pools = [op['name'] for op in create_ops if op['success']] + upgrade_ops = _run_concurrent_operations( + operation_fn = lambda name: client.upgrade_nodepool( + name = name, + target_version = target_version, + ), + items = created_pools, + op_label = 'ScenarioA_UpgradeNodePool', + ) + results += _ops_to_samples(upgrade_ops, 'ScenarioA_UpgradeNodePool') + + # ---- Step A3: Concurrent Delete ----------------------------------------- + # Only delete pools that were successfully created — timed-out/failed creates + # may have been rolled back by EKS and won't exist to delete. + existing_pools = [op['name'] for op in create_ops if op['success']] + if not existing_pools: + logging.warning('[ScenarioA] No successfully created pools to delete.') + delete_ops = _run_concurrent_operations( + operation_fn = lambda name: client.delete_nodepool(name), + items = existing_pools, + op_label = 'ScenarioA_DeleteNodePool', + ) + results += _ops_to_samples(delete_ops, 'ScenarioA_DeleteNodePool') + + return results + + +def _run_scenario_b(client, initial_version: str) -> List[sample.Sample]: + """ + Scenario B: Initiate NodePool creation *during* an ongoing Cluster Update. + + Steps + ----- + 1. Fire a ClusterUpdate (async – do NOT wait for completion). + 2. Immediately fire a CreateNodePool. + 3. Record initiation latency for both, then poll both to completion. + 4. Record end-to-end latency and success/failure for each. + """ + results = [] + pool_name = _pool_name('b-overlap', 0) + + # Fire cluster update (async) + cluster_op_start = time.time() + cluster_op_id = client.start_cluster_update_async() + cluster_initiation_latency = time.time() - cluster_op_start + + results.append(sample.Sample( + 'ScenarioB_ClusterUpdate_InitiationLatency', + cluster_initiation_latency, + 'seconds', + {'operation': 'ClusterUpdate', 'phase': 'initiation'}, + )) + logging.info('[ScenarioB] ClusterUpdate initiated (op_id=%s) in %.2fs', + cluster_op_id, cluster_initiation_latency) + + # Immediately fire CreateNodePool (overlapping) + np_start = time.time() + np_op_id = client.start_create_nodepool_async( + name = pool_name, + node_count = FLAGS.mgmt_nodes_per_nodepool, + node_version = initial_version, + ) + np_initiation_latency = time.time() - np_start + + results.append(sample.Sample( + 'ScenarioB_CreateNodePool_InitiationLatency', + np_initiation_latency, + 'seconds', + {'operation': 'CreateNodePool', 'phase': 'initiation', 'overlap': 'ClusterUpdate'}, + )) + logging.info('[ScenarioB] CreateNodePool initiated (op_id=%s) in %.2fs', + np_op_id, np_initiation_latency) + + # Poll both operations to completion concurrently + with ThreadPoolExecutor(max_workers=2) as executor: + cluster_future = executor.submit( + client.wait_for_operation, cluster_op_id, cluster_op_start) + np_future = executor.submit( + client.wait_for_operation, np_op_id, np_start) + + cluster_result = cluster_future.result() + np_result = np_future.result() + + results.append(sample.Sample( + 'ScenarioB_ClusterUpdate_EndToEndLatency', + cluster_result['end_to_end_latency'], + 'seconds', + { + 'operation': 'ClusterUpdate', + 'success': str(cluster_result['success']), + 'final_state': cluster_result.get('final_state', 'unknown'), + }, + )) + results.append(sample.Sample( + 'ScenarioB_CreateNodePool_EndToEndLatency', + np_result['end_to_end_latency'], + 'seconds', + { + 'operation': 'CreateNodePool', + 'success': str(np_result['success']), + 'overlap': 'ClusterUpdate', + 'final_state': np_result.get('final_state', 'unknown'), + }, + )) + + # Cleanup the test node pool + try: + client.delete_nodepool(pool_name) + except Exception as exc: # pylint: disable=broad-except + logging.warning('[ScenarioB] Could not delete test node pool %s: %s', pool_name, exc) + + return results + + +def _run_scenario_c(client, initial_version: str) -> List[sample.Sample]: + """ + Scenario C: Large-scale Node Pool provisioning (up to 1,000 node pools). + + All node pools are created concurrently (batched to avoid API flooding). + After all complete, they are deleted concurrently to restore cluster state. + """ + n = FLAGS.mgmt_large_scale_nodepools + batch_size = 50 # submit in batches to avoid rate-limiting + results = [] + + pool_names = [_pool_name('c-large', i) for i in range(n)] + all_ops = [] + + logging.info('[ScenarioC] Creating %d node pools in batches of %d…', n, batch_size) + for batch_start in range(0, n, batch_size): + batch = pool_names[batch_start: batch_start + batch_size] + batch_ops = _run_concurrent_operations( + operation_fn = lambda name: client.create_nodepool( + name = name, + node_count = FLAGS.mgmt_nodes_per_nodepool, + node_version = initial_version, + ), + items = batch, + op_label = f'ScenarioC_CreateNodePool_batch{batch_start // batch_size}', + ) + all_ops += batch_ops + + results += _ops_to_samples(all_ops, 'ScenarioC_CreateNodePool') + + # Clean up – delete all created pools + created = [op['name'] for op in all_ops if op['success']] + logging.info('[ScenarioC] Deleting %d successfully created pools…', len(created)) + delete_ops = [] + for batch_start in range(0, len(created), batch_size): + batch = created[batch_start: batch_start + batch_size] + batch_ops = _run_concurrent_operations( + operation_fn = lambda name: client.delete_nodepool(name), + items = batch, + op_label = f'ScenarioC_DeleteNodePool_batch{batch_start // batch_size}', + ) + delete_ops += batch_ops + results += _ops_to_samples(delete_ops, 'ScenarioC_DeleteNodePool') + + return results + + +# =========================================================================== +# Concurrency helpers +# =========================================================================== + +def _run_concurrent_operations( + operation_fn: Callable[[str], dict], + items: List[str], + op_label: str, +) -> List[dict]: + """ + Runs operation_fn(name) concurrently for every item in `items`. + + Each callable must return a dict with at minimum: + { 'name': str, 'success': bool, + 'initiation_latency': float, 'end_to_end_latency': float } + + Returns the list of result dicts (order not guaranteed). + """ + results = [] + + if not items: + logging.info('[%s] No items to process, skipping.', op_label) + return results + + n_workers = min(len(items), 50) # cap thread pool to avoid OS limits + + logging.info('[%s] Launching %d concurrent operations...', op_label, len(items)) + with ThreadPoolExecutor(max_workers=n_workers) as executor: + future_to_name = {executor.submit(operation_fn, name): name for name in items} + for future in as_completed(future_to_name): + name = future_to_name[future] + try: + result = future.result() + results.append(result) + logging.info('[%s] %-40s e2e=%.2fs init=%.2fs ok=%s', + op_label, name, + result.get('end_to_end_latency', -1), + result.get('initiation_latency', -1), + result.get('success', False)) + except Exception as exc: # pylint: disable=broad-except + logging.error('[%s] Operation failed for %s: %s', op_label, name, exc) + results.append({ + 'name': name, + 'success': False, + 'initiation_latency': -1.0, + 'end_to_end_latency': -1.0, + 'error': str(exc), + }) + return results + + +# =========================================================================== +# Sample construction + aggregate statistics +# =========================================================================== + +def _ops_to_samples(ops: List[dict], metric_prefix: str) -> List[sample.Sample]: + """ + Converts a list of operation result dicts into PKB Samples. + + Emits: + - One Sample per operation for initiation and end-to-end latency + - Aggregate stat Samples: median, mean, min, max, stddev, success_rate + """ + samples = [] + + init_latencies = [] + e2e_latencies = [] + success_count = 0 + + for op in ops: + meta = { + 'operation_name': op['name'], + 'success': str(op['success']), + } + if 'error' in op: + meta['error'] = op['error'] + if 'final_state' in op: + meta['final_state'] = op['final_state'] + + if op['success']: + success_count += 1 + + if op['initiation_latency'] >= 0: + samples.append(sample.Sample( + f'{metric_prefix}_InitiationLatency', + op['initiation_latency'], + 'seconds', + meta, + )) + init_latencies.append(op['initiation_latency']) + + if op['end_to_end_latency'] >= 0: + samples.append(sample.Sample( + f'{metric_prefix}_EndToEndLatency', + op['end_to_end_latency'], + 'seconds', + meta, + )) + e2e_latencies.append(op['end_to_end_latency']) + + # Aggregate stats + total = len(ops) + if total > 0: + samples.append(sample.Sample( + f'{metric_prefix}_SuccessRate', + success_count / total * 100, + 'percent', + {'total_ops': str(total), 'successful_ops': str(success_count)}, + )) + + for label, latencies in [('InitiationLatency', init_latencies), + ('EndToEndLatency', e2e_latencies)]: + if len(latencies) < 2: + continue + agg_meta = {'sample_count': str(len(latencies))} + samples += [ + sample.Sample(f'{metric_prefix}_{label}_Median', + statistics.median(latencies), 'seconds', agg_meta), + sample.Sample(f'{metric_prefix}_{label}_Mean', + statistics.mean(latencies), 'seconds', agg_meta), + sample.Sample(f'{metric_prefix}_{label}_Min', + min(latencies), 'seconds', agg_meta), + sample.Sample(f'{metric_prefix}_{label}_Max', + max(latencies), 'seconds', agg_meta), + sample.Sample(f'{metric_prefix}_{label}_StdDev', + statistics.stdev(latencies), 'seconds', agg_meta), + ] + + return samples + + +# =========================================================================== +# Cloud client factory +# =========================================================================== + +def _get_cloud_client(cloud: str, cluster): + """Returns the appropriate cloud-specific management client.""" + cloud_upper = cloud.upper() + if cloud_upper == 'GCP': + return GKEManagementClient(cluster) + elif cloud_upper == 'AWS': + return EKSManagementClient(cluster) + elif cloud_upper == 'AZURE': + return AKSManagementClient(cluster) + else: + raise errors.Benchmarks.PrepareException( + f'Unsupported cloud for management plane benchmark: {cloud}') + + +# =========================================================================== +# Utility helpers +# =========================================================================== + +def _pool_name(scenario_tag: str, index: int) -> str: + """Generates a deterministic, PKB-safe node pool name.""" + return f'pkb-{scenario_tag}-{index:04d}' + + +def _wait_for_operation_generic( + poll_fn: Callable[[], str], + op_id: str, + start_time: float, + timeout_sec: int, + poll_interval_sec: int, +) -> dict: + """ + Generic async-operation poller. + + Args: + poll_fn: zero-arg callable that returns the current operation state string. + op_id: operation identifier (for logging). + start_time: epoch time when the operation was initiated. + timeout_sec: hard timeout. + poll_interval_sec: sleep between polls. + + Returns: + { 'success': bool, 'end_to_end_latency': float, 'final_state': str } + """ + deadline = start_time + timeout_sec + while True: + state = poll_fn() + if state in TERMINAL_STATES: + end_to_end = time.time() - start_time + success = state in {STATE_DONE, STATE_SUCCEEDED} + logging.info('Operation %s reached terminal state %s in %.2fs', + op_id, state, end_to_end) + return { + 'success': success, + 'end_to_end_latency': end_to_end, + 'final_state': state, + } + if time.time() > deadline: + logging.error('Operation %s timed out after %ds (last state: %s)', + op_id, timeout_sec, state) + return { + 'success': False, + 'end_to_end_latency': time.time() - start_time, + 'final_state': 'TIMEOUT', + } + logging.debug('Operation %s state=%s – polling again in %ds', op_id, state, poll_interval_sec) + time.sleep(poll_interval_sec) + + +# =========================================================================== +# GKE Management Client (GCP) +# =========================================================================== + +class GKEManagementClient: + """ + Wraps the GKE container_v1 REST API for management plane operations. + Requires: google-cloud-container (pip install google-cloud-container) + """ + + def __init__(self, cluster): + from google.cloud import container_v1 # pylint: disable=import-outside-toplevel + self._cluster = cluster + self._gke = container_v1.ClusterManagerClient() + self._project = cluster.project + self._location = cluster.zone # e.g. 'us-central1-a' or 'us-central1' + self._cluster_name = cluster.name + self._parent = f'projects/{self._project}/locations/{self._location}' + self._cluster_path = f'{self._parent}/clusters/{self._cluster_name}' + + # ------------------------------------------------------------------ + # Version resolution + # ------------------------------------------------------------------ + + def resolve_versions(self, flags_initial, flags_target): + from google.cloud import container_v1 # pylint: disable=import-outside-toplevel + sc = self._gke.get_server_config(name=self._parent) + valid_versions = [c.name for c in sc.channels + if c.channel == container_v1.ReleaseChannel.Channel.REGULAR] + if not valid_versions: + valid_versions = sc.valid_node_versions + + valid_versions.sort(reverse=True) # latest first + + initial = flags_initial or (valid_versions[2] if len(valid_versions) >= 3 + else valid_versions[-1]) + target = flags_target or valid_versions[0] + + return initial, target + + # ------------------------------------------------------------------ + # Async helpers + # ------------------------------------------------------------------ + + def wait_for_operation(self, op_name: str, start_time: float) -> dict: + def _poll(): + op = self._gke.get_operation({'name': op_name}) + return op.status.name # RUNNING / DONE / ABORTING + + return _wait_for_operation_generic( + poll_fn = _poll, + op_id = op_name, + start_time = start_time, + timeout_sec = FLAGS.mgmt_operation_timeout_sec, + poll_interval_sec = FLAGS.mgmt_poll_interval_sec, + ) + + def _wait(self, op, start_time: float) -> dict: + return self.wait_for_operation(op.name, start_time) + + # ------------------------------------------------------------------ + # Node pool operations + # ------------------------------------------------------------------ + + def start_create_nodepool_async(self, name: str, node_count: int, node_version: str): + from google.cloud import container_v1 # pylint: disable=import-outside-toplevel + req = container_v1.CreateNodePoolRequest( + parent = self._cluster_path, + node_pool = container_v1.NodePool( + name = name, + version = node_version, + initial_node_count = node_count, + config = container_v1.NodeConfig(machine_type='e2-standard-2'), + ), + ) + op = self._gke.create_node_pool(request=req) + return op.name # operation resource name + + def create_nodepool(self, name: str, node_count: int, node_version: str) -> dict: + start = time.time() + op_id = self.start_create_nodepool_async(name, node_count, node_version) + initiation_latency = time.time() - start + result = self.wait_for_operation(op_id, start) + return { + 'name': name, + 'success': result['success'], + 'initiation_latency': initiation_latency, + 'end_to_end_latency': result['end_to_end_latency'], + 'final_state': result.get('final_state'), + } + + def upgrade_nodepool(self, name: str, target_version: str) -> dict: + from google.cloud import container_v1 # pylint: disable=import-outside-toplevel + start = time.time() + req = container_v1.UpdateNodePoolRequest( + name = f'{self._cluster_path}/nodePools/{name}', + node_version = target_version, + ) + op = self._gke.update_node_pool(request=req) + initiation_latency = time.time() - start + result = self.wait_for_operation(op.name, start) + return { + 'name': name, + 'success': result['success'], + 'initiation_latency': initiation_latency, + 'end_to_end_latency': result['end_to_end_latency'], + 'final_state': result.get('final_state'), + } + + def delete_nodepool(self, name: str) -> dict: + from google.cloud import container_v1 # pylint: disable=import-outside-toplevel + start = time.time() + req = container_v1.DeleteNodePoolRequest( + name=f'{self._cluster_path}/nodePools/{name}') + op = self._gke.delete_node_pool(request=req) + initiation_latency = time.time() - start + result = self.wait_for_operation(op.name, start) + return { + 'name': name, + 'success': result['success'], + 'initiation_latency': initiation_latency, + 'end_to_end_latency': result['end_to_end_latency'], + 'final_state': result.get('final_state'), + } + + def start_cluster_update_async(self) -> str: + """Triggers a no-op-equivalent cluster update to exercise the control plane.""" + from google.cloud import container_v1 # pylint: disable=import-outside-toplevel + cluster = self._gke.get_cluster({'name': self._cluster_path}) + req = container_v1.UpdateClusterRequest( + name = self._cluster_path, + update = container_v1.ClusterUpdate( + desired_logging_service = cluster.logging_service, # same value – triggers op + ), + ) + op = self._gke.update_cluster(request=req) + return op.name + + def delete_all_benchmark_nodepools(self): + from google.cloud import container_v1 # pylint: disable=import-outside-toplevel + cluster = self._gke.get_cluster({'name': self._cluster_path}) + for np in cluster.node_pools: + if np.name.startswith('pkb-'): + try: + self.delete_nodepool(np.name) + except Exception as exc: # pylint: disable=broad-except + logging.warning('Could not delete node pool %s: %s', np.name, exc) + + +# =========================================================================== +# EKS Management Client (AWS) +# =========================================================================== + +class EKSManagementClient: + """ + Wraps the AWS EKS boto3 API for management plane operations. + Requires: boto3 (pip install boto3) + """ + + def __init__(self, cluster): + self._cluster = cluster + self._cluster_name = cluster.name + self._region = cluster.region + # Do NOT store boto3 client as an instance attribute — PKB pickles + # benchmark_spec (including mgmt_client) and boto3 clients are not picklable. + # Use the _eks property below which creates the client lazily per-call. + + @property + def _eks(self): + import boto3 # pylint: disable=import-outside-toplevel + if not hasattr(self, '_eks_client') or self._eks_client is None: + self._eks_client = boto3.client('eks', region_name=self._region) + return self._eks_client + + def __getstate__(self): + # Exclude unpicklable boto3 client when PKB serialises benchmark_spec + state = self.__dict__.copy() + state.pop('_eks_client', None) + state.pop('_cached_node_role_arn', None) + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self._eks_client = None + + # ------------------------------------------------------------------ + # Version resolution + # ------------------------------------------------------------------ + + def resolve_versions(self, flags_initial, flags_target): + # Get supported nodegroup versions directly from the cluster's version + cluster_info = self._eks.describe_cluster(name=self._cluster_name)['cluster'] + cluster_version = cluster_info['version'] # e.g. '1.34' + + major, minor = cluster_version.split('.') + minor = int(minor) + + # EKS supports N, N-1, N-2, N-3 nodegroup versions relative to cluster + supported = [f'{major}.{minor - i}' for i in range(4)] + logging.info('[EKS] Cluster version %s, supported nodegroup versions: %s', + cluster_version, supported) + + # initial = N-2, target = cluster version (N) + initial = flags_initial or supported[2] # N-2 + target = flags_target or supported[0] # N (same as cluster = latest) + return initial, target + + # ------------------------------------------------------------------ + # Async helpers + # ------------------------------------------------------------------ + + def wait_for_operation(self, op_id: str, start_time: float) -> dict: + """ + op_id format: "/" or "/__cluster__" + """ + parts = op_id.split('/') + is_cluster_op = (parts[-1] == '__cluster__') + + def _poll(): + if is_cluster_op: + r = self._eks.describe_cluster(name=self._cluster_name) + return r['cluster']['status'] # ACTIVE / UPDATING / FAILED + else: + ng_name = parts[-1] + r = self._eks.describe_nodegroup( + clusterName = self._cluster_name, + nodegroupName = ng_name, + ) + return r['nodegroup']['status'] # ACTIVE / CREATING / UPDATING / DELETING / FAILED + + def _normalise(state): + mapping = { + 'ACTIVE': STATE_DONE, + 'FAILED': STATE_FAILED, + 'DEGRADED': STATE_FAILED, + } + return mapping.get(state, STATE_RUNNING) + + def _poll_normalised(): + return _normalise(_poll()) + + return _wait_for_operation_generic( + poll_fn = _poll_normalised, + op_id = op_id, + start_time = start_time, + timeout_sec = FLAGS.mgmt_operation_timeout_sec, + poll_interval_sec = FLAGS.mgmt_poll_interval_sec, + ) + + # ------------------------------------------------------------------ + # Node group (node pool) operations + # ------------------------------------------------------------------ + + def _get_node_role_arn(self) -> str: + """ + Returns a node IAM role ARN that has ec2.amazonaws.com in its trust policy. + Looks up the role from the existing default nodegroup created by PKB/eksctl, + which always has the correct trust relationship. + Falls back to constructing the standard eksctl role name if no nodegroup exists. + """ + if hasattr(self, '_cached_node_role_arn'): + return self._cached_node_role_arn + + # Try to get role from the existing default nodegroup (most reliable) + try: + ngs = self._eks.list_nodegroups(clusterName=self._cluster_name) + for ng_name in ngs.get('nodegroups', []): + ng = self._eks.describe_nodegroup( + clusterName=self._cluster_name, nodegroupName=ng_name) + role_arn = ng['nodegroup'].get('nodeRole') + if role_arn: + logging.info('[EKS] Using node role from existing nodegroup %s: %s', + ng_name, role_arn) + self._cached_node_role_arn = role_arn + return role_arn + except Exception as exc: # pylint: disable=broad-except + logging.warning('[EKS] Could not look up node role from nodegroup: %s', exc) + + # Fallback: construct standard eksctl node role name + import boto3 # pylint: disable=import-outside-toplevel + iam = boto3.client('iam', region_name=self._eks.meta.region_name) + paginator = iam.get_paginator('list_roles') + prefix = f'eksctl-{self._cluster_name}-nodegroup' + for page in paginator.paginate(): + for role in page['Roles']: + if prefix in role['RoleName'] and 'NodeInstanceRole' in role['RoleName']: + self._cached_node_role_arn = role['Arn'] + logging.info('[EKS] Found node instance role via IAM: %s', role['Arn']) + return self._cached_node_role_arn + + raise RuntimeError( + f'Could not find a node IAM role with ec2.amazonaws.com trust for cluster ' + f'{self._cluster_name}. Ensure the default nodegroup exists or pass a role ARN.') + + def start_create_nodepool_async(self, name: str, node_count: int, node_version: str) -> str: + cluster_info = self._eks.describe_cluster(name=self._cluster_name)['cluster'] + subnet_ids = cluster_info['resourcesVpcConfig']['subnetIds'] + # Use node role (ec2.amazonaws.com trust) not cluster ServiceRole + node_role_arn = self._get_node_role_arn() + + self._eks.create_nodegroup( + clusterName = self._cluster_name, + nodegroupName = name, + scalingConfig = {'minSize': node_count, 'maxSize': node_count, 'desiredSize': node_count}, + subnets = subnet_ids, + nodeRole = node_role_arn, + version = node_version, + instanceTypes = ['t3.medium'], + ) + return f'{self._cluster_name}/{name}' + + def create_nodepool(self, name: str, node_count: int, node_version: str) -> dict: + start = time.time() + op_id = self.start_create_nodepool_async(name, node_count, node_version) + initiation_latency = time.time() - start + result = self.wait_for_operation(op_id, start) + return {'name': name, 'initiation_latency': initiation_latency, **result} + + def upgrade_nodepool(self, name: str, target_version: str) -> dict: + start = time.time() + self._eks.update_nodegroup_version( + clusterName = self._cluster_name, + nodegroupName = name, + version = target_version, + ) + initiation_latency = time.time() - start + op_id = f'{self._cluster_name}/{name}' + result = self.wait_for_operation(op_id, start) + return {'name': name, 'initiation_latency': initiation_latency, **result} + + def delete_nodepool(self, name: str) -> dict: + start = time.time() + self._eks.delete_nodegroup( + clusterName = self._cluster_name, + nodegroupName = name, + ) + initiation_latency = time.time() - start + op_id = f'{self._cluster_name}/{name}' + result = self.wait_for_operation(op_id, start) + return {'name': name, 'initiation_latency': initiation_latency, **result} + + def start_cluster_update_async(self) -> str: + # Toggle cluster logging to trigger a real ClusterUpdate operation. + # We enable 'api' logging if currently disabled, or disable if enabled. + # This is the lightest possible cluster update — no infrastructure change. + cluster = self._eks.describe_cluster(name=self._cluster_name)['cluster'] + current_logging = cluster.get('logging', {}).get('clusterLogging', []) + + # Find current state of 'api' log type + api_enabled = False + for entry in current_logging: + if 'api' in entry.get('types', []) and entry.get('enabled'): + api_enabled = True + break + + # Toggle: if api logging is on, turn it off; if off, turn it on + # This guarantees a meaningful change that EKS will accept + new_enabled = not api_enabled + logging.info('[EKS] Toggling api logging from %s to %s to trigger ClusterUpdate', + api_enabled, new_enabled) + + self._eks.update_cluster_config( + name = self._cluster_name, + logging = { + 'clusterLogging': [ + {'types': ['api'], 'enabled': new_enabled} + ] + } + ) + return f'{self._cluster_name}/__cluster__' + + def delete_all_benchmark_nodepools(self): + resp = self._eks.list_nodegroups(clusterName=self._cluster_name) + for ng in resp.get('nodegroups', []): + if ng.startswith('pkb-'): + try: + self.delete_nodepool(ng) + except Exception as exc: # pylint: disable=broad-except + logging.warning('Could not delete nodegroup %s: %s', ng, exc) + + +# =========================================================================== +# AKS Management Client (Azure) +# =========================================================================== + +class AKSManagementClient: + """ + Wraps the Azure ContainerServiceClient for management plane operations. + Requires: azure-mgmt-containerservice, azure-identity + """ + + def __init__(self, cluster): + from azure.identity import DefaultAzureCredential # pylint: disable=import-outside-toplevel + from azure.mgmt.containerservice import ContainerServiceClient # pylint: disable=import-outside-toplevel + self._cluster = cluster + self._cluster_name = cluster.name + self._resource_group = cluster.resource_group + self._subscription_id = cluster.subscription_id + cred = DefaultAzureCredential() + self._aks = ContainerServiceClient(cred, self._subscription_id) + + # ------------------------------------------------------------------ + # Version resolution + # ------------------------------------------------------------------ + + def resolve_versions(self, flags_initial, flags_target): + versions = sorted( + [v.orchestrator_version + for v in self._aks.container_services.list_orchestrators( + location='eastus', resource_type='managedClusters' + ).orchestrators], + reverse=True, + ) + initial = flags_initial or (versions[2] if len(versions) >= 3 else versions[-1]) + target = flags_target or versions[0] + return initial, target + + # ------------------------------------------------------------------ + # Async helpers + # ------------------------------------------------------------------ + + def wait_for_operation(self, op_id: str, start_time: float) -> dict: + """ + op_id: "//" + or "//__cluster__" + """ + parts = op_id.split('/') + is_cluster_op = (parts[-1] == '__cluster__') + + def _poll(): + if is_cluster_op: + c = self._aks.managed_clusters.get(self._resource_group, self._cluster_name) + return c.provisioning_state # Succeeded / Failed / Updating / Creating + else: + pool_name = parts[-1] + ap = self._aks.agent_pools.get( + self._resource_group, self._cluster_name, pool_name) + return ap.provisioning_state + + def _normalise(state): + mapping = { + 'Succeeded': STATE_SUCCEEDED, + 'Failed': STATE_FAILED, + 'Canceled': 'CANCELED', + } + return mapping.get(state, STATE_RUNNING) + + def _poll_normalised(): + return _normalise(_poll()) + + return _wait_for_operation_generic( + poll_fn = _poll_normalised, + op_id = op_id, + start_time = start_time, + timeout_sec = FLAGS.mgmt_operation_timeout_sec, + poll_interval_sec = FLAGS.mgmt_poll_interval_sec, + ) + + # ------------------------------------------------------------------ + # Agent pool (node pool) operations + # ------------------------------------------------------------------ + + def start_create_nodepool_async(self, name: str, node_count: int, node_version: str) -> str: + from azure.mgmt.containerservice.models import AgentPool # pylint: disable=import-outside-toplevel + self._aks.agent_pools.begin_create_or_update( + self._resource_group, + self._cluster_name, + name, + AgentPool( + count = node_count, + vm_size = 'Standard_D2s_v3', + orchestrator_version = node_version, + mode = 'User', + ), + ) + return f'{self._resource_group}/{self._cluster_name}/{name}' + + def create_nodepool(self, name: str, node_count: int, node_version: str) -> dict: + start = time.time() + op_id = self.start_create_nodepool_async(name, node_count, node_version) + initiation_latency = time.time() - start + result = self.wait_for_operation(op_id, start) + return {'name': name, 'initiation_latency': initiation_latency, **result} + + def upgrade_nodepool(self, name: str, target_version: str) -> dict: + from azure.mgmt.containerservice.models import AgentPool # pylint: disable=import-outside-toplevel + start = time.time() + ap = self._aks.agent_pools.get(self._resource_group, self._cluster_name, name) + ap.orchestrator_version = target_version + self._aks.agent_pools.begin_create_or_update( + self._resource_group, self._cluster_name, name, ap) + initiation_latency = time.time() - start + op_id = f'{self._resource_group}/{self._cluster_name}/{name}' + result = self.wait_for_operation(op_id, start) + return {'name': name, 'initiation_latency': initiation_latency, **result} + + def delete_nodepool(self, name: str) -> dict: + start = time.time() + self._aks.agent_pools.begin_delete( + self._resource_group, self._cluster_name, name) + initiation_latency = time.time() - start + op_id = f'{self._resource_group}/{self._cluster_name}/{name}' + result = self.wait_for_operation(op_id, start) + return {'name': name, 'initiation_latency': initiation_latency, **result} + + def start_cluster_update_async(self) -> str: + cluster = self._aks.managed_clusters.get(self._resource_group, self._cluster_name) + self._aks.managed_clusters.begin_create_or_update( + self._resource_group, self._cluster_name, cluster) + return f'{self._resource_group}/{self._cluster_name}/__cluster__' + + def delete_all_benchmark_nodepools(self): + pools = self._aks.agent_pools.list(self._resource_group, self._cluster_name) + for pool in pools: + if pool.name.startswith('pkb-'): + try: + self.delete_nodepool(pool.name) + except Exception as exc: # pylint: disable=broad-except + logging.warning('Could not delete agent pool %s: %s', pool.name, exc) + From 43bf72db8a8cd28ea548875d963b0f23a2e84a15 Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Wed, 13 May 2026 16:27:23 +0530 Subject: [PATCH 02/19] management_plane_benchmarking --- perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py | 1 - 1 file changed, 1 deletion(-) diff --git a/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py index 1e319b49a0..bf0d4a4635 100644 --- a/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py @@ -1140,4 +1140,3 @@ def delete_all_benchmark_nodepools(self): self.delete_nodepool(pool.name) except Exception as exc: # pylint: disable=broad-except logging.warning('Could not delete agent pool %s: %s', pool.name, exc) - From ba133610b1ed2f22045326034fe49876193546e4 Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Wed, 20 May 2026 21:04:41 +0530 Subject: [PATCH 03/19] fix: correct BENCHMARK_NAME to k8s_management, add abc import, t3.medium, poll 5s --- .../k8s_management_benchmark.py | 1748 +++++++---------- .../aws/elastic_kubernetes_service.py | 480 +++++ .../azure/azure_kubernetes_service.py | 386 +++- .../providers/gcp/google_kubernetes_engine.py | 196 ++ .../container_service/kubernetes_cluster.py | 114 +- 5 files changed, 1805 insertions(+), 1119 deletions(-) diff --git a/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py index bf0d4a4635..de568101ae 100644 --- a/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py @@ -1,1142 +1,716 @@ -""" -Benchmark: K8s Management Plane Operations -=========================================== -Covers GKE / EKS / AKS management plane operations as defined in: - "Benchmark Methodology: GKE Management Plane Operations" (katmitchell@, Mar 2026) - -Scenarios implemented ---------------------- - A. Concurrent Node Pool operations – CreateNodePool, UpdateNodePool, DeleteNodePool - B. Overlapping Cluster + Node Pool op – CreateNodePool fired during ClusterUpdate - C. Large-scale Node Pool provisioning – up to MAX_NODE_POOLS node pools - -Metrics collected per operation --------------------------------- - - initiation_latency : time from API call to async operation accepted - - end_to_end_latency : time from API call to operation DONE/SUCCEEDED - - success / failure : per-operation outcome - - aggregate stats : median, mean, min, max, stddev, success_rate (via PKB sample metadata) - -Cloud coverage --------------- - GCP → GKE (google-cloud-sdk / container_v1 client) - AWS → EKS (boto3) - Azure → AKS (azure-mgmt-containerservice) - -Author: vendor implementation based on methodology doc +# Copyright 2026 PerfKitBenchmarker Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Benchmark for Kubernetes management plane operations. + +Measures GKE/EKS/AKS control-plane API responsiveness via three scenarios: + A. Concurrent node-pool create/upgrade/delete. + B. Node-pool create overlapping with a long-running cluster update. + C. Large-scale node-pool provisioning (single scale or sweep). + +Optimizations for minimum run time: + - Streaming concurrency in Scenario C (no batch barriers) + - Optional pipelined Scenario A (create->upgrade->delete per thread) + - Reduced poll_interval in provider WaitForOperation (5s vs 10s) + - Per-op threads capped at _MAX_CONCURRENT to avoid OS limits + - Accurate delete success rate via attempted_ops denominator """ -import logging -import math -import statistics +import copy +import threading import time -import uuid -from concurrent.futures import ThreadPoolExecutor, as_completed, wait, ALL_COMPLETED -from typing import Callable, List, Optional, Tuple +from typing import Callable from absl import flags +from absl import logging +from perfkitbenchmarker import background_tasks +from perfkitbenchmarker import benchmark_spec as bm_spec from perfkitbenchmarker import configs from perfkitbenchmarker import errors from perfkitbenchmarker import sample -from perfkitbenchmarker import vm_util +from perfkitbenchmarker.configs import benchmark_config_spec +from perfkitbenchmarker.resources.container_service import container as container_lib +from perfkitbenchmarker.resources.container_service import kubectl +from perfkitbenchmarker.resources.container_service import kubernetes_cluster -# --------------------------------------------------------------------------- -# Benchmark identity -# --------------------------------------------------------------------------- -BENCHMARK_NAME = 'k8s_management_benchmark' +_SLEEP_POD_NAME = 'pkb-mgmt-sleep' + +BENCHMARK_NAME = 'k8s_management' BENCHMARK_CONFIG = """ -k8s_management_benchmark: +k8s_management: description: > Benchmarks GKE/EKS/AKS management plane operations: concurrent node pool - create/upgrade/delete, overlapping cluster+nodepool ops, and large-scale - provisioning. No data-plane workloads required. + create/upgrade/delete, overlapping cluster + node-pool ops, and large-scale + provisioning. Focused on control-plane API responsiveness. + Spec regions: GCP us-central1, AWS us-east-1 (closest), Azure eastus (closest). + Equivalent machine types across clouds per Google benchmark spec. container_cluster: type: Kubernetes - # Minimal node count – benchmark is control-plane-only vm_count: 1 vm_spec: GCP: + # us-central1-a: spec primary region for GCP + # e2-standard-2: 2 vCPU 8GB — equivalent to t3.medium / Standard_D2s_v3 machine_type: e2-standard-2 zone: us-central1-a AWS: + # us-east-1a: closest comparable region to GCP us-central1 + # t3.medium: 2 vCPU 4GB — closest equivalent to e2-standard-2 (Google spec) machine_type: t3.medium zone: us-east-1a Azure: + # eastus: closest comparable region to GCP us-central1 + # Standard_D2s_v3: 2 vCPU 8GB — equivalent to e2-standard-2 machine_type: Standard_D2s_v3 zone: eastus """ -# --------------------------------------------------------------------------- -# Configurable flags -# --------------------------------------------------------------------------- -FLAGS = flags.FLAGS - -flags.DEFINE_integer( - 'mgmt_concurrent_nodepools', 5, - 'Number of node pools to create/upgrade/delete concurrently in Scenario A.') +_VALID_SCENARIOS = frozenset({'A', 'B', 'C'}) -flags.DEFINE_integer( - 'mgmt_large_scale_nodepools', 50, +_CONCURRENT_NODEPOOLS = flags.DEFINE_integer( + 'k8s_mgmt_concurrent_nodepools', + 5, + 'Number of node pools to create/upgrade/delete concurrently in Scenario A.', +) +_LARGE_SCALE_NODEPOOLS = flags.DEFINE_integer( + 'k8s_mgmt_large_scale_nodepools', + 1000, 'Number of node pools to provision in the large-scale Scenario C. ' - 'Set up to 1000 for full stress test (ensure quota is available).') + 'Spec target is 1000; ensure VPC/quota is available before running.', +) +_NODES_PER_NODEPOOL = flags.DEFINE_integer( + 'k8s_mgmt_nodes_per_nodepool', + 2, + 'Number of nodes per node pool. Google spec: 2 nodes per pool.', +) +_INITIAL_VERSION = flags.DEFINE_string( + 'k8s_mgmt_initial_version', + None, + 'Kubernetes version for newly-created node pools (N-1). None = auto.', +) +_TARGET_VERSION = flags.DEFINE_string( + 'k8s_mgmt_target_version', + None, + 'Kubernetes version to upgrade node pools to (N). None = cluster version.', +) +_SCENARIOS = flags.DEFINE_list( + 'k8s_mgmt_scenarios', + ['A', 'B', 'C'], + 'Comma-separated subset of scenarios to run. Valid values: A, B, C.', +) +_SCALE_SWEEP = flags.DEFINE_list( + 'k8s_mgmt_scale_sweep', + [], + 'Comma-separated list of node-pool counts for Scenario C scale sweep. ' + 'Each scale runs as a separate sub-run with full create/delete cycle. ' + 'Example: --k8s_mgmt_scale_sweep=10,50,100,500,1000. ' + 'If empty, uses --k8s_mgmt_large_scale_nodepools.', +) +_MAX_CONCURRENT = flags.DEFINE_integer( + 'k8s_mgmt_max_concurrent', + 50, + 'Cap on concurrent provider API calls within a batch. ' + 'Higher = faster but more aggressive on connection pools.', +) +_PIPELINE_SCENARIO_A = flags.DEFINE_boolean( + 'k8s_mgmt_pipeline_scenario_a', + False, + 'If True, run Scenario A as a per-pool pipeline (create->upgrade->delete ' + 'back-to-back per thread). Minimizes wall time but measures ops under ' + 'mixed-type concurrent load. Default False = phase-by-phase (spec-strict).', +) + +# AKS caps node-pool names at 12 chars — keep all names within that limit. +_PREFIX = 'pkbm' +_SCENARIO_A_NAME = lambda i: f'{_PREFIX}a{i:03d}' +_SCENARIO_B_NAME = f'{_PREFIX}b' +_SCENARIO_C_NAME = lambda i: f'{_PREFIX}c{i:04d}' -flags.DEFINE_integer( - 'mgmt_nodes_per_nodepool', 1, - 'Number of nodes per node pool. Kept low to reduce quota consumption.') -flags.DEFINE_string( - 'mgmt_k8s_version', None, - 'Kubernetes version for the cluster (None = cloud default / latest).') +def GetConfig(user_config): + return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) + + +def CheckPrerequisites( + benchmark_config: benchmark_config_spec.BenchmarkConfigSpec, +): + """Validates flag values and cluster type before any cloud calls.""" + invalid = [ + s for s in _SCENARIOS.value if s.strip().upper() not in _VALID_SCENARIOS + ] + if invalid: + raise errors.Config.InvalidValue( + f'Invalid value(s) for --k8s_mgmt_scenarios: {invalid}. ' + f'Valid options: {sorted(_VALID_SCENARIOS)}.' + ) + for s in _SCALE_SWEEP.value: + try: + int(s.strip()) + except ValueError as e: + raise errors.Config.InvalidValue( + f'Non-integer value in --k8s_mgmt_scale_sweep: {s!r}' + ) from e + if benchmark_config.container_cluster.type != 'Kubernetes': + raise errors.Config.InvalidValue( + 'k8s_management benchmark requires a Kubernetes container cluster.' + ) -flags.DEFINE_string( - 'mgmt_nodepool_initial_version', None, - 'Initial node pool version (N-2). None = derive from cluster version.') -flags.DEFINE_string( - 'mgmt_nodepool_target_version', None, - 'Target node pool upgrade version (N-1 or latest). None = latest available.') +def Prepare(benchmark_spec: bm_spec.BenchmarkSpec) -> None: + """Asserts the cluster is reachable; deploys spec-defined sleep workload.""" + cluster = benchmark_spec.container_cluster + assert isinstance(cluster, kubernetes_cluster.KubernetesCluster) + benchmark_spec.always_call_cleanup = True + logging.info( + 'k8s_management Prepare: cluster=%s, version=%s', + cluster.name, + cluster.k8s_version, + ) + # Spec workload: "a simple container that sleeps for a given time". + # Confirms data-plane reachability; generates no data-plane load. + _, _, rc = kubectl.RunKubectlCommand( + [ + 'run', _SLEEP_POD_NAME, + '--image=busybox', + '--restart=Never', + '--', 'sleep', '86400', + ], + raise_on_failure=False, + ) + if rc: + logging.warning( + 'Sleep workload deploy returned rc=%d (non-fatal; continuing)', rc + ) -flags.DEFINE_integer( - 'mgmt_operation_timeout_sec', 2700, - 'Maximum seconds to wait for any single async management-plane operation.') -flags.DEFINE_integer( - 'mgmt_poll_interval_sec', 15, - 'Polling interval in seconds when waiting for async operations to complete.') +def _CleanStartSweep(cluster: kubernetes_cluster.KubernetesCluster) -> None: + """Deletes any stale pkbm* node pools so each run starts clean (spec C.2).""" + try: + stale = [n for n in cluster.GetNodePoolNames() if n.startswith(_PREFIX)] + except Exception: # pylint: disable=broad-except + logging.exception('CleanStart: failed to list node pools') + return + if not stale: + logging.info('CleanStart: no stale pools found — clean start confirmed.') + return + logging.warning('CleanStart: deleting %d stale pools: %s', len(stale), stale) + background_tasks.RunThreaded(cluster.DeleteNodePool, stale) + + +def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]: + """Runs the selected scenarios and returns flat list of samples.""" + cluster = benchmark_spec.container_cluster + assert isinstance(cluster, kubernetes_cluster.KubernetesCluster) + + # Spec C.2: start clean. + _CleanStartSweep(cluster) + + # Resolve versions once; log clearly; tag every sample. + # Google spec: initial=N-1, target=N (adjacent minor upgrade). + flag_initial = _INITIAL_VERSION.value + flag_target = _TARGET_VERSION.value + if flag_initial and flag_target: + initial, target = flag_initial, flag_target + source = 'flags' + else: + resolved_initial, resolved_target = cluster.ResolveNodePoolVersions() + initial = flag_initial or resolved_initial + target = flag_target or resolved_target + source = 'auto-resolved' if not (flag_initial or flag_target) else 'mixed' + + logging.info( + 'NodePool versions (%s): initial=%s -> target=%s ' + '(cluster k8s_version=%s) | nodes_per_pool=%d | machine_type=%s', + source, initial, target, cluster.k8s_version, + _NODES_PER_NODEPOOL.value, + cluster.default_nodepool.machine_type + if hasattr(cluster, 'default_nodepool') else 'unknown', + ) + + scenarios = {s.strip().upper() for s in _SCENARIOS.value} + samples: list[sample.Sample] = [] + + if 'A' in scenarios: + samples += _RunScenarioA(cluster, initial, target) + if 'B' in scenarios: + samples += _RunScenarioB(cluster, initial) + if 'C' in scenarios: + scales = ( + [int(x.strip()) for x in _SCALE_SWEEP.value] + if _SCALE_SWEEP.value + else [_LARGE_SCALE_NODEPOOLS.value] + ) + logging.info('Scenario C: scale sweep = %s', scales) + for scale in scales: + scenario_c_samples = _RunScenarioC(cluster, initial, scale) + for s in scenario_c_samples: + s.metadata['scenario_c_scale'] = str(scale) + samples += scenario_c_samples + + # Tag all samples with version path and run config for published results. + run_meta = { + 'initial_version': str(initial), + 'target_version': str(target), + 'cluster_k8s_version': str(cluster.k8s_version), + 'nodes_per_nodepool': str(_NODES_PER_NODEPOOL.value), + 'concurrent_nodepools': str(_CONCURRENT_NODEPOOLS.value), + } + for s in samples: + s.metadata.update(run_meta) + + return samples + + +def Cleanup(benchmark_spec: bm_spec.BenchmarkSpec) -> None: + """Best-effort delete of leftover benchmark node pools and sleep pod.""" + cluster = benchmark_spec.container_cluster + if cluster is None: + return + kubectl.RunKubectlCommand( + ['delete', 'pod', _SLEEP_POD_NAME, '--ignore-not-found'], + raise_on_failure=False, + ) + try: + leftover = [ + n for n in cluster.GetNodePoolNames() if n.startswith(_PREFIX) + ] + except Exception: # pylint: disable=broad-except + logging.exception('Cleanup: failed to list node pools') + return + if not leftover: + return + logging.info('Cleanup: deleting %d leftover node pools', len(leftover)) + background_tasks.RunThreaded(cluster.DeleteNodePool, leftover) + # --------------------------------------------------------------------------- -# Constants +# Scenario A # --------------------------------------------------------------------------- -# Operation result states -STATE_DONE = 'DONE' -STATE_SUCCEEDED = 'SUCCEEDED' # Azure uses this -STATE_FAILED = 'FAILED' -STATE_RUNNING = 'RUNNING' -STATE_CREATING = 'CREATING' -STATE_UPDATING = 'UPDATING' -STATE_DELETING = 'DELETING' - -TERMINAL_STATES = {STATE_DONE, STATE_SUCCEEDED, STATE_FAILED, 'ERROR', 'CANCELED'} - -# =========================================================================== -# PKB lifecycle hooks -# =========================================================================== - -def GetConfig(user_config): - """Returns the benchmark configuration merged with user overrides.""" - return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) +def _RunScenarioA( + cluster: kubernetes_cluster.KubernetesCluster, + initial: str, + target: str, +) -> list[sample.Sample]: + """Concurrent CreateNodePool, UpgradeNodePool, DeleteNodePool.""" + n = _CONCURRENT_NODEPOOLS.value + if _PIPELINE_SCENARIO_A.value: + logging.info( + 'Scenario A (pipelined): %d pools, initial=%s, target=%s', n, initial, target) + return _RunScenarioAPipelined(cluster, n, initial, target) + + logging.info( + 'Scenario A (phase-by-phase): %d pools, initial=%s, target=%s', n, initial, target) + pool_names = [_SCENARIO_A_NAME(i) for i in range(n)] + configs_ = [_MakeNodePoolConfig(cluster, name) for name in pool_names] + samples: list[sample.Sample] = [] + + # ── Phase 1: concurrent creates ────────────────────────────────────────── + create_results = _RunAsync( + kickoff = lambda cfg: cluster.CreateNodePoolAsync(cfg, node_version=initial), + wait_fn = cluster.WaitForOperation, + items = configs_, + get_name = lambda cfg: cfg.name, + ) + samples += _OpSamples('ScenarioA_Create', create_results, + attempted_ops=len(pool_names)) + + # ── Phase 2: concurrent upgrades (only successfully created pools) ──────── + created = [name for name, _, _, err in create_results if err is None] + logging.info('Scenario A: %d/%d pools created — proceeding to upgrade', + len(created), n) + upgrade_results = _RunAsync( + kickoff = lambda name: cluster.UpgradeNodePoolAsync(name, target), + wait_fn = cluster.WaitForOperation, + items = created, + get_name = str, + ) + samples += _OpSamples('ScenarioA_Upgrade', upgrade_results, + attempted_ops=len(created)) + + # ── Phase 3: concurrent deletes (live-list to catch EKS rollbacks) ──────── + alive = [p for p in cluster.GetNodePoolNames() if p.startswith(f'{_PREFIX}a')] + logging.info('Scenario A: %d live pools found for delete (originally %d)', + len(alive), n) + delete_results = _RunAsync( + kickoff = cluster.DeleteNodePoolAsync, + wait_fn = cluster.WaitForOperation, + items = alive, + get_name = str, + ) + # attempted_ops=n: success rate reflects original request, not just live pools. + # EKS rolls back timed-out pools silently — without this fix delete shows 100%. + samples += _OpSamples('ScenarioA_Delete', delete_results, + attempted_ops=n) + return samples + + +def _RunScenarioAPipelined( + cluster: kubernetes_cluster.KubernetesCluster, + n: int, + initial: str, + target: str, +) -> list[sample.Sample]: + """Per-pool pipeline: create->upgrade->delete back-to-back per thread. + + Minimizes wall time: max_i(create_i + upgrade_i + delete_i) vs + max(creates)+max(upgrades)+max(deletes) in phase-by-phase mode. + Trade-off: ops run under mixed-type concurrent load. + """ + pool_names = [_SCENARIO_A_NAME(i) for i in range(n)] + creates = _Results() + upgrades = _Results() + deletes = _Results() + + def _do_pool(name: str): + cfg = _MakeNodePoolConfig(cluster, name) + init, e2e, err = _TimedAsync( + lambda: cluster.CreateNodePoolAsync(cfg, node_version=initial), + cluster.WaitForOperation, + ) + creates.add(name, init, e2e, err) + if err is not None: + return + init, e2e, err = _TimedAsync( + lambda: cluster.UpgradeNodePoolAsync(name, target), + cluster.WaitForOperation, + ) + upgrades.add(name, init, e2e, err) + init, e2e, err = _TimedAsync( + lambda: cluster.DeleteNodePoolAsync(name), + cluster.WaitForOperation, + ) + deletes.add(name, init, e2e, err) + background_tasks.RunThreaded( + _do_pool, pool_names, + max_concurrent_threads=min(n, _MAX_CONCURRENT.value), + ) + samples: list[sample.Sample] = [] + samples += _OpSamples('ScenarioA_Create', creates.entries, attempted_ops=n) + samples += _OpSamples('ScenarioA_Upgrade', upgrades.entries, attempted_ops=n) + samples += _OpSamples('ScenarioA_Delete', deletes.entries, attempted_ops=n) + return samples -def Prepare(benchmark_spec): - """ - Verifies the cluster is reachable and collects version metadata. - PKB has already created the cluster at this point. - """ - cluster = benchmark_spec.container_cluster - # PKB stores cloud on the cluster's CLOUD class attribute (e.g. 'AWS', 'GCP', 'Azure'). - # Fall back to FLAGS.cloud if the cluster doesn't expose it directly. - cloud = getattr(cluster, 'CLOUD', None) or FLAGS.cloud - cloud = cloud.upper() if cloud else 'AWS' +# --------------------------------------------------------------------------- +# Scenario B +# --------------------------------------------------------------------------- - logging.info('[Prepare] Cluster %s on %s - cluster is ready (PKB provisioned).', cluster.name, cloud) - # PKB already waits for the cluster to be ready during _PostCreate. - # Verify reachability via kubectl before proceeding. - try: - cluster.RunKubectl(['get', 'nodes', '--no-headers']) - logging.info('[Prepare] kubectl get nodes succeeded - cluster is reachable.') - except Exception as exc: # pylint: disable=broad-except - logging.warning('[Prepare] kubectl get nodes warning: %s', exc) +def _RunScenarioB( + cluster: kubernetes_cluster.KubernetesCluster, + initial: str, +) -> list[sample.Sample]: + """CreateNodePool fired concurrently with a long-running cluster update. + + Both ops kick off async on separate threads; initiation + E2E latency + recorded independently. Overlap window duration = ClusterUpdate E2E latency. + """ + logging.info('Scenario B: overlapping cluster update + node-pool create') + cfg = _MakeNodePoolConfig(cluster, _SCENARIO_B_NAME) + results = _Results() + + def _do_cluster_update(): + init, e2e, err = _TimedAsync( + cluster.UpdateClusterAsync, cluster.WaitForOperation) + results.add('ScenarioB_ClusterUpdate', init, e2e, err) + logging.info('Scenario B ClusterUpdate: init=%.2fs e2e=%.2fs ok=%s', + init, e2e, err is None) + + def _do_create(): + init, e2e, err = _TimedAsync( + lambda: cluster.CreateNodePoolAsync(cfg, node_version=initial), + cluster.WaitForOperation, + ) + results.add('ScenarioB_NodePoolCreate', init, e2e, err) + logging.info('Scenario B NodePoolCreate: init=%.2fs e2e=%.2fs ok=%s', + init, e2e, err is None) - client = _get_cloud_client(cloud, cluster) + background_tasks.RunThreaded(lambda fn: fn(), [_do_cluster_update, _do_create]) - # Resolve Kubernetes / node-pool versions dynamically if not pinned via flags - initial_version, target_version = client.resolve_versions( - flags_initial = FLAGS.mgmt_nodepool_initial_version, - flags_target = FLAGS.mgmt_nodepool_target_version, - ) + samples: list[sample.Sample] = [] + for entry in results.entries: + name, init_dur, e2e_dur, err = entry + samples += _OpSamples(name, [(name, init_dur, e2e_dur, err)], attempted_ops=1) - # Stash on benchmark_spec so Run() can access them without re-querying - benchmark_spec.mgmt_client = client - benchmark_spec.mgmt_initial_version = initial_version - benchmark_spec.mgmt_target_version = target_version - - logging.info('[Prepare] Node pool initial version: %s → target version: %s', - initial_version, target_version) - - -def Run(benchmark_spec): - """ - Executes all three benchmark scenarios and returns a flat list of Samples. - """ - client = benchmark_spec.mgmt_client - initial_version = benchmark_spec.mgmt_initial_version - target_version = benchmark_spec.mgmt_target_version - results = [] - - # ------------------------------------------------------------------ - # Scenario A – Concurrent Node Pool operations - # ------------------------------------------------------------------ - logging.info('=' * 60) - logging.info('SCENARIO A: Concurrent Node Pool Operations') - logging.info('=' * 60) - results += _run_scenario_a(client, initial_version, target_version) - - # ------------------------------------------------------------------ - # Scenario B – Overlapping Cluster Update + Node Pool Create - # ------------------------------------------------------------------ - logging.info('=' * 60) - logging.info('SCENARIO B: Overlapping Cluster Update + NodePool Create') - logging.info('=' * 60) - results += _run_scenario_b(client, initial_version) - - # ------------------------------------------------------------------ - # Scenario C – Large-scale Node Pool provisioning - # ------------------------------------------------------------------ - logging.info('=' * 60) - logging.info('SCENARIO C: Large-Scale Node Pool Provisioning (%d pools)', - FLAGS.mgmt_large_scale_nodepools) - logging.info('=' * 60) - results += _run_scenario_c(client, initial_version) - - return results - - -def Cleanup(benchmark_spec): - """ - Best-effort deletion of any node pools created during the run. - PKB deletes the cluster itself; we only clean up leftover node pools. - """ - client = getattr(benchmark_spec, 'mgmt_client', None) - if client is None: - return - logging.info('[Cleanup] Removing any benchmark node pools…') - try: - client.delete_all_benchmark_nodepools() - except Exception as exc: # pylint: disable=broad-except - logging.warning('[Cleanup] Non-fatal error during node pool cleanup: %s', exc) - - -# =========================================================================== -# Scenario implementations -# =========================================================================== - -def _run_scenario_a( - client, - initial_version: str, - target_version: str, -) -> List[sample.Sample]: - """ - Scenario A: Concurrent CreateNodePool, UpdateNodePool, DeleteNodePool. - - Steps - ----- - 1. Concurrently create N node pools (initial_version). - 2. Concurrently upgrade all N node pools (initial_version → target_version). - 3. Concurrently delete all N node pools. - - Returns one Sample per individual operation plus aggregate stat Samples. - """ - n = FLAGS.mgmt_concurrent_nodepools - results = [] - - # ---- Step A1: Concurrent Create ---------------------------------------- - pool_names = [_pool_name('a-create', i) for i in range(n)] - create_ops = _run_concurrent_operations( - operation_fn = lambda name: client.create_nodepool( - name = name, - node_count = FLAGS.mgmt_nodes_per_nodepool, - node_version = initial_version, - ), - items = pool_names, - op_label = 'ScenarioA_CreateNodePool', - ) - results += _ops_to_samples(create_ops, 'ScenarioA_CreateNodePool') - - # ---- Step A2: Concurrent Upgrade ---------------------------------------- - # Only upgrade pools that were successfully created - created_pools = [op['name'] for op in create_ops if op['success']] - upgrade_ops = _run_concurrent_operations( - operation_fn = lambda name: client.upgrade_nodepool( - name = name, - target_version = target_version, - ), - items = created_pools, - op_label = 'ScenarioA_UpgradeNodePool', - ) - results += _ops_to_samples(upgrade_ops, 'ScenarioA_UpgradeNodePool') - - # ---- Step A3: Concurrent Delete ----------------------------------------- - # Only delete pools that were successfully created — timed-out/failed creates - # may have been rolled back by EKS and won't exist to delete. - existing_pools = [op['name'] for op in create_ops if op['success']] - if not existing_pools: - logging.warning('[ScenarioA] No successfully created pools to delete.') - delete_ops = _run_concurrent_operations( - operation_fn = lambda name: client.delete_nodepool(name), - items = existing_pools, - op_label = 'ScenarioA_DeleteNodePool', - ) - results += _ops_to_samples(delete_ops, 'ScenarioA_DeleteNodePool') - - return results - - -def _run_scenario_b(client, initial_version: str) -> List[sample.Sample]: - """ - Scenario B: Initiate NodePool creation *during* an ongoing Cluster Update. - - Steps - ----- - 1. Fire a ClusterUpdate (async – do NOT wait for completion). - 2. Immediately fire a CreateNodePool. - 3. Record initiation latency for both, then poll both to completion. - 4. Record end-to-end latency and success/failure for each. - """ - results = [] - pool_name = _pool_name('b-overlap', 0) - - # Fire cluster update (async) - cluster_op_start = time.time() - cluster_op_id = client.start_cluster_update_async() - cluster_initiation_latency = time.time() - cluster_op_start - - results.append(sample.Sample( - 'ScenarioB_ClusterUpdate_InitiationLatency', - cluster_initiation_latency, - 'seconds', - {'operation': 'ClusterUpdate', 'phase': 'initiation'}, - )) - logging.info('[ScenarioB] ClusterUpdate initiated (op_id=%s) in %.2fs', - cluster_op_id, cluster_initiation_latency) - - # Immediately fire CreateNodePool (overlapping) - np_start = time.time() - np_op_id = client.start_create_nodepool_async( - name = pool_name, - node_count = FLAGS.mgmt_nodes_per_nodepool, - node_version = initial_version, - ) - np_initiation_latency = time.time() - np_start + # Remove test pool (best-effort). + try: + cluster.DeleteNodePool(_SCENARIO_B_NAME) + except Exception: # pylint: disable=broad-except + logging.exception('Scenario B: failed to delete test pool') + return samples - results.append(sample.Sample( - 'ScenarioB_CreateNodePool_InitiationLatency', - np_initiation_latency, - 'seconds', - {'operation': 'CreateNodePool', 'phase': 'initiation', 'overlap': 'ClusterUpdate'}, - )) - logging.info('[ScenarioB] CreateNodePool initiated (op_id=%s) in %.2fs', - np_op_id, np_initiation_latency) - - # Poll both operations to completion concurrently - with ThreadPoolExecutor(max_workers=2) as executor: - cluster_future = executor.submit( - client.wait_for_operation, cluster_op_id, cluster_op_start) - np_future = executor.submit( - client.wait_for_operation, np_op_id, np_start) - - cluster_result = cluster_future.result() - np_result = np_future.result() - - results.append(sample.Sample( - 'ScenarioB_ClusterUpdate_EndToEndLatency', - cluster_result['end_to_end_latency'], - 'seconds', - { - 'operation': 'ClusterUpdate', - 'success': str(cluster_result['success']), - 'final_state': cluster_result.get('final_state', 'unknown'), - }, - )) - results.append(sample.Sample( - 'ScenarioB_CreateNodePool_EndToEndLatency', - np_result['end_to_end_latency'], - 'seconds', - { - 'operation': 'CreateNodePool', - 'success': str(np_result['success']), - 'overlap': 'ClusterUpdate', - 'final_state': np_result.get('final_state', 'unknown'), - }, - )) - # Cleanup the test node pool - try: - client.delete_nodepool(pool_name) - except Exception as exc: # pylint: disable=broad-except - logging.warning('[ScenarioB] Could not delete test node pool %s: %s', pool_name, exc) - - return results - - -def _run_scenario_c(client, initial_version: str) -> List[sample.Sample]: - """ - Scenario C: Large-scale Node Pool provisioning (up to 1,000 node pools). - - All node pools are created concurrently (batched to avoid API flooding). - After all complete, they are deleted concurrently to restore cluster state. - """ - n = FLAGS.mgmt_large_scale_nodepools - batch_size = 50 # submit in batches to avoid rate-limiting - results = [] - - pool_names = [_pool_name('c-large', i) for i in range(n)] - all_ops = [] - - logging.info('[ScenarioC] Creating %d node pools in batches of %d…', n, batch_size) - for batch_start in range(0, n, batch_size): - batch = pool_names[batch_start: batch_start + batch_size] - batch_ops = _run_concurrent_operations( - operation_fn = lambda name: client.create_nodepool( - name = name, - node_count = FLAGS.mgmt_nodes_per_nodepool, - node_version = initial_version, - ), - items = batch, - op_label = f'ScenarioC_CreateNodePool_batch{batch_start // batch_size}', - ) - all_ops += batch_ops - - results += _ops_to_samples(all_ops, 'ScenarioC_CreateNodePool') - - # Clean up – delete all created pools - created = [op['name'] for op in all_ops if op['success']] - logging.info('[ScenarioC] Deleting %d successfully created pools…', len(created)) - delete_ops = [] - for batch_start in range(0, len(created), batch_size): - batch = created[batch_start: batch_start + batch_size] - batch_ops = _run_concurrent_operations( - operation_fn = lambda name: client.delete_nodepool(name), - items = batch, - op_label = f'ScenarioC_DeleteNodePool_batch{batch_start // batch_size}', - ) - delete_ops += batch_ops - results += _ops_to_samples(delete_ops, 'ScenarioC_DeleteNodePool') - - return results - - -# =========================================================================== -# Concurrency helpers -# =========================================================================== - -def _run_concurrent_operations( - operation_fn: Callable[[str], dict], - items: List[str], - op_label: str, -) -> List[dict]: - """ - Runs operation_fn(name) concurrently for every item in `items`. - - Each callable must return a dict with at minimum: - { 'name': str, 'success': bool, - 'initiation_latency': float, 'end_to_end_latency': float } - - Returns the list of result dicts (order not guaranteed). - """ - results = [] - - if not items: - logging.info('[%s] No items to process, skipping.', op_label) - return results - - n_workers = min(len(items), 50) # cap thread pool to avoid OS limits - - logging.info('[%s] Launching %d concurrent operations...', op_label, len(items)) - with ThreadPoolExecutor(max_workers=n_workers) as executor: - future_to_name = {executor.submit(operation_fn, name): name for name in items} - for future in as_completed(future_to_name): - name = future_to_name[future] - try: - result = future.result() - results.append(result) - logging.info('[%s] %-40s e2e=%.2fs init=%.2fs ok=%s', - op_label, name, - result.get('end_to_end_latency', -1), - result.get('initiation_latency', -1), - result.get('success', False)) - except Exception as exc: # pylint: disable=broad-except - logging.error('[%s] Operation failed for %s: %s', op_label, name, exc) - results.append({ - 'name': name, - 'success': False, - 'initiation_latency': -1.0, - 'end_to_end_latency': -1.0, - 'error': str(exc), - }) - return results - - -# =========================================================================== -# Sample construction + aggregate statistics -# =========================================================================== - -def _ops_to_samples(ops: List[dict], metric_prefix: str) -> List[sample.Sample]: - """ - Converts a list of operation result dicts into PKB Samples. - - Emits: - - One Sample per operation for initiation and end-to-end latency - - Aggregate stat Samples: median, mean, min, max, stddev, success_rate - """ - samples = [] - - init_latencies = [] - e2e_latencies = [] - success_count = 0 - - for op in ops: - meta = { - 'operation_name': op['name'], - 'success': str(op['success']), - } - if 'error' in op: - meta['error'] = op['error'] - if 'final_state' in op: - meta['final_state'] = op['final_state'] - - if op['success']: - success_count += 1 - - if op['initiation_latency'] >= 0: - samples.append(sample.Sample( - f'{metric_prefix}_InitiationLatency', - op['initiation_latency'], - 'seconds', - meta, - )) - init_latencies.append(op['initiation_latency']) - - if op['end_to_end_latency'] >= 0: - samples.append(sample.Sample( - f'{metric_prefix}_EndToEndLatency', - op['end_to_end_latency'], - 'seconds', - meta, - )) - e2e_latencies.append(op['end_to_end_latency']) - - # Aggregate stats - total = len(ops) - if total > 0: - samples.append(sample.Sample( - f'{metric_prefix}_SuccessRate', - success_count / total * 100, - 'percent', - {'total_ops': str(total), 'successful_ops': str(success_count)}, - )) - - for label, latencies in [('InitiationLatency', init_latencies), - ('EndToEndLatency', e2e_latencies)]: - if len(latencies) < 2: - continue - agg_meta = {'sample_count': str(len(latencies))} - samples += [ - sample.Sample(f'{metric_prefix}_{label}_Median', - statistics.median(latencies), 'seconds', agg_meta), - sample.Sample(f'{metric_prefix}_{label}_Mean', - statistics.mean(latencies), 'seconds', agg_meta), - sample.Sample(f'{metric_prefix}_{label}_Min', - min(latencies), 'seconds', agg_meta), - sample.Sample(f'{metric_prefix}_{label}_Max', - max(latencies), 'seconds', agg_meta), - sample.Sample(f'{metric_prefix}_{label}_StdDev', - statistics.stdev(latencies), 'seconds', agg_meta), - ] +# --------------------------------------------------------------------------- +# Scenario C +# --------------------------------------------------------------------------- +def _RunScenarioC( + cluster: kubernetes_cluster.KubernetesCluster, + initial: str, + scale: int, +) -> list[sample.Sample]: + """Large-scale node-pool provisioning at a given scale. + + Streams all `scale` creates through a single executor capped at + _MAX_CONCURRENT workers — as each op completes the next starts immediately + (no batch barriers). Delete uses a live-list so EKS-rolled-back pools are + excluded from the denominator correctly. + """ + logging.info( + 'Scenario C: scale=%d, max_concurrent=%d, initial_version=%s', + scale, _MAX_CONCURRENT.value, initial, + ) + pool_names = [_SCENARIO_C_NAME(i) for i in range(scale)] + configs_ = [_MakeNodePoolConfig(cluster, name) for name in pool_names] + samples: list[sample.Sample] = [] + + # ── Creates ─────────────────────────────────────────────────────────────── + create_results = _RunAsync( + kickoff = lambda cfg: cluster.CreateNodePoolAsync( + cfg, node_version=initial), + wait_fn = cluster.WaitForOperation, + items = configs_, + get_name = lambda cfg: cfg.name, + ) + created_ok = sum(1 for _, _, _, err in create_results if err is None) + logging.info('Scenario C scale=%d: %d/%d creates succeeded', + scale, created_ok, scale) + samples += _OpSamples('ScenarioC_Create', create_results, + attempted_ops=scale) + + # ── Deletes (live-list) ─────────────────────────────────────────────────── + alive = [p for p in cluster.GetNodePoolNames() if p.startswith(f'{_PREFIX}c')] + logging.info( + 'Scenario C scale=%d: %d live pools for delete (originally requested %d; ' + '%d rolled back by cloud)', + scale, len(alive), scale, scale - len(alive), + ) + if not alive: + logging.warning( + 'Scenario C scale=%d: 0 live pools — all timed-out creates were ' + 'rolled back. Recording 0%% delete success rate.', scale) + samples += _OpSamples('ScenarioC_Delete', [], attempted_ops=scale) return samples + delete_results = _RunAsync( + kickoff = cluster.DeleteNodePoolAsync, + wait_fn = cluster.WaitForOperation, + items = alive, + get_name = str, + ) + # attempted_ops=scale: accurate rate against original request count. + samples += _OpSamples('ScenarioC_Delete', delete_results, + attempted_ops=scale) + return samples + -# =========================================================================== -# Cloud client factory -# =========================================================================== +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- -def _get_cloud_client(cloud: str, cluster): - """Returns the appropriate cloud-specific management client.""" - cloud_upper = cloud.upper() - if cloud_upper == 'GCP': - return GKEManagementClient(cluster) - elif cloud_upper == 'AWS': - return EKSManagementClient(cluster) - elif cloud_upper == 'AZURE': - return AKSManagementClient(cluster) +class _Results: + """Thread-safe collector for (name, init_latency, e2e_latency, error).""" + + def __init__(self): + self._lock = threading.Lock() + self.entries: list[tuple[str, float, float, Exception | None]] = [] + + def add(self, name: str, init_dur: float, e2e_dur: float, + err: Exception | None) -> None: + with self._lock: + self.entries.append((name, init_dur, e2e_dur, err)) + + +def _TimedAsync( + kickoff: Callable[[], str], + wait_fn: Callable[[str], None], +) -> tuple[float, float, Exception | None]: + """Runs kickoff() then wait_fn(handle); returns (init_lat, e2e_lat, err). + + init_lat = time for kickoff() to return (API accepted). + e2e_lat = total wall time including wait. On kickoff failure both are set + to elapsed time at failure point. + """ + init_start = time.time() + try: + handle = kickoff() + except Exception as exc: # pylint: disable=broad-except + elapsed = time.time() - init_start + return elapsed, elapsed, exc + init_dur = time.time() - init_start + try: + wait_fn(handle) + return init_dur, time.time() - init_start, None + except Exception as exc: # pylint: disable=broad-except + return init_dur, time.time() - init_start, exc + + +def _RunAsync( + kickoff: Callable, + wait_fn: Callable[[str], None], + items: list, + get_name: Callable[[object], str], +) -> list[tuple[str, float, float, Exception | None]]: + """Fires kickoff(item) concurrently for all items; returns timed results. + + Uses background_tasks.RunThreaded with a concurrency cap for streaming + execution — completed ops free their slot immediately for the next one. + """ + if not items: + return [] + results = _Results() + cap = min(len(items), _MAX_CONCURRENT.value) + + def _wrap(item): + init_dur, e2e_dur, err = _TimedAsync(lambda: kickoff(item), wait_fn) + name = get_name(item) + results.add(name, init_dur, e2e_dur, err) + logging.info('%s ok=%s initiation=%.2fs end_to_end=%.2fs', + name, err is None, init_dur, e2e_dur) + + background_tasks.RunThreaded(_wrap, items, max_concurrent_threads=cap) + return results.entries + + +def _MakeNodePoolConfig( + cluster: kubernetes_cluster.KubernetesCluster, + name: str, +) -> container_lib.BaseNodePoolConfig: + """Builds a node-pool config from the cluster's default pool.""" + cfg = copy.copy(cluster.default_nodepool) + cfg.name = name + cfg.num_nodes = _NODES_PER_NODEPOOL.value + cfg.min_nodes = _NODES_PER_NODEPOOL.value + cfg.max_nodes = _NODES_PER_NODEPOOL.value + return cfg + + +def _OpSamples( + metric_prefix: str, + results: list[tuple[str, float, float, Exception | None]], + attempted_ops: int = None, +) -> list[sample.Sample]: + """Per-op + aggregate samples for initiation and end-to-end latency. + + Args: + metric_prefix: prefix for all metric names. + results: list of (operation_name, init_lat, e2e_lat, err). + attempted_ops: total ops originally requested. Used as the denominator + for SuccessRate so EKS-rolled-back pools (which never + appear in results) are counted as failures, not ignored. + If None, len(results) is used (original behavior). + """ + samples: list[sample.Sample] = [] + init_latencies: list[float] = [] + e2e_latencies: list[float] = [] + success = 0 + + for name, init_dur, e2e_dur, err in results: + meta = {'operation_name': name, 'success': str(err is None)} + if err is not None: + meta['error'] = str(err)[:200] else: - raise errors.Benchmarks.PrepareException( - f'Unsupported cloud for management plane benchmark: {cloud}') - - -# =========================================================================== -# Utility helpers -# =========================================================================== - -def _pool_name(scenario_tag: str, index: int) -> str: - """Generates a deterministic, PKB-safe node pool name.""" - return f'pkb-{scenario_tag}-{index:04d}' - - -def _wait_for_operation_generic( - poll_fn: Callable[[], str], - op_id: str, - start_time: float, - timeout_sec: int, - poll_interval_sec: int, -) -> dict: - """ - Generic async-operation poller. - - Args: - poll_fn: zero-arg callable that returns the current operation state string. - op_id: operation identifier (for logging). - start_time: epoch time when the operation was initiated. - timeout_sec: hard timeout. - poll_interval_sec: sleep between polls. - - Returns: - { 'success': bool, 'end_to_end_latency': float, 'final_state': str } - """ - deadline = start_time + timeout_sec - while True: - state = poll_fn() - if state in TERMINAL_STATES: - end_to_end = time.time() - start_time - success = state in {STATE_DONE, STATE_SUCCEEDED} - logging.info('Operation %s reached terminal state %s in %.2fs', - op_id, state, end_to_end) - return { - 'success': success, - 'end_to_end_latency': end_to_end, - 'final_state': state, - } - if time.time() > deadline: - logging.error('Operation %s timed out after %ds (last state: %s)', - op_id, timeout_sec, state) - return { - 'success': False, - 'end_to_end_latency': time.time() - start_time, - 'final_state': 'TIMEOUT', - } - logging.debug('Operation %s state=%s – polling again in %ds', op_id, state, poll_interval_sec) - time.sleep(poll_interval_sec) - - -# =========================================================================== -# GKE Management Client (GCP) -# =========================================================================== - -class GKEManagementClient: - """ - Wraps the GKE container_v1 REST API for management plane operations. - Requires: google-cloud-container (pip install google-cloud-container) - """ - - def __init__(self, cluster): - from google.cloud import container_v1 # pylint: disable=import-outside-toplevel - self._cluster = cluster - self._gke = container_v1.ClusterManagerClient() - self._project = cluster.project - self._location = cluster.zone # e.g. 'us-central1-a' or 'us-central1' - self._cluster_name = cluster.name - self._parent = f'projects/{self._project}/locations/{self._location}' - self._cluster_path = f'{self._parent}/clusters/{self._cluster_name}' - - # ------------------------------------------------------------------ - # Version resolution - # ------------------------------------------------------------------ - - def resolve_versions(self, flags_initial, flags_target): - from google.cloud import container_v1 # pylint: disable=import-outside-toplevel - sc = self._gke.get_server_config(name=self._parent) - valid_versions = [c.name for c in sc.channels - if c.channel == container_v1.ReleaseChannel.Channel.REGULAR] - if not valid_versions: - valid_versions = sc.valid_node_versions - - valid_versions.sort(reverse=True) # latest first - - initial = flags_initial or (valid_versions[2] if len(valid_versions) >= 3 - else valid_versions[-1]) - target = flags_target or valid_versions[0] - - return initial, target - - # ------------------------------------------------------------------ - # Async helpers - # ------------------------------------------------------------------ - - def wait_for_operation(self, op_name: str, start_time: float) -> dict: - def _poll(): - op = self._gke.get_operation({'name': op_name}) - return op.status.name # RUNNING / DONE / ABORTING - - return _wait_for_operation_generic( - poll_fn = _poll, - op_id = op_name, - start_time = start_time, - timeout_sec = FLAGS.mgmt_operation_timeout_sec, - poll_interval_sec = FLAGS.mgmt_poll_interval_sec, - ) - - def _wait(self, op, start_time: float) -> dict: - return self.wait_for_operation(op.name, start_time) - - # ------------------------------------------------------------------ - # Node pool operations - # ------------------------------------------------------------------ - - def start_create_nodepool_async(self, name: str, node_count: int, node_version: str): - from google.cloud import container_v1 # pylint: disable=import-outside-toplevel - req = container_v1.CreateNodePoolRequest( - parent = self._cluster_path, - node_pool = container_v1.NodePool( - name = name, - version = node_version, - initial_node_count = node_count, - config = container_v1.NodeConfig(machine_type='e2-standard-2'), - ), - ) - op = self._gke.create_node_pool(request=req) - return op.name # operation resource name - - def create_nodepool(self, name: str, node_count: int, node_version: str) -> dict: - start = time.time() - op_id = self.start_create_nodepool_async(name, node_count, node_version) - initiation_latency = time.time() - start - result = self.wait_for_operation(op_id, start) - return { - 'name': name, - 'success': result['success'], - 'initiation_latency': initiation_latency, - 'end_to_end_latency': result['end_to_end_latency'], - 'final_state': result.get('final_state'), - } - - def upgrade_nodepool(self, name: str, target_version: str) -> dict: - from google.cloud import container_v1 # pylint: disable=import-outside-toplevel - start = time.time() - req = container_v1.UpdateNodePoolRequest( - name = f'{self._cluster_path}/nodePools/{name}', - node_version = target_version, - ) - op = self._gke.update_node_pool(request=req) - initiation_latency = time.time() - start - result = self.wait_for_operation(op.name, start) - return { - 'name': name, - 'success': result['success'], - 'initiation_latency': initiation_latency, - 'end_to_end_latency': result['end_to_end_latency'], - 'final_state': result.get('final_state'), - } - - def delete_nodepool(self, name: str) -> dict: - from google.cloud import container_v1 # pylint: disable=import-outside-toplevel - start = time.time() - req = container_v1.DeleteNodePoolRequest( - name=f'{self._cluster_path}/nodePools/{name}') - op = self._gke.delete_node_pool(request=req) - initiation_latency = time.time() - start - result = self.wait_for_operation(op.name, start) - return { - 'name': name, - 'success': result['success'], - 'initiation_latency': initiation_latency, - 'end_to_end_latency': result['end_to_end_latency'], - 'final_state': result.get('final_state'), - } - - def start_cluster_update_async(self) -> str: - """Triggers a no-op-equivalent cluster update to exercise the control plane.""" - from google.cloud import container_v1 # pylint: disable=import-outside-toplevel - cluster = self._gke.get_cluster({'name': self._cluster_path}) - req = container_v1.UpdateClusterRequest( - name = self._cluster_path, - update = container_v1.ClusterUpdate( - desired_logging_service = cluster.logging_service, # same value – triggers op - ), - ) - op = self._gke.update_cluster(request=req) - return op.name - - def delete_all_benchmark_nodepools(self): - from google.cloud import container_v1 # pylint: disable=import-outside-toplevel - cluster = self._gke.get_cluster({'name': self._cluster_path}) - for np in cluster.node_pools: - if np.name.startswith('pkb-'): - try: - self.delete_nodepool(np.name) - except Exception as exc: # pylint: disable=broad-except - logging.warning('Could not delete node pool %s: %s', np.name, exc) - - -# =========================================================================== -# EKS Management Client (AWS) -# =========================================================================== - -class EKSManagementClient: - """ - Wraps the AWS EKS boto3 API for management plane operations. - Requires: boto3 (pip install boto3) - """ - - def __init__(self, cluster): - self._cluster = cluster - self._cluster_name = cluster.name - self._region = cluster.region - # Do NOT store boto3 client as an instance attribute — PKB pickles - # benchmark_spec (including mgmt_client) and boto3 clients are not picklable. - # Use the _eks property below which creates the client lazily per-call. - - @property - def _eks(self): - import boto3 # pylint: disable=import-outside-toplevel - if not hasattr(self, '_eks_client') or self._eks_client is None: - self._eks_client = boto3.client('eks', region_name=self._region) - return self._eks_client - - def __getstate__(self): - # Exclude unpicklable boto3 client when PKB serialises benchmark_spec - state = self.__dict__.copy() - state.pop('_eks_client', None) - state.pop('_cached_node_role_arn', None) - return state - - def __setstate__(self, state): - self.__dict__.update(state) - self._eks_client = None - - # ------------------------------------------------------------------ - # Version resolution - # ------------------------------------------------------------------ - - def resolve_versions(self, flags_initial, flags_target): - # Get supported nodegroup versions directly from the cluster's version - cluster_info = self._eks.describe_cluster(name=self._cluster_name)['cluster'] - cluster_version = cluster_info['version'] # e.g. '1.34' - - major, minor = cluster_version.split('.') - minor = int(minor) - - # EKS supports N, N-1, N-2, N-3 nodegroup versions relative to cluster - supported = [f'{major}.{minor - i}' for i in range(4)] - logging.info('[EKS] Cluster version %s, supported nodegroup versions: %s', - cluster_version, supported) - - # initial = N-2, target = cluster version (N) - initial = flags_initial or supported[2] # N-2 - target = flags_target or supported[0] # N (same as cluster = latest) - return initial, target - - # ------------------------------------------------------------------ - # Async helpers - # ------------------------------------------------------------------ - - def wait_for_operation(self, op_id: str, start_time: float) -> dict: - """ - op_id format: "/" or "/__cluster__" - """ - parts = op_id.split('/') - is_cluster_op = (parts[-1] == '__cluster__') - - def _poll(): - if is_cluster_op: - r = self._eks.describe_cluster(name=self._cluster_name) - return r['cluster']['status'] # ACTIVE / UPDATING / FAILED - else: - ng_name = parts[-1] - r = self._eks.describe_nodegroup( - clusterName = self._cluster_name, - nodegroupName = ng_name, - ) - return r['nodegroup']['status'] # ACTIVE / CREATING / UPDATING / DELETING / FAILED - - def _normalise(state): - mapping = { - 'ACTIVE': STATE_DONE, - 'FAILED': STATE_FAILED, - 'DEGRADED': STATE_FAILED, - } - return mapping.get(state, STATE_RUNNING) - - def _poll_normalised(): - return _normalise(_poll()) - - return _wait_for_operation_generic( - poll_fn = _poll_normalised, - op_id = op_id, - start_time = start_time, - timeout_sec = FLAGS.mgmt_operation_timeout_sec, - poll_interval_sec = FLAGS.mgmt_poll_interval_sec, - ) - - # ------------------------------------------------------------------ - # Node group (node pool) operations - # ------------------------------------------------------------------ - - def _get_node_role_arn(self) -> str: - """ - Returns a node IAM role ARN that has ec2.amazonaws.com in its trust policy. - Looks up the role from the existing default nodegroup created by PKB/eksctl, - which always has the correct trust relationship. - Falls back to constructing the standard eksctl role name if no nodegroup exists. - """ - if hasattr(self, '_cached_node_role_arn'): - return self._cached_node_role_arn - - # Try to get role from the existing default nodegroup (most reliable) - try: - ngs = self._eks.list_nodegroups(clusterName=self._cluster_name) - for ng_name in ngs.get('nodegroups', []): - ng = self._eks.describe_nodegroup( - clusterName=self._cluster_name, nodegroupName=ng_name) - role_arn = ng['nodegroup'].get('nodeRole') - if role_arn: - logging.info('[EKS] Using node role from existing nodegroup %s: %s', - ng_name, role_arn) - self._cached_node_role_arn = role_arn - return role_arn - except Exception as exc: # pylint: disable=broad-except - logging.warning('[EKS] Could not look up node role from nodegroup: %s', exc) - - # Fallback: construct standard eksctl node role name - import boto3 # pylint: disable=import-outside-toplevel - iam = boto3.client('iam', region_name=self._eks.meta.region_name) - paginator = iam.get_paginator('list_roles') - prefix = f'eksctl-{self._cluster_name}-nodegroup' - for page in paginator.paginate(): - for role in page['Roles']: - if prefix in role['RoleName'] and 'NodeInstanceRole' in role['RoleName']: - self._cached_node_role_arn = role['Arn'] - logging.info('[EKS] Found node instance role via IAM: %s', role['Arn']) - return self._cached_node_role_arn - - raise RuntimeError( - f'Could not find a node IAM role with ec2.amazonaws.com trust for cluster ' - f'{self._cluster_name}. Ensure the default nodegroup exists or pass a role ARN.') - - def start_create_nodepool_async(self, name: str, node_count: int, node_version: str) -> str: - cluster_info = self._eks.describe_cluster(name=self._cluster_name)['cluster'] - subnet_ids = cluster_info['resourcesVpcConfig']['subnetIds'] - # Use node role (ec2.amazonaws.com trust) not cluster ServiceRole - node_role_arn = self._get_node_role_arn() - - self._eks.create_nodegroup( - clusterName = self._cluster_name, - nodegroupName = name, - scalingConfig = {'minSize': node_count, 'maxSize': node_count, 'desiredSize': node_count}, - subnets = subnet_ids, - nodeRole = node_role_arn, - version = node_version, - instanceTypes = ['t3.medium'], - ) - return f'{self._cluster_name}/{name}' - - def create_nodepool(self, name: str, node_count: int, node_version: str) -> dict: - start = time.time() - op_id = self.start_create_nodepool_async(name, node_count, node_version) - initiation_latency = time.time() - start - result = self.wait_for_operation(op_id, start) - return {'name': name, 'initiation_latency': initiation_latency, **result} - - def upgrade_nodepool(self, name: str, target_version: str) -> dict: - start = time.time() - self._eks.update_nodegroup_version( - clusterName = self._cluster_name, - nodegroupName = name, - version = target_version, - ) - initiation_latency = time.time() - start - op_id = f'{self._cluster_name}/{name}' - result = self.wait_for_operation(op_id, start) - return {'name': name, 'initiation_latency': initiation_latency, **result} - - def delete_nodepool(self, name: str) -> dict: - start = time.time() - self._eks.delete_nodegroup( - clusterName = self._cluster_name, - nodegroupName = name, - ) - initiation_latency = time.time() - start - op_id = f'{self._cluster_name}/{name}' - result = self.wait_for_operation(op_id, start) - return {'name': name, 'initiation_latency': initiation_latency, **result} - - def start_cluster_update_async(self) -> str: - # Toggle cluster logging to trigger a real ClusterUpdate operation. - # We enable 'api' logging if currently disabled, or disable if enabled. - # This is the lightest possible cluster update — no infrastructure change. - cluster = self._eks.describe_cluster(name=self._cluster_name)['cluster'] - current_logging = cluster.get('logging', {}).get('clusterLogging', []) - - # Find current state of 'api' log type - api_enabled = False - for entry in current_logging: - if 'api' in entry.get('types', []) and entry.get('enabled'): - api_enabled = True - break - - # Toggle: if api logging is on, turn it off; if off, turn it on - # This guarantees a meaningful change that EKS will accept - new_enabled = not api_enabled - logging.info('[EKS] Toggling api logging from %s to %s to trigger ClusterUpdate', - api_enabled, new_enabled) - - self._eks.update_cluster_config( - name = self._cluster_name, - logging = { - 'clusterLogging': [ - {'types': ['api'], 'enabled': new_enabled} - ] - } - ) - return f'{self._cluster_name}/__cluster__' - - def delete_all_benchmark_nodepools(self): - resp = self._eks.list_nodegroups(clusterName=self._cluster_name) - for ng in resp.get('nodegroups', []): - if ng.startswith('pkb-'): - try: - self.delete_nodepool(ng) - except Exception as exc: # pylint: disable=broad-except - logging.warning('Could not delete nodegroup %s: %s', ng, exc) - - -# =========================================================================== -# AKS Management Client (Azure) -# =========================================================================== - -class AKSManagementClient: - """ - Wraps the Azure ContainerServiceClient for management plane operations. - Requires: azure-mgmt-containerservice, azure-identity - """ - - def __init__(self, cluster): - from azure.identity import DefaultAzureCredential # pylint: disable=import-outside-toplevel - from azure.mgmt.containerservice import ContainerServiceClient # pylint: disable=import-outside-toplevel - self._cluster = cluster - self._cluster_name = cluster.name - self._resource_group = cluster.resource_group - self._subscription_id = cluster.subscription_id - cred = DefaultAzureCredential() - self._aks = ContainerServiceClient(cred, self._subscription_id) - - # ------------------------------------------------------------------ - # Version resolution - # ------------------------------------------------------------------ - - def resolve_versions(self, flags_initial, flags_target): - versions = sorted( - [v.orchestrator_version - for v in self._aks.container_services.list_orchestrators( - location='eastus', resource_type='managedClusters' - ).orchestrators], - reverse=True, - ) - initial = flags_initial or (versions[2] if len(versions) >= 3 else versions[-1]) - target = flags_target or versions[0] - return initial, target - - # ------------------------------------------------------------------ - # Async helpers - # ------------------------------------------------------------------ - - def wait_for_operation(self, op_id: str, start_time: float) -> dict: - """ - op_id: "//" - or "//__cluster__" - """ - parts = op_id.split('/') - is_cluster_op = (parts[-1] == '__cluster__') - - def _poll(): - if is_cluster_op: - c = self._aks.managed_clusters.get(self._resource_group, self._cluster_name) - return c.provisioning_state # Succeeded / Failed / Updating / Creating - else: - pool_name = parts[-1] - ap = self._aks.agent_pools.get( - self._resource_group, self._cluster_name, pool_name) - return ap.provisioning_state - - def _normalise(state): - mapping = { - 'Succeeded': STATE_SUCCEEDED, - 'Failed': STATE_FAILED, - 'Canceled': 'CANCELED', - } - return mapping.get(state, STATE_RUNNING) - - def _poll_normalised(): - return _normalise(_poll()) - - return _wait_for_operation_generic( - poll_fn = _poll_normalised, - op_id = op_id, - start_time = start_time, - timeout_sec = FLAGS.mgmt_operation_timeout_sec, - poll_interval_sec = FLAGS.mgmt_poll_interval_sec, - ) - - # ------------------------------------------------------------------ - # Agent pool (node pool) operations - # ------------------------------------------------------------------ - - def start_create_nodepool_async(self, name: str, node_count: int, node_version: str) -> str: - from azure.mgmt.containerservice.models import AgentPool # pylint: disable=import-outside-toplevel - self._aks.agent_pools.begin_create_or_update( - self._resource_group, - self._cluster_name, - name, - AgentPool( - count = node_count, - vm_size = 'Standard_D2s_v3', - orchestrator_version = node_version, - mode = 'User', - ), - ) - return f'{self._resource_group}/{self._cluster_name}/{name}' - - def create_nodepool(self, name: str, node_count: int, node_version: str) -> dict: - start = time.time() - op_id = self.start_create_nodepool_async(name, node_count, node_version) - initiation_latency = time.time() - start - result = self.wait_for_operation(op_id, start) - return {'name': name, 'initiation_latency': initiation_latency, **result} - - def upgrade_nodepool(self, name: str, target_version: str) -> dict: - from azure.mgmt.containerservice.models import AgentPool # pylint: disable=import-outside-toplevel - start = time.time() - ap = self._aks.agent_pools.get(self._resource_group, self._cluster_name, name) - ap.orchestrator_version = target_version - self._aks.agent_pools.begin_create_or_update( - self._resource_group, self._cluster_name, name, ap) - initiation_latency = time.time() - start - op_id = f'{self._resource_group}/{self._cluster_name}/{name}' - result = self.wait_for_operation(op_id, start) - return {'name': name, 'initiation_latency': initiation_latency, **result} - - def delete_nodepool(self, name: str) -> dict: - start = time.time() - self._aks.agent_pools.begin_delete( - self._resource_group, self._cluster_name, name) - initiation_latency = time.time() - start - op_id = f'{self._resource_group}/{self._cluster_name}/{name}' - result = self.wait_for_operation(op_id, start) - return {'name': name, 'initiation_latency': initiation_latency, **result} - - def start_cluster_update_async(self) -> str: - cluster = self._aks.managed_clusters.get(self._resource_group, self._cluster_name) - self._aks.managed_clusters.begin_create_or_update( - self._resource_group, self._cluster_name, cluster) - return f'{self._resource_group}/{self._cluster_name}/__cluster__' - - def delete_all_benchmark_nodepools(self): - pools = self._aks.agent_pools.list(self._resource_group, self._cluster_name) - for pool in pools: - if pool.name.startswith('pkb-'): - try: - self.delete_nodepool(pool.name) - except Exception as exc: # pylint: disable=broad-except - logging.warning('Could not delete agent pool %s: %s', pool.name, exc) + success += 1 + init_latencies.append(init_dur) + e2e_latencies.append(e2e_dur) + samples.append(sample.Sample( + f'{metric_prefix}_InitiationLatency', init_dur, 'seconds', dict(meta))) + samples.append(sample.Sample( + f'{metric_prefix}_EndToEndLatency', e2e_dur, 'seconds', dict(meta))) + + # ── Success rate ────────────────────────────────────────────────────────── + total = attempted_ops if attempted_ops is not None else len(results) + executed = len(results) + if total > 0: + samples.append(sample.Sample( + f'{metric_prefix}_SuccessRate', + 100.0 * success / total, + 'percent', + { + 'total_ops': str(total), + 'executed_ops': str(executed), + 'successful_ops': str(success), + 'skipped_ops': str(total - executed), # cloud-rolled-back ops + }, + )) + + # ── Aggregate stats (successful ops only) ──────────────────────────────── + for phase_label, latencies in ( + ('InitiationLatency', init_latencies), + ('EndToEndLatency', e2e_latencies), + ): + if len(latencies) >= 2: + samples += _AggregateSamples(metric_prefix, phase_label, latencies) + if len(latencies) >= 4: + samples += _OutlierSamples(metric_prefix, phase_label, latencies) + + return samples + + +def _AggregateSamples( + metric_prefix: str, phase_label: str, latencies: list[float] +) -> list[sample.Sample]: + """Emits Mean/StdDev/Min/Median/P90/P99/Max samples.""" + pcts = sample.PercentileCalculator( + latencies, percentiles=(0, 50, 90, 99, 100)) + agg_meta = {'sample_count': str(len(latencies))} + out: list[sample.Sample] = [] + for label, key in ( + ('Mean', 'average'), + ('StdDev', 'stddev'), + ('Min', 'p0'), + ('Median', 'p50'), + ('P90', 'p90'), + ('P99', 'p99'), + ('Max', 'p100'), + ): + if key in pcts: + out.append(sample.Sample( + f'{metric_prefix}_{phase_label}_{label}', + pcts[key], 'seconds', agg_meta)) + return out + + +def _OutlierSamples( + metric_prefix: str, phase_label: str, latencies: list[float] +) -> list[sample.Sample]: + """Tukey IQR outlier detection; emits OutlierCount sample with metadata.""" + sorted_lats = sorted(latencies) + n = len(sorted_lats) + q1 = sorted_lats[n // 4] + q3 = sorted_lats[(3 * n) // 4] + iqr = q3 - q1 + upper_fence = q3 + 1.5 * iqr + lower_fence = q1 - 1.5 * iqr + outliers = [v for v in latencies if v > upper_fence or v < lower_fence] + meta = { + 'sample_count': str(n), + 'q1': f'{q1:.3f}', + 'q3': f'{q3:.3f}', + 'iqr': f'{iqr:.3f}', + 'upper_fence': f'{upper_fence:.3f}', + 'lower_fence': f'{lower_fence:.3f}', + 'outlier_values': ','.join(f'{v:.2f}' for v in outliers), + } + if outliers: + logging.warning( + '[Outliers] %s %s: %d outlier(s) detected: %s (fence: %.2f-%.2f)', + metric_prefix, phase_label, len(outliers), + [f'{v:.2f}s' for v in outliers], lower_fence, upper_fence, + ) + return [sample.Sample( + f'{metric_prefix}_{phase_label}_OutlierCount', + len(outliers), 'count', meta)] diff --git a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py index 7a08e4a19b..e81e3dc837 100644 --- a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py +++ b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py @@ -26,6 +26,7 @@ import logging import math import re +import threading from typing import Any from urllib import parse @@ -526,6 +527,485 @@ def ResizeNodePool( ] vm_util.IssueCommand(cmd) + def CreateNodePool( + self, + nodepool_config: container.BaseNodePoolConfig, + node_version: str | None = None, + ) -> None: + """Creates a single managed node group on the cluster.""" + ng_json = self._RenderNodeGroupJson(nodepool_config) + if node_version: + ng_json['version'] = node_version + config_json = { + 'apiVersion': 'eksctl.io/v1alpha5', + 'kind': 'ClusterConfig', + 'metadata': { + 'name': self.name, + 'region': self.region, + }, + 'managedNodeGroups': [ng_json], + } + filename = self._WriteJsonToFile(config_json) + cmd = [ + FLAGS.eksctl, + 'create', + 'nodegroup', + f'--config-file={filename}', + ] + _, stderr, retcode = vm_util.IssueCommand( + cmd, timeout=1800, raise_on_failure=False + ) + if retcode: + raise errors.Resource.CreationError(stderr) + + def DeleteNodePool(self, name: str) -> None: + """Deletes the named node group.""" + cmd = [ + FLAGS.eksctl, + 'delete', + 'nodegroup', + f'--name={name}', + f'--cluster={self.name}', + f'--region={self.region}', + '--wait', + ] + vm_util.IssueCommand(cmd, timeout=1800) + + def UpgradeNodePool(self, name: str, target_version: str) -> None: + """Upgrades the named node group to target_version.""" + cmd = [ + FLAGS.eksctl, + 'upgrade', + 'nodegroup', + f'--name={name}', + f'--cluster={self.name}', + f'--region={self.region}', + f'--kubernetes-version={target_version}', + '--wait', + ] + vm_util.IssueCommand(cmd, timeout=1800) + + # ---- Async variants (return opaque handles) ------------------------------- + + def _DiscoverSubnets(self) -> list[str]: + """Returns the EKS cluster's subnet IDs (cached after first call).""" + if getattr(self, '_cached_subnets', None): + return self._cached_subnets + out, _, _ = vm_util.IssueCommand( + util.AWS_PREFIX + + [ + 'eks', + 'describe-cluster', + '--name', + self.name, + '--region', + self.region, + ] + ) + info = json.loads(out) + self._cached_subnets = info['cluster']['resourcesVpcConfig']['subnetIds'] + return self._cached_subnets + + def _ResolveReleaseVersion(self, minor: str) -> str: + """Returns the EKS-optimized AMI release version (e.g. '1.33.10-20260124'). + + Used to populate `releaseVersion` in the create-nodegroup payload so the + benchmark can pin specific K8s minors. Thread-safe: at scale we have N + workers all asking for the same minor; only the first does the SSM + lookup, the rest read from the cache. + """ + if getattr(self, '_release_version_lock', None) is None: + self._release_version_lock = threading.Lock() + with self._release_version_lock: + cache = getattr(self, '_cached_release_versions', None) or {} + if minor in cache: + return cache[minor] + cmd = util.AWS_PREFIX + [ + 'ssm', + 'get-parameter', + '--name', + ( + f'/aws/service/eks/optimized-ami/{minor}/amazon-linux-2023/' + 'x86_64/standard/recommended/release_version' + ), + '--region', + self.region, + '--query', + 'Parameter.Value', + '--output', + 'text', + ] + out, err, rc = vm_util.IssueCommand(cmd, raise_on_failure=False) + if rc: + raise errors.Resource.CreationError( + f'Failed to resolve EKS release version for minor {minor!r}: {err}' + ) + cache[minor] = out.strip() + self._cached_release_versions = cache + return cache[minor] + + def _DiscoverNodeRoleArn(self) -> str: + """Returns a usable node IAM role ARN by inspecting an existing nodegroup.""" + if getattr(self, '_cached_node_role_arn', None): + return self._cached_node_role_arn + out, _, _ = vm_util.IssueCommand( + util.AWS_PREFIX + + [ + 'eks', + 'list-nodegroups', + '--cluster-name', + self.name, + '--region', + self.region, + ] + ) + for ng_name in json.loads(out).get('nodegroups', []): + ng_out, _, _ = vm_util.IssueCommand( + util.AWS_PREFIX + + [ + 'eks', + 'describe-nodegroup', + '--cluster-name', + self.name, + '--nodegroup-name', + ng_name, + '--region', + self.region, + ] + ) + role = json.loads(ng_out)['nodegroup'].get('nodeRole') + if role: + self._cached_node_role_arn = role + return role + raise errors.Resource.CreationError( + f'No existing nodegroup found to discover node role for ' + f'cluster {self.name}.' + ) + + def CreateNodePoolAsync( + self, + nodepool_config: container.BaseNodePoolConfig, + node_version: str | None = None, + ) -> str: + # Pass the full request via --cli-input-json so that we can specify both + # `version` (e.g. "1.33") and `releaseVersion` (e.g. "1.33.11-...") in + # the same call. Two reasons this matters: + # 1. AWS CLI v1 has a bug where the top-level --version flag swallows + # the subcommand --version, printing the CLI banner and exiting. + # cli-input-json sidesteps CLI argument parsing entirely. + # 2. EKS rejects a releaseVersion that doesn't match the request's + # `version`; if `version` is omitted EKS defaults it to the + # cluster's version, which (for the N-1 -> N benchmark path) + # produces a "release version X is not valid for kubernetes + # version Y" error. + payload: dict[str, Any] = { + 'clusterName': self.name, + 'nodegroupName': nodepool_config.name, + 'scalingConfig': { + 'minSize': nodepool_config.num_nodes, + 'maxSize': nodepool_config.num_nodes, + 'desiredSize': nodepool_config.num_nodes, + }, + 'subnets': self._DiscoverSubnets(), + 'instanceTypes': [nodepool_config.machine_type], + 'amiType': 'AL2023_x86_64_STANDARD', + 'nodeRole': self._DiscoverNodeRoleArn(), + 'labels': {'pkb_nodepool': nodepool_config.name}, + 'tags': util.MakeDefaultTags(), + } + if node_version: + payload['version'] = node_version + payload['releaseVersion'] = self._ResolveReleaseVersion(node_version) + filename = self._WriteJsonToFile(payload) + cmd = util.AWS_PREFIX + [ + 'eks', + 'create-nodegroup', + '--region', + self.region, + '--cli-input-json', + f'file://{filename}', + ] + _, stderr, retcode = vm_util.IssueCommand( + cmd, timeout=300, raise_on_failure=False + ) + if retcode: + raise errors.Resource.CreationError(stderr) + return f'ng_active:{nodepool_config.name}' + + def UpgradeNodePoolAsync(self, name: str, target_version: str) -> str: + cmd = util.AWS_PREFIX + [ + 'eks', + 'update-nodegroup-version', + '--cluster-name', + self.name, + '--nodegroup-name', + name, + '--region', + self.region, + '--kubernetes-version', + target_version, + ] + _, stderr, retcode = vm_util.IssueCommand( + cmd, timeout=300, raise_on_failure=False + ) + if retcode: + raise errors.Resource.CreationError(stderr) + return f'ng_active:{name}' + + def DeleteNodePoolAsync(self, name: str) -> str: + cmd = util.AWS_PREFIX + [ + 'eks', + 'delete-nodegroup', + '--cluster-name', + self.name, + '--nodegroup-name', + name, + '--region', + self.region, + ] + _, stderr, retcode = vm_util.IssueCommand( + cmd, timeout=300, raise_on_failure=False + ) + if retcode: + raise errors.Resource.CreationError(stderr) + return f'ng_gone:{name}' + + def UpdateClusterAsync(self) -> str: + """Fires a CloudWatch logging toggle; returns handle 'cluster_update:'. + + Returns a handle carrying the specific update id so WaitForOperation + can poll *that* update's status (Successful / Failed) rather than the + cluster's top-level status (which stays ACTIVE during config updates, + making the wait return instantly and silently mis-reporting latency). + """ + log_types = ['api', 'audit', 'authenticator', 'controllerManager', + 'scheduler'] + describe = util.AWS_PREFIX + [ + 'eks', + 'describe-cluster', + '--name', + self.name, + '--region', + self.region, + ] + out, _, _ = vm_util.IssueCommand(describe) + current = ( + json.loads(out)['cluster'].get('logging', {}).get('clusterLogging', []) + ) + any_enabled = any(e.get('enabled', False) for e in current) + payload = json.dumps({ + 'clusterLogging': [ + {'types': log_types, 'enabled': not any_enabled} + ] + }) + upd = util.AWS_PREFIX + [ + 'eks', + 'update-cluster-config', + '--name', + self.name, + '--region', + self.region, + '--logging', + payload, + ] + stdout, stderr, retcode = vm_util.IssueCommand( + upd, timeout=300, raise_on_failure=False + ) + if retcode: + raise errors.Resource.CreationError(stderr) + update_id = json.loads(stdout)['update']['id'] + return f'cluster_update:{update_id}' + + def ResolveNodePoolVersions(self) -> tuple[str, str]: + """Returns (initial, target) EKS nodegroup versions. + + Uses cluster_version (already set from FLAGS/describe-cluster) rather than + querying kubectl, which is faster and avoids a kubectl round-trip. + initial = N-1 (adjacent minor below cluster version) + target = N (cluster version = latest) + """ + cluster_ver = self.cluster_version or self.k8s_version + # Strip any patch suffix e.g. '1.34.7' -> '1.34' + parts = cluster_ver.lstrip('v').split('.') + major, minor = int(parts[0]), int(parts[1]) + target = f'{major}.{minor}' + initial = f'{major}.{minor - 1}' + logging.info( + '[EKS] ResolveNodePoolVersions: cluster=%s initial=%s target=%s', + cluster_ver, initial, target, + ) + return initial, target + + def WaitForOperation(self, op_handle: str) -> None: + """Polls EKS resources until the expected terminal state is observed.""" + kind, _, name = op_handle.partition(':') + + @vm_util.Retry( + poll_interval=5, + fuzz=0, + timeout=3600, + retryable_exceptions=(errors.Resource.RetryableCreationError,), + ) + def _wait_ng_active(): + out, err, rc = vm_util.IssueCommand( + util.AWS_PREFIX + + [ + 'eks', + 'describe-nodegroup', + '--cluster-name', + self.name, + '--nodegroup-name', + name, + '--region', + self.region, + ], + raise_on_failure=False, + ) + if rc: + raise errors.Resource.RetryableCreationError(err) + status = json.loads(out)['nodegroup']['status'] + if status in ('ACTIVE',): + return + if status in ('CREATE_FAILED', 'DELETE_FAILED', 'DEGRADED'): + raise errors.Resource.CreationError( + f'nodegroup {name} ended in {status}' + ) + raise errors.Resource.RetryableCreationError( + f'nodegroup {name} status={status}' + ) + + @vm_util.Retry( + poll_interval=5, + fuzz=0, + timeout=3600, + retryable_exceptions=(errors.Resource.RetryableDeletionError,), + ) + def _wait_ng_gone(): + out, err, rc = vm_util.IssueCommand( + util.AWS_PREFIX + + [ + 'eks', + 'describe-nodegroup', + '--cluster-name', + self.name, + '--nodegroup-name', + name, + '--region', + self.region, + ], + raise_on_failure=False, + ) + if rc and 'ResourceNotFoundException' in (err or ''): + return + if rc: + raise errors.Resource.RetryableDeletionError(err) + raise errors.Resource.RetryableDeletionError( + f'nodegroup {name} still present' + ) + + @vm_util.Retry( + poll_interval=5, + fuzz=0, + timeout=3600, + retryable_exceptions=(errors.Resource.RetryableCreationError,), + ) + def _wait_cluster_update(): + out, err, rc = vm_util.IssueCommand( + util.AWS_PREFIX + + [ + 'eks', + 'describe-update', + '--name', + self.name, + '--update-id', + name, + '--region', + self.region, + '--query', + 'update.status', + '--output', + 'text', + ], + raise_on_failure=False, + ) + if rc: + raise errors.Resource.RetryableCreationError(err) + status = out.strip() + if status == 'Successful': + return + if status in ('Failed', 'Cancelled'): + raise errors.Resource.CreationError( + f'cluster update {name} ended in {status}' + ) + raise errors.Resource.RetryableCreationError( + f'cluster update {name} status={status}' + ) + + if kind == 'ng_active': + _wait_ng_active() + elif kind == 'ng_gone': + _wait_ng_gone() + elif kind == 'cluster_update': + _wait_cluster_update() + else: + raise ValueError(f'Unknown EKS op handle: {op_handle!r}') + + def UpdateCluster(self) -> None: + """Real cluster-level update via a CloudWatch logging toggle. + + Reads the current cluster logging state, flips it (enable->disable or + vice versa), and waits for the cluster to return to ACTIVE. Enabling all + five log types is a 5-10 minute control-plane op, giving a meaningful + overlap window for Scenario B. + """ + log_types = ['api', 'audit', 'authenticator', 'controllerManager', + 'scheduler'] + describe = util.AWS_PREFIX + [ + 'eks', 'describe-cluster', + '--name', self.name, + '--region', self.region, + ] + stdout, _, _ = vm_util.IssueCommand(describe) + info = json.loads(stdout) + current = info['cluster'].get('logging', {}).get('clusterLogging', []) + any_enabled = any(entry.get('enabled', False) for entry in current) + new_enabled = not any_enabled + logging_payload = json.dumps({ + 'clusterLogging': [ + {'types': log_types, 'enabled': new_enabled} + ] + }) + update = util.AWS_PREFIX + [ + 'eks', 'update-cluster-config', + '--name', self.name, + '--region', self.region, + '--logging', logging_payload, + ] + vm_util.IssueCommand(update, timeout=900) + + @vm_util.Retry( + poll_interval=5, + fuzz=0, + timeout=900, + retryable_exceptions=(errors.Resource.RetryableCreationError,), + ) + def _wait_active(): + query = util.AWS_PREFIX + [ + 'eks', 'describe-cluster', + '--name', self.name, + '--region', self.region, + '--query', 'cluster.status', + '--output', 'text', + ] + out, _, _ = vm_util.IssueCommand(query) + status = out.strip() + if status != 'ACTIVE': + raise errors.Resource.RetryableCreationError( + f'cluster status={status}' + ) + + _wait_active() + class EksAutoCluster(BaseEksCluster): """Class representing an Elastic Kubernetes Service cluster with auto mode. diff --git a/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py b/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py index 5d9bbc222b..1307de81fa 100644 --- a/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py +++ b/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py @@ -15,6 +15,7 @@ """Contains classes/functions related to Azure Kubernetes Service.""" import json +import time from typing import Any, List from absl import flags @@ -154,8 +155,7 @@ def GetResourceMetadata(self): def _IsAutoscalerEnabled(self, nodepool_config: container.BaseNodePoolConfig): """Returns True if the cluster autoscaler is enabled.""" return ( - nodepool_config.min_nodes - != nodepool_config.max_nodes + nodepool_config.min_nodes != nodepool_config.max_nodes # Auto node provisioning mode is incompatible with cluster autoscaler. ) and not FLAGS.azure_aks_auto_node_provisioning @@ -204,35 +204,20 @@ def _Create(self): if FLAGS.azure_aks_auto_node_provisioning: # For provision_node_pools benchmark, add auto provisioning mode cmd.append('--node-provisioning-mode=auto') - - self._RunCreateClusterCmd(cmd) + # TODO(pclay): expose quota and capacity errors + # Creating an AKS cluster with a fresh service principal usually fails due + # to a race condition. Active Directory knows the service principal exists, + # but AKS does not. (https://github.com/Azure/azure-cli/issues/9585) + # Use 5 min timeout on service principle retry. cmd will fail fast. + vm_util.Retry(timeout=300)(vm_util.IssueCommand)( + cmd, + # Half hour timeout on creating the cluster. + timeout=1800, + ) for _, nodepool in self.nodepools.items(): self._CreateNodePool(nodepool) - @vm_util.Retry( - timeout=3600, - retryable_exceptions=(errors.Resource.RetryableCreationError,), - ) - def _RunCreateClusterCmd(self, cmd: list[str]): - """Runs the create cluster command, retrying on race condition errors.""" - try: - _, err, retcode = vm_util.IssueCommand( - cmd, - # Half hour timeout on creating the cluster. - timeout=1800, - raise_on_failure=False, - ) - except errors.VmUtil.IssueCommandTimeoutError as e: - retcode = 1 - err = str(e) - if retcode: - if 'InvalidOutputTable' in err: - # This is a race condition where the logs analytics workspace hasn't - # finished being created. Retrying solves it. - raise errors.Resource.RetryableCreationError(err) - raise errors.Resource.CreationError(err) - def _CreateNodePool(self, nodepool_config: container.BaseNodePoolConfig): """Creates a node pool.""" cmd = [ @@ -265,7 +250,9 @@ def _CreateNodePool(self, nodepool_config: container.BaseNodePoolConfig): raise errors.Resource.CreationError(stderr) def _GetNodeFlags( - self, nodepool_config: container.BaseNodePoolConfig + self, + nodepool_config: container.BaseNodePoolConfig, + version_override: str | None = None, ) -> List[str]: """Common flags for create and nodepools add.""" args = [] + self.resource_group.args @@ -288,8 +275,9 @@ def _GetNodeFlags( args += ['--zones', zones] if self.default_nodepool.disk_size: args += ['--node-osdisk-size', str(self.default_nodepool.disk_size)] - if self.cluster_version: - args += ['--kubernetes-version', self.cluster_version] + version = version_override or self.cluster_version + if version: + args += ['--kubernetes-version', version] return args def _Exists(self): @@ -539,6 +527,338 @@ def AddNodepool(self, batch_name, pool_id): spot=FLAGS.azure_low_priority_vms, ) + def CreateNodePool( + self, + nodepool_config: container.BaseNodePoolConfig, + node_version: str | None = None, + ) -> None: + """Creates a single named node pool on the cluster.""" + cmd = [ + azure.AZURE_PATH, + 'aks', + 'nodepool', + 'add', + '--cluster-name', + self.name, + '--name', + _AzureNodePoolName(nodepool_config.name), + '--labels', + f'pkb_nodepool={nodepool_config.name}', + ] + self._GetNodeFlags(nodepool_config, version_override=node_version) + _, stderr, retcode = vm_util.IssueCommand( + cmd, timeout=1800, raise_on_failure=False + ) + if retcode: + raise errors.Resource.CreationError(stderr) + + def DeleteNodePool(self, name: str) -> None: + """Deletes the named node pool.""" + cmd = [ + azure.AZURE_PATH, + 'aks', + 'nodepool', + 'delete', + '--cluster-name', + self.name, + '--name', + _AzureNodePoolName(name), + '--yes', + ] + self.resource_group.args + vm_util.IssueCommand(cmd, timeout=1800) + + def UpgradeNodePool(self, name: str, target_version: str) -> None: + """Upgrades the named node pool to target_version.""" + cmd = [ + azure.AZURE_PATH, + 'aks', + 'nodepool', + 'upgrade', + '--cluster-name', + self.name, + '--name', + _AzureNodePoolName(name), + '--kubernetes-version', + target_version, + '--yes', + ] + self.resource_group.args + vm_util.IssueCommand(cmd, timeout=1800) + + def UpdateCluster(self) -> None: + """Real cluster-level update via a unique-timestamp tag change. + + Triggers a control-plane operation (cluster-scoped, not pool-scoped) by + updating the cluster tags. Always succeeds because the tag value changes + every call. + """ + cmd = [ + azure.AZURE_PATH, + 'aks', + 'update', + '--name', + self.name, + '--tags', + f'k8s-mgmt-ts={int(time.time())}', + ] + self.resource_group.args + vm_util.IssueCommand(cmd, timeout=1800) + + # ---- Async variants (return opaque handles) ------------------------------- + + def CreateNodePoolAsync( + self, + nodepool_config: container.BaseNodePoolConfig, + node_version: str | None = None, + ) -> str: + cmd = [ + azure.AZURE_PATH, + 'aks', + 'nodepool', + 'add', + '--cluster-name', + self.name, + '--name', + _AzureNodePoolName(nodepool_config.name), + '--labels', + f'pkb_nodepool={nodepool_config.name}', + '--no-wait', + ] + self._GetNodeFlags(nodepool_config, version_override=node_version) + _, stderr, retcode = vm_util.IssueCommand( + cmd, timeout=300, raise_on_failure=False + ) + if retcode: + raise errors.Resource.CreationError(stderr) + return f'np_succeeded:{_AzureNodePoolName(nodepool_config.name)}' + + def UpgradeNodePoolAsync(self, name: str, target_version: str) -> str: + cmd = [ + azure.AZURE_PATH, + 'aks', + 'nodepool', + 'upgrade', + '--cluster-name', + self.name, + '--name', + _AzureNodePoolName(name), + '--kubernetes-version', + target_version, + '--no-wait', + '--yes', + ] + self.resource_group.args + _, stderr, retcode = vm_util.IssueCommand( + cmd, timeout=300, raise_on_failure=False + ) + if retcode: + raise errors.Resource.CreationError(stderr) + return f'np_succeeded:{_AzureNodePoolName(name)}' + + def DeleteNodePoolAsync(self, name: str) -> str: + cmd = [ + azure.AZURE_PATH, + 'aks', + 'nodepool', + 'delete', + '--cluster-name', + self.name, + '--name', + _AzureNodePoolName(name), + '--no-wait', + '--yes', + ] + self.resource_group.args + _, stderr, retcode = vm_util.IssueCommand( + cmd, timeout=300, raise_on_failure=False + ) + if retcode: + raise errors.Resource.CreationError(stderr) + return f'np_gone:{_AzureNodePoolName(name)}' + + def UpdateClusterAsync(self) -> str: + """Triggers a node-count scale on the system node pool to create a + long-running cluster update for Scenario B overlap testing. + + Scaling the system pool by ±1 node takes 3-8 minutes on AKS, which + creates a meaningful overlap window for the concurrent NodePool create. + The scale alternates +1/-1 each call so it is always a real change. + Falls back to a tag update if the system pool cannot be identified. + """ + # Find the system node pool name + list_cmd = [ + azure.AZURE_PATH, 'aks', 'nodepool', 'list', + '--cluster-name', self.name, + '--query', '[?mode==`System`].{name:name,count:count}', + '--output', 'json', + ] + self.resource_group.args + out, _, rc = vm_util.IssueCommand(list_cmd, raise_on_failure=False) + if not rc and out.strip(): + try: + pools = json.loads(out.strip()) + if pools: + pool_name = pools[0]['name'] + current_count = int(pools[0]['count']) + # Toggle: scale to current+1 or current-1 (minimum 1) + new_count = current_count + 1 if current_count <= 1 else current_count - 1 + scale_cmd = [ + azure.AZURE_PATH, 'aks', 'nodepool', 'scale', + '--cluster-name', self.name, + '--name', pool_name, + '--node-count', str(new_count), + '--no-wait', + ] + self.resource_group.args + _, stderr, retcode = vm_util.IssueCommand( + scale_cmd, timeout=300, raise_on_failure=False + ) + if not retcode: + logging.info( + '[AKS] UpdateClusterAsync: scaling system pool %s %d->%d', + pool_name, current_count, new_count, + ) + return 'cluster_succeeded' + except (ValueError, KeyError, json.JSONDecodeError) as e: + logging.warning('[AKS] UpdateClusterAsync: pool parse error: %s', e) + # Fallback: tag update + logging.warning('[AKS] UpdateClusterAsync: falling back to tag update') + cmd = [ + azure.AZURE_PATH, 'aks', 'update', + '--name', self.name, + '--tags', f'k8s-mgmt-ts={int(time.time())}', + '--no-wait', + ] + self.resource_group.args + _, stderr, retcode = vm_util.IssueCommand( + cmd, timeout=300, raise_on_failure=False + ) + if retcode: + raise errors.Resource.CreationError(stderr) + return 'cluster_succeeded' + + def ResolveNodePoolVersions(self) -> tuple[str, str]: + """Returns (initial, target) AKS node pool versions. + + Uses cluster_version (already set) rather than querying kubectl. + initial = N-1 (adjacent minor below cluster version) + target = N (cluster version = latest) + """ + cluster_ver = self.cluster_version or self.k8s_version + parts = cluster_ver.lstrip('v').split('.') + major, minor = int(parts[0]), int(parts[1]) + target = f'{major}.{minor}' + initial = f'{major}.{minor - 1}' + logging.info( + '[AKS] ResolveNodePoolVersions: cluster=%s initial=%s target=%s', + cluster_ver, initial, target, + ) + return initial, target + + def WaitForOperation(self, op_handle: str) -> None: + """Polls AKS resources until the expected terminal state is observed.""" + kind, _, name = op_handle.partition(':') + + @vm_util.Retry( + poll_interval=5, + fuzz=0, + timeout=3600, + retryable_exceptions=(errors.Resource.RetryableCreationError,), + ) + def _wait_np_succeeded(): + out, err, rc = vm_util.IssueCommand( + [ + azure.AZURE_PATH, + 'aks', + 'nodepool', + 'show', + '--cluster-name', + self.name, + '--name', + name, + '--query', + 'provisioningState', + '--output', + 'tsv', + ] + + self.resource_group.args, + raise_on_failure=False, + ) + if rc: + raise errors.Resource.RetryableCreationError(err) + status = out.strip() + if status == 'Succeeded': + return + if status == 'Failed': + raise errors.Resource.CreationError( + f'nodepool {name} ended in Failed' + ) + raise errors.Resource.RetryableCreationError( + f'nodepool {name} state={status}' + ) + + @vm_util.Retry( + poll_interval=5, + fuzz=0, + timeout=3600, + retryable_exceptions=(errors.Resource.RetryableDeletionError,), + ) + def _wait_np_gone(): + _, err, rc = vm_util.IssueCommand( + [ + azure.AZURE_PATH, + 'aks', + 'nodepool', + 'show', + '--cluster-name', + self.name, + '--name', + name, + ] + + self.resource_group.args, + raise_on_failure=False, + ) + if rc and ('NotFound' in (err or '') or 'not found' in (err or '').lower()): + return + if rc: + raise errors.Resource.RetryableDeletionError(err) + raise errors.Resource.RetryableDeletionError( + f'nodepool {name} still present' + ) + + @vm_util.Retry( + poll_interval=5, + fuzz=0, + timeout=3600, + retryable_exceptions=(errors.Resource.RetryableCreationError,), + ) + def _wait_cluster_succeeded(): + out, err, rc = vm_util.IssueCommand( + [ + azure.AZURE_PATH, + 'aks', + 'show', + '--name', + self.name, + '--query', + 'provisioningState', + '--output', + 'tsv', + ] + + self.resource_group.args, + raise_on_failure=False, + ) + if rc: + raise errors.Resource.RetryableCreationError(err) + status = out.strip() + if status == 'Succeeded': + return + if status == 'Failed': + raise errors.Resource.CreationError('cluster update ended in Failed') + raise errors.Resource.RetryableCreationError( + f'cluster state={status}' + ) + + if kind == 'np_succeeded': + _wait_np_succeeded() + elif kind == 'np_gone': + _wait_np_gone() + elif kind == 'cluster_succeeded': + _wait_cluster_succeeded() + else: + raise ValueError(f'Unknown AKS op handle: {op_handle!r}') + class AksAutomaticCluster(AksCluster): """Class representing an AKS Automatic cluster, which has managed node pools. @@ -576,7 +896,11 @@ def _Create(self): 'automatic', '--tags', ] + tags_list - self._RunCreateClusterCmd(cmd) + vm_util.IssueCommand( + cmd, + # Half hour timeout on creating the cluster. + timeout=1800, + ) def _CreateRoleAssignment(self): """Creates a role assignment for the current user.""" diff --git a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py index b3f4915aa7..13b172e800 100644 --- a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py +++ b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py @@ -18,6 +18,7 @@ import math import os import re +import time import typing from typing import Any @@ -26,6 +27,7 @@ from perfkitbenchmarker import provider_info from perfkitbenchmarker import virtual_machine from perfkitbenchmarker import virtual_machine_spec +from perfkitbenchmarker import vm_util from perfkitbenchmarker.configs import container_spec as container_spec_lib from perfkitbenchmarker.providers.gcp import flags as gcp_flags from perfkitbenchmarker.providers.gcp import gce_disk @@ -639,6 +641,200 @@ def ResizeNodePool( cmd.flags['node-pool'] = node_pool cmd.Issue() + def CreateNodePool( + self, + nodepool_config: container.BaseNodePoolConfig, + node_version: str | None = None, + ) -> None: + """Creates a single named node pool on the cluster.""" + cmd = self._GcloudCommand( + 'container', + 'node-pools', + 'create', + nodepool_config.name, + '--cluster', + self.name, + ) + self._AddNodeParamsToCmd(nodepool_config, cmd) + if node_version: + cmd.flags['node-version'] = node_version + self._IssueResourceCreationCommand(cmd) + + def DeleteNodePool(self, name: str) -> None: + """Deletes the named node pool.""" + cmd = self._GcloudCommand( + 'container', + 'node-pools', + 'delete', + name, + '--cluster', + self.name, + ) + cmd.args.append('--quiet') + cmd.Issue(timeout=ONE_HOUR) + + def UpgradeNodePool(self, name: str, target_version: str) -> None: + """Upgrades the named node pool to target_version.""" + cmd = self._GcloudCommand( + 'container', + 'clusters', + 'upgrade', + self.name, + '--node-pool', + name, + '--cluster-version', + target_version, + ) + cmd.args.append('--quiet') + cmd.Issue(timeout=ONE_HOUR) + + def UpdateCluster(self) -> None: + """Real cluster-level update via a unique-timestamp label change. + + Triggers an actual control-plane operation (cluster-level, not nodepool) + without destructively altering cluster configuration. Always succeeds + because the label value changes every call. + """ + cmd = self._GcloudCommand('container', 'clusters', 'update', self.name) + cmd.flags['update-labels'] = f'k8s-mgmt-ts={int(time.time())}' + cmd.Issue(timeout=ONE_HOUR) + + # ---- Async variants (return opaque handles) ------------------------------- + + def _IssueAsync(self, cmd: util.GcloudCommand) -> str: + """Issues a gcloud command with --async, returns the operation name.""" + cmd.args.append('--async') + cmd.flags['format'] = 'value(name)' + stdout, stderr, retcode = cmd.Issue(timeout=600, raise_on_failure=False) + if retcode: + raise errors.Resource.CreationError(stderr) + op_name = stdout.strip().splitlines()[-1].strip() if stdout else '' + if not op_name: + raise errors.Resource.CreationError( + f'GKE async command returned no operation name; stderr={stderr}' + ) + return op_name + + def CreateNodePoolAsync( + self, + nodepool_config: container.BaseNodePoolConfig, + node_version: str | None = None, + ) -> str: + cmd = self._GcloudCommand( + 'container', + 'node-pools', + 'create', + nodepool_config.name, + '--cluster', + self.name, + ) + self._AddNodeParamsToCmd(nodepool_config, cmd) + if node_version: + cmd.flags['node-version'] = node_version + # --async is incompatible with the long --timeout flag in some gcloud + # builds; remove it so the CLI just hands back the op name immediately. + cmd.flags.pop('timeout', None) + return self._IssueAsync(cmd) + + def UpgradeNodePoolAsync(self, name: str, target_version: str) -> str: + cmd = self._GcloudCommand( + 'container', + 'clusters', + 'upgrade', + self.name, + '--node-pool', + name, + '--cluster-version', + target_version, + ) + cmd.args.append('--quiet') + return self._IssueAsync(cmd) + + def DeleteNodePoolAsync(self, name: str) -> str: + cmd = self._GcloudCommand( + 'container', + 'node-pools', + 'delete', + name, + '--cluster', + self.name, + ) + cmd.args.append('--quiet') + return self._IssueAsync(cmd) + + def UpdateClusterAsync(self) -> str: + cmd = self._GcloudCommand('container', 'clusters', 'update', self.name) + cmd.flags['update-labels'] = f'k8s-mgmt-ts={int(time.time())}' + return self._IssueAsync(cmd) + + def ResolveNodePoolVersions(self) -> tuple[str, str]: + """Returns (initial, target) GKE node versions: initial=N-1, target=N. + + GKE requires fully-qualified node versions (e.g. '1.34.4-gke.1234'), + so we query `gcloud container get-server-config` and pick the newest + valid version per minor. + """ + cmd = self._GcloudCommand('container', 'get-server-config') + cmd.flags['format'] = 'json' + stdout, stderr, retcode = cmd.Issue(raise_on_failure=False) + if retcode: + raise errors.Resource.GetError( + f'gcloud get-server-config failed: {stderr}' + ) + config = json.loads(stdout) + valid = list(config.get('validNodeVersions', [])) + if not valid: + raise errors.Resource.GetError( + 'GKE get-server-config returned no validNodeVersions' + ) + + def _version_tuple(v): + return tuple(int(x) for x in v.split('-', 1)[0].split('.')) + + valid.sort(key=_version_tuple, reverse=True) + target = valid[0] + initial_minor = kubernetes_cluster.AdjacentMinorBelow(target) + for v in valid: + if kubernetes_cluster.BareMinor(v) == initial_minor: + return v, target + raise errors.Resource.GetError( + f'No GKE node version found for minor {initial_minor!r}; ' + f'available top 5: {valid[:5]}' + ) + + def WaitForOperation(self, op_handle: str) -> None: + """Polls a GKE operation until terminal; raises on failure.""" + + @vm_util.Retry( + poll_interval=5, + fuzz=0, + timeout=ONE_HOUR, + retryable_exceptions=(errors.Resource.RetryableCreationError,), + ) + def _poll(): + describe = self._GcloudCommand( + 'container', + 'operations', + 'describe', + op_handle, + '--format=value(status)', + ) + out, err, rc = describe.Issue(raise_on_failure=False) + if rc: + raise errors.Resource.RetryableCreationError( + f'describe op failed: {err}' + ) + status = out.strip() + if status == 'DONE': + return + if status in ('ABORTING', 'ABORTED'): + raise errors.Resource.CreationError(f'op {op_handle} aborted') + raise errors.Resource.RetryableCreationError( + f'op {op_handle} status={status}' + ) + + _poll() + class GkeAutopilotCluster(BaseGkeCluster): """Class representing an Autopilot GKE cluster, which has no nodepools.""" diff --git a/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py b/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py index 9b98d15508..1742b93470 100644 --- a/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py +++ b/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py @@ -1,5 +1,6 @@ """Classes related to KubernetesCluster.""" +import abc import functools import json import logging @@ -308,6 +309,117 @@ def AddNodepool(self, batch_name: str, pool_id: str): """Adds an additional nodepool with the given name to the cluster.""" pass + def CreateNodePool( + self, + nodepool_config: container_lib.BaseNodePoolConfig, + node_version: str | None = None, + ) -> None: + """Creates a single named node pool on the cluster (blocks until ready). + + Args: + nodepool_config: Node pool definition (name, machine type, node count). + node_version: Optional Kubernetes version to pin the node pool to. None + means use the cluster default. + """ + raise NotImplementedError + + def DeleteNodePool(self, name: str) -> None: + """Deletes the named node pool (blocks until removed).""" + raise NotImplementedError + + def UpgradeNodePool(self, name: str, target_version: str) -> None: + """Upgrades the named node pool to the given Kubernetes version.""" + raise NotImplementedError + + def UpdateCluster(self) -> None: + """Performs a lightweight cluster-level update operation (blocks). + + Intended for management-plane benchmarks that need to overlap a real + cluster-level operation with a node-pool operation. The implementation + should issue a control-plane mutation (so an actual operation runs) that + is non-destructive and idempotent across repeated invocations. + """ + raise NotImplementedError + + def CreateNodePoolAsync( + self, + nodepool_config: container_lib.BaseNodePoolConfig, + node_version: str | None = None, + ) -> str: + """Initiates node-pool create; returns opaque op handle. Does NOT wait.""" + raise NotImplementedError + + def UpgradeNodePoolAsync(self, name: str, target_version: str) -> str: + """Initiates node-pool upgrade; returns opaque op handle. Does NOT wait.""" + raise NotImplementedError + + def DeleteNodePoolAsync(self, name: str) -> str: + """Initiates node-pool delete; returns opaque op handle. Does NOT wait.""" + raise NotImplementedError + + def UpdateClusterAsync(self) -> str: + """Initiates cluster-level update; returns opaque op handle. Does NOT wait.""" + raise NotImplementedError + + @abc.abstractmethod + def GetNodePoolNames(self) -> list[str]: + """Returns the names of all node pools currently in the cluster. + + Used by the k8s_management benchmark to: + - Sweep stale pkbm* pools before each run (clean-start spec requirement) + - Re-list live pools after creates before deleting (avoids stale names) + """ + + def WaitForOperation(self, op_handle: str) -> None: + """Blocks until the operation identified by op_handle completes. + + Args: + op_handle: provider-specific opaque string from one of the *Async + methods above. + + Raises: + errors.Resource.RetryableCreationError or similar on timeout/failure. + """ + raise NotImplementedError + + def ResolveNodePoolVersions(self) -> tuple[str, str]: + """Returns (initial, target) node-pool Kubernetes versions per benchmark spec. + + Spec contract: + target = cluster's current K8s version (the latest available) + initial = the adjacent minor below target (e.g., target=1.35 -> 1.34) + Default implementation returns bare-minor strings ("1.34", "1.35") which + EKS and AKS accept directly. Providers requiring fully-qualified versions + (notably GKE) must override. + """ + target = BareMinor(self.k8s_version) + initial = AdjacentMinorBelow(self.k8s_version) + return initial, target + + +def BareMinor(version: str) -> str: + """Returns the 'major.minor' part of a K8s version string. + + Accepts and normalizes formats like 'v1.35.4', '1.35.4-gke.1234', '1.35'. + """ + if version.startswith('v'): + version = version[1:] + bare = version.split('-', 1)[0] + parts = bare.split('.') + if len(parts) < 2 or not parts[0].isdigit() or not parts[1].isdigit(): + raise ValueError(f'Cannot parse K8s version: {version!r}') + return f'{parts[0]}.{parts[1]}' + + +def AdjacentMinorBelow(version: str) -> str: + """Returns the bare minor one below the given version: '1.35.4' -> '1.34'.""" + bare = BareMinor(version) + major_s, minor_s = bare.split('.') + minor = int(minor_s) + if minor <= 0: + raise ValueError(f'No adjacent minor below {version!r}') + return f'{major_s}.{minor - 1}' + def _DeleteAllFromDefaultNamespace(): """Deletes all resources from a namespace. @@ -358,4 +470,4 @@ def _DeleteAllFromDefaultNamespace(): if 'kubeconfig1: no such file or directory' in str(e): logging.info('Kubeconfig not found, assuming cluster is already deleted.') return - raise e + raise e \ No newline at end of file From 6c281635a90f9d1da132c7f0b8ddd0509c33e19f Mon Sep 17 00:00:00 2001 From: Srikant Patil Date: Fri, 22 May 2026 09:43:37 +0530 Subject: [PATCH 04/19] azure flag issue fixes --- .../providers/azure/azure_kubernetes_service.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py b/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py index 1307de81fa..5863f551fa 100644 --- a/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py +++ b/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py @@ -19,6 +19,7 @@ from typing import Any, List from absl import flags +from absl import logging from perfkitbenchmarker import errors from perfkitbenchmarker import provider_info from perfkitbenchmarker import virtual_machine @@ -562,7 +563,6 @@ def DeleteNodePool(self, name: str) -> None: self.name, '--name', _AzureNodePoolName(name), - '--yes', ] + self.resource_group.args vm_util.IssueCommand(cmd, timeout=1800) @@ -579,7 +579,6 @@ def UpgradeNodePool(self, name: str, target_version: str) -> None: _AzureNodePoolName(name), '--kubernetes-version', target_version, - '--yes', ] + self.resource_group.args vm_util.IssueCommand(cmd, timeout=1800) @@ -641,7 +640,6 @@ def UpgradeNodePoolAsync(self, name: str, target_version: str) -> str: '--kubernetes-version', target_version, '--no-wait', - '--yes', ] + self.resource_group.args _, stderr, retcode = vm_util.IssueCommand( cmd, timeout=300, raise_on_failure=False @@ -661,7 +659,6 @@ def DeleteNodePoolAsync(self, name: str) -> str: '--name', _AzureNodePoolName(name), '--no-wait', - '--yes', ] + self.resource_group.args _, stderr, retcode = vm_util.IssueCommand( cmd, timeout=300, raise_on_failure=False From 2425aac5da46be01f09abb0805a64c6f4868368d Mon Sep 17 00:00:00 2001 From: Srikant Patil Date: Fri, 22 May 2026 11:00:13 +0530 Subject: [PATCH 05/19] GCP overlapping issue fixes --- .../k8s_management_benchmark.py | 21 ++++++++++++++++++ .../providers/gcp/google_kubernetes_engine.py | 22 +++++++++++++------ 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py index de568101ae..21601527e9 100644 --- a/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py @@ -339,6 +339,27 @@ def _RunScenarioA( ) samples += _OpSamples('ScenarioA_Upgrade', upgrade_results, attempted_ops=len(created)) + # ── Dynamic Wait: Wait until the cluster control plane reports STATUS_RUNNING ── + logging.info('Scenario A upgrades finished. Checking control plane status...') + poll_start = time.time() + timeout_seconds = 300 # 5-minute guardrail timeout + + while time.time() - poll_start < timeout_seconds: + # Most provider cluster objects expose a way to refresh or check status + # If your provider wrapper supports a direct get/refresh, use it here: + try: + # Example assuming standard PKB cluster status tracking: + # If the cluster control plane is still 'UPDATING', wait. + if hasattr(cluster, 'GetStatus') and cluster.GetStatus() == 'RUNNING': + logging.info('Cluster control plane is stable and RUNNING.') + break + except Exception as e: + logging.warning('Waiting for control plane to stabilize: %s', e) + + logging.info('Control plane busy or locking. Waiting 15 seconds before checking again...') + time.sleep(15) + else: + logging.warning('Control plane did not return to idle within timeout. Proceeding anyway.') # ── Phase 3: concurrent deletes (live-list to catch EKS rollbacks) ──────── alive = [p for p in cluster.GetNodePoolNames() if p.startswith(f'{_PREFIX}a')] diff --git a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py index 13b172e800..ab2cb91687 100644 --- a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py +++ b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py @@ -55,8 +55,8 @@ def _CalculateCidrSize(nodes: int) -> int: # So 2^(32 - nodes) - 2^(32 - 20) >= 2^(32 - 24) * CIDR # OR CIDR <= 32 - log2(2^8 * nodes + 2^12) cidr_size = int(32 - math.log2((nodes << 8) + (1 << 12))) - # /19 is narrowest CIDR range GKE supports - return min(cidr_size, 19) + # /17 is narrowest CIDR range GKE supports + return min(cidr_size, 16) class GoogleArtifactRegistry(container_registry.BaseContainerRegistry): @@ -262,10 +262,13 @@ def GetNodePoolNames(self) -> list[str]: # Command `gcloud container node-pools list` does not work for Autopilot # clusters - node pools are hidden and command results in 4xx. cmd = self._GcloudCommand('container', 'clusters', 'describe', self.name) - cmd.flags['flatten'] = 'nodePools' - cmd.flags['format'] = 'value(nodePools.name)' + cmd.flags['format'] = 'json' stdout, _, _ = cmd.Issue() - return stdout.split() + try: + cluster_info = json.loads(stdout) + return [np['name'] for np in cluster_info.get('nodePools', [])] + except (json.JSONDecodeError, ValueError, KeyError, TypeError): + return stdout.split() def GetMachineTypeFromNodeName(self, node_name: str) -> str | None: """Get the machine type from the node name.""" @@ -817,14 +820,19 @@ def _poll(): 'operations', 'describe', op_handle, - '--format=value(status)', ) + #describe.flags['format'] = 'value(status)' + describe.flags['format'] = 'json' out, err, rc = describe.Issue(raise_on_failure=False) if rc: raise errors.Resource.RetryableCreationError( f'describe op failed: {err}' ) - status = out.strip() + #status = out.strip() + try: + status = json.loads(out).get('status') + except (json.JSONDecodeError, ValueError): + status = out.strip() if status == 'DONE': return if status in ('ABORTING', 'ABORTED'): From 169b7f7de554be94d4bdc210d44ebfaa8afd67fe Mon Sep 17 00:00:00 2001 From: Srikant Patil Date: Fri, 22 May 2026 11:12:09 +0530 Subject: [PATCH 06/19] GCP overlapping issue fixes --- .../k8s_management_benchmark.py | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py index 21601527e9..dbbf8a4923 100644 --- a/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py @@ -342,24 +342,29 @@ def _RunScenarioA( # ── Dynamic Wait: Wait until the cluster control plane reports STATUS_RUNNING ── logging.info('Scenario A upgrades finished. Checking control plane status...') poll_start = time.time() - timeout_seconds = 300 # 5-minute guardrail timeout + timeout_seconds = 300 + status = None - while time.time() - poll_start < timeout_seconds: - # Most provider cluster objects expose a way to refresh or check status - # If your provider wrapper supports a direct get/refresh, use it here: + while status != 'RUNNING' and (time.time() - poll_start) < timeout_seconds: try: - # Example assuming standard PKB cluster status tracking: - # If the cluster control plane is still 'UPDATING', wait. - if hasattr(cluster, 'GetStatus') and cluster.GetStatus() == 'RUNNING': - logging.info('Cluster control plane is stable and RUNNING.') + if hasattr(cluster, 'GetStatus'): + status = cluster.GetStatus() + logging.info('Current cluster control plane status: %s', status) + else: + logging.warning('Cluster provider does not support GetStatus(). Falling back to 30s cooldown.') + time.sleep(30) break except Exception as e: - logging.warning('Waiting for control plane to stabilize: %s', e) + logging.warning('Transient error querying cluster status: %s. Retrying...', e) - logging.info('Control plane busy or locking. Waiting 15 seconds before checking again...') - time.sleep(15) + # Only sleep if we need to poll again (status is still updating) + if status != 'RUNNING': + logging.info('Control plane busy or locking. Waiting 30 seconds before checking again...') + time.sleep(30) + if status == 'RUNNING': + logging.info('Cluster control plane is stable and RUNNING. Proceeding to deletes.') else: - logging.warning('Control plane did not return to idle within timeout. Proceeding anyway.') + logging.warning('Control plane did not return to RUNNING within safety limit. Proceeding anyway.') # ── Phase 3: concurrent deletes (live-list to catch EKS rollbacks) ──────── alive = [p for p in cluster.GetNodePoolNames() if p.startswith(f'{_PREFIX}a')] From d99332510de73991aa9a22303a5670dbb74ccd78 Mon Sep 17 00:00:00 2001 From: Srikant Patil Date: Fri, 22 May 2026 15:40:14 +0530 Subject: [PATCH 07/19] GCP logging update --- .../k8s_management_benchmark.py | 27 +------------------ 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py index dbbf8a4923..72ffea3caf 100644 --- a/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py @@ -35,6 +35,7 @@ from absl import flags from absl import logging from perfkitbenchmarker import background_tasks +from perfkitbenchmarker import vm_util from perfkitbenchmarker import benchmark_spec as bm_spec from perfkitbenchmarker import configs from perfkitbenchmarker import errors @@ -339,33 +340,7 @@ def _RunScenarioA( ) samples += _OpSamples('ScenarioA_Upgrade', upgrade_results, attempted_ops=len(created)) - # ── Dynamic Wait: Wait until the cluster control plane reports STATUS_RUNNING ── - logging.info('Scenario A upgrades finished. Checking control plane status...') - poll_start = time.time() - timeout_seconds = 300 - status = None - while status != 'RUNNING' and (time.time() - poll_start) < timeout_seconds: - try: - if hasattr(cluster, 'GetStatus'): - status = cluster.GetStatus() - logging.info('Current cluster control plane status: %s', status) - else: - logging.warning('Cluster provider does not support GetStatus(). Falling back to 30s cooldown.') - time.sleep(30) - break - except Exception as e: - logging.warning('Transient error querying cluster status: %s. Retrying...', e) - - # Only sleep if we need to poll again (status is still updating) - if status != 'RUNNING': - logging.info('Control plane busy or locking. Waiting 30 seconds before checking again...') - time.sleep(30) - if status == 'RUNNING': - logging.info('Cluster control plane is stable and RUNNING. Proceeding to deletes.') - else: - logging.warning('Control plane did not return to RUNNING within safety limit. Proceeding anyway.') - # ── Phase 3: concurrent deletes (live-list to catch EKS rollbacks) ──────── alive = [p for p in cluster.GetNodePoolNames() if p.startswith(f'{_PREFIX}a')] logging.info('Scenario A: %d live pools found for delete (originally %d)', From 496610542ad814e9704efeaa5f738ae121993e43 Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Fri, 22 May 2026 17:28:22 +0530 Subject: [PATCH 08/19] fix: 200~3-AZ round-robin nodegroup distribution for EKS~ --- .../k8s_management_benchmark.py | 7 +- .../aws/elastic_kubernetes_service.py | 242 ++++++++++++++---- perfkitbenchmarker/providers/aws/flags.py | 11 + .../container_service/kubernetes_cluster.py | 17 +- 4 files changed, 228 insertions(+), 49 deletions(-) diff --git a/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py index de568101ae..4ef028a93f 100644 --- a/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py @@ -126,10 +126,11 @@ ) _PIPELINE_SCENARIO_A = flags.DEFINE_boolean( 'k8s_mgmt_pipeline_scenario_a', - False, + True, 'If True, run Scenario A as a per-pool pipeline (create->upgrade->delete ' - 'back-to-back per thread). Minimizes wall time but measures ops under ' - 'mixed-type concurrent load. Default False = phase-by-phase (spec-strict).', + 'back-to-back per thread). Minimizes wall time: max_i(create+upgrade+delete) ' + 'vs max(creates)+max(upgrades)+max(deletes) in phase-by-phase mode. ' + 'Default True for faster runs. Set False for spec-strict phase-by-phase.', ) # AKS caps node-pool names at 12 chars — keep all names within that limit. diff --git a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py index e81e3dc837..90b74d86a2 100644 --- a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py +++ b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py @@ -46,6 +46,13 @@ from perfkitbenchmarker.resources.container_service import kubernetes_cluster from perfkitbenchmarker.resources.container_service import kubernetes_commands +# Flag to skip EBS CSI driver setup during cluster creation. +# The k8s_management benchmark does not use persistent volumes, so EBS CSI +# setup (OIDC association + IAM role + addon install) is unnecessary and adds +# ~3 minutes to every run. Set to True to skip it and save time. +# Defined before FLAGS = flags.FLAGS so it is registered at import time +# and visible to PKB's flag parser before --cloud/--container_cluster_type +# are resolved. FLAGS = flags.FLAGS # GPU types which practically require spot to get. _RARE_GPU_TYPES = [ @@ -129,10 +136,21 @@ def _DeleteDependencies(self): def _EksCtlCreate(self, create_json: dict[str, Any]): """Creates the EKS cluster.""" - # If multiple zones are passed use them for the control plane. - # Otherwise EKS will auto-select control plane zones in the region. - if self.control_plane_zones: - create_json['availabilityZones'] = self.control_plane_zones + # Pass all control_plane_zones to the cluster so eksctl creates VPC subnets + # in every requested AZ. Without this, eksctl may only create subnets in 2 + # AZs even when 3 are requested, preventing round-robin nodegroup placement. + # This is critical for distributing nodegroups across AZs to avoid per-AZ + # EC2 capacity limits. + # availabilityZones is already set in create_json by _CreateDependencies + # via the EC2 AZ query (bypassing PKB zone flag truncation). + # Log it here for visibility. + if 'availabilityZones' in create_json: + logging.info( + '[EKS] Creating cluster with AZs: %s — ' + 'eksctl will auto-assign CIDRs for all %d zones.', + create_json['availabilityZones'], + len(create_json['availabilityZones']), + ) # Schema for the cluster create command is here: # https://schema.eksctl.io/ create_json = RecursivelyUpdateDictionary( @@ -186,6 +204,11 @@ def _RenderNodeGroupJson( if nodepool.min_nodes != nodepool.max_nodes: group_json['minSize'] = nodepool.min_nodes group_json['maxSize'] = nodepool.max_nodes + # Pin the default nodegroup to control_plane_zones[0] so it stays in a + # single known AZ. The benchmark nodegroups (pkbma*, pkbmc*) are placed + # via CreateNodePoolAsync using the round-robin _DiscoverSubnetsPerAZ logic. + if self.control_plane_zones: + group_json['availabilityZones'] = [self.control_plane_zones[0]] return group_json def _WriteJsonToFile(self, json_dict: dict[str, Any]) -> str: @@ -398,55 +421,103 @@ def _Create(self): nodepool_jsons += [self._RenderNodeGroupJson(node_group)] create_json: dict[str, Any] = { 'managedNodeGroups': nodepool_jsons, - 'vpc': { - 'nat': {'gateway': 'Disable'}, - }, + 'vpc': {'nat': {'gateway': 'Disable'}}, } + # Explicitly set cluster-level availabilityZones so eksctl creates VPC + # public+private subnets in ALL AZs in the region. + # IMPORTANT: PKB's deprecated --zones flag gets truncated by its own + # translation layer to 2 AZs even when 3 are specified. We bypass this + # by querying EC2 directly for all available AZs in the region and + # passing all of them to eksctl. This ensures the VPC gets subnets in + # all AZs, enabling proper round-robin nodegroup placement. + try: + az_out, _, az_rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'describe-availability-zones', + '--region', self.region, + '--filters', 'Name=state,Values=available', + '--query', 'AvailabilityZones[*].ZoneName', + '--output', 'json', + ], + raise_on_failure=False, + ) + if az_rc == 0 and az_out.strip(): + all_azs = json.loads(az_out.strip()) + # Limit to 3 AZs maximum to avoid excessive subnet creation + cluster_azs = sorted(all_azs)[:3] + else: + # Fallback: use control_plane_zones or default to known us-east-1 AZs + cluster_azs = ( + self.control_plane_zones + if self.control_plane_zones + else [f'{self.region}a', f'{self.region}b', f'{self.region}c'] + ) + except Exception: # pylint: disable=broad-except + cluster_azs = ( + self.control_plane_zones + if self.control_plane_zones + else [f'{self.region}a', f'{self.region}b', f'{self.region}c'] + ) + + create_json['availabilityZones'] = cluster_azs + logging.info( + '[EKS] Cluster will have subnets in %d AZs: %s ' + '(queried from EC2, bypassing PKB zone flag truncation)', + len(cluster_azs), cluster_azs, + ) self._EksCtlCreate(create_json) # Above create command passes "withOidc=true", but it doesn't seem to work & # therefore this command is needed. - cmd = [ - FLAGS.eksctl, - 'utils', - 'associate-iam-oidc-provider', - f'--cluster={self.name}', - f'--region={self.region}', - '--approve', - ] - vm_util.IssueCommand(cmd) + if not FLAGS.eks_skip_ebs_csi: + cmd = [ + FLAGS.eksctl, + 'utils', + 'associate-iam-oidc-provider', + f'--cluster={self.name}', + f'--region={self.region}', + '--approve', + ] + vm_util.IssueCommand(cmd) # EBS CSI driver is required for creating EBS volumes in version > 1.23 # https://docs.aws.amazon.com/eks/latest/userguide/ebs-csi.html + # Skip if --eks_skip_ebs_csi is set (saves ~3 min for benchmarks that + # do not use persistent volumes, such as k8s_management). + if FLAGS.eks_skip_ebs_csi: + logging.info( + '[EKS] Skipping EBS CSI driver setup (--eks_skip_ebs_csi=True). ' + 'Saves ~3 min. Set to False if your benchmark needs persistent volumes.' + ) + else: + # Name must be unique. + ebs_csi_driver_role = f'AmazonEKS_EBS_CSI_DriverRole_{self.name}' + + cmd = [ + FLAGS.eksctl, + 'create', + 'iamserviceaccount', + '--name=ebs-csi-controller-sa', + '--namespace=kube-system', + f'--region={self.region}', + f'--cluster={self.name}', + '--attach-policy-arn=arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy', + '--approve', + '--role-only', + f'--role-name={ebs_csi_driver_role}', + ] + vm_util.IssueCommand(cmd) - # Name must be unique. - ebs_csi_driver_role = f'AmazonEKS_EBS_CSI_DriverRole_{self.name}' - - cmd = [ - FLAGS.eksctl, - 'create', - 'iamserviceaccount', - '--name=ebs-csi-controller-sa', - '--namespace=kube-system', - f'--region={self.region}', - f'--cluster={self.name}', - '--attach-policy-arn=arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy', - '--approve', - '--role-only', - f'--role-name={ebs_csi_driver_role}', - ] - vm_util.IssueCommand(cmd) - - cmd = [ - FLAGS.eksctl, - 'create', - 'addon', - '--name=aws-ebs-csi-driver', - f'--region={self.region}', - f'--cluster={self.name}', - f'--service-account-role-arn=arn:aws:iam::{self.account}:role/{ebs_csi_driver_role}', - ] - vm_util.IssueCommand(cmd) + cmd = [ + FLAGS.eksctl, + 'create', + 'addon', + '--name=aws-ebs-csi-driver', + f'--region={self.region}', + f'--cluster={self.name}', + f'--service-account-role-arn=arn:aws:iam::{self.account}:role/{ebs_csi_driver_role}', + ] + vm_util.IssueCommand(cmd) if aws_flags.AWS_EKS_POD_IDENTITY_ROLE.value: cmd = util.AWS_PREFIX + [ @@ -606,6 +677,63 @@ def _DiscoverSubnets(self) -> list[str]: self._cached_subnets = info['cluster']['resourcesVpcConfig']['subnetIds'] return self._cached_subnets + def _DiscoverSubnetsPerAZ(self) -> dict[str, str]: + """Returns a mapping of {AZ: subnet_id} for the cluster's subnets. + + Used by CreateNodePoolAsync to distribute nodegroups round-robin across + AZs, avoiding per-AZ EC2 capacity limits when creating many pools. + Only returns AZs that are in control_plane_zones (if specified). + Cached after first call. + """ + if getattr(self, '_cached_subnets_per_az', None): + return self._cached_subnets_per_az + + subnet_ids = self._DiscoverSubnets() + if not subnet_ids: + self._cached_subnets_per_az = {} + return {} + + # Describe subnets to get their AZ mapping + out, _, rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'describe-subnets', + '--region', self.region, + '--subnet-ids', *subnet_ids, + '--query', 'Subnets[*].{SubnetId:SubnetId,AZ:AvailabilityZone}', + '--output', 'json', + ], + raise_on_failure=False, + ) + if rc: + logging.warning('[EKS] Could not describe subnets for AZ mapping — ' + 'falling back to all subnets') + self._cached_subnets_per_az = {} + return {} + + subnets = json.loads(out) + + # Do NOT filter by control_plane_zones — PKB truncates it to 2 AZs. + # Accept all subnets the VPC has across all AZs. + allowed_zones = None + + az_map: dict[str, str] = {} + for s in subnets: + az = s['AZ'] + if allowed_zones and az not in allowed_zones: + continue + # Keep only one subnet per AZ (prefer public subnets — already filtered + # by _DiscoverSubnets which returns the cluster's configured subnets) + if az not in az_map: + az_map[az] = s['SubnetId'] + + logging.info( + '[EKS] Subnet-per-AZ mapping: %s (from %d total subnets, ' + 'allowed_zones=%s)', + az_map, len(subnet_ids), allowed_zones, + ) + self._cached_subnets_per_az = az_map + return az_map + def _ResolveReleaseVersion(self, minor: str) -> str: """Returns the EKS-optimized AMI release version (e.g. '1.33.10-20260124'). @@ -698,6 +826,30 @@ def CreateNodePoolAsync( # cluster's version, which (for the N-1 -> N benchmark path) # produces a "release version X is not valid for kubernetes # version Y" error. + + # ── AZ distribution ──────────────────────────────────────────────────── + # When multiple zones are specified (e.g. us-east-1a,1b,1c), distribute + # nodegroups round-robin across AZs to avoid per-AZ EC2 capacity limits. + # Without this, EKS places all nodegroups in a single AZ causing timeouts. + # Pool name format: pkbma000, pkbma001, ... — extract index from suffix. + az_subnets = self._DiscoverSubnetsPerAZ() + if az_subnets and len(az_subnets) > 1: + # Extract numeric suffix from pool name to determine AZ assignment + name = nodepool_config.name + suffix = ''.join(c for c in name if c.isdigit()) + idx = int(suffix) if suffix else 0 + zones = sorted(az_subnets.keys()) + assigned_az = zones[idx % len(zones)] + subnets = [az_subnets[assigned_az]] + logging.info( + '[EKS] CreateNodePool %s -> AZ=%s subnet=%s (round-robin idx=%d)', + name, assigned_az, subnets[0], idx, + ) + else: + subnets = self._DiscoverSubnets() + logging.info('[EKS] CreateNodePool %s -> using all subnets (single AZ)', + nodepool_config.name) + payload: dict[str, Any] = { 'clusterName': self.name, 'nodegroupName': nodepool_config.name, @@ -706,7 +858,7 @@ def CreateNodePoolAsync( 'maxSize': nodepool_config.num_nodes, 'desiredSize': nodepool_config.num_nodes, }, - 'subnets': self._DiscoverSubnets(), + 'subnets': subnets, 'instanceTypes': [nodepool_config.machine_type], 'amiType': 'AL2023_x86_64_STANDARD', 'nodeRole': self._DiscoverNodeRoleArn(), diff --git a/perfkitbenchmarker/providers/aws/flags.py b/perfkitbenchmarker/providers/aws/flags.py index 6871a085e5..414c8c3fa1 100644 --- a/perfkitbenchmarker/providers/aws/flags.py +++ b/perfkitbenchmarker/providers/aws/flags.py @@ -376,3 +376,14 @@ def _ValidatePreprovisionedDataAccess(flag_values: dict[str, Any]) -> bool: None, 'If supplied, creates the DocumentDB instance from the snapshot.', ) + +# Flag to skip EBS CSI driver setup during EKS cluster creation. +# Safe for benchmarks that do not use persistent volumes (e.g. k8s_management). +# Saves ~3 minutes per run. +flags.DEFINE_boolean( + 'eks_skip_ebs_csi', + False, + 'If True, skip EBS CSI driver setup (OIDC + IAM role + addon install) ' + 'during EKS cluster creation. Safe for the k8s_management benchmark ' + 'which does not use persistent volumes. Saves ~3 minutes per run.', +) diff --git a/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py b/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py index 1742b93470..e65016ad04 100644 --- a/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py +++ b/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py @@ -57,7 +57,22 @@ def Create(self, restore: bool = False) -> None: def _PostCreate(self): super()._PostCreate() if self.event_poller: - self.event_poller.StartPolling() + try: + self.event_poller.StartPolling() + except Exception as exc: # pylint: disable=broad-except + # Python 3.14 tightened pickling rules for multiprocessing — local + # functions passed to Process cannot be pickled. Rather than crashing + # PKB entirely (which prevents cleanup and orphans cloud resources), + # log a warning and continue without the event poller. + # Impact: no Kubernetes event streaming during the run — benchmark + # metrics are unaffected. + logging.warning( + 'Event poller failed to start (non-fatal, continuing without ' + 'event polling): %s. This is a known Python 3.14 pickling ' + 'issue — switch to Python 3.13 to enable event polling.', + exc, + ) + self.event_poller = None def Delete(self, freeze: bool = False) -> None: if self.inference_server: From ae31919f7c7eb77ffde0206527b7ca82dbb192b7 Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Fri, 22 May 2026 21:53:23 +0530 Subject: [PATCH 09/19] fix: rename benchmark file and add capacity reservation targeting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename k8s_management_benchmark.py to kubernetes_management_benchmark.py - elastic_kubernetes_service.py: add capacityReservationSpecification with preference 'open' to target EC2 capacity reservations in CreateNodePoolAsync — ensures reserved t3.medium instances are used by EKS nodegroups instead of competing for on-demand capacity --- ...ment_benchmark.py => kubernetes_management_benchmark.py} | 0 .../providers/aws/elastic_kubernetes_service.py | 6 ++++++ 2 files changed, 6 insertions(+) rename perfkitbenchmarker/linux_benchmarks/{k8s_management_benchmark.py => kubernetes_management_benchmark.py} (100%) diff --git a/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py similarity index 100% rename from perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py rename to perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py diff --git a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py index f463b6c597..90ed8ca565 100644 --- a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py +++ b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py @@ -864,6 +864,12 @@ def CreateNodePoolAsync( 'nodeRole': self._DiscoverNodeRoleArn(), 'labels': {'pkb_nodepool': nodepool_config.name}, 'tags': util.MakeDefaultTags(), + # Target open capacity reservations first before falling back to + # regular on-demand. Ensures EC2 capacity reservations created + # before the benchmark are actually used by EKS nodegroups. + 'capacityReservationSpecification': { + 'capacityReservationPreference': 'open', + }, } if node_version: payload['version'] = node_version From 833867784a56fc298a539826bcf45f6c1fa8829f Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Tue, 26 May 2026 23:37:20 +0530 Subject: [PATCH 10/19] EKS --- .../aws/elastic_kubernetes_service.py | 329 ++++++++++++++++-- 1 file changed, 297 insertions(+), 32 deletions(-) diff --git a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py index 90ed8ca565..7023d4ec69 100644 --- a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py +++ b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py @@ -115,6 +115,8 @@ def __init__(self, spec): self.account: str = util.GetAccount() self.node_to_nodepool: dict[str, container.BaseNodePoolConfig | None] = {} self.node_to_machine_type: dict[str, str | None] = {} + # Dynamically created capacity reservations — keyed by AZ + self._capacity_reservation_ids: dict[str, str] = {} def _ChooseSecondZone(self): """Choose a second zone for the control plane if only one is specified.""" @@ -126,13 +128,9 @@ def _ChooseSecondZone(self): self.region + ('b' if self.zone.endswith('a') else 'a') ) - def _CreateDependencies(self): - """Set up the ssh key.""" - aws_virtual_machine.AwsKeyFileManager.ImportKeyfile(self.region) - def _DeleteDependencies(self): - """Delete the ssh key.""" - aws_virtual_machine.AwsKeyFileManager.DeleteKeyfile(self.region) + + def _EksCtlCreate(self, create_json: dict[str, Any]): """Creates the EKS cluster.""" @@ -235,6 +233,32 @@ def _WriteJsonToFile(self, json_dict: dict[str, Any]) -> str: def _Delete(self): """Deletes the control plane and worker nodes.""" + # Clean up SSH key pair — safety net in case _DeleteDependencies didn't run + try: + aws_virtual_machine.AwsKeyFileManager.DeleteKeyfile(self.region) + except Exception: # pylint: disable=broad-except + pass + # Clean up dynamically created launch templates and capacity reservations + for az in getattr(self, '_capacity_reservation_ids', {}).keys(): + vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'delete-launch-template', + '--launch-template-name', f'pkb-eks-lt-{az}', + '--region', self.region, + ], + raise_on_failure=False, + ) + logging.info('[EKS] Deleted launch template pkb-eks-lt-%s', az) + for az, res_id in getattr(self, '_capacity_reservation_ids', {}).items(): + vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'cancel-capacity-reservation', + '--capacity-reservation-id', res_id, + '--region', self.region, + ], + raise_on_failure=False, + ) + logging.info('[EKS] Cancelled capacity reservation %s in %s', res_id, az) super()._Delete() cmd = [ FLAGS.eksctl, @@ -416,6 +440,8 @@ def GetResourceMetadata(self): def _Create(self): """Creates the control plane and worker nodes.""" + # Import SSH key pair to EC2 before cluster creation — eksctl requires it. + aws_virtual_machine.AwsKeyFileManager.ImportKeyfile(self.region) nodepool_jsons = [self._RenderNodeGroupJson(self.default_nodepool)] for _, node_group in self.nodepools.items(): nodepool_jsons += [self._RenderNodeGroupJson(node_group)] @@ -465,8 +491,143 @@ def _Create(self): '(queried from EC2, bypassing PKB zone flag truncation)', len(cluster_azs), cluster_azs, ) + self._EksCtlCreate(create_json) + # Dynamically create capacity reservations + launch templates AFTER cluster + # creation so cluster CA and endpoint are available for node bootstrap. + self._capacity_reservation_ids = {} + # Reserve enough capacity per AZ for 100 pools: + # ~67 pools per AZ × 2 nodes = 134 instances max per AZ (Scenario A) + # Plus default nodegroup (2) + buffer = 80 minimum for 10 pools, 150 for 100 pools + concurrent = getattr(FLAGS, 'k8s_mgmt_concurrent_nodepools', 10) + nodes_per_az = max(80, concurrent * 2 + 20) + # Fetch cluster CA and endpoint for bootstrap user data + import json as _json + cluster_out, _, cluster_rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'eks', 'describe-cluster', + '--name', self.name, + '--region', self.region, + '--query', 'cluster.{endpoint:endpoint,ca:certificateAuthority.data,cidr:kubernetesNetworkConfig.serviceIpv4Cidr}', + '--output', 'json', + ], + raise_on_failure=False, + ) + cluster_ca = '' + cluster_endpoint = '' + cluster_service_cidr = '10.100.0.0/16' # default fallback + if cluster_rc == 0 and cluster_out.strip(): + cluster_info = _json.loads(cluster_out.strip()) + cluster_ca = cluster_info.get('ca', '') + cluster_endpoint = cluster_info.get('endpoint', '') + cluster_service_cidr = cluster_info.get('cidr', '10.100.0.0/16') + logging.info('[EKS] Fetched cluster endpoint=%s cidr=%s for bootstrap', + cluster_endpoint, cluster_service_cidr) + + # Query EKS-optimized AMI once for all AZs + # cluster_version may be None if not explicitly set — fetch from cluster + if not self.cluster_version: + ver_out, _, ver_rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'eks', 'describe-cluster', + '--name', self.name, + '--region', self.region, + '--query', 'cluster.version', + '--output', 'text', + ], + raise_on_failure=False, + ) + self.cluster_version = ver_out.strip() if ver_rc == 0 and ver_out.strip() else '1.34' + logging.info('[EKS] Resolved cluster version: %s', self.cluster_version) + k8s_minor_str = '.'.join(self.cluster_version.split('.')[:2]) + ami_out, _, ami_rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ssm', 'get-parameter', + '--name', ( + f'/aws/service/eks/optimized-ami/{k8s_minor_str}/' + 'amazon-linux-2023/x86_64/standard/recommended/image_id' + ), + '--region', self.region, + '--query', 'Parameter.Value', + '--output', 'text', + ], + raise_on_failure=False, + ) + ami_id = ami_out.strip() if ami_rc == 0 and ami_out.strip() else '' + logging.info('[EKS] EKS AMI for K8s %s: %s', k8s_minor_str, ami_id) + + for az in cluster_azs: + logging.info('[EKS] Creating capacity reservation in %s (%d instances)...', az, nodes_per_az) + cap_out, _, cap_rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'create-capacity-reservation', + '--instance-type', 't3.medium', + '--instance-platform', 'Linux/UNIX', + '--availability-zone', az, + '--instance-count', str(nodes_per_az), + '--region', self.region, + '--query', 'CapacityReservation.CapacityReservationId', + '--output', 'text', + ], + raise_on_failure=False, + ) + if cap_rc == 0 and cap_out.strip() and cap_out.strip() != 'None': + res_id = cap_out.strip() + self._capacity_reservation_ids[az] = res_id + logging.info('[EKS] Created capacity reservation %s in %s', res_id, az) + if ami_id and cluster_ca and cluster_endpoint: + import base64 as _b64 + # AL2023 uses nodeadm YAML config — NOT the old bootstrap.sh + nodeadm_config = ( + 'apiVersion: node.eks.aws/v1alpha1' + chr(10) + + 'kind: NodeConfig' + chr(10) + + 'spec:' + chr(10) + + ' cluster:' + chr(10) + + f' name: {self.name}' + chr(10) + + f' apiServerEndpoint: {cluster_endpoint}' + chr(10) + + f' certificateAuthority: {cluster_ca}' + chr(10) + + f' cidr: {cluster_service_cidr}' + ) + user_data = _b64.b64encode(('MIME-Version: 1.0' + chr(10) + + 'Content-Type: multipart/mixed; boundary="==BOUNDARY=="' + chr(10) + + chr(10) + + '--==BOUNDARY==' + chr(10) + + 'Content-Type: application/node.eks.aws' + chr(10) + + chr(10) + + nodeadm_config + chr(10) + + '--==BOUNDARY==--').encode()).decode() + logging.info('[EKS] Using AL2023 nodeadm bootstrap for %s', az) + lt_data = ( + '{' + f'"ImageId":"{ami_id}",' + '"CapacityReservationSpecification":{' + '"CapacityReservationPreference":"capacity-reservations-only",' + f'"CapacityReservationTarget":{{"CapacityReservationId":"{res_id}"}}}},' + f'"UserData":"{user_data}"' + '}' + ) + _, _, lt_rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'create-launch-template', + '--region', self.region, + '--launch-template-name', f'pkb-eks-lt-{az}', + '--launch-template-data', lt_data, + ], + raise_on_failure=False, + ) + if lt_rc == 0: + logging.info( + '[EKS] Created launch template pkb-eks-lt-%s (AMI=%s) -> %s', + az, ami_id, res_id, + ) + else: + logging.warning('[EKS] Failed to create launch template for %s', az) + else: + logging.warning('[EKS] Missing AMI/CA/endpoint — no launch template for %s', az) + else: + logging.warning('[EKS] Failed to create capacity reservation in %s — on-demand', az) + # Above create command passes "withOidc=true", but it doesn't seem to work & # therefore this command is needed. if not FLAGS.eks_skip_ebs_csi: @@ -693,13 +854,13 @@ def _DiscoverSubnetsPerAZ(self) -> dict[str, str]: self._cached_subnets_per_az = {} return {} - # Describe subnets to get their AZ mapping + # Describe subnets to get their AZ mapping AND public/private status out, _, rc = vm_util.IssueCommand( util.AWS_PREFIX + [ 'ec2', 'describe-subnets', '--region', self.region, '--subnet-ids', *subnet_ids, - '--query', 'Subnets[*].{SubnetId:SubnetId,AZ:AvailabilityZone}', + '--query', 'Subnets[*].{SubnetId:SubnetId,AZ:AvailabilityZone,Public:MapPublicIpOnLaunch}', '--output', 'json', ], raise_on_failure=False, @@ -716,15 +877,27 @@ def _DiscoverSubnetsPerAZ(self) -> dict[str, str]: # Accept all subnets the VPC has across all AZs. allowed_zones = None + # Build AZ map — always prefer public subnets (MapPublicIpOnLaunch=True) + # which have an internet gateway route. Private subnets lack IGW routes + # and nodes launched there cannot reach the EKS API server to join the cluster. az_map: dict[str, str] = {} + az_map_private: dict[str, str] = {} for s in subnets: az = s['AZ'] if allowed_zones and az not in allowed_zones: continue - # Keep only one subnet per AZ (prefer public subnets — already filtered - # by _DiscoverSubnets which returns the cluster's configured subnets) - if az not in az_map: + if s.get('Public'): + # Public subnet — always prefer this az_map[az] = s['SubnetId'] + logging.info('[EKS] AZ %s → public subnet %s', az, s['SubnetId']) + elif az not in az_map: + # Private subnet — only use as fallback if no public subnet found + az_map_private[az] = s['SubnetId'] + # Fill in any AZs that only have private subnets + for az, sid in az_map_private.items(): + if az not in az_map: + logging.warning('[EKS] AZ %s has no public subnet — using private %s', az, sid) + az_map[az] = sid logging.info( '[EKS] Subnet-per-AZ mapping: %s (from %d total subnets, ' @@ -837,7 +1010,9 @@ def CreateNodePoolAsync( # Extract numeric suffix from pool name to determine AZ assignment name = nodepool_config.name suffix = ''.join(c for c in name if c.isdigit()) - idx = int(suffix) if suffix else 0 + # pkbmb (Scenario B) has no suffix — use idx=1 (us-east-1b) to avoid + # competing with us-east-1a which already has the default nodegroup + idx = int(suffix) if suffix else 1 zones = sorted(az_subnets.keys()) assigned_az = zones[idx % len(zones)] subnets = [az_subnets[assigned_az]] @@ -864,16 +1039,45 @@ def CreateNodePoolAsync( 'nodeRole': self._DiscoverNodeRoleArn(), 'labels': {'pkb_nodepool': nodepool_config.name}, 'tags': util.MakeDefaultTags(), - # Target open capacity reservations first before falling back to - # regular on-demand. Ensures EC2 capacity reservations created - # before the benchmark are actually used by EKS nodegroups. - 'capacityReservationSpecification': { - 'capacityReservationPreference': 'open', - }, } + _az = assigned_az if az_subnets and len(az_subnets) > 1 else f'{self.region}a' + _lt_name = f'pkb-eks-lt-{_az}' + _lt_out, _, _lt_rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'describe-launch-templates', + '--region', self.region, + '--filters', f'Name=launch-template-name,Values={_lt_name}', + '--query', 'LaunchTemplates[0].LaunchTemplateId', + '--output', 'text', + ], + raise_on_failure=False, + ) + # Use launch template WITH correct EKS bootstrap to target capacity reservation. + # The launch template must specify the EKS-optimized AMI and bootstrap user data + # so nodes can join the cluster, while also targeting the capacity reservation. + res_id = self._capacity_reservation_ids.get(_az, '') + if res_id and _lt_rc == 0 and _lt_out.strip() and _lt_out.strip() not in ('None', 'null', ''): + payload['launchTemplate'] = {'id': _lt_out.strip(), 'version': '$Latest'} + # When launch template specifies an ImageId, EKS rejects these fields: + # - releaseVersion: conflicts with AMI + # - instanceTypes: must come from launch template only + # - amiType: conflicts with AMI + payload.pop('releaseVersion', None) + payload.pop('instanceTypes', None) + payload.pop('amiType', None) + logging.info( + '[EKS] Nodegroup %s using launch template %s targeting reservation %s in AZ %s', + nodepool_config.name, _lt_name, res_id, _az, + ) + else: + logging.warning('[EKS] No reservation/template for AZ %s — using on-demand', _az) + if node_version: - payload['version'] = node_version - payload['releaseVersion'] = self._ResolveReleaseVersion(node_version) + # EKS rejects both 'version' and 'releaseVersion' when a launch template + # with ImageId is specified — skip both when launchTemplate is in use. + if 'launchTemplate' not in payload: + payload['version'] = node_version + payload['releaseVersion'] = self._ResolveReleaseVersion(node_version) filename = self._WriteJsonToFile(payload) cmd = util.AWS_PREFIX + [ 'eks', @@ -891,18 +1095,53 @@ def CreateNodePoolAsync( return f'ng_active:{nodepool_config.name}' def UpgradeNodePoolAsync(self, name: str, target_version: str) -> str: - cmd = util.AWS_PREFIX + [ - 'eks', - 'update-nodegroup-version', - '--cluster-name', - self.name, - '--nodegroup-name', - name, - '--region', - self.region, - '--kubernetes-version', - target_version, - ] + # For Custom AMI nodegroups (using launch template with ImageId), + # EKS requires the launch template to be passed on upgrade. + # Determine the AZ for this nodegroup to find the correct launch template. + suffix = ''.join(c for c in name if c.isdigit()) + # pkbmb (Scenario B) has no suffix — use idx=1 (us-east-1b) to avoid + # competing with us-east-1a which already has the default nodegroup + idx = int(suffix) if suffix else 1 + az_subnets = self._DiscoverSubnetsPerAZ() + if az_subnets and len(az_subnets) > 1: + zones = sorted(az_subnets.keys()) + _az = zones[idx % len(zones)] + else: + _az = f'{self.region}a' + _lt_name = f'pkb-eks-lt-{_az}' + + # Check if launch template exists for this AZ + lt_out, _, lt_rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'describe-launch-templates', + '--region', self.region, + '--filters', f'Name=launch-template-name,Values={_lt_name}', + '--query', 'LaunchTemplates[0].LaunchTemplateId', + '--output', 'text', + ], + raise_on_failure=False, + ) + lt_id = lt_out.strip() if lt_rc == 0 and lt_out.strip() not in ('', 'None', 'null') else '' + + # Custom AMI nodegroups cannot use --kubernetes-version — use launch template only + if lt_id: + cmd = util.AWS_PREFIX + [ + 'eks', 'update-nodegroup-version', + '--cluster-name', self.name, + '--nodegroup-name', name, + '--region', self.region, + '--launch-template', f'id={lt_id},version=$Latest', + ] + logging.info('[EKS] Upgrading %s with launch template %s in AZ %s', + name, _lt_name, _az) + else: + cmd = util.AWS_PREFIX + [ + 'eks', 'update-nodegroup-version', + '--cluster-name', self.name, + '--nodegroup-name', name, + '--region', self.region, + '--kubernetes-version', target_version, + ] _, stderr, retcode = vm_util.IssueCommand( cmd, timeout=300, raise_on_failure=False ) @@ -1212,6 +1451,32 @@ def _PostCreate(self): def _Delete(self): """Deletes the control plane and worker nodes.""" + # Clean up SSH key pair — safety net in case _DeleteDependencies didn't run + try: + aws_virtual_machine.AwsKeyFileManager.DeleteKeyfile(self.region) + except Exception: # pylint: disable=broad-except + pass + # Clean up dynamically created launch templates and capacity reservations + for az in getattr(self, '_capacity_reservation_ids', {}).keys(): + vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'delete-launch-template', + '--launch-template-name', f'pkb-eks-lt-{az}', + '--region', self.region, + ], + raise_on_failure=False, + ) + logging.info('[EKS] Deleted launch template pkb-eks-lt-%s', az) + for az, res_id in getattr(self, '_capacity_reservation_ids', {}).items(): + vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'cancel-capacity-reservation', + '--capacity-reservation-id', res_id, + '--region', self.region, + ], + raise_on_failure=False, + ) + logging.info('[EKS] Cancelled capacity reservation %s in %s', res_id, az) super()._Delete() cmd = [ FLAGS.eksctl, From f9e72e58324f36fac2c0f45051804d8f353213a9 Mon Sep 17 00:00:00 2001 From: Srikant Patil Date: Wed, 27 May 2026 15:40:43 +0530 Subject: [PATCH 11/19] GCP: Upgrade command fixes and Test Cases update --- .../kubernetes_management_benchmark.py | 797 ++++++++++++ .../aws/elastic_kubernetes_service.py | 325 +++-- .../azure/azure_kubernetes_service.py | 52 +- .../providers/gcp/google_kubernetes_engine.py | 152 ++- .../container_service/kubernetes_cluster.py | 40 +- .../kubernetes_management_benchmark_test.py | 1105 +++++++++++++++++ .../aws/elastic_kubernetes_service_test.py | 339 ++++- .../azure/azure_kubernetes_service_test.py | 255 +++- .../gcp/google_kubernetes_engine_test.py | 366 +++++- 9 files changed, 3278 insertions(+), 153 deletions(-) create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py create mode 100644 tests/linux_benchmarks/kubernetes_management_benchmark_test.py diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py new file mode 100644 index 0000000000..5819062b61 --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py @@ -0,0 +1,797 @@ +# Copyright 2026 PerfKitBenchmarker Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Benchmark for Kubernetes management plane operations. + +Measures GKE/EKS/AKS control-plane API responsiveness via three scenarios: + A. Concurrent node-pool create/upgrade/delete. + B. Node-pool create overlapping with a long-running cluster update. + C. Large-scale node-pool provisioning (single scale or sweep). + +Optimizations for minimum run time: + - Streaming concurrency in Scenario C (no batch barriers) + - Optional pipelined Scenario A (create->upgrade->delete per thread) + - Reduced poll_interval in provider WaitForOperation (5s vs 10s) + - Per-op threads capped at _MAX_CONCURRENT to avoid OS limits + - Accurate delete success rate via attempted_ops denominator +""" + +import copy +import statistics +import threading +import time +from typing import Callable + +from absl import flags +from absl import logging +from perfkitbenchmarker import background_tasks +from perfkitbenchmarker import benchmark_spec as bm_spec +from perfkitbenchmarker import configs +from perfkitbenchmarker import errors +from perfkitbenchmarker import sample +from perfkitbenchmarker.configs import benchmark_config_spec +from perfkitbenchmarker.resources.container_service import ( + container as container_lib) +from perfkitbenchmarker.resources.container_service import kubectl +from perfkitbenchmarker.resources.container_service import kubernetes_cluster + +_SLEEP_POD_NAME = 'pkb-mgmt-sleep' + +BENCHMARK_NAME = 'kubernetes_management' + +BENCHMARK_CONFIG = """ +kubernetes_management: + description: > + Benchmarks GKE/EKS/AKS management plane operations: concurrent node pool + create/upgrade/delete, overlapping cluster + node-pool ops, and large-scale + provisioning. Focused on control-plane API responsiveness. + Spec regions: GCP us-central1, AWS us-east-1 (closest), Azure eastus. + Equivalent machine types across clouds per Google benchmark spec. + container_cluster: + type: Kubernetes + vm_count: 1 + vm_spec: + GCP: + # us-central1-a: spec primary region for GCP + # e2-standard-2: 2 vCPU 8GB — equivalent to t3.medium / D2s_v3 + machine_type: e2-standard-2 + zone: us-central1-a + AWS: + # us-east-1a: closest comparable region to GCP us-central1 + # t3.medium: 2 vCPU 4GB — closest equivalent to e2-standard-2 + machine_type: t3.medium + zone: us-east-1a + Azure: + # eastus: closest comparable region to GCP us-central1 + # Standard_D2s_v3: 2 vCPU 8GB — equivalent to e2-standard-2 + machine_type: Standard_D2s_v3 + zone: eastus +""" + +_VALID_SCENARIOS = frozenset({'A', 'B', 'C'}) + +_CONCURRENT_NODEPOOLS = flags.DEFINE_integer( + 'k8s_mgmt_concurrent_nodepools', + 5, + 'Number of node pools to create/upgrade/delete concurrently in Scenario A.', +) +_LARGE_SCALE_NODEPOOLS = flags.DEFINE_integer( + 'k8s_mgmt_large_scale_nodepools', + 1000, + 'Number of node pools to provision in the large-scale Scenario C. ' + + 'Spec target is 1000; ensure VPC/quota is available before running.', +) +_NODES_PER_NODEPOOL = flags.DEFINE_integer( + 'k8s_mgmt_nodes_per_nodepool', + 2, + 'Number of nodes per node pool. Google spec: 2 nodes per pool.', +) +_INITIAL_VERSION = flags.DEFINE_string( + 'k8s_mgmt_initial_version', + None, + 'Kubernetes version for newly-created node pools (N-1). None = auto.', +) +_TARGET_VERSION = flags.DEFINE_string( + 'k8s_mgmt_target_version', + None, + 'Kubernetes version to upgrade node pools to (N). None = cluster version.', +) +_SCENARIOS = flags.DEFINE_list( + 'k8s_mgmt_scenarios', + ['A', 'B', 'C'], + 'Comma-separated subset of scenarios to run. Valid values: A, B, C.', +) +_SCALE_SWEEP = flags.DEFINE_list( + 'k8s_mgmt_scale_sweep', + [], + 'Comma-separated list of node-pool counts for Scenario C scale sweep. ' + + 'Each scale runs as a separate sub-run with full create/delete cycle. ' + + 'Example: --k8s_mgmt_scale_sweep=10,50,100,500,1000. ' + + 'If empty, uses --k8s_mgmt_large_scale_nodepools.', +) +_MAX_CONCURRENT = flags.DEFINE_integer( + 'k8s_mgmt_max_concurrent', + 50, + 'Cap on concurrent provider API calls within a batch. ' + + 'Higher = faster but more aggressive on connection pools.', +) +_PIPELINE_SCENARIO_A = flags.DEFINE_boolean( + 'k8s_mgmt_pipeline_scenario_a', + True, + 'If True, run Scenario A as per-pool pipeline (create->upgrade->delete ' + + 'back-to-back per thread). Minimizes wall time. ' + + 'Default False for spec-strict phase-by-phase.', +) + +# AKS caps node-pool names at 12 chars — keep all names within that limit. +_PREFIX = 'pkbm' + + +def _ScenarioAName(i): + return f'{_PREFIX}a{i:03d}' + + +_SCENARIO_B_NAME = f'{_PREFIX}b' + + +def _ScenarioCName(i): + return f'{_PREFIX}c{i:04d}' + + +def GetConfig(user_config): + return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) + + +def CheckPrerequisites( + benchmark_config: benchmark_config_spec.BenchmarkConfigSpec,): + """Validates flag values and cluster type before any cloud calls.""" + invalid = [ + s for s in _SCENARIOS.value if s.strip() not in _VALID_SCENARIOS + ] + if invalid: + raise errors.Config.InvalidValue( + f'Invalid value(s) for --k8s_mgmt_scenarios: {invalid}. ' + + f'Valid options: {sorted(_VALID_SCENARIOS)}.') + for s in _SCALE_SWEEP.value: + try: + int(s.strip()) + except ValueError as e: + raise errors.Config.InvalidValue( + f'Non-integer value in --k8s_mgmt_scale_sweep: {s!r}') from e + if benchmark_config.container_cluster.type != 'Kubernetes': + raise errors.Config.InvalidValue( + 'kubernetes_management benchmark requires a Kubernetes' + + ' container cluster.') + + +def Prepare(benchmark_spec: bm_spec.BenchmarkSpec) -> None: + """Asserts the cluster is reachable; deploys spec-defined sleep workload.""" + cluster = benchmark_spec.container_cluster + assert isinstance(cluster, kubernetes_cluster.KubernetesCluster) + benchmark_spec.always_call_cleanup = True + logging.info( + 'kubernetes_management Prepare: cluster=%s, version=%s', + cluster.name, + cluster.k8s_version, + ) + # Spec workload: "a simple container that sleeps for a given time". + # Confirms data-plane reachability; generates no data-plane load. + _, _, rc = kubectl.RunKubectlCommand( + [ + 'run', + _SLEEP_POD_NAME, + '--image=busybox', + '--restart=Never', + '--', + 'sleep', + '86400', + ], + raise_on_failure=False, + ) + if rc: + logging.warning( + 'Sleep workload deploy returned rc=%d (non-fatal; continuing)', rc) + + +def _CleanStartSweep(cluster: kubernetes_cluster.KubernetesCluster) -> None: + """Deletes any stale pkbm* node pools so each run starts clean (spec C.2).""" + try: + stale = [ + n for n in cluster.GetNodePoolNames() if n.startswith(_PREFIX) + ] + except Exception: # pylint: disable=broad-except + logging.exception('CleanStart: failed to list node pools') + return + if not stale: + logging.info( + 'CleanStart: no stale pools found — clean start confirmed.') + return + logging.warning('CleanStart: deleting %d stale pools: %s', len(stale), + stale) + background_tasks.RunThreaded(cluster.DeleteNodePool, stale) + + +def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]: + """Runs the selected scenarios and returns flat list of samples.""" + cluster = benchmark_spec.container_cluster + assert isinstance(cluster, kubernetes_cluster.KubernetesCluster) + + # Spec C.2: start clean. + _CleanStartSweep(cluster) + + # Resolve versions once; log clearly; tag every sample. + # Google spec: initial=N-1, target=N (adjacent minor upgrade). + flag_initial = _INITIAL_VERSION.value + flag_target = _TARGET_VERSION.value + if flag_initial and flag_target: + initial, target = flag_initial, flag_target + source = 'flags' + else: + resolved_initial, resolved_target = cluster.ResolveNodePoolVersions() + initial = flag_initial or resolved_initial + target = flag_target or resolved_target + source = 'auto-resolved' if not (flag_initial or flag_target) else 'mixed' + + logging.info( + 'NodePool versions (%s): initial=%s -> target=%s ' + + '(cluster k8s_version=%s) | nodes_per_pool=%d | machine_type=%s', + source, + initial, + target, + cluster.k8s_version, + _NODES_PER_NODEPOOL.value, + cluster.default_nodepool.machine_type + if hasattr(cluster, 'default_nodepool') else 'unknown', + ) + + scenarios = {s.strip().upper() for s in _SCENARIOS.value} + samples: list[sample.Sample] = [] + + if 'A' in scenarios: + samples += _RunScenarioA(cluster, initial, target) + if 'B' in scenarios: + samples += _RunScenarioB(cluster, initial) + if 'C' in scenarios: + scales = ([int(x.strip()) for x in _SCALE_SWEEP.value] + if _SCALE_SWEEP.value else [_LARGE_SCALE_NODEPOOLS.value]) + logging.info('Scenario C: scale sweep = %s', scales) + for scale in scales: + scenario_c_samples = _RunScenarioC(cluster, initial, scale) + for s in scenario_c_samples: + s.metadata['scenario_c_scale'] = str(scale) + samples += scenario_c_samples + + # Tag all samples with version path and run config for published results. + run_meta = { + 'initial_version': str(initial), + 'target_version': str(target), + 'cluster_k8s_version': str(cluster.k8s_version), + 'nodes_per_nodepool': str(_NODES_PER_NODEPOOL.value), + 'concurrent_nodepools': str(_CONCURRENT_NODEPOOLS.value), + } + for s in samples: + s.metadata.update(run_meta) + + return samples + + +def Cleanup(benchmark_spec: bm_spec.BenchmarkSpec) -> None: + """Best-effort delete of leftover benchmark node pools and sleep pod.""" + cluster = benchmark_spec.container_cluster + if cluster is None: + return + kubectl.RunKubectlCommand( + ['delete', 'pod', _SLEEP_POD_NAME, '--ignore-not-found'], + raise_on_failure=False, + ) + try: + leftover = [ + n for n in cluster.GetNodePoolNames() if n.startswith(_PREFIX) + ] + except Exception: # pylint: disable=broad-except + logging.exception('Cleanup: failed to list node pools') + return + if not leftover: + return + logging.info('Cleanup: deleting %d leftover node pools', len(leftover)) + background_tasks.RunThreaded(cluster.DeleteNodePool, leftover) + + +# --------------------------------------------------------------------------- +# Scenario A +# --------------------------------------------------------------------------- + + +def _RunScenarioA( + cluster: kubernetes_cluster.KubernetesCluster, + initial: str, + target: str, +) -> list[sample.Sample]: + """Concurrent CreateNodePool, UpgradeNodePool, DeleteNodePool.""" + n = _CONCURRENT_NODEPOOLS.value + if _PIPELINE_SCENARIO_A.value: + logging.info( + 'Scenario A (pipelined): %d pools, initial=%s, target=%s', + n, initial, target) + return _RunScenarioAPipelined(cluster, n, initial, target) + + logging.info( + 'Scenario A (phase-by-phase): %d pools, initial=%s, target=%s', + n, initial, target) + pool_names = [_ScenarioAName(i) for i in range(n)] + configs_ = [_MakeNodePoolConfig(cluster, name) for name in pool_names] + samples: list[sample.Sample] = [] + + # ── Phase 1: concurrent creates ───────────────────────────────────────── + create_results = _RunAsync( + kickoff=lambda cfg: cluster.CreateNodePoolAsync( + cfg, node_version=initial), + wait_fn=cluster.WaitForOperation, + items=configs_, + get_name=lambda cfg: cfg.name, + ) + samples += _OpSamples('ScenarioA_Create', + create_results, + attempted_ops=len(pool_names)) + + # ── Phase 2: concurrent upgrades (only successfully created pools) ─────── + created = [name for name, _, _, err in create_results if err is None] + logging.info( + 'Scenario A: %d/%d pools created — proceeding to upgrade', + len(created), n) + upgrade_results = _RunAsync( + kickoff=lambda name: cluster.UpgradeNodePoolAsync(name, target), + wait_fn=cluster.WaitForOperation, + items=created, + get_name=str, + ) + samples += _OpSamples('ScenarioA_Upgrade', + upgrade_results, + attempted_ops=len(created)) + + # # ── Idiomatic Control Plane Synchronization Barrier ────────────────────── + # # Give the GKE control plane a brief window to register the async ops. + # time.sleep(15) + + # # Check if the cluster object has our native upgrade tracking capability. + # if hasattr(cluster, 'HasActiveUpgradeOperations'): + # logging.info('GCP GKE cluster detected; polling via provider API.') + + # while cluster.HasActiveUpgradeOperations(): + # logging.info( + # 'Upgrade operations active; holding delete phase for 30s.') + # time.sleep(30) + + # logging.info( + # 'All upgrade ops completed; flushing API gateway write-locks.') + # time.sleep(10) + # else: + # # Non-GCP providers (Azure AKS / AWS EKS): standard safety pause. + # logging.info( + # 'Non-GCP cluster; proceeding with stabilization pause.') + # time.sleep(5) + + # ── Phase 3: concurrent deletes (live-list to catch EKS rollbacks) ────── + alive = [ + p for p in cluster.GetNodePoolNames() if p.startswith(f'{_PREFIX}a') + ] + logging.info( + 'Scenario A: %d live pools found for delete (originally %d)', + len(alive), n) + delete_results = _RunAsync( + kickoff=cluster.DeleteNodePoolAsync, + wait_fn=cluster.WaitForOperation, + items=alive, + get_name=str, + ) + # attempted_ops=n: success rate reflects original request, not just live. + # EKS rolls back timed-out pools silently — without this shows 100%. + samples += _OpSamples('ScenarioA_Delete', delete_results, attempted_ops=n) + return samples + + +def _RunScenarioAPipelined( + cluster: kubernetes_cluster.KubernetesCluster, + n: int, + initial: str, + target: str, +) -> list[sample.Sample]: + """Per-pool pipeline: create->upgrade->delete back-to-back per thread. + + Minimizes wall time: max_i(create_i + upgrade_i + delete_i) vs + max(creates)+max(upgrades)+max(deletes) in phase-by-phase mode. + Trade-off: ops run under mixed-type concurrent load. + """ + pool_names = [_ScenarioAName(i) for i in range(n)] + creates = _Results() + upgrades = _Results() + deletes = _Results() + + def DoPool(name: str): + cfg = _MakeNodePoolConfig(cluster, name) + init, e2e, err = _TimedAsync( + lambda: cluster.CreateNodePoolAsync(cfg, node_version=initial), + cluster.WaitForOperation, + ) + creates.add(name, init, e2e, err) + if err is not None: + return + init, e2e, err = _TimedAsync( + lambda: cluster.UpgradeNodePoolAsync(name, target), + cluster.WaitForOperation, + ) + upgrades.add(name, init, e2e, err) + init, e2e, err = _TimedAsync( + lambda: cluster.DeleteNodePoolAsync(name), + cluster.WaitForOperation, + ) + deletes.add(name, init, e2e, err) + + background_tasks.RunThreaded( + DoPool, + pool_names, + max_concurrent_threads=min(n, _MAX_CONCURRENT.value), + ) + samples: list[sample.Sample] = [] + samples += _OpSamples('ScenarioA_Create', creates.entries, attempted_ops=n) + samples += _OpSamples('ScenarioA_Upgrade', + upgrades.entries, + attempted_ops=n) + samples += _OpSamples('ScenarioA_Delete', deletes.entries, attempted_ops=n) + return samples + + +# --------------------------------------------------------------------------- +# Scenario B +# --------------------------------------------------------------------------- + + +def _RunScenarioB( + cluster: kubernetes_cluster.KubernetesCluster, + initial: str, +) -> list[sample.Sample]: + """CreateNodePool fired concurrently with a long-running cluster update. + + Both ops kick off async on separate threads; initiation + E2E latency + recorded independently. Overlap window = ClusterUpdate E2E latency. + """ + logging.info('Scenario B: overlapping cluster update + node-pool create') + cfg = _MakeNodePoolConfig(cluster, _SCENARIO_B_NAME) + results = _Results() + + def DoClusterUpdate(): + init, e2e, err = _TimedAsync(cluster.UpdateClusterAsync, + cluster.WaitForOperation) + results.add('ScenarioB_ClusterUpdate', init, e2e, err) + logging.info('Scenario B ClusterUpdate: init=%.2fs e2e=%.2fs ok=%s', + init, e2e, err is None) + + def DoCreate(): + init, e2e, err = _TimedAsync( + lambda: cluster.CreateNodePoolAsync(cfg, node_version=initial), + cluster.WaitForOperation, + ) + results.add('ScenarioB_NodePoolCreate', init, e2e, err) + logging.info('Scenario B NodePoolCreate: init=%.2fs e2e=%.2fs ok=%s', + init, e2e, err is None) + + background_tasks.RunThreaded(lambda fn: fn(), + [DoClusterUpdate, DoCreate]) + + samples: list[sample.Sample] = [] + for entry in results.entries: + name, init_dur, e2e_dur, err = entry + samples += _OpSamples(name, [(name, init_dur, e2e_dur, err)], + attempted_ops=1) + + # Remove test pool (best-effort). + try: + cluster.DeleteNodePool(_SCENARIO_B_NAME) + except Exception: # pylint: disable=broad-except + logging.exception('Scenario B: failed to delete test pool') + return samples + + +# --------------------------------------------------------------------------- +# Scenario C +# --------------------------------------------------------------------------- + + +def _RunScenarioC( + cluster: kubernetes_cluster.KubernetesCluster, + initial: str, + scale: int, +) -> list[sample.Sample]: + """Large-scale node-pool provisioning at a given scale. + + Streams all `scale` creates through a single executor capped at + _MAX_CONCURRENT workers — as each op completes the next starts immediately + (no batch barriers). Delete uses a live-list so EKS-rolled-back pools are + excluded from the denominator correctly. + """ + logging.info( + 'Scenario C: scale=%d, max_concurrent=%d, initial_version=%s', + scale, + _MAX_CONCURRENT.value, + initial, + ) + pool_names = [_ScenarioCName(i) for i in range(scale)] + configs_ = [_MakeNodePoolConfig(cluster, name) for name in pool_names] + samples: list[sample.Sample] = [] + + # ── Creates ────────────────────────────────────────────────────────────── + create_results = _RunAsync( + kickoff=lambda cfg: cluster.CreateNodePoolAsync( + cfg, node_version=initial), + wait_fn=cluster.WaitForOperation, + items=configs_, + get_name=lambda cfg: cfg.name, + ) + created_ok = sum(1 for _, _, _, err in create_results if err is None) + logging.info('Scenario C scale=%d: %d/%d creates succeeded', scale, + created_ok, scale) + samples += _OpSamples('ScenarioC_Create', + create_results, + attempted_ops=scale) + + # ── Deletes (live-list) ────────────────────────────────────────────────── + alive = [ + p for p in cluster.GetNodePoolNames() if p.startswith(f'{_PREFIX}c') + ] + logging.info( + 'Scenario C scale=%d: %d live pools for delete (originally %d;' + + ' %d rolled back by cloud)', + scale, + len(alive), + scale, + scale - len(alive), + ) + if not alive: + logging.warning( + 'Scenario C scale=%d: 0 live pools — all timed-out creates were' + + ' rolled back. Recording 0%% delete success rate.', scale) + samples += _OpSamples('ScenarioC_Delete', [], attempted_ops=scale) + return samples + + delete_results = _RunAsync( + kickoff=cluster.DeleteNodePoolAsync, + wait_fn=cluster.WaitForOperation, + items=alive, + get_name=str, + ) + # attempted_ops=scale: accurate rate against original request count. + samples += _OpSamples('ScenarioC_Delete', + delete_results, + attempted_ops=scale) + return samples + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +class _Results: + """Thread-safe collector for (name, init_latency, e2e_latency, error).""" + + def __init__(self): + self._lock = threading.Lock() + self.entries: list[tuple[str, float, float, Exception | None]] = [] + + def add(self, name: str, init_dur: float, e2e_dur: float, + err: Exception | None) -> None: + with self._lock: + self.entries.append((name, init_dur, e2e_dur, err)) + + +def _TimedAsync( + kickoff: Callable[[], str], + wait_fn: Callable[[str], None], +) -> tuple[float, float, Exception | None]: + """Runs kickoff() then wait_fn(handle); returns (init_lat, e2e_lat, err). + + init_lat = time for kickoff() to return (API accepted). + e2e_lat = total wall time including wait. On kickoff failure both are set + to elapsed time at failure point. + """ + init_start = time.time() + try: + handle = kickoff() + except Exception as exc: # pylint: disable=broad-except + elapsed = time.time() - init_start + return elapsed, elapsed, exc + init_dur = time.time() - init_start + try: + wait_fn(handle) + return init_dur, time.time() - init_start, None + except Exception as exc: # pylint: disable=broad-except + return init_dur, time.time() - init_start, exc + + +def _RunAsync( + kickoff: Callable, + wait_fn: Callable[[str], None], + items: list, + get_name: Callable[[object], str], +) -> list[tuple[str, float, float, Exception | None]]: + """Fires kickoff(item) concurrently for all items; returns timed results. + + Uses background_tasks.RunThreaded with a concurrency cap for streaming + execution — completed ops free their slot immediately for the next one. + """ + if not items: + return [] + results = _Results() + cap = min(len(items), _MAX_CONCURRENT.value) + + def DoWrap(item): + init_dur, e2e_dur, err = _TimedAsync(lambda: kickoff(item), wait_fn) + name = get_name(item) + results.add(name, init_dur, e2e_dur, err) + logging.info('%s ok=%s initiation=%.2fs end_to_end=%.2fs', name, + err is None, init_dur, e2e_dur) + + background_tasks.RunThreaded(DoWrap, items, max_concurrent_threads=cap) + return results.entries + + +def _MakeNodePoolConfig( + cluster: kubernetes_cluster.KubernetesCluster, + name: str, +) -> container_lib.BaseNodePoolConfig: + """Builds a node-pool config from the cluster's default pool.""" + cfg = copy.copy(cluster.default_nodepool) + cfg.name = name + cfg.num_nodes = _NODES_PER_NODEPOOL.value + cfg.min_nodes = _NODES_PER_NODEPOOL.value + cfg.max_nodes = _NODES_PER_NODEPOOL.value + return cfg + + +def _OpSamples( + metric_prefix: str, + results: list[tuple[str, float, float, Exception | None]], + attempted_ops: int = None, +) -> list[sample.Sample]: + """Per-op + aggregate samples for initiation and end-to-end latency. + + Args: + metric_prefix: prefix for all metric names. + results: list of (operation_name, init_lat, e2e_lat, err). + attempted_ops: total ops originally requested. Used as the denominator + for SuccessRate so EKS-rolled-back pools (which never + appear in results) are counted as failures, not ignored. + If None, len(results) is used (original behavior). + """ + samples: list[sample.Sample] = [] + init_latencies: list[float] = [] + e2e_latencies: list[float] = [] + success = 0 + + for name, init_dur, e2e_dur, err in results: + meta = {'operation_name': name, 'success': str(err is None)} + if err is not None: + meta['error'] = str(err)[:200] + else: + success += 1 + init_latencies.append(init_dur) + e2e_latencies.append(e2e_dur) + samples.append( + sample.Sample(f'{metric_prefix}_InitiationLatency', init_dur, + 'seconds', dict(meta))) + samples.append( + sample.Sample(f'{metric_prefix}_EndToEndLatency', e2e_dur, + 'seconds', dict(meta))) + + # ── Success rate ───────────────────────────────────────────────────────── + total = attempted_ops if attempted_ops is not None else len(results) + executed = len(results) + if total > 0: + samples.append( + sample.Sample( + f'{metric_prefix}_SuccessRate', + 100.0 * success / total, + 'percent', + { + 'total_ops': str(total), + 'executed_ops': str(executed), + 'successful_ops': str(success), + 'skipped_ops': str(total - executed), + }, + )) + + # ── Aggregate stats (successful ops only) ──────────────────────────────── + for phase_label, latencies in ( + ('InitiationLatency', init_latencies), + ('EndToEndLatency', e2e_latencies), + ): + if len(latencies) >= 2: + samples += _AggregateSamples(metric_prefix, phase_label, latencies) + if len(latencies) >= 4: + samples += _OutlierSamples(metric_prefix, phase_label, latencies) + + return samples + + +def _AggregateSamples(metric_prefix: str, phase_label: str, + latencies: list[float]) -> list[sample.Sample]: + """Emits Mean/StdDev/Min/Median/P90/P99/Max samples for a latency series.""" + n = len(latencies) + sorted_lats = sorted(latencies) + meta = {'sample_count': str(n)} + + def _Percentile(p): + idx = (p / 100.0) * (n - 1) + lo = int(idx) + hi = min(lo + 1, n - 1) + frac = idx - lo + return sorted_lats[lo] * (1 - frac) + sorted_lats[hi] * frac + + stats = [ + ('Mean', statistics.mean(latencies)), + ('StdDev', statistics.pstdev(latencies)), + ('Min', sorted_lats[0]), + ('Median', statistics.median(latencies)), + ('P90', _Percentile(90)), + ('P99', _Percentile(99)), + ('Max', sorted_lats[-1]), + ] + result = [] + for label, value in stats: + result.append( + sample.Sample( + f'{metric_prefix}_{phase_label}_{label}', + value, + 'seconds', + dict(meta), + )) + return result + + +def _OutlierSamples(metric_prefix: str, phase_label: str, + latencies: list[float]) -> list[sample.Sample]: + """Emits a single OutlierCount sample using IQR-fence outlier detection.""" + sorted_lats = sorted(latencies) + n = len(sorted_lats) + + def _Percentile(p): + idx = (p / 100.0) * (n - 1) + lo = int(idx) + hi = min(lo + 1, n - 1) + frac = idx - lo + return sorted_lats[lo] * (1 - frac) + sorted_lats[hi] * frac + + q1 = _Percentile(25) + q3 = _Percentile(75) + iqr = q3 - q1 + lower_fence = q1 - 1.5 * iqr + upper_fence = q3 + 1.5 * iqr + outlier_count = sum( + 1 for v in latencies if v < lower_fence or v > upper_fence + ) + meta = { + 'q1': str(q1), + 'q3': str(q3), + 'iqr': str(iqr), + 'upper_fence': str(upper_fence), + 'lower_fence': str(lower_fence), + 'sample_count': str(n), + } + return [ + sample.Sample( + f'{metric_prefix}_{phase_label}_OutlierCount', + outlier_count, + 'count', + meta, + ) + ] diff --git a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py index 58ae2af595..7e2123f562 100644 --- a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py +++ b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py @@ -46,6 +46,13 @@ from perfkitbenchmarker.resources.container_service import kubernetes_cluster from perfkitbenchmarker.resources.container_service import kubernetes_commands +# Flag to skip EBS CSI driver setup during cluster creation. +# The kubernetes_management benchmark does not use persistent volumes, so +# EBS CSI setup (OIDC + IAM role + addon install) is unnecessary and adds +# ~3 minutes to every run. Set to True to skip it and save time. +# Defined before FLAGS = flags.FLAGS so it is registered at import time +# and visible to PKB's flag parser before --cloud/--container_cluster_type +# are resolved. FLAGS = flags.FLAGS # GPU types which practically require spot to get. _RARE_GPU_TYPES = [ @@ -55,7 +62,7 @@ ] -def RecursivelyUpdateDictionary( +def _recursively_update_dictionary( original: dict[str, Any], updates: dict[str, Any] ) -> dict[str, Any]: """Updates a nested dictionary. @@ -73,14 +80,14 @@ def RecursivelyUpdateDictionary( # Copied from https://stackoverflow.com/questions/3232943 for k, v in updates.items(): if isinstance(v, abc.Mapping): - original[k] = RecursivelyUpdateDictionary(original.get(k, {}), v) + original[k] = _recursively_update_dictionary(original.get(k, {}), v) else: original[k] = v return original class BaseEksCluster(kubernetes_cluster.KubernetesCluster): - """Shared base class for Elastic Kubernetes Service cluster auto mode & not.""" + """Shared base class for EKS cluster (auto mode and standard).""" def __init__(self, spec): # EKS requires a region and optionally a list of one or zones. @@ -108,6 +115,9 @@ def __init__(self, spec): self.account: str = util.GetAccount() self.node_to_nodepool: dict[str, container.BaseNodePoolConfig | None] = {} self.node_to_machine_type: dict[str, str | None] = {} + self._cached_subnets: list[str] | None = None + self._cached_subnets_per_az: dict[str, str] | None = None + self._cached_node_role_arn: str | None = None def _ChooseSecondZone(self): """Choose a second zone for the control plane if only one is specified.""" @@ -129,13 +139,24 @@ def _DeleteDependencies(self): def _EksCtlCreate(self, create_json: dict[str, Any]): """Creates the EKS cluster.""" - # If multiple zones are passed use them for the control plane. - # Otherwise EKS will auto-select control plane zones in the region. - if self.control_plane_zones: - create_json['availabilityZones'] = self.control_plane_zones + # Pass all control_plane_zones to the cluster so eksctl creates VPC subnets + # in every requested AZ. Without this, eksctl may only create subnets in 2 + # AZs even when 3 are requested, preventing round-robin nodegroup placement. + # This is critical for distributing nodegroups across AZs to avoid per-AZ + # EC2 capacity limits. + # availabilityZones is already set in create_json by _CreateDependencies + # via the EC2 AZ query (bypassing PKB zone flag truncation). + # Log it here for visibility. + if 'availabilityZones' in create_json: + logging.info( + '[EKS] Creating cluster with AZs: %s — ' + + 'eksctl will auto-assign CIDRs for all %d zones.', + create_json['availabilityZones'], + len(create_json['availabilityZones']), + ) # Schema for the cluster create command is here: # https://schema.eksctl.io/ - create_json = RecursivelyUpdateDictionary( + create_json = _recursively_update_dictionary( { 'apiVersion': 'eksctl.io/v1alpha5', 'kind': 'ClusterConfig', @@ -186,6 +207,11 @@ def _RenderNodeGroupJson( if nodepool.min_nodes != nodepool.max_nodes: group_json['minSize'] = nodepool.min_nodes group_json['maxSize'] = nodepool.max_nodes + # Pin the default nodegroup to control_plane_zones[0] so it stays in a + # single known AZ. The benchmark nodegroups (pkbma*, pkbmc*) are placed + # via CreateNodePoolAsync using the round-robin _DiscoverSubnetsPerAZ logic. + if self.control_plane_zones: + group_json['availabilityZones'] = [self.control_plane_zones[0]] return group_json def _WriteJsonToFile(self, json_dict: dict[str, Any]) -> str: @@ -398,55 +424,108 @@ def _Create(self): nodepool_jsons += [self._RenderNodeGroupJson(node_group)] create_json: dict[str, Any] = { 'managedNodeGroups': nodepool_jsons, - 'vpc': { - 'nat': {'gateway': 'Disable'}, - }, + 'vpc': {'nat': {'gateway': 'Disable'}}, } + # Explicitly set cluster-level availabilityZones so eksctl creates VPC + # public+private subnets in ALL AZs in the region. + # IMPORTANT: PKB's deprecated --zones flag gets truncated by its own + # translation layer to 2 AZs even when 3 are specified. We bypass this + # by querying EC2 directly for all available AZs in the region and + # passing all of them to eksctl. This ensures the VPC gets subnets in + # all AZs, enabling proper round-robin nodegroup placement. + try: + az_out, _, az_rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'describe-availability-zones', + '--region', self.region, + '--filters', 'Name=state,Values=available', + '--query', 'AvailabilityZones[*].ZoneName', + '--output', 'json', + ], + raise_on_failure=False, + ) + if az_rc == 0 and az_out.strip(): + all_azs = json.loads(az_out.strip()) + # Limit to 3 AZs maximum to avoid excessive subnet creation + cluster_azs = sorted(all_azs)[:3] + else: + # Fallback: use control_plane_zones or default to known us-east-1 AZs + cluster_azs = ( + self.control_plane_zones + if self.control_plane_zones + else [f'{self.region}a', f'{self.region}b', f'{self.region}c'] + ) + except Exception: # pylint: disable=broad-except + cluster_azs = ( + self.control_plane_zones + if self.control_plane_zones + else [f'{self.region}a', f'{self.region}b', f'{self.region}c'] + ) + + create_json['availabilityZones'] = cluster_azs + logging.info( + '[EKS] Cluster will have subnets in %d AZs: %s ' + + '(queried from EC2, bypassing PKB zone flag truncation)', + len(cluster_azs), cluster_azs, + ) self._EksCtlCreate(create_json) # Above create command passes "withOidc=true", but it doesn't seem to work & # therefore this command is needed. - cmd = [ - FLAGS.eksctl, - 'utils', - 'associate-iam-oidc-provider', - f'--cluster={self.name}', - f'--region={self.region}', - '--approve', - ] - vm_util.IssueCommand(cmd) + if not FLAGS.eks_skip_ebs_csi: + cmd = [ + FLAGS.eksctl, + 'utils', + 'associate-iam-oidc-provider', + f'--cluster={self.name}', + f'--region={self.region}', + '--approve', + ] + vm_util.IssueCommand(cmd) # EBS CSI driver is required for creating EBS volumes in version > 1.23 # https://docs.aws.amazon.com/eks/latest/userguide/ebs-csi.html + # Skip if --eks_skip_ebs_csi is set (saves ~3 min for benchmarks that + # do not use persistent volumes, such as kubernetes_management). + if FLAGS.eks_skip_ebs_csi: + logging.info( + '[EKS] Skipping EBS CSI driver setup (--eks_skip_ebs_csi=True). ' + + 'Saves ~3 min. Set to False if benchmark needs persistent volumes.' + ) + else: + # Name must be unique. + ebs_csi_driver_role = f'AmazonEKS_EBS_CSI_DriverRole_{self.name}' + + ebs_policy_arn = ( + 'arn:aws:iam::aws:policy/service-role/' + + 'AmazonEBSCSIDriverPolicy') + cmd = [ + FLAGS.eksctl, + 'create', + 'iamserviceaccount', + '--name=ebs-csi-controller-sa', + '--namespace=kube-system', + f'--region={self.region}', + f'--cluster={self.name}', + f'--attach-policy-arn={ebs_policy_arn}', + '--approve', + '--role-only', + f'--role-name={ebs_csi_driver_role}', + ] + vm_util.IssueCommand(cmd) - # Name must be unique. - ebs_csi_driver_role = f'AmazonEKS_EBS_CSI_DriverRole_{self.name}' - - cmd = [ - FLAGS.eksctl, - 'create', - 'iamserviceaccount', - '--name=ebs-csi-controller-sa', - '--namespace=kube-system', - f'--region={self.region}', - f'--cluster={self.name}', - '--attach-policy-arn=arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy', - '--approve', - '--role-only', - f'--role-name={ebs_csi_driver_role}', - ] - vm_util.IssueCommand(cmd) - - cmd = [ - FLAGS.eksctl, - 'create', - 'addon', - '--name=aws-ebs-csi-driver', - f'--region={self.region}', - f'--cluster={self.name}', - f'--service-account-role-arn=arn:aws:iam::{self.account}:role/{ebs_csi_driver_role}', - ] - vm_util.IssueCommand(cmd) + svc_acct_arn = ( + f'arn:aws:iam::{self.account}:role/{ebs_csi_driver_role}') + cmd = [ + FLAGS.eksctl, + 'create', + 'addon', + '--name=aws-ebs-csi-driver', + f'--region={self.region}', + f'--cluster={self.name}', + f'--service-account-role-arn={svc_acct_arn}', + ] + vm_util.IssueCommand(cmd) if aws_flags.AWS_EKS_POD_IDENTITY_ROLE.value: cmd = util.AWS_PREFIX + [ @@ -606,6 +685,60 @@ def _DiscoverSubnets(self) -> list[str]: self._cached_subnets = info['cluster']['resourcesVpcConfig']['subnetIds'] return self._cached_subnets + def _DiscoverSubnetsPerAZ(self) -> dict[str, str]: + """Returns a mapping of {AZ: subnet_id} for the cluster's subnets. + + Used by CreateNodePoolAsync to distribute nodegroups round-robin across + AZs, avoiding per-AZ EC2 capacity limits when creating many pools. + Only returns AZs that are in control_plane_zones (if specified). + Cached after first call. + """ + if getattr(self, '_cached_subnets_per_az', None) is not None: + return self._cached_subnets_per_az + + subnet_ids = self._DiscoverSubnets() + if not subnet_ids: + self._cached_subnets_per_az = {} + return {} + + # Describe subnets to get their AZ mapping + out, _, rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'describe-subnets', + '--region', self.region, + '--subnet-ids', *subnet_ids, + '--query', 'Subnets[*].{SubnetId:SubnetId,AZ:AvailabilityZone}', + '--output', 'json', + ], + raise_on_failure=False, + ) + if rc: + logging.warning( + '[EKS] Could not describe subnets for AZ mapping — ' + + 'falling back to all subnets' + ) + self._cached_subnets_per_az = {} + return {} + + subnets = json.loads(out) + + # Do NOT filter by control_plane_zones — PKB truncates it to 2 AZs. + # Accept all subnets the VPC has across all AZs. + az_map: dict[str, str] = {} + for s in subnets: + az = s['AZ'] + # Keep only one subnet per AZ (prefer public subnets — already filtered + # by _DiscoverSubnets which returns the cluster's configured subnets) + if az not in az_map: + az_map[az] = s['SubnetId'] + + logging.info( + '[EKS] Subnet-per-AZ mapping: %s (from %d total subnets)', + az_map, len(subnet_ids), + ) + self._cached_subnets_per_az = az_map + return az_map + def _ResolveReleaseVersion(self, minor: str) -> str: """Returns the EKS-optimized AMI release version (e.g. '1.33.10-20260124'). @@ -645,7 +778,7 @@ def _ResolveReleaseVersion(self, minor: str) -> str: return cache[minor] def _DiscoverNodeRoleArn(self) -> str: - """Returns a usable node IAM role ARN by inspecting an existing nodegroup.""" + """Returns a node IAM role ARN by inspecting an existing nodegroup.""" if getattr(self, '_cached_node_role_arn', None): return self._cached_node_role_arn out, _, _ = vm_util.IssueCommand( @@ -698,6 +831,30 @@ def CreateNodePoolAsync( # cluster's version, which (for the N-1 -> N benchmark path) # produces a "release version X is not valid for kubernetes # version Y" error. + + # ── AZ distribution ──────────────────────────────────────────────────── + # When multiple zones are specified (e.g. us-east-1a,1b,1c), distribute + # nodegroups round-robin across AZs to avoid per-AZ EC2 capacity limits. + # Without this, EKS places all nodegroups in a single AZ causing timeouts. + # Pool name format: pkbma000, pkbma001, ... — extract index from suffix. + az_subnets = self._DiscoverSubnetsPerAZ() + if az_subnets and len(az_subnets) > 1: + # Extract numeric suffix from pool name to determine AZ assignment + name = nodepool_config.name + suffix = ''.join(c for c in name if c.isdigit()) + idx = int(suffix) if suffix else 0 + zones = sorted(az_subnets.keys()) + assigned_az = zones[idx % len(zones)] + subnets = [az_subnets[assigned_az]] + logging.info( + '[EKS] CreateNodePool %s -> AZ=%s subnet=%s (round-robin idx=%d)', + name, assigned_az, subnets[0], idx, + ) + else: + subnets = self._DiscoverSubnets() + logging.info('[EKS] CreateNodePool %s -> using all subnets (single AZ)', + nodepool_config.name) + payload: dict[str, Any] = { 'clusterName': self.name, 'nodegroupName': nodepool_config.name, @@ -706,12 +863,18 @@ def CreateNodePoolAsync( 'maxSize': nodepool_config.num_nodes, 'desiredSize': nodepool_config.num_nodes, }, - 'subnets': self._DiscoverSubnets(), + 'subnets': subnets, 'instanceTypes': [nodepool_config.machine_type], 'amiType': 'AL2023_x86_64_STANDARD', 'nodeRole': self._DiscoverNodeRoleArn(), 'labels': {'pkb_nodepool': nodepool_config.name}, 'tags': util.MakeDefaultTags(), + # Target open capacity reservations first before falling back to + # regular on-demand. Ensures EC2 capacity reservations created + # before the benchmark are actually used by EKS nodegroups. + 'capacityReservationSpecification': { + 'capacityReservationPreference': 'open', + }, } if node_version: payload['version'] = node_version @@ -881,7 +1044,7 @@ def _wait_ng_active(): retryable_exceptions=(errors.Resource.RetryableDeletionError,), ) def _wait_ng_gone(): - out, err, rc = vm_util.IssueCommand( + _, err, rc = vm_util.IssueCommand( util.AWS_PREFIX + [ 'eks', @@ -1022,7 +1185,7 @@ class EksAutoCluster(BaseEksCluster): def __init__(self, spec): super().__init__(spec) self._ChooseSecondZone() - is_rare_gpu = self.gpu_type in _RARE_GPU_TYPES + is_rare_gpu = virtual_machine.GPU_TYPE.value in _RARE_GPU_TYPES self.use_spot: bool = aws_flags.USE_AWS_SPOT_INSTANCES.value or is_rare_gpu def _Create(self): @@ -1087,14 +1250,15 @@ def ResizeNodePool( def GetNodeSelectors(self, machine_type: str | None = None) -> dict[str, str]: """Get the node selectors section of a yaml for the provider.""" del machine_type # Unused. - # Theoretically needed in mixed mode, but deployments fail without it: - # https://docs.aws.amazon.com/eks/latest/userguide/associate-workload.html#_require_a_workload_is_deployed_to_eks_auto_mode_nodes + # Theoretically needed in mixed mode, but deployments fail without it. + # See: docs.aws.amazon.com/eks/latest/userguide/associate-workload.html + # #_require_a_workload_is_deployed_to_eks_auto_mode_nodes selectors = {'eks.amazonaws.com/compute-type': 'auto'} if self.use_spot: selectors['karpenter.sh/capacity-type'] = 'spot' - if self.gpu_type: + if virtual_machine.GPU_TYPE.value: selectors['eks.amazonaws.com/instance-gpu-name'] = ( - self.gpu_type + virtual_machine.GPU_TYPE.value ) return selectors @@ -1126,10 +1290,15 @@ def __init__(self, spec): def _Create(self): """Creates the control plane and worker nodes.""" template_filename = vm_util.PrependTempDir('cloud-formation-template.yaml') + cfn_url = ( + 'https://raw.githubusercontent.com/aws/karpenter-provider-aws/' + + f'v{_KARPENTER_VERSION}/website/content/en/preview/' + + 'getting-started/getting-started-with-karpenter/' + + 'cloudformation.yaml') vm_util.IssueCommand([ 'curl', '-fsSL', - f'https://raw.githubusercontent.com/aws/karpenter-provider-aws/v{_KARPENTER_VERSION}/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml', + cfn_url, '-o', template_filename, ]) @@ -1161,6 +1330,12 @@ def _Create(self): bootstrapping_nodepool.min_nodes = 1 bootstrapping_nodepool.max_nodes = 1 bootstrapping_nodepool.machine_type = 'm7i.2xlarge' + karpenter_policy_arn = ( + f'arn:aws:iam::{self.account}:policy/' + + f'KarpenterControllerPolicy-{self.name}') + karpenter_node_role_arn = ( + f'arn:aws:iam::{self.account}:role/' + + f'KarpenterNodeRole-{self.name}') create_json: dict[str, Any] = { 'metadata': { 'tags': {'karpenter.sh/discovery': self.name}, @@ -1171,14 +1346,12 @@ def _Create(self): 'serviceAccountName': 'karpenter', 'roleName': f'{self.name}-karpenter', 'permissionPolicyARNs': [ - f'arn:aws:iam::{self.account}:policy/KarpenterControllerPolicy-{self.name}' + karpenter_policy_arn ], }], }, 'iamIdentityMappings': [{ - 'arn': ( - f'arn:aws:iam::{self.account}:role/KarpenterNodeRole-{self.name}' - ), + 'arn': karpenter_node_role_arn, 'username': 'system:node:{{EC2PrivateDNSName}}', 'groups': ['system:bootstrappers', 'system:nodes'], }], @@ -1219,15 +1392,16 @@ def _InstallAwsLoadBalancerController(self) -> None: policy_arn = (stdout or '').strip() if not policy_arn or policy_arn == 'None': with vm_util.NamedTemporaryFile(dir=vm_util.GetTempDir(), mode='w') as tf: + alb_policy_url = ( + 'https://raw.githubusercontent.com/kubernetes-sigs/' + + 'aws-load-balancer-controller/' + + 'v2.13.4/docs/install/iam_policy.json') vm_util.IssueCommand([ 'curl', '-sSL', '-o', tf.name, - ( - 'https://raw.githubusercontent.com/kubernetes-sigs/aws-load-balancer-controller/' - 'v2.13.4/docs/install/iam_policy.json' - ), + alb_policy_url, ]) stdout, _, _ = vm_util.IssueCommand( util.AWS_PREFIX @@ -1268,11 +1442,14 @@ def _InstallAwsLoadBalancerController(self) -> None: in stderr, ) # 4) Apply CRDs + crds_url = ( + 'https://raw.githubusercontent.com/aws/eks-charts/master/' + + 'stable/aws-load-balancer-controller/crds/crds.yaml') kubectl.RunKubectlCommand( [ 'apply', '-f', - 'https://raw.githubusercontent.com/aws/eks-charts/master/stable/aws-load-balancer-controller/crds/crds.yaml', + crds_url, ], suppress_failure=lambda stdout, stderr, retcode: 'already exists' in stderr, @@ -1363,7 +1540,8 @@ def _WaitForIngress(self, name: str, namespace: str, port: int) -> str: def _PostIngressNetworkingFixups( self, namespace: str, name: str, port: int, address: str ) -> None: - """Fixs ALB -> nodes connectivity to prevent 504 errors from unhealthy targets.""" + """Fixes ALB -> node connectivity to prevent 504 errors.""" + del namespace, name # Unused # 1) Get ALB security group from address host = ( @@ -1488,7 +1666,7 @@ def _PostCreate(self): 'daemonset/aws-node', '-n', 'kube-system', - '--timeout=%ds' % vm_util.DEFAULT_TIMEOUT, + f'--timeout={vm_util.DEFAULT_TIMEOUT}s', ], timeout=vm_util.DEFAULT_TIMEOUT, ) @@ -1573,12 +1751,15 @@ def _PostCreate(self): # Get the AMI version for current kubernetes version. # See e.g. https://karpenter.sh/docs/tasks/managing-amis/ for not using # @latest. + ssm_ami_path = ( + f'/aws/service/eks/optimized-ami/{self.cluster_version}/' + + 'amazon-linux-2023/x86_64/standard/recommended/image_id') image_id, _, _ = vm_util.IssueCommand([ 'aws', 'ssm', 'get-parameter', '--name', - f'/aws/service/eks/optimized-ami/{self.cluster_version}/amazon-linux-2023/x86_64/standard/recommended/image_id', + ssm_ami_path, '--region', self.region, '--query', @@ -1699,7 +1880,7 @@ def _DeleteDependencies(self): else: logging.info( 'Karpenter node role %s not found or empty response; skipping' - ' instance profile cleanup', + + ' instance profile cleanup', node_role, ) profiles_json = {'InstanceProfiles': []} @@ -1851,7 +2032,7 @@ def _CleanupKarpenter(self): for eni_id in eni_ids: # Bind eni_id by default to avoid loop closure issues if # this is refactored. - def _DeleteOneEni(eni_id=eni_id) -> None: + def _delete_one_eni(eni_id=eni_id) -> None: _, stderr, retcode = vm_util.IssueCommand( [ 'aws', @@ -1882,7 +2063,7 @@ def _DeleteOneEni(eni_id=eni_id) -> None: poll_interval=10, max_retries=5, retryable_exceptions=(errors.Resource.RetryableDeletionError,), - )(_DeleteOneEni)() + )(_delete_one_eni)() def _IsReady(self): """Returns True if cluster is running. Autopilot defaults to 0 nodes.""" diff --git a/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py b/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py index 5863f551fa..f9c227a986 100644 --- a/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py +++ b/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py @@ -205,20 +205,35 @@ def _Create(self): if FLAGS.azure_aks_auto_node_provisioning: # For provision_node_pools benchmark, add auto provisioning mode cmd.append('--node-provisioning-mode=auto') - # TODO(pclay): expose quota and capacity errors - # Creating an AKS cluster with a fresh service principal usually fails due - # to a race condition. Active Directory knows the service principal exists, - # but AKS does not. (https://github.com/Azure/azure-cli/issues/9585) - # Use 5 min timeout on service principle retry. cmd will fail fast. - vm_util.Retry(timeout=300)(vm_util.IssueCommand)( - cmd, - # Half hour timeout on creating the cluster. - timeout=1800, - ) + + self._RunCreateClusterCmd(cmd) for _, nodepool in self.nodepools.items(): self._CreateNodePool(nodepool) + @vm_util.Retry( + timeout=3600, + retryable_exceptions=(errors.Resource.RetryableCreationError,), + ) + def _RunCreateClusterCmd(self, cmd: list[str]): + """Runs the create cluster command, retrying on race condition errors.""" + try: + _, err, retcode = vm_util.IssueCommand( + cmd, + # Half hour timeout on creating the cluster. + timeout=1800, + raise_on_failure=False, + ) + except errors.VmUtil.IssueCommandTimeoutError as e: + retcode = 1 + err = str(e) + if retcode: + if 'InvalidOutputTable' in err: + # This is a race condition where the logs analytics workspace hasn't + # finished being created. Retrying solves it. + raise errors.Resource.RetryableCreationError(err) + raise errors.Resource.CreationError(err) + def _CreateNodePool(self, nodepool_config: container.BaseNodePoolConfig): """Creates a node pool.""" cmd = [ @@ -251,9 +266,7 @@ def _CreateNodePool(self, nodepool_config: container.BaseNodePoolConfig): raise errors.Resource.CreationError(stderr) def _GetNodeFlags( - self, - nodepool_config: container.BaseNodePoolConfig, - version_override: str | None = None, + self, nodepool_config: container.BaseNodePoolConfig ) -> List[str]: """Common flags for create and nodepools add.""" args = [] + self.resource_group.args @@ -276,9 +289,8 @@ def _GetNodeFlags( args += ['--zones', zones] if self.default_nodepool.disk_size: args += ['--node-osdisk-size', str(self.default_nodepool.disk_size)] - version = version_override or self.cluster_version - if version: - args += ['--kubernetes-version', version] + if self.cluster_version: + args += ['--kubernetes-version', self.cluster_version] return args def _Exists(self): @@ -564,7 +576,7 @@ def DeleteNodePool(self, name: str) -> None: '--name', _AzureNodePoolName(name), ] + self.resource_group.args - vm_util.IssueCommand(cmd, timeout=1800) + self._RunCreateClusterCmd(cmd) def UpgradeNodePool(self, name: str, target_version: str) -> None: """Upgrades the named node pool to target_version.""" @@ -893,11 +905,7 @@ def _Create(self): 'automatic', '--tags', ] + tags_list - vm_util.IssueCommand( - cmd, - # Half hour timeout on creating the cluster. - timeout=1800, - ) + self._RunCreateClusterCmd(cmd) def _CreateRoleAssignment(self): """Creates a role assignment for the current user.""" diff --git a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py index 1da9197188..76bd8afb97 100644 --- a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py +++ b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py @@ -25,6 +25,7 @@ from absl import flags from perfkitbenchmarker import errors from perfkitbenchmarker import provider_info +from perfkitbenchmarker import virtual_machine from perfkitbenchmarker import virtual_machine_spec from perfkitbenchmarker import vm_util from perfkitbenchmarker.configs import container_spec as container_spec_lib @@ -54,7 +55,7 @@ def _CalculateCidrSize(nodes: int) -> int: # So 2^(32 - nodes) - 2^(32 - 20) >= 2^(32 - 24) * CIDR # OR CIDR <= 32 - log2(2^8 * nodes + 2^12) cidr_size = int(32 - math.log2((nodes << 8) + (1 << 12))) - # /17 is narrowest CIDR range GKE supports + # /16 is narrowest CIDR range GKE supports return min(cidr_size, 16) @@ -330,6 +331,8 @@ def InitializeNodePoolForCloud( nodepool_config.disk_size = vm_config.boot_disk_size nodepool_config.max_local_disks = vm_config.max_local_disks nodepool_config.ssd_interface = vm_config.ssd_interface + nodepool_config.gpu_type = vm_config.gpu_type + nodepool_config.gpu_count = vm_config.gpu_count nodepool_config.threads_per_core = vm_config.threads_per_core nodepool_config.gce_tags = vm_config.gce_tags nodepool_config.min_cpu_platform = vm_config.min_cpu_platform @@ -365,6 +368,9 @@ def GetResourceMetadata(self) -> dict[str, Any]: result['gce_local_ssd_count'] = self.default_nodepool.max_local_disks result['gce_local_ssd_interface'] = self.default_nodepool.ssd_interface result['gke_nccl_fast_socket'] = self.enable_nccl_fast_socket + if 'nccl' in self.nodepools: + result['gpu_type'] = self.nodepools['nccl'].gpu_type + result['gpu_count'] = self.nodepools['nccl'].gpu_count if self.image_type: result['image_type'] = self.image_type if gcp_flags.MAX_CPU.value: @@ -712,6 +718,97 @@ def _IssueAsync(self, cmd: util.GcloudCommand) -> str: ) return op_name + def _GetLatestOperationName( + self, + operation_type: str = 'UPGRADE_NODES', + target_name: str = '', + max_attempts: int = 5, + retry_delay: int = 3, + op_start_time: float = 0.0, + ) -> str: + """Returns the name of the most recent matching operation for this cluster. + + The async gcloud command may return before the GKE control plane has + transitioned the operation from PENDING to RUNNING. For fast operations + (e.g. label updates) the operation may already be DONE by the time this + method is called. Passing op_start_time handles both cases. + + Args: + operation_type: GKE operationType to filter on, e.g. 'UPGRADE_NODES' + for node pool upgrades or 'UPDATE_CLUSTER' for cluster-level + updates via 'gcloud container clusters update'. + target_name: Substring to match against targetLink (e.g. nodepool name + for UPGRADE_NODES, or cluster name for UPDATE_CLUSTER). If empty, + falls back to self.name (the cluster name). + max_attempts: Number of query attempts before giving up. + retry_delay: Seconds to wait between attempts. + op_start_time: Unix timestamp recorded just before the async gcloud + command was issued. When provided, the status filter is broadened + to include DONE (so fast-completing operations are found) and a + startTime >= guard is added to avoid matching old operations. + + Returns: + Operation name string, or empty string if none found. + """ + link_target = target_name or self.name + if op_start_time: + # Fast operations (e.g. --update-labels) may be DONE before we query. + # Broaden the status filter and add a startTime guard (with a 30-second + # buffer for clock skew) to avoid picking up older completed operations. + from_time = time.strftime( + '%Y-%m-%dT%H:%M:%SZ', time.gmtime(op_start_time - 30) + ) + status_filter = '(status=RUNNING OR status=PENDING OR status=DONE)' + time_filter = f' AND startTime>="{from_time}"' + else: + # Slow operations (e.g. node pool upgrades): only look for active ops. + status_filter = '(status=RUNNING OR status=PENDING)' + time_filter = '' + + filter_str = ( + f'operationType={operation_type} AND ' + f'{status_filter} AND ' + f'targetLink ~ {link_target}' + f'{time_filter}' + ) + for attempt in range(1, max_attempts + 1): + list_cmd = self._GcloudCommand('container', 'operations', 'list') + list_cmd.flags['filter'] = filter_str + list_cmd.flags['sort-by'] = '~startTime' + list_cmd.flags['limit'] = 1 + list_cmd.flags['format'] = 'value(name)' + stdout, stderr, _ = list_cmd.Issue(raise_on_failure=False) + op_name = stdout.strip() + if op_name: + logging.info( + '_GetLatestOperationName: found op %s (type=%s target=%s) ' + '(attempt %d/%d)', op_name, operation_type, link_target, + attempt, max_attempts, + ) + return op_name + logging.warning( + '_GetLatestOperationName: no %s op found for target=%s ' + '(attempt %d/%d), retrying in %ds. stderr=%s', + operation_type, link_target, attempt, max_attempts, retry_delay, + stderr, + ) + time.sleep(retry_delay) + return '' + +# def HasActiveUpgradeOperations(self) -> bool: +# """Checks if there are any active node pool upgrades running on the cluster.""" +# cmd = self._GcloudCommand('container', 'operations', 'list') +# cmd.flags['project'] = self.project +# cmd.flags['zone'] = self.zone +# cmd.flags['filter'] = 'operationType=UPGRADE_NODES AND status=RUNNING' +# cmd.flags['sort-by'] = '~startTime' +# cmd.flags['limit'] = 1 +# cmd.flags['format'] = 'value(name)' + + # Issue the command using PKB's native GcloudCommand wrapper + stdout, _, _ = cmd.Issue(raise_on_failure=False) + return bool(stdout.strip()) + def CreateNodePoolAsync( self, nodepool_config: container.BaseNodePoolConfig, @@ -744,8 +841,23 @@ def UpgradeNodePoolAsync(self, name: str, target_version: str) -> str: '--cluster-version', target_version, ) - cmd.args.append('--quiet') - return self._IssueAsync(cmd) + try: + return self._IssueAsync(cmd) + except errors.Resource.CreationError as e: + if 'returned no operation name' not in str(e): + raise + # Fallback: gcloud succeeded but printed nothing. Query the operations + # list scoped to this specific nodepool to find the operation name. + logging.warning( + 'UpgradeNodePoolAsync: falling back to operations list for ' + 'nodepool %s. Original error: %s', name, e + ) + op_name = self._GetLatestOperationName( + operation_type='UPGRADE_NODES', target_name=name + ) + if not op_name: + raise + return op_name def DeleteNodePoolAsync(self, name: str) -> str: cmd = self._GcloudCommand( @@ -762,7 +874,36 @@ def DeleteNodePoolAsync(self, name: str) -> str: def UpdateClusterAsync(self) -> str: cmd = self._GcloudCommand('container', 'clusters', 'update', self.name) cmd.flags['update-labels'] = f'k8s-mgmt-ts={int(time.time())}' - return self._IssueAsync(cmd) + # 'gcloud container clusters update --async' suppresses stdout when + # --quiet is active (same behaviour as 'clusters upgrade'), so the + # operation name is never printed. Remove --quiet here; the label-update + # is non-interactive so no confirmation prompt is needed. + cmd.flags.pop('quiet', None) + # Record start time BEFORE issuing. The label-update operation completes + # in seconds, so it may already be DONE by the time the fallback queries + # the operations list. The timestamp lets us safely include DONE ops + # without matching older completed operations from previous runs. + op_start_time = time.time() + try: + return self._IssueAsync(cmd) + except errors.Resource.CreationError as e: + if 'returned no operation name' not in str(e): + raise + # Fallback: gcloud returned retcode=0 but empty stdout. Query the + # operations list including DONE status (fast label-update ops complete + # before we query) guarded by op_start_time to avoid stale matches. + logging.warning( + 'UpdateClusterAsync: falling back to operations list for cluster %s.' + ' Original error: %s', self.name, e + ) + op_name = self._GetLatestOperationName( + operation_type='UPDATE_CLUSTER', + target_name=self.name, + op_start_time=op_start_time, + ) + if not op_name: + raise + return op_name def ResolveNodePoolVersions(self) -> tuple[str, str]: """Returns (initial, target) GKE node versions: initial=N-1, target=N. @@ -837,7 +978,6 @@ def _poll(): _poll() - class GkeAutopilotCluster(BaseGkeCluster): """Class representing an Autopilot GKE cluster, which has no nodepools.""" @@ -936,4 +1076,4 @@ def GetNodeSelectors(self, machine_type: str | None = None) -> dict[str, str]: def ResizeNodePool( self, new_size: int, node_pool: str = container_cluster.DEFAULT_NODEPOOL ): - raise NotImplementedError('Autopilot clusters do not support resizing.') + raise NotImplementedError('Autopilot clusters do not support resizing.') \ No newline at end of file diff --git a/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py b/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py index 1742b93470..4a5015819c 100644 --- a/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py +++ b/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py @@ -11,7 +11,7 @@ from perfkitbenchmarker import vm_util from perfkitbenchmarker.configs import container_spec as container_spec_lib from perfkitbenchmarker.resources import kubernetes_inference_server -from perfkitbenchmarker.resources.container_service import container as container_lib +from perfkitbenchmarker.resources.container_service import (container as container_lib) from perfkitbenchmarker.resources.container_service import container_cluster from perfkitbenchmarker.resources.container_service import kubectl from perfkitbenchmarker.resources.container_service import kubernetes @@ -55,9 +55,25 @@ def Create(self, restore: bool = False) -> None: self.inference_server.Create() def _PostCreate(self): + """Starts the event poller after the cluster has been created.""" super()._PostCreate() if self.event_poller: - self.event_poller.StartPolling() + try: + self.event_poller.StartPolling() + except Exception as exc: # pylint: disable=broad-except + # Python 3.14 tightened pickling rules for multiprocessing — local + # functions passed to Process cannot be pickled. Rather than crashing + # PKB entirely (which prevents cleanup and orphans cloud resources), + # log a warning and continue without the event poller. + # Impact: no Kubernetes event streaming during the run — benchmark + # metrics are unaffected. + logging.warning( + 'Event poller failed to start (non-fatal, continuing without ' + + 'event polling): %s. This is a known Python 3.14 pickling ' + + 'issue — switch to Python 3.13 to enable event polling.', + exc, + ) + self.event_poller = None def Delete(self, freeze: bool = False) -> None: if self.inference_server: @@ -152,6 +168,7 @@ def GetDefaultStorageClass(self) -> str: def GetNodeSelectors(self, machine_type: str | None = None) -> dict[str, str]: """Gets the node selectors section of a yaml for the provider.""" + del machine_type # Unused; subclasses may use it. return {} def ModifyPodSpecPlacementYaml( @@ -166,9 +183,9 @@ def ModifyPodSpecPlacementYaml( the most likely to change from cloud to cloud. Args: - yaml_dicts: The list of yaml dicts to search through & modify. See - https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.34/#podspec-v1-core - for documentation on the pod spec fields. This is modified in place. + yaml_dicts: The list of yaml dicts to search through & modify. See the + K8s PodSpec API docs for pod spec field documentation. Modified + in place. name: The name of the app. machine_type: A specified machine type to request. """ @@ -196,9 +213,8 @@ def _ModifyPodSpecPlacementYaml( the most likely to change from cloud to cloud. Args: - pod_spec_yaml: The pod spec yaml to modify. See - https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.34/#podspec-v1-core - for documentation on the pod spec fields. This is modified in place. + pod_spec_yaml: The pod spec yaml to modify. See the K8s PodSpec API + docs for pod spec field documentation. This is modified in place. name: The name of the app. machine_type: A specified machine type to request. """ @@ -358,14 +374,14 @@ def DeleteNodePoolAsync(self, name: str) -> str: raise NotImplementedError def UpdateClusterAsync(self) -> str: - """Initiates cluster-level update; returns opaque op handle. Does NOT wait.""" + """Initiates cluster-level update. Returns op handle; does NOT wait.""" raise NotImplementedError @abc.abstractmethod def GetNodePoolNames(self) -> list[str]: """Returns the names of all node pools currently in the cluster. - Used by the k8s_management benchmark to: + Used by the kubernetes_management benchmark to: - Sweep stale pkbm* pools before each run (clean-start spec requirement) - Re-list live pools after creates before deleting (avoids stale names) """ @@ -383,7 +399,7 @@ def WaitForOperation(self, op_handle: str) -> None: raise NotImplementedError def ResolveNodePoolVersions(self) -> tuple[str, str]: - """Returns (initial, target) node-pool Kubernetes versions per benchmark spec. + """Returns (initial, target) K8s versions per benchmark spec. Spec contract: target = cluster's current K8s version (the latest available) @@ -470,4 +486,4 @@ def _DeleteAllFromDefaultNamespace(): if 'kubeconfig1: no such file or directory' in str(e): logging.info('Kubeconfig not found, assuming cluster is already deleted.') return - raise e \ No newline at end of file + raise e diff --git a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py new file mode 100644 index 0000000000..6852c8df46 --- /dev/null +++ b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py @@ -0,0 +1,1105 @@ +# Copyright 2026 PerfKitBenchmarker Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for linux_benchmarks.kubernetes_management_benchmark.""" + +# pylint: disable=invalid-name,protected-access + +import threading +import time +import unittest +from unittest import mock + +from absl import flags +from absl.testing import flagsaver +from perfkitbenchmarker import errors +from perfkitbenchmarker import sample +from perfkitbenchmarker.linux_benchmarks import kubernetes_management_benchmark +from perfkitbenchmarker.resources.container_service import kubernetes_cluster +from tests import pkb_common_test_case + +FLAGS = flags.FLAGS + +_CLUSTER_NAME = 'test-cluster' + + +def _make_sample(metric, value, unit='seconds', metadata=None): + return sample.Sample(metric, value, unit, metadata or {}) + + +def _make_mock_cluster( + name=_CLUSTER_NAME, + k8s_version='1.34', + pool_names=None, +): + """Creates a fully-stubbed KubernetesCluster mock for use in tests.""" + cluster = mock.create_autospec( + kubernetes_cluster.KubernetesCluster, instance=True + ) + cluster.name = name + cluster.k8s_version = k8s_version + cluster.cluster_version = k8s_version + cluster.GetNodePoolNames.return_value = pool_names or [] + cluster.ResolveNodePoolVersions.return_value = ('1.33', '1.34') + cluster.CreateNodePoolAsync.return_value = 'op-create-1' + cluster.UpgradeNodePoolAsync.return_value = 'op-upgrade-1' + cluster.DeleteNodePoolAsync.return_value = 'op-delete-1' + cluster.UpdateClusterAsync.return_value = 'op-update-1' + cluster.WaitForOperation.return_value = None + default_np = mock.MagicMock() + default_np.machine_type = 'e2-standard-2' + default_np.num_nodes = 1 + default_np.min_nodes = 1 + default_np.max_nodes = 1 + default_np.zone = 'us-central1-a' + default_np.disk_size = 100 + default_np.name = 'default-pool' + cluster.default_nodepool = default_np + return cluster + + +def _make_mock_benchmark_spec(cluster=None): + spec = mock.MagicMock() + spec.container_cluster = cluster or _make_mock_cluster() + return spec + + +def _make_mock_config(cluster_type='Kubernetes'): + cfg = mock.MagicMock() + cfg.container_cluster.type = cluster_type + return cfg + + +class ScenarioNameTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for _SCENARIO_A_NAME, _SCENARIO_B_NAME, _SCENARIO_C_NAME.""" + + def testScenarioANameZeroPadsToThreeDigits(self): + self.assertEqual( + 'pkbma000', + kubernetes_management_benchmark._ScenarioAName(0), + ) + + def testScenarioANameTwoDigitIndex(self): + self.assertEqual( + 'pkbma042', + kubernetes_management_benchmark._ScenarioAName(42), + ) + + def testScenarioANameMaxThreeDigits(self): + self.assertEqual( + 'pkbma999', + kubernetes_management_benchmark._ScenarioAName(999), + ) + + def testScenarioBNameIsConstant(self): + self.assertEqual( + 'pkbmb', + kubernetes_management_benchmark._SCENARIO_B_NAME, + ) + + def testScenarioCNameZeroPadsToFourDigits(self): + self.assertEqual( + 'pkbmc0000', + kubernetes_management_benchmark._ScenarioCName(0), + ) + + def testScenarioCNameSingleDigitIndex(self): + self.assertEqual( + 'pkbmc0007', + kubernetes_management_benchmark._ScenarioCName(7), + ) + + def testScenarioCNameFourDigitIndex(self): + self.assertEqual( + 'pkbmc1000', + kubernetes_management_benchmark._ScenarioCName(1000), + ) + + def testAllNamesWithinAksLimit(self): + for i in range(1000): + self.assertLessEqual( + len(kubernetes_management_benchmark._ScenarioAName(i)), 12 + ) + for i in range(10000): + self.assertLessEqual( + len(kubernetes_management_benchmark._ScenarioCName(i)), 12 + ) + self.assertLessEqual( + len(kubernetes_management_benchmark._SCENARIO_B_NAME), 12 + ) + + +class CheckPrerequisitesTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the CheckPrerequisites validation function.""" + + def testValidScenariosPass(self): + with flagsaver.flagsaver(k8s_mgmt_scenarios=['A', 'B', 'C']): + kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) + + def testInvalidScenarioRaises(self): + with flagsaver.flagsaver(k8s_mgmt_scenarios=['X']): + with self.assertRaises(errors.Config.InvalidValue): + kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) + + def testMixedValidInvalidRaises(self): + with flagsaver.flagsaver(k8s_mgmt_scenarios=['A', 'Z']): + with self.assertRaises(errors.Config.InvalidValue): + kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) + + def testNonKubernetesClusterTypeRaises(self): + with flagsaver.flagsaver(k8s_mgmt_scenarios=['A']): + with self.assertRaises(errors.Config.InvalidValue): + kubernetes_management_benchmark.CheckPrerequisites( + _make_mock_config(cluster_type='Mesos') + ) + + def testInvalidScaleSweepRaises(self): + with flagsaver.flagsaver( + k8s_mgmt_scenarios=['C'], k8s_mgmt_scale_sweep=['10', 'abc'] + ): + with self.assertRaises(errors.Config.InvalidValue): + kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) + + def testValidScaleSweepPasses(self): + with flagsaver.flagsaver( + k8s_mgmt_scenarios=['C'], k8s_mgmt_scale_sweep=['10', '50', '100'] + ): + kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) + + def testLowercaseScenarioRaises(self): + with flagsaver.flagsaver(k8s_mgmt_scenarios=['a']): + with self.assertRaises(errors.Config.InvalidValue): + kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) + + +class PrepareTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the Prepare benchmark lifecycle function.""" + + def _patch_kubectl(self, rc=0): + return mock.patch( + 'perfkitbenchmarker.resources.container_service.kubectl' + + '.RunKubectlCommand', + return_value=('', '', rc), + ) + + def testPrepareRunsKubectlSleepPod(self): + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with self._patch_kubectl() as mock_kubectl: + kubernetes_management_benchmark.Prepare(bm_spec) + mock_kubectl.assert_called_once() + args = mock_kubectl.call_args[0][0] + self.assertIn('run', args) + self.assertIn('pkb-mgmt-sleep', args) + self.assertIn('sleep', args) + + def testPrepareSetsAlwaysCallCleanup(self): + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with self._patch_kubectl(): + kubernetes_management_benchmark.Prepare(bm_spec) + self.assertTrue(bm_spec.always_call_cleanup) + + def testPrepareToleratesKubectlNonZeroReturn(self): + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with self._patch_kubectl(rc=1): + kubernetes_management_benchmark.Prepare(bm_spec) + + +class CleanupTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the Cleanup benchmark lifecycle function.""" + + def _patch_kubectl(self): + return mock.patch( + 'perfkitbenchmarker.resources.container_service.kubectl' + + '.RunKubectlCommand', + return_value=('', '', 0), + ) + + def testCleanupDeletesSleepPod(self): + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with self._patch_kubectl() as mock_kubectl: + kubernetes_management_benchmark.Cleanup(bm_spec) + delete_calls = [ + str(c) for c in mock_kubectl.call_args_list + if 'pkb-mgmt-sleep' in str(c) + ] + self.assertNotEmpty(delete_calls) + + def testCleanupDeletesAllPkbmPrefixedPools(self): + cluster = _make_mock_cluster( + pool_names=['pkbma000', 'default-pool', 'pkbmc0001'] + ) + bm_spec = _make_mock_benchmark_spec(cluster) + with self._patch_kubectl(): + kubernetes_management_benchmark.Cleanup(bm_spec) + deleted = {c.args[0] for c in cluster.DeleteNodePool.call_args_list} + self.assertIn('pkbma000', deleted) + self.assertIn('pkbmc0001', deleted) + self.assertNotIn('default-pool', deleted) + + def testCleanupSkipsDeleteWhenNoLeftoverPools(self): + cluster = _make_mock_cluster(pool_names=['default-pool']) + bm_spec = _make_mock_benchmark_spec(cluster) + with self._patch_kubectl(): + kubernetes_management_benchmark.Cleanup(bm_spec) + cluster.DeleteNodePool.assert_not_called() + + def testCleanupHandlesNoneCluster(self): + bm_spec = _make_mock_benchmark_spec() + bm_spec.container_cluster = None + kubernetes_management_benchmark.Cleanup(bm_spec) + + +class CleanStartSweepTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _CleanStartSweep helper function.""" + + def testDeletesStalePkbmPools(self): + cluster = _make_mock_cluster( + pool_names=['pkbma000', 'pkbmc0001', 'user-pool'] + ) + kubernetes_management_benchmark._CleanStartSweep(cluster) + deleted = {c.args[0] for c in cluster.DeleteNodePool.call_args_list} + self.assertIn('pkbma000', deleted) + self.assertIn('pkbmc0001', deleted) + self.assertNotIn('user-pool', deleted) + + def testDoesNothingWhenNoPkbmPools(self): + cluster = _make_mock_cluster(pool_names=['user-pool', 'default-pool']) + kubernetes_management_benchmark._CleanStartSweep(cluster) + cluster.DeleteNodePool.assert_not_called() + + def testToleratesGetNodePoolNamesException(self): + cluster = _make_mock_cluster() + cluster.GetNodePoolNames.side_effect = RuntimeError('API error') + kubernetes_management_benchmark._CleanStartSweep(cluster) + + +class ResultsTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _Results result-accumulator helper.""" + + def testAddSingleEntry(self): + r = kubernetes_management_benchmark._Results() + r.add('op1', 0.1, 1.0, None) + self.assertLen(r.entries, 1) + name, init, e2e, err = r.entries[0] + self.assertEqual('op1', name) + self.assertAlmostEqual(0.1, init, places=5) + self.assertAlmostEqual(1.0, e2e, places=5) + self.assertIsNone(err) + + def testAddMultipleEntries(self): + r = kubernetes_management_benchmark._Results() + r.add('op1', 0.1, 1.0, None) + r.add('op2', 0.2, 2.0, ValueError('fail')) + self.assertLen(r.entries, 2) + + def testAddIsThreadSafe(self): + """Tests that concurrent add() calls from multiple threads are safe.""" + r = kubernetes_management_benchmark._Results() + n = 100 + + def _add(i): + r.add(f'op{i}', float(i), float(i) * 2, None) + + threads = [threading.Thread(target=_add, args=(i,)) for i in range(n)] + for t in threads: + t.start() + for t in threads: + t.join() + self.assertLen(r.entries, n) + + def testAddPreservesError(self): + r = kubernetes_management_benchmark._Results() + exc = RuntimeError('test error') + r.add('failing-op', 0.5, 0.5, exc) + _, _, _, err = r.entries[0] + self.assertIs(exc, err) + + +class TimedAsyncTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _TimedAsync timing helper.""" + + def testSuccessfulKickoffAndWait(self): + kickoff = mock.Mock(return_value='op-handle') + wait_fn = mock.Mock(return_value=None) + init_lat, e2e_lat, err = kubernetes_management_benchmark._TimedAsync( + kickoff, wait_fn + ) + kickoff.assert_called_once() + wait_fn.assert_called_once_with('op-handle') + self.assertIsNone(err) + self.assertGreaterEqual(init_lat, 0.0) + self.assertGreaterEqual(e2e_lat, init_lat) + + def testKickoffFailureReturnsError(self): + exc = RuntimeError('kickoff failed') + kickoff = mock.Mock(side_effect=exc) + wait_fn = mock.Mock() + init_lat, e2e_lat, err = kubernetes_management_benchmark._TimedAsync( + kickoff, wait_fn + ) + self.assertIs(exc, err) + wait_fn.assert_not_called() + self.assertAlmostEqual(init_lat, e2e_lat, places=2) + + def testWaitFailureReturnsError(self): + exc = RuntimeError('wait failed') + kickoff = mock.Mock(return_value='op-handle') + wait_fn = mock.Mock(side_effect=exc) + _, e2e_lat, err = kubernetes_management_benchmark._TimedAsync( + kickoff, wait_fn + ) + self.assertIs(exc, err) + self.assertGreater(e2e_lat, 0.0) + + def testInitLatencyNotGreaterThanE2eLatency(self): + kickoff = mock.Mock(return_value='handle') + wait_fn = mock.Mock(side_effect=lambda _: time.sleep(0.01)) + init_lat, e2e_lat, err = kubernetes_management_benchmark._TimedAsync( + kickoff, wait_fn + ) + self.assertIsNone(err) + self.assertLessEqual(init_lat, e2e_lat) + + def testHandlePassedToWaitFn(self): + kickoff = mock.Mock(return_value='my-op-handle') + wait_fn = mock.Mock() + kubernetes_management_benchmark._TimedAsync(kickoff, wait_fn) + wait_fn.assert_called_once_with('my-op-handle') + + +class RunAsyncTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _RunAsync concurrent execution helper.""" + + def testEmptyItemsReturnsEmptyList(self): + results = kubernetes_management_benchmark._RunAsync( + kickoff=mock.Mock(), + wait_fn=mock.Mock(), + items=[], + get_name=str, + ) + self.assertEmpty(results) + + @flagsaver.flagsaver(k8s_mgmt_max_concurrent=50) + def testReturnsOneResultPerItem(self): + kickoff = mock.Mock(return_value='op-handle') + wait_fn = mock.Mock(return_value=None) + results = kubernetes_management_benchmark._RunAsync( + kickoff=kickoff, wait_fn=wait_fn, items=['a', 'b', 'c'], get_name=str + ) + self.assertLen(results, 3) + self.assertEqual({'a', 'b', 'c'}, {name for name, _, _, _ in results}) + + @flagsaver.flagsaver(k8s_mgmt_max_concurrent=50) + def testKickoffErrorCapturedInResults(self): + kickoff = mock.Mock(side_effect=RuntimeError('kaboom')) + results = kubernetes_management_benchmark._RunAsync( + kickoff=kickoff, wait_fn=mock.Mock(), items=['x'], get_name=str + ) + self.assertLen(results, 1) + _, _, _, err = results[0] + self.assertIsNotNone(err) + + @flagsaver.flagsaver(k8s_mgmt_max_concurrent=2) + def testConcurrencyCapDoesNotDropItems(self): + results = kubernetes_management_benchmark._RunAsync( + kickoff=mock.Mock(return_value='op'), + wait_fn=mock.Mock(return_value=None), + items=list(range(5)), + get_name=str, + ) + self.assertLen(results, 5) + + @flagsaver.flagsaver(k8s_mgmt_max_concurrent=50) + def testGetNameCallableApplied(self): + cfg = mock.MagicMock() + cfg.name = 'poolname' + results = kubernetes_management_benchmark._RunAsync( + kickoff=mock.Mock(return_value='h'), + wait_fn=mock.Mock(), + items=[cfg], + get_name=lambda c: c.name, + ) + name, _, _, _ = results[0] + self.assertEqual('poolname', name) + + +class MakeNodePoolConfigTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _MakeNodePoolConfig factory.""" + + @flagsaver.flagsaver(k8s_mgmt_nodes_per_nodepool=3) + def testNameIsSet(self): + cluster = _make_mock_cluster() + cfg = kubernetes_management_benchmark._MakeNodePoolConfig(cluster, 'mypool') + self.assertEqual('mypool', cfg.name) + + @flagsaver.flagsaver(k8s_mgmt_nodes_per_nodepool=3) + def testNumNodesComesFromFlag(self): + cluster = _make_mock_cluster() + cfg = kubernetes_management_benchmark._MakeNodePoolConfig(cluster, 'p') + self.assertEqual(3, cfg.num_nodes) + self.assertEqual(3, cfg.min_nodes) + self.assertEqual(3, cfg.max_nodes) + + @flagsaver.flagsaver(k8s_mgmt_nodes_per_nodepool=1) + def testDoesNotMutateDefaultNodepool(self): + cluster = _make_mock_cluster() + original_name = cluster.default_nodepool.name + kubernetes_management_benchmark._MakeNodePoolConfig(cluster, 'newname') + self.assertEqual(original_name, cluster.default_nodepool.name) + + +class OpSamplesTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _OpSamples sample-generation helper.""" + + def testEmptyResultsYieldsSuccessRateOfZero(self): + samples = kubernetes_management_benchmark._OpSamples( + 'PrefixOp', [], attempted_ops=5 + ) + rate = next(s for s in samples if s.metric == 'PrefixOp_SuccessRate') + self.assertEqual(0.0, rate.value) + + def testPerOpInitiationAndE2eSamplesGenerated(self): + results = [('op1', 0.1, 1.0, None), ('op2', 0.2, 2.0, None)] + samples = kubernetes_management_benchmark._OpSamples( + 'MyOp', results, attempted_ops=2 + ) + metrics = [s.metric for s in samples] + self.assertIn('MyOp_InitiationLatency', metrics) + self.assertIn('MyOp_EndToEndLatency', metrics) + + def testSuccessRateHundredPercentWhenAllSucceed(self): + results = [('op1', 1.0, 2.0, None), ('op2', 0.5, 1.5, None)] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=2 + ) + rate = next(s for s in samples if s.metric == 'Op_SuccessRate') + self.assertAlmostEqual(100.0, rate.value) + + def testSuccessRateFiftyPercentWhenHalfFail(self): + results = [ + ('op1', 1.0, 2.0, None), + ('op2', 0.5, 0.5, RuntimeError('fail')), + ] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=2 + ) + rate = next(s for s in samples if s.metric == 'Op_SuccessRate') + self.assertAlmostEqual(50.0, rate.value) + + def testAttemptedOpsExceedingExecutedOpsLowersRate(self): + results = [('op1', 1.0, 2.0, None)] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=3 + ) + rate = next(s for s in samples if s.metric == 'Op_SuccessRate') + self.assertAlmostEqual(100.0 / 3, rate.value, places=3) + + def testSuccessRateMetadataFields(self): + results = [('op1', 1.0, 2.0, None), ('op2', 0.5, 0.5, Exception('err'))] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=3 + ) + rate = next(s for s in samples if s.metric == 'Op_SuccessRate') + self.assertEqual('3', rate.metadata['total_ops']) + self.assertEqual('2', rate.metadata['executed_ops']) + self.assertEqual('1', rate.metadata['successful_ops']) + self.assertEqual('1', rate.metadata['skipped_ops']) + + def testFailedOpIncludesErrorMessage(self): + results = [('fail-op', 0.5, 0.5, RuntimeError('oops'))] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=1 + ) + init_s = next(s for s in samples if s.metric == 'Op_InitiationLatency') + self.assertIn('error', init_s.metadata) + self.assertIn('oops', init_s.metadata['error']) + + def testAggregatesGeneratedForTwoOrMoreSuccesses(self): + results = [(f'op{i}', float(i), float(i) * 2, None) for i in range(1, 4)] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=3 + ) + metrics = [s.metric for s in samples] + self.assertIn('Op_InitiationLatency_Mean', metrics) + self.assertIn('Op_EndToEndLatency_Mean', metrics) + + def testAggregatesNotGeneratedForSingleSuccess(self): + results = [('op1', 1.0, 2.0, None)] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=1 + ) + self.assertNotIn('Op_InitiationLatency_Mean', + [s.metric for s in samples]) + + def testOutliersGeneratedForFourOrMoreSuccesses(self): + results = [(f'op{i}', float(i), float(i) * 2, None) for i in range(1, 6)] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=5 + ) + metrics = [s.metric for s in samples] + self.assertIn('Op_InitiationLatency_OutlierCount', metrics) + self.assertIn('Op_EndToEndLatency_OutlierCount', metrics) + + def testOutliersNotGeneratedForThreeOrFewerSuccesses(self): + results = [(f'op{i}', float(i), float(i) * 2, None) for i in range(1, 4)] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=3 + ) + self.assertNotIn('Op_InitiationLatency_OutlierCount', + [s.metric for s in samples]) + + +class AggregateSamplesTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _AggregateSamples statistics helper.""" + + def testProducesAllExpectedStatMetrics(self): + samples = kubernetes_management_benchmark._AggregateSamples( + 'Pfx', 'InitiationLatency', [1.0, 2.0, 3.0, 4.0, 5.0] + ) + metrics = {s.metric for s in samples} + for label in ('Mean', 'StdDev', 'Min', 'Median', 'P90', 'P99', 'Max'): + self.assertIn(f'Pfx_InitiationLatency_{label}', metrics) + + def testMeanValueCorrect(self): + samples = kubernetes_management_benchmark._AggregateSamples( + 'Op', 'E2E', [1.0, 2.0, 3.0, 4.0, 5.0] + ) + mean_s = next(s for s in samples if 'Mean' in s.metric) + self.assertAlmostEqual(3.0, mean_s.value, places=3) + + def testMinValueCorrect(self): + samples = kubernetes_management_benchmark._AggregateSamples( + 'Op', 'E2E', [10.0, 20.0, 30.0] + ) + min_s = next(s for s in samples if 'Min' in s.metric) + self.assertAlmostEqual(10.0, min_s.value, places=3) + + def testMaxValueCorrect(self): + samples = kubernetes_management_benchmark._AggregateSamples( + 'Op', 'E2E', [10.0, 20.0, 30.0] + ) + max_s = next(s for s in samples if 'Max' in s.metric) + self.assertAlmostEqual(30.0, max_s.value, places=3) + + def testSampleCountInMetadata(self): + samples = kubernetes_management_benchmark._AggregateSamples( + 'Op', 'E2E', [1.0, 2.0, 3.0] + ) + for s in samples: + self.assertEqual('3', s.metadata.get('sample_count')) + + def testUnitsAreSeconds(self): + samples = kubernetes_management_benchmark._AggregateSamples( + 'Op', 'E2E', [1.0, 2.0] + ) + for s in samples: + self.assertEqual('seconds', s.unit) + + +class OutlierSamplesTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _OutlierSamples IQR-based outlier detection helper.""" + + def testNoOutliersYieldsZeroCount(self): + samples = kubernetes_management_benchmark._OutlierSamples( + 'Op', 'E2E', [1.0, 1.1, 1.2, 1.3, 1.4, 1.5] + ) + self.assertLen(samples, 1) + self.assertEqual(0, samples[0].value) + + def testClearOutlierDetected(self): + samples = kubernetes_management_benchmark._OutlierSamples( + 'Op', 'E2E', [1.0, 1.0, 1.0, 1.0, 100.0] + ) + self.assertEqual(1, samples[0].value) + + def testMetricNameFormatted(self): + samples = kubernetes_management_benchmark._OutlierSamples( + 'MyPrefix', 'InitiationLatency', [1.0, 2.0, 3.0, 4.0] + ) + self.assertEqual( + 'MyPrefix_InitiationLatency_OutlierCount', samples[0].metric + ) + + def testMetadataContainsFenceFields(self): + meta = kubernetes_management_benchmark._OutlierSamples( + 'Op', 'E2E', [1.0, 2.0, 3.0, 4.0, 5.0] + )[0].metadata + for field in ('q1', 'q3', 'iqr', 'upper_fence', 'lower_fence', + 'sample_count'): + self.assertIn(field, meta) + + def testSampleCountInMetadata(self): + samples = kubernetes_management_benchmark._OutlierSamples( + 'Op', 'E2E', [1.0, 2.0, 3.0, 4.0, 5.0] + ) + self.assertEqual('5', samples[0].metadata['sample_count']) + + def testUnitIsCount(self): + samples = kubernetes_management_benchmark._OutlierSamples( + 'Op', 'E2E', [1.0, 2.0, 3.0, 4.0] + ) + self.assertEqual('count', samples[0].unit) + + def testReturnsSingleSample(self): + samples = kubernetes_management_benchmark._OutlierSamples( + 'Op', 'E2E', list(range(1, 11)) + ) + self.assertLen(samples, 1) + + +class RunTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the Run benchmark entry-point function.""" + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['A', 'B', 'C'], + k8s_mgmt_scale_sweep=[], + k8s_mgmt_large_scale_nodepools=10, + ) + def testRunCallsCleanStartSweep(self): + """Tests that Run invokes _CleanStartSweep before executing scenarios.""" + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ) as mock_clean, mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + ): + kubernetes_management_benchmark.Run(bm_spec) + mock_clean.assert_called_once_with(cluster) + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['A'], + k8s_mgmt_scale_sweep=[], + k8s_mgmt_large_scale_nodepools=10, + ) + def testRunOnlyScenarioACallsOnlyA(self): + """Tests that Run only calls _RunScenarioA when scenarios=['A'].""" + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + ) as mock_a, mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ) as mock_b, mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + ) as mock_c: + kubernetes_management_benchmark.Run(bm_spec) + mock_a.assert_called_once() + mock_b.assert_not_called() + mock_c.assert_not_called() + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['B'], + k8s_mgmt_scale_sweep=[], + k8s_mgmt_large_scale_nodepools=10, + ) + def testRunOnlyScenarioBCallsOnlyB(self): + """Tests that Run only calls _RunScenarioB when scenarios=['B'].""" + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + ) as mock_a, mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ) as mock_b, mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + ) as mock_c: + kubernetes_management_benchmark.Run(bm_spec) + mock_a.assert_not_called() + mock_b.assert_called_once() + mock_c.assert_not_called() + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['C'], + k8s_mgmt_scale_sweep=[], + k8s_mgmt_large_scale_nodepools=42, + ) + def testRunScenarioCPassesLargeScaleFlag(self): + """Tests that Run passes the large-scale-nodepools flag to _RunScenarioC.""" + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + ) as mock_c: + kubernetes_management_benchmark.Run(bm_spec) + mock_c.assert_called_once() + _, _, scale = mock_c.call_args.args + self.assertEqual(42, scale) + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['C'], + k8s_mgmt_scale_sweep=['10', '50'], + k8s_mgmt_large_scale_nodepools=100, + ) + def testRunScenarioCScaleSweepRunsTwice(self): + """Tests that Run calls _RunScenarioC once per scale in the sweep.""" + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, + '_RunScenarioC', + return_value=[_make_sample('m', 1.0)], + ) as mock_c: + kubernetes_management_benchmark.Run(bm_spec) + self.assertEqual(2, mock_c.call_count) + scales = [call.args[2] for call in mock_c.call_args_list] + self.assertIn(10, scales) + self.assertIn(50, scales) + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['C'], + k8s_mgmt_scale_sweep=['10'], + k8s_mgmt_large_scale_nodepools=10, + ) + def testRunTagsScenarioCScaleInMetadata(self): + """Tests that Run adds scenario_c_scale to each sample's metadata.""" + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + test_sample = _make_sample('metric', 1.0) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, + '_RunScenarioC', + return_value=[test_sample], + ): + samples = kubernetes_management_benchmark.Run(bm_spec) + self.assertIn('scenario_c_scale', samples[0].metadata) + self.assertEqual('10', samples[0].metadata['scenario_c_scale']) + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['A'], + k8s_mgmt_scale_sweep=[], + k8s_mgmt_large_scale_nodepools=10, + ) + def testRunTagsAllSamplesWithRunMetadata(self): + """Tests that Run adds version and config keys to all sample metadata.""" + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + test_sample = _make_sample('m', 1.0) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ), mock.patch.object( + kubernetes_management_benchmark, + '_RunScenarioA', + return_value=[test_sample], + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + ): + samples = kubernetes_management_benchmark.Run(bm_spec) + meta = samples[0].metadata + for key in ('initial_version', 'target_version', 'cluster_k8s_version', + 'nodes_per_nodepool', 'concurrent_nodepools'): + self.assertIn(key, meta) + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['A'], + k8s_mgmt_initial_version='1.30', + k8s_mgmt_target_version='1.31', + k8s_mgmt_scale_sweep=[], + k8s_mgmt_large_scale_nodepools=10, + ) + def testRunUsesExplicitVersionFlags(self): + """Tests that Run uses explicit version flags over auto-resolved ones.""" + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ), mock.patch.object( + kubernetes_management_benchmark, + '_RunScenarioA', + return_value=[_make_sample('m', 1.0)], + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + ): + samples = kubernetes_management_benchmark.Run(bm_spec) + cluster.ResolveNodePoolVersions.assert_not_called() + self.assertEqual('1.30', samples[0].metadata['initial_version']) + self.assertEqual('1.31', samples[0].metadata['target_version']) + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['A'], + k8s_mgmt_scale_sweep=[], + k8s_mgmt_large_scale_nodepools=10, + ) + def testRunAutoResolvesVersionsWhenFlagsAbsent(self): + """Tests Run calls ResolveNodePoolVersions when version flags absent.""" + cluster = _make_mock_cluster() + cluster.ResolveNodePoolVersions.return_value = ('1.33', '1.34') + bm_spec = _make_mock_benchmark_spec(cluster) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ), mock.patch.object( + kubernetes_management_benchmark, + '_RunScenarioA', + return_value=[_make_sample('m', 1.0)], + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + ): + samples = kubernetes_management_benchmark.Run(bm_spec) + cluster.ResolveNodePoolVersions.assert_called_once() + self.assertEqual('1.33', samples[0].metadata['initial_version']) + self.assertEqual('1.34', samples[0].metadata['target_version']) + + +class RunScenarioATest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _RunScenarioA phase-by-phase and pipelined modes.""" + + @flagsaver.flagsaver( + k8s_mgmt_concurrent_nodepools=2, + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + k8s_mgmt_pipeline_scenario_a=False, + ) + def testPhaseByPhaseProducesCreateUpgradeDeleteSamples(self): + """Tests Scenario A produces Create, Upgrade, and Delete samples.""" + cluster = _make_mock_cluster(pool_names=['pkbma000', 'pkbma001']) + samples = kubernetes_management_benchmark._RunScenarioA( + cluster, '1.33', '1.34' + ) + metrics = {s.metric for s in samples} + self.assertTrue(any('ScenarioA_Create' in m for m in metrics)) + self.assertTrue(any('ScenarioA_Upgrade' in m for m in metrics)) + self.assertTrue(any('ScenarioA_Delete' in m for m in metrics)) + + @flagsaver.flagsaver( + k8s_mgmt_concurrent_nodepools=2, + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + k8s_mgmt_pipeline_scenario_a=False, + ) + def testPhaseByPhasePassesInitialVersionToCreate(self): + """Tests _RunScenarioA passes initial_version to CreateNodePoolAsync.""" + cluster = _make_mock_cluster(pool_names=['pkbma000', 'pkbma001']) + kubernetes_management_benchmark._RunScenarioA(cluster, '1.33', '1.34') + for call in cluster.CreateNodePoolAsync.call_args_list: + kw = call.kwargs if call.kwargs else {} + pos = call.args + node_version = ( + kw.get('node_version') or (pos[1] if len(pos) > 1 else None) + ) + self.assertEqual('1.33', node_version) + + @flagsaver.flagsaver( + k8s_mgmt_concurrent_nodepools=2, + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + k8s_mgmt_pipeline_scenario_a=False, + ) + def testPhaseByPhaseDeleteUsesLivePoolList(self): + """Tests that _RunScenarioA deletes only the pools it finds at runtime.""" + cluster = _make_mock_cluster(pool_names=['pkbma000']) + kubernetes_management_benchmark._RunScenarioA(cluster, '1.33', '1.34') + self.assertEqual(1, cluster.DeleteNodePoolAsync.call_count) + + @flagsaver.flagsaver( + k8s_mgmt_concurrent_nodepools=2, + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + k8s_mgmt_pipeline_scenario_a=True, + ) + def testPipelinedModeActivatedByFlag(self): + """Tests pipelined mode is activated by the pipeline_scenario_a flag.""" + cluster = _make_mock_cluster(pool_names=[]) + samples = kubernetes_management_benchmark._RunScenarioA( + cluster, '1.33', '1.34' + ) + metrics = {s.metric for s in samples} + self.assertTrue(any('ScenarioA_Create' in m for m in metrics)) + self.assertTrue(any('ScenarioA_Upgrade' in m for m in metrics)) + self.assertTrue(any('ScenarioA_Delete' in m for m in metrics)) + + +class RunScenarioAPipelinedTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _RunScenarioAPipelined pipelined execution path.""" + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testPipelinedProducesAllThreePhases(self): + """Tests pipelined Scenario A produces Create/Upgrade/Delete samples.""" + cluster = _make_mock_cluster(pool_names=[]) + samples = kubernetes_management_benchmark._RunScenarioAPipelined( + cluster, n=2, initial='1.33', target='1.34' + ) + metrics = {s.metric for s in samples} + self.assertTrue(any('ScenarioA_Create' in m for m in metrics)) + self.assertTrue(any('ScenarioA_Upgrade' in m for m in metrics)) + self.assertTrue(any('ScenarioA_Delete' in m for m in metrics)) + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testPipelinedSkipsUpgradeAfterCreateFailure(self): + """Tests pipelined mode skips upgrade when create fails.""" + cluster = _make_mock_cluster(pool_names=[]) + cluster.CreateNodePoolAsync.side_effect = RuntimeError('create failed') + samples = kubernetes_management_benchmark._RunScenarioAPipelined( + cluster, n=1, initial='1.33', target='1.34' + ) + cluster.UpgradeNodePoolAsync.assert_not_called() + upgrade_rate = next( + (s for s in samples if s.metric == 'ScenarioA_Upgrade_SuccessRate'), + None, + ) + if upgrade_rate is not None: + self.assertEqual(0.0, upgrade_rate.value) + + +class RunScenarioBTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _RunScenarioB cluster-update + nodepool-create scenario.""" + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testProducesClusterUpdateAndNodePoolCreateSamples(self): + cluster = _make_mock_cluster(pool_names=[]) + samples = kubernetes_management_benchmark._RunScenarioB(cluster, '1.33') + metrics = {s.metric for s in samples} + self.assertTrue(any('ScenarioB_ClusterUpdate' in m for m in metrics)) + self.assertTrue(any('ScenarioB_NodePoolCreate' in m for m in metrics)) + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testDeletesTestPoolAfterRun(self): + cluster = _make_mock_cluster(pool_names=[]) + kubernetes_management_benchmark._RunScenarioB(cluster, '1.33') + cluster.DeleteNodePool.assert_called_once_with( + kubernetes_management_benchmark._SCENARIO_B_NAME + ) + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testToleratesDeleteFailure(self): + cluster = _make_mock_cluster(pool_names=[]) + cluster.DeleteNodePool.side_effect = RuntimeError('delete failed') + kubernetes_management_benchmark._RunScenarioB(cluster, '1.33') + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testPassesInitialVersionToCreate(self): + """Tests _RunScenarioB passes initial_version to CreateNodePoolAsync.""" + cluster = _make_mock_cluster(pool_names=[]) + kubernetes_management_benchmark._RunScenarioB(cluster, '1.33') + for call in cluster.CreateNodePoolAsync.call_args_list: + kw = call.kwargs if call.kwargs else {} + pos = call.args + node_version = ( + kw.get('node_version') or (pos[1] if len(pos) > 1 else None) + ) + self.assertEqual('1.33', node_version) + + +class RunScenarioCTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _RunScenarioC large-scale create-and-delete scenario.""" + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testProducesCreateAndDeleteSamples(self): + cluster = _make_mock_cluster(pool_names=['pkbmc0000', 'pkbmc0001']) + samples = kubernetes_management_benchmark._RunScenarioC( + cluster, '1.33', scale=2 + ) + metrics = {s.metric for s in samples} + self.assertTrue(any('ScenarioC_Create' in m for m in metrics)) + self.assertTrue(any('ScenarioC_Delete' in m for m in metrics)) + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testZeroLivePoolsRecordsZeroDeleteSuccessRate(self): + """Tests Scenario C records 0% delete rate when no live pools exist.""" + cluster = _make_mock_cluster(pool_names=[]) + samples = kubernetes_management_benchmark._RunScenarioC( + cluster, '1.33', scale=3 + ) + delete_rate = next( + s for s in samples if s.metric == 'ScenarioC_Delete_SuccessRate' + ) + self.assertEqual(0.0, delete_rate.value) + cluster.DeleteNodePoolAsync.assert_not_called() + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testDeleteUsesLiveListNotOriginalCreateList(self): + cluster = _make_mock_cluster(pool_names=['pkbmc0000', 'pkbmc0001']) + kubernetes_management_benchmark._RunScenarioC(cluster, '1.33', scale=3) + self.assertEqual(2, cluster.DeleteNodePoolAsync.call_count) + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testCreateSuccessRateUsesScaleAsDenominator(self): + """Tests Scenario C create success rate uses scale as total_ops.""" + cluster = _make_mock_cluster(pool_names=['pkbmc0000']) + samples = kubernetes_management_benchmark._RunScenarioC( + cluster, '1.33', scale=3 + ) + create_rate = next( + s for s in samples if s.metric == 'ScenarioC_Create_SuccessRate' + ) + self.assertLessEqual(create_rate.value, 100.0) + self.assertEqual('3', create_rate.metadata['total_ops']) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/providers/aws/elastic_kubernetes_service_test.py b/tests/providers/aws/elastic_kubernetes_service_test.py index 90d28eb834..bc8744d565 100644 --- a/tests/providers/aws/elastic_kubernetes_service_test.py +++ b/tests/providers/aws/elastic_kubernetes_service_test.py @@ -1,12 +1,16 @@ +"""Tests for the AWS Elastic Kubernetes Service provider.""" +# pylint: disable=invalid-name,protected-access + import json import os import tempfile import unittest from unittest import mock from urllib import parse -from absl.testing import flagsaver -from absl.testing import parameterized +from absl.testing import flagsaver # pylint: disable=import-error +from absl.testing import parameterized # pylint: disable=import-error from perfkitbenchmarker import data +from perfkitbenchmarker import errors from perfkitbenchmarker import network from perfkitbenchmarker import vm_util from perfkitbenchmarker.configs import container_spec @@ -34,6 +38,7 @@ class BaseEksTest(pkb_common_test_case.PkbCommonTestCase): + """Base test class providing common EKS cluster setup and mock helpers.""" def setUp(self): super().setUp() @@ -80,11 +85,13 @@ def MockJsonRead(self, cluster: elastic_kubernetes_service.BaseEksCluster): class ElasticKubernetesServiceTest(BaseEksTest): + """Tests for the managed-nodegroup EksCluster provider.""" def testInitEksClusterWorks(self): elastic_kubernetes_service.EksCluster(EKS_SPEC) def testEksClusterCreateRegion(self): + """EksCluster._Create() without explicit AZ omits availabilityZones.""" self.MockIssueCommand({'create cluster': [('Cluster created', '', 0)]}) spec = container_spec.ContainerClusterSpec( 'NAME', @@ -121,6 +128,9 @@ def testEksClusterCreateRegion(self): ) def testEksClusterCreateZone(self): + """EksCluster._Create() with a zone issues the expected eksctl commands.""" + ebs_policy = 'arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy' + ebs_role = 'arn:aws:iam::1234:role/AmazonEKS_EBS_CSI_DriverRole_pkb-123p' issue_command = self.MockIssueCommand( {'create cluster': [('Cluster created', '', 0)]} ) @@ -136,7 +146,7 @@ def testEksClusterCreateZone(self): '--namespace=kube-system', '--region=us-west-1', '--cluster=pkb-123p', - '--attach-policy-arn=arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy', + f'--attach-policy-arn={ebs_policy}', '--approve', '--role-only', '--role-name=AmazonEKS_EBS_CSI_DriverRole_pkb-123p', @@ -148,7 +158,7 @@ def testEksClusterCreateZone(self): '--name=aws-ebs-csi-driver', '--region=us-west-1', '--cluster=pkb-123p', - '--service-account-role-arn=arn:aws:iam::1234:role/AmazonEKS_EBS_CSI_DriverRole_pkb-123p', + f'--service-account-role-arn={ebs_role}', ]), ]) assert self.patched_read_json is not None @@ -158,6 +168,7 @@ def testEksClusterCreateZone(self): ) def testEksClusterNodepools(self): + """Additional nodepools appear in the managedNodeGroups config.""" self.MockIssueCommand({'create cluster': [('Cluster created', '', 0)]}) spec2 = EKS_SPEC_DICT.copy() spec2['nodepools'] = { @@ -200,6 +211,7 @@ def testEksClusterNodepools(self): ) def testEksClusterNodepoolsAutoscaling(self): + """Autoscaling min/max/desired values propagate to managedNodeGroups.""" self.MockIssueCommand({'create cluster': [('Cluster created', '', 0)]}) spec2 = EKS_SPEC_DICT.copy() spec2['min_vm_count'] = 1 @@ -236,6 +248,7 @@ def testEksClusterNodepoolsAutoscaling(self): self.assertEqual(node_groups[1]['desiredCapacity'], 3) def testGetNodePoolNames(self): + """GetNodePoolNames returns list of nodegroup names from eksctl output.""" # Mock the output of the aws cli command cluster = elastic_kubernetes_service.EksCluster(EKS_SPEC) @@ -255,6 +268,7 @@ def testGetNodePoolNames(self): ) def testGetNodePoolNamesKarpenter(self): + """GetNodePoolNames on Karpenter cluster returns kubectl nodepool names.""" cluster = elastic_kubernetes_service.EksKarpenterCluster(EKS_SPEC) self.MockIssueCommand({ 'kubectl --kubeconfig get nodepool -o json': [( @@ -275,6 +289,7 @@ def testGetNodePoolNamesKarpenter(self): ('standard nodepool', 'nginx', 'nginx'), ) def testEksClusterGetNodepoolFromName(self, nodepool_name, expected_name): + """GetNodePoolFromNodeName resolves a node name to its nodepool.""" self.MockIssueCommand({'get node': [(nodepool_name, '', 0)]}) spec2 = EKS_SPEC_DICT.copy() spec2['nodepools'] = { @@ -296,6 +311,7 @@ def testEksClusterGetNodepoolFromName(self, nodepool_name, expected_name): self.assertEqual(nodepool.name, expected_name) def testEksClusterNotFound(self): + """GetNodePoolFromNodeName returns None when node is not found.""" self.MockIssueCommand({'get node': [('', '', 0)]}) spec2 = EKS_SPEC_DICT.copy() spec2['nodepools'] = { @@ -326,6 +342,7 @@ def testEksClusterGetMachineTypeFromNodeName(self): class EksAutoClusterTest(BaseEksTest): + """Tests for the auto-mode EksAutoCluster provider.""" def testInitEksClusterWorks(self): elastic_kubernetes_service.EksAutoCluster(EKS_SPEC) @@ -340,6 +357,7 @@ def testEksClusterCreate(self): self.assertEqual(called_json['autoModeConfig'], {'enabled': True}) def testEksClusterIsReady(self): + """EksAutoCluster._IsReady() returns True when cluster-info succeeds.""" self.enter_context( mock.patch.object( kubectl, @@ -347,7 +365,8 @@ def testEksClusterIsReady(self): return_value=( ( r'^[[0;32mKubernetes control plane^[[0m is running at' - r' ^[[0;33mhttps://RAND1234.gr7.us-west-1.eks.amazonaws.com^[[0mTo' + r' ^[[0;33mhttps://RAND1234.gr7.us-west-1.' + r'eks.amazonaws.com^[[0mTo' " further debug and diagnose cluster problems, use 'kubectl" " cluster-info dump'." ), @@ -361,6 +380,7 @@ def testEksClusterIsReady(self): class EksKarpenterTest(BaseEksTest): + """Tests for the Karpenter-based EksKarpenterCluster provider.""" def setUp(self): super().setUp() @@ -380,6 +400,7 @@ def testInitEksClusterWorks(self): @flagsaver.flagsaver(kubeconfig='/tmp/kubeconfig') def testEksYamlCreateFull(self): + """EksKarpenterCluster._Create() produces the expected eksctl yaml.""" cluster = elastic_kubernetes_service.EksKarpenterCluster(EKS_SPEC) self.MockJsonRead(cluster) mock_cmd = self.MockIssueCommand({ @@ -452,6 +473,7 @@ def testEksYamlCreateFull(self): ) @flagsaver.flagsaver(kubeconfig='/tmp/kubeconfig') def testEksYamlCreateFullNodepools(self, nodepool_config, expected_content): + """EksKarpenterCluster._PostCreate() logs expected nodepool yaml.""" # Mock resources for _PostCreate self.MockIssueCommand({ 'helm upgrade --install karpenter': [('', '', 0)], @@ -515,28 +537,17 @@ def testRecursiveDictionaryUpdate(self): expected = {'a': 3, 'deep': {'c': 2, 'd': 4}, 'f': 12} self.assertEqual( expected, - elastic_kubernetes_service.RecursivelyUpdateDictionary(base, update), + elastic_kubernetes_service._recursively_update_dictionary(base, update), ) def testIngressAddressParsing(self): """Test parsing AWS ALB address with dualstack prefix removal.""" + elb_host = 'k8s-test-ingress-abc12345ef-123456789.us-east-1.elb.amazonaws.com' test_cases = [ - ( - 'http://dualstack.k8s-test-ingress-abc12345ef-123456789.us-east-1.elb.amazonaws.com', - 'k8s-test-ingress-abc12345ef-123456789.us-east-1.elb.amazonaws.com', - ), - ( - 'https://dualstack.k8s-test-ingress-abc12345ef-123456789.us-east-1.elb.amazonaws.com', - 'k8s-test-ingress-abc12345ef-123456789.us-east-1.elb.amazonaws.com', - ), - ( - 'dualstack.k8s-test-ingress-abc12345ef-123456789.us-east-1.elb.amazonaws.com', - 'k8s-test-ingress-abc12345ef-123456789.us-east-1.elb.amazonaws.com', - ), - ( - 'k8s-test-ingress-abc12345ef-123456789.us-east-1.elb.amazonaws.com', - 'k8s-test-ingress-abc12345ef-123456789.us-east-1.elb.amazonaws.com', - ), + (f'http://dualstack.{elb_host}', elb_host), + (f'https://dualstack.{elb_host}', elb_host), + (f'dualstack.{elb_host}', elb_host), + (elb_host, elb_host), ] for address, expected in test_cases: with self.subTest(address=address): @@ -549,5 +560,289 @@ def testIngressAddressParsing(self): self.assertEqual(normalized, expected) +class EksManagementPlaneTest(BaseEksTest): + """Tests for EKS management-plane methods (k8s_management_benchmark).""" + + def _make_cluster(self, spec_dict=None): + spec = container_spec.ContainerClusterSpec( + 'NAME', + **(spec_dict or EKS_SPEC_DICT), + ) + cluster = elastic_kubernetes_service.EksCluster(spec) + self.MockJsonRead(cluster) + # Individual tests override via MockIssueCommand. + return cluster + + def _make_nodepool_config(self, name='pkbpool0', machine_type='m5.large', + num_nodes=2): + cfg = mock.MagicMock() + cfg.name = name + cfg.num_nodes = num_nodes + cfg.machine_type = machine_type + return cfg + + # ---- CreateNodePoolAsync -------------------------------------------------- + + def testCreateNodePoolAsyncIssuesCreateNodegroup(self): + """CreateNodePoolAsync calls create-nodegroup; returns ng_active handle.""" + cluster = self._make_cluster() + # Subnets / AZ discovery stubs + cluster._cached_subnets = ['subnet-1'] + cluster._cached_subnets_per_az = {} + cluster._cached_node_role_arn = 'arn:aws:iam::1234:role/NodeRole' + self.MockIssueCommand({'create-nodegroup': [('', '', 0)]}) + + handle = cluster.CreateNodePoolAsync(self._make_nodepool_config('poolA')) + + self.assertEqual('ng_active:poolA', handle) + # Verify the json file path was written + self.assertIsNotNone(self.patched_read_json) + + def testCreateNodePoolAsyncReturnsNgActiveHandle(self): + """CreateNodePoolAsync returns 'ng_active:' on success.""" + cluster = self._make_cluster() + cluster._cached_subnets = ['subnet-1'] + cluster._cached_subnets_per_az = {} + cluster._cached_node_role_arn = 'arn:aws:iam::1234:role/NodeRole' + self.MockIssueCommand({'': [('', '', 0)]}) + + handle = cluster.CreateNodePoolAsync(self._make_nodepool_config('myng')) + self.assertEqual('ng_active:myng', handle) + + def testCreateNodePoolAsyncRaisesOnFailure(self): + """CreateNodePoolAsync raises CreationError when the CLI fails.""" + cluster = self._make_cluster() + cluster._cached_subnets = ['subnet-1'] + cluster._cached_subnets_per_az = {} + cluster._cached_node_role_arn = 'arn:aws:iam::1234:role/NodeRole' + self.MockIssueCommand({'': [('', 'error msg', 1)]}) + + with self.assertRaises(Exception): + cluster.CreateNodePoolAsync(self._make_nodepool_config('failng')) + + # ---- UpgradeNodePoolAsync ------------------------------------------------- + + def testUpgradeNodePoolAsyncReturnsNgActiveHandle(self): + """UpgradeNodePoolAsync calls update-nodegroup-version; returns handle.""" + cluster = self._make_cluster() + mock_cmd = self.MockIssueCommand( + {'update-nodegroup-version': [('', '', 0)]} + ) + handle = cluster.UpgradeNodePoolAsync('my-ng', '1.34') + + self.assertEqual('ng_active:my-ng', handle) + self.assertIn('update-nodegroup-version', mock_cmd.all_commands) + self.assertIn('--kubernetes-version 1.34', mock_cmd.all_commands) + + def testUpgradeNodePoolAsyncRaisesOnFailure(self): + """UpgradeNodePoolAsync raises on non-zero exit code.""" + cluster = self._make_cluster() + self.MockIssueCommand({'': [('', 'oops', 1)]}) + with self.assertRaises(Exception): + cluster.UpgradeNodePoolAsync('bad-ng', '1.34') + + # ---- DeleteNodePoolAsync -------------------------------------------------- + + def testDeleteNodePoolAsyncReturnsNgGoneHandle(self): + """DeleteNodePoolAsync calls delete-nodegroup, returns ng_gone handle.""" + cluster = self._make_cluster() + mock_cmd = self.MockIssueCommand({'delete-nodegroup': [('', '', 0)]}) + handle = cluster.DeleteNodePoolAsync('old-ng') + + self.assertEqual('ng_gone:old-ng', handle) + self.assertIn('delete-nodegroup', mock_cmd.all_commands) + self.assertIn('--nodegroup-name old-ng', mock_cmd.all_commands) + + # ---- UpdateClusterAsync --------------------------------------------------- + + def testUpdateClusterAsyncReturnsClusterUpdateHandle(self): + """UpdateClusterAsync returns 'cluster_update:'.""" + cluster = self._make_cluster() + describe_out = json.dumps({ + 'cluster': {'logging': {'clusterLogging': []}} + }) + update_out = json.dumps({'update': {'id': 'u-abc123'}}) + self.MockIssueCommand({ + 'describe-cluster': [(describe_out, '', 0)], + 'update-cluster-config': [(update_out, '', 0)], + }) + handle = cluster.UpdateClusterAsync() + self.assertEqual('cluster_update:u-abc123', handle) + + def testUpdateClusterAsyncTogglesLogging(self): + """UpdateClusterAsync toggles logging enable state.""" + cluster = self._make_cluster() + # Current state: logging disabled + describe_out = json.dumps({ + 'cluster': {'logging': {'clusterLogging': [{'enabled': False}]}} + }) + update_out = json.dumps({'update': {'id': 'u-xyz'}}) + mock_cmd = self.MockIssueCommand({ + 'describe-cluster': [(describe_out, '', 0)], + 'update-cluster-config': [(update_out, '', 0)], + }) + cluster.UpdateClusterAsync() + self.assertIn('update-cluster-config', mock_cmd.all_commands) + self.assertIn('--logging', mock_cmd.all_commands) + + # ---- WaitForOperation ----------------------------------------------------- + + def testWaitForOperationNgActiveSuccess(self): + """WaitForOperation(ng_active:name) returns when nodegroup is ACTIVE.""" + cluster = self._make_cluster() + ng_out = json.dumps({'nodegroup': {'status': 'ACTIVE'}}) + self.MockIssueCommand({'describe-nodegroup': [(ng_out, '', 0)]}) + # Should not raise + cluster.WaitForOperation('ng_active:my-ng') + + def testWaitForOperationNgActiveFailedRaises(self): + """WaitForOperation raises CreationError on CREATE_FAILED nodegroup.""" + cluster = self._make_cluster() + ng_out = json.dumps({'nodegroup': {'status': 'CREATE_FAILED'}}) + self.MockIssueCommand({'describe-nodegroup': [(ng_out, '', 0)]}) + with self.assertRaises(Exception): + cluster.WaitForOperation('ng_active:bad-ng') + + def testWaitForOperationNgGoneSuccess(self): + """WaitForOperation(ng_gone:name) returns on ResourceNotFoundException.""" + cluster = self._make_cluster() + self.MockIssueCommand({ + 'describe-nodegroup': [('', 'ResourceNotFoundException', 1)] + }) + # Should not raise + cluster.WaitForOperation('ng_gone:deleted-ng') + + def testWaitForOperationClusterUpdateSuccess(self): + """WaitForOperation(cluster_update:id) returns when update is Successful.""" + cluster = self._make_cluster() + self.MockIssueCommand({'describe-update': [('Successful\n', '', 0)]}) + # Should not raise + cluster.WaitForOperation('cluster_update:u-999') + + def testWaitForOperationClusterUpdateFailedRaises(self): + """WaitForOperation raises when cluster update ends in Failed.""" + cluster = self._make_cluster() + self.MockIssueCommand({'describe-update': [('Failed\n', '', 0)]}) + with self.assertRaises(Exception): + cluster.WaitForOperation('cluster_update:u-fail') + + def testWaitForOperationUnknownHandleRaises(self): + """WaitForOperation raises ValueError for unknown handle prefix.""" + cluster = self._make_cluster() + with self.assertRaises(ValueError): + cluster.WaitForOperation('unknown_handle:xyz') + + # ---- ResolveNodePoolVersions ---------------------------------------------- + + def testResolveNodePoolVersionsNMinus1Math(self): + """ResolveNodePoolVersions returns (N-1, N) from cluster_version.""" + cluster = self._make_cluster() + cluster.cluster_version = '1.34' + initial, target = cluster.ResolveNodePoolVersions() + self.assertEqual('1.33', initial) + self.assertEqual('1.34', target) + + def testResolveNodePoolVersionsStripsMinorPatch(self): + """ResolveNodePoolVersions strips patch from version strings.""" + cluster = self._make_cluster() + cluster.cluster_version = '1.33.7' + initial, target = cluster.ResolveNodePoolVersions() + self.assertEqual('1.32', initial) + self.assertEqual('1.33', target) + + # ---- _DiscoverSubnets ----------------------------------------------------- + + def testDiscoverSubnets(self): + """_DiscoverSubnets returns subnet IDs from describe-cluster.""" + cluster = self._make_cluster() + describe_out = json.dumps({ + 'cluster': { + 'resourcesVpcConfig': { + 'subnetIds': ['subnet-aaa', 'subnet-bbb'] + } + } + }) + self.MockIssueCommand({'describe-cluster': [(describe_out, '', 0)]}) + subnets = cluster._DiscoverSubnets() + self.assertEqual(['subnet-aaa', 'subnet-bbb'], subnets) + + def testDiscoverSubnetsCached(self): + """_DiscoverSubnets uses cached result on second call.""" + cluster = self._make_cluster() + cluster._cached_subnets = ['subnet-cached'] + # No IssueCommand calls expected because cache is used + with mock.patch.object(vm_util, 'IssueCommand') as mock_issue: + result = cluster._DiscoverSubnets() + mock_issue.assert_not_called() + self.assertEqual(['subnet-cached'], result) + + # ---- _DiscoverSubnetsPerAZ ------------------------------------------------ + + def testDiscoverSubnetsPerAZBuildsAzMap(self): + """_DiscoverSubnetsPerAZ builds a {AZ: subnet_id} map from EC2.""" + cluster = self._make_cluster() + cluster._cached_subnets = ['subnet-a1', 'subnet-b2'] + subnets_out = json.dumps([ + {'SubnetId': 'subnet-a1', 'AZ': 'us-west-1a'}, + {'SubnetId': 'subnet-b2', 'AZ': 'us-west-1b'}, + ]) + self.MockIssueCommand({'describe-subnets': [(subnets_out, '', 0)]}) + az_map = cluster._DiscoverSubnetsPerAZ() + self.assertEqual({'us-west-1a': 'subnet-a1', 'us-west-1b': 'subnet-b2'}, + az_map) + + # ---- _DiscoverNodeRoleArn ------------------------------------------------- + + def testDiscoverNodeRoleArn(self): + """_DiscoverNodeRoleArn returns role ARN from the first nodegroup.""" + cluster = self._make_cluster() + list_out = json.dumps({'nodegroups': ['ng1']}) + describe_out = json.dumps({ + 'nodegroup': {'nodeRole': 'arn:aws:iam::1234:role/MyRole'} + }) + self.MockIssueCommand({ + 'list-nodegroups': [(list_out, '', 0)], + 'describe-nodegroup': [(describe_out, '', 0)], + }) + arn = cluster._DiscoverNodeRoleArn() + self.assertEqual('arn:aws:iam::1234:role/MyRole', arn) + + def testDiscoverNodeRoleArnRaisesWhenNoNodegroup(self): + """_DiscoverNodeRoleArn raises CreationError when no nodegroups found.""" + cluster = self._make_cluster() + list_out = json.dumps({'nodegroups': []}) + self.MockIssueCommand({'list-nodegroups': [(list_out, '', 0)]}) + with self.assertRaises(errors.Resource.CreationError): + cluster._DiscoverNodeRoleArn() + + # ---- _ResolveReleaseVersion ----------------------------------------------- + + def testResolveReleaseVersion(self): + """_ResolveReleaseVersion returns the SSM parameter value.""" + cluster = self._make_cluster() + self.MockIssueCommand({ + 'get-parameter': [('1.33.10-20260101\n', '', 0)] + }) + version = cluster._ResolveReleaseVersion('1.33') + self.assertEqual('1.33.10-20260101', version) + + def testResolveReleaseVersionCached(self): + """_ResolveReleaseVersion uses cache for repeated calls.""" + cluster = self._make_cluster() + self.MockIssueCommand({ + 'get-parameter': [('1.34.2-20260101\n', '', 0)] + }) + v1 = cluster._ResolveReleaseVersion('1.34') + v2 = cluster._ResolveReleaseVersion('1.34') + self.assertEqual(v1, v2) + + def testResolveReleaseVersionRaisesOnFailure(self): + """_ResolveReleaseVersion raises CreationError when SSM lookup fails.""" + cluster = self._make_cluster() + self.MockIssueCommand({'get-parameter': [('', 'not found', 1)]}) + with self.assertRaises(errors.Resource.CreationError): + cluster._ResolveReleaseVersion('1.99') + + if __name__ == '__main__': unittest.main() diff --git a/tests/providers/azure/azure_kubernetes_service_test.py b/tests/providers/azure/azure_kubernetes_service_test.py index 7ca09fb29c..3f6334e998 100644 --- a/tests/providers/azure/azure_kubernetes_service_test.py +++ b/tests/providers/azure/azure_kubernetes_service_test.py @@ -1,3 +1,6 @@ +"""Tests for the Azure Kubernetes Service provider.""" +# pylint: disable=invalid-name,protected-access + import unittest from unittest import mock from absl.testing import flagsaver @@ -7,12 +10,14 @@ from perfkitbenchmarker.providers.azure import azure_kubernetes_service from perfkitbenchmarker.providers.azure import azure_network from perfkitbenchmarker.providers.azure import util -from tests import pkb_common_test_case +from tests import pkb_common_test_case # pylint: disable=no-name-in-module class AzureKubernetesServiceTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the AksCluster provider.""" def setUp(self): + """Sets up mocks and creates a default AksCluster for each test.""" super().setUp() self.enter_context( mock.patch.object( @@ -70,6 +75,7 @@ def initAksCluster(self, spec_dict): self.aks.resource_group.args = [] def testCreate(self): + """AksCluster.Create() issues the expected az aks create command.""" mock_cmd = self.MockIssueCommand( { 'az aks create': [('', '', 0)], @@ -119,6 +125,7 @@ def testCreate(self): ) def testCreateError(self): + """AksCluster.Create() raises CreationError when az aks create fails.""" self.MockIssueCommand( { 'az aks create': [('out', 'Error could not create', 1)], @@ -141,6 +148,7 @@ def testCreateError(self): self.aks.Create() def testCreateNodepool(self): + """Additional nodepools appear in az aks nodepool add commands.""" mock_cmd = self.MockIssueCommand( { 'az aks create': [('', '', 0)], @@ -207,6 +215,7 @@ def testCreateAutoscaler(self): ) def testCreateAutoscaler_NodepoolAndClamps(self): + """Autoscaler min/max/desired values propagate to nodepool add commands.""" mock_cmd = self.MockIssueCommand( { 'az aks create': [('', '', 0)], @@ -229,12 +238,13 @@ def testCreateAutoscaler_NodepoolAndClamps(self): self.aks._Create() self.assertIn( '--enable-cluster-autoscaler --min-count=4 --max-count=6' - ' --node-count=4', + + ' --node-count=4', mock_cmd.all_commands, ) @flagsaver.flagsaver(kubectl='kubectl', kubeconfig='dummy') def testFullCreateAksAutomatic(self): + """AksAutomaticCluster.Create() issues RBAC and policy assignment cmds.""" aks_auto = azure_kubernetes_service.AksAutomaticCluster(self.spec) aks_auto.resource_group.name = 'resource-group' mock_cmd = self.MockIssueCommand( @@ -258,7 +268,8 @@ def testFullCreateAksAutomatic(self): ('servicePrincipal', '', 0), ('user-name', '', 0), ( - 'test-user@example.com\n12345678-1234-1234-1234-123456789abc', + 'test-user@example.com\n' + + '12345678-1234-1234-1234-123456789abc', '', 0, ), @@ -272,7 +283,7 @@ def testFullCreateAksAutomatic(self): aks_auto.Create() self.assertIn( 'az role assignment create --assignee user-name --role Azure Kubernetes' - ' Service RBAC Admin', + + ' Service RBAC Admin', mock_cmd.all_commands, ) self.assertIn( @@ -281,11 +292,12 @@ def testFullCreateAksAutomatic(self): ) self.assertIn( 'az policy assignment update --name' - ' aks-deployment-safeguards-policy-assignment', + + ' aks-deployment-safeguards-policy-assignment', mock_cmd.all_commands, ) def testGetNodePoolNames(self): + """GetNodePoolNames returns pool names from az aks nodepool list output.""" self.MockIssueCommand( { 'az aks nodepool list': [( @@ -308,5 +320,238 @@ def testGetNodePoolNames(self): self.assertEqual(self.aks.GetNodePoolNames(), ['default', 'nodepool1']) +class AksManagementPlaneTest(AzureKubernetesServiceTest): + """Tests for AKS management-plane methods (k8s_management_benchmark).""" + + # These tests are inherited from AzureKubernetesServiceTest but are not + # relevant to the management-plane test suite. Override to skip them so + # they don't pollute the AksManagementPlaneTest results. + def testCreate(self): + pass + + def testCreateError(self): + pass + + def _make_nodepool_config(self, name='pkbpool0', + machine_type='Standard_D2s_v5', + num_nodes=2): + cfg = mock.MagicMock() + cfg.name = name + cfg.num_nodes = num_nodes + cfg.machine_type = machine_type + cfg.min_nodes = num_nodes + cfg.max_nodes = num_nodes + cfg.disk_size = 100 + return cfg + + # ---- CreateNodePool ------------------------------------------------------- + + def testCreateNodePool(self): + """CreateNodePool issues 'az aks nodepool add' with cluster-name.""" + mock_cmd = self.MockIssueCommand({'az aks nodepool add': [('', '', 0)]}) + self.aks.CreateNodePool(self._make_nodepool_config('testpool')) + + self.assertIn('az aks nodepool add', mock_cmd.all_commands) + self.assertIn('--cluster-name', mock_cmd.all_commands) + self.assertIn('--labels', mock_cmd.all_commands) + + def testCreateNodePoolWithVersion(self): + """CreateNodePool passes --kubernetes-version when node_version is set.""" + self.aks.cluster_version = '1.33' + mock_cmd = self.MockIssueCommand({'az aks nodepool add': [('', '', 0)]}) + self.aks.CreateNodePool( + self._make_nodepool_config('verpool'), node_version='1.32' + ) + self.assertIn('--kubernetes-version 1.32', mock_cmd.all_commands) + + def testCreateNodePoolRaisesOnFailure(self): + """CreateNodePool raises CreationError when CLI fails.""" + self.MockIssueCommand({'az aks nodepool add': [('', 'error', 1)]}) + with self.assertRaises(errors.Resource.CreationError): + self.aks.CreateNodePool(self._make_nodepool_config('failpool')) + + # ---- DeleteNodePool ------------------------------------------------------- + + def testDeleteNodePool(self): + """DeleteNodePool issues 'az aks nodepool delete' with cluster-name.""" + mock_cmd = self.MockIssueCommand( + {'az aks nodepool delete': [('', '', 0)]} + ) + self.aks.DeleteNodePool('old-pool') + + self.assertIn('az aks nodepool delete', mock_cmd.all_commands) + self.assertIn('--cluster-name', mock_cmd.all_commands) + + # ---- UpgradeNodePool ------------------------------------------------------ + + def testUpgradeNodePool(self): + """UpgradeNodePool issues 'az aks nodepool upgrade' with version.""" + mock_cmd = self.MockIssueCommand( + {'az aks nodepool upgrade': [('', '', 0)]} + ) + self.aks.UpgradeNodePool('my-pool', '1.34') + + self.assertIn('az aks nodepool upgrade', mock_cmd.all_commands) + self.assertIn('--kubernetes-version 1.34', mock_cmd.all_commands) + + # ---- UpdateCluster -------------------------------------------------------- + + def testUpdateCluster(self): + """UpdateCluster issues 'az aks update' with a timestamp tag.""" + mock_cmd = self.MockIssueCommand({'az aks update': [('', '', 0)]}) + self.aks.UpdateCluster() + + self.assertIn('az aks update', mock_cmd.all_commands) + self.assertIn('--tags', mock_cmd.all_commands) + self.assertIn('k8s-mgmt-ts=', mock_cmd.all_commands) + + # ---- CreateNodePoolAsync -------------------------------------------------- + + def testCreateNodePoolAsyncReturnsNpSucceededHandle(self): + """CreateNodePoolAsync issues nodepool add with --no-wait.""" + mock_cmd = self.MockIssueCommand( + {'az aks nodepool add': [('', '', 0)]} + ) + handle = self.aks.CreateNodePoolAsync(self._make_nodepool_config('apool')) + + self.assertIn('--no-wait', mock_cmd.all_commands) + self.assertTrue(handle.startswith('np_succeeded:')) + + def testCreateNodePoolAsyncRaisesOnFailure(self): + """CreateNodePoolAsync raises CreationError on CLI failure.""" + self.MockIssueCommand({'az aks nodepool add': [('', 'err', 1)]}) + with self.assertRaises(errors.Resource.CreationError): + self.aks.CreateNodePoolAsync(self._make_nodepool_config('failpool')) + + # ---- UpgradeNodePoolAsync ------------------------------------------------- + + def testUpgradeNodePoolAsyncReturnsNpSucceededHandle(self): + """UpgradeNodePoolAsync issues upgrade with --no-wait.""" + mock_cmd = self.MockIssueCommand( + {'az aks nodepool upgrade': [('', '', 0)]} + ) + handle = self.aks.UpgradeNodePoolAsync('my-pool', '1.34') + + self.assertIn('--no-wait', mock_cmd.all_commands) + self.assertTrue(handle.startswith('np_succeeded:')) + self.assertIn('--kubernetes-version 1.34', mock_cmd.all_commands) + + # ---- DeleteNodePoolAsync -------------------------------------------------- + + def testDeleteNodePoolAsyncReturnsNpGoneHandle(self): + """DeleteNodePoolAsync issues delete with --no-wait.""" + mock_cmd = self.MockIssueCommand( + {'az aks nodepool delete': [('', '', 0)]} + ) + handle = self.aks.DeleteNodePoolAsync('rm-pool') + + self.assertIn('--no-wait', mock_cmd.all_commands) + self.assertTrue(handle.startswith('np_gone:')) + + # ---- UpdateClusterAsync --------------------------------------------------- + + def testUpdateClusterAsyncScalesSystemPool(self): + """UpdateClusterAsync scales the system pool; returns cluster_succeeded.""" + pools_json = '[{"name": "nodepool1", "count": 2}]' + self.MockIssueCommand({ + 'az aks nodepool list': [(pools_json, '', 0)], + 'az aks nodepool scale': [('', '', 0)], + }) + handle = self.aks.UpdateClusterAsync() + self.assertEqual('cluster_succeeded', handle) + + def testUpdateClusterAsyncFallbackTagUpdate(self): + """UpdateClusterAsync falls back to tag update when nodepool list fails.""" + self.MockIssueCommand({ + 'az aks nodepool list': [('', 'err', 1)], + 'az aks update': [('', '', 0)], + }) + handle = self.aks.UpdateClusterAsync() + self.assertEqual('cluster_succeeded', handle) + + # ---- WaitForOperation ----------------------------------------------------- + + def testWaitForOperationNpSucceeded(self): + """WaitForOperation(np_succeeded:name) returns on Succeeded state.""" + self.MockIssueCommand( + {'az aks nodepool show': [('Succeeded\n', '', 0)]} + ) + # Should not raise + self.aks.WaitForOperation('np_succeeded:mypool') + + def testWaitForOperationNpSucceededFailedRaises(self): + """WaitForOperation raises CreationError on Failed provisioningState.""" + self.MockIssueCommand( + {'az aks nodepool show': [('Failed\n', '', 0)]} + ) + with self.assertRaises(errors.Resource.CreationError): + self.aks.WaitForOperation('np_succeeded:failpool') + + def testWaitForOperationNpGone(self): + """WaitForOperation(np_gone:name) returns when nodepool is not found.""" + self.MockIssueCommand({ + 'az aks nodepool show': [('', 'NotFound', 1)] + }) + # Should not raise + self.aks.WaitForOperation('np_gone:deleted-pool') + + def testWaitForOperationClusterSucceeded(self): + """WaitForOperation(cluster_succeeded) returns on Succeeded state.""" + self.MockIssueCommand({ + 'az aks show': [('Succeeded\n', '', 0)] + }) + # Should not raise + self.aks.WaitForOperation('cluster_succeeded') + + def testWaitForOperationClusterSucceededFailedRaises(self): + """WaitForOperation raises CreationError when cluster update is Failed.""" + self.MockIssueCommand({ + 'az aks show': [('Failed\n', '', 0)] + }) + with self.assertRaises(errors.Resource.CreationError): + self.aks.WaitForOperation('cluster_succeeded') + + def testWaitForOperationUnknownHandleRaises(self): + """WaitForOperation raises ValueError for an unknown handle prefix.""" + with self.assertRaises(ValueError): + self.aks.WaitForOperation('bad_handle:something') + + # ---- ResolveNodePoolVersions ---------------------------------------------- + + def testResolveNodePoolVersionsNMinus1Math(self): + """ResolveNodePoolVersions returns (N-1, N) from cluster_version.""" + self.aks.cluster_version = '1.34' + initial, target = self.aks.ResolveNodePoolVersions() + self.assertEqual('1.33', initial) + self.assertEqual('1.34', target) + + def testResolveNodePoolVersionsStripsMinorPatch(self): + """ResolveNodePoolVersions strips patch from full version string.""" + self.aks.cluster_version = '1.33.5' + initial, target = self.aks.ResolveNodePoolVersions() + self.assertEqual('1.32', initial) + self.assertEqual('1.33', target) + + # ---- _GetNodeFlags with version_override ---------------------------------- + + def testGetNodeFlagsVersionOverride(self): + """_GetNodeFlags uses version_override instead of cluster_version.""" + self.aks.cluster_version = '1.34' + cfg = self._make_nodepool_config() + flags = self.aks._GetNodeFlags(cfg, version_override='1.33') + self.assertIn('--kubernetes-version', flags) + idx = flags.index('--kubernetes-version') + self.assertEqual('1.33', flags[idx + 1]) + + def testGetNodeFlagsUsesClusterVersionWhenNoOverride(self): + """_GetNodeFlags uses cluster_version when version_override is None.""" + self.aks.cluster_version = '1.34' + cfg = self._make_nodepool_config() + flags = self.aks._GetNodeFlags(cfg, version_override=None) + self.assertIn('--kubernetes-version', flags) + idx = flags.index('--kubernetes-version') + self.assertEqual('1.34', flags[idx + 1]) + + if __name__ == '__main__': unittest.main() diff --git a/tests/providers/gcp/google_kubernetes_engine_test.py b/tests/providers/gcp/google_kubernetes_engine_test.py index dbf8232f5e..d49ac77d2a 100644 --- a/tests/providers/gcp/google_kubernetes_engine_test.py +++ b/tests/providers/gcp/google_kubernetes_engine_test.py @@ -13,10 +13,11 @@ # limitations under the License. """Tests for perfkitbenchmarker.providers.gcp.google_kubernetes_engine.""" -# pylint: disable=not-context-manager +# pylint: disable=not-context-manager,invalid-name,protected-access import builtins import contextlib +import json import os import tempfile import unittest @@ -61,6 +62,7 @@ class PatchedObjectsTestCase(pkb_common_test_case.PkbCommonTestCase): def patch_critical_objects( self, stdout='', stderr='', return_code=0, flags=FLAGS ): + """Patches common objects and yields a mock IssueCommand.""" with contextlib.ExitStack() as stack: flags.gcloud_path = 'gcloud' flags.run_uri = _RUN_URI @@ -99,10 +101,12 @@ def patch_critical_objects( class GoogleContainerRegistryTestCase(PatchedObjectsTestCase): + """Tests for the GoogleArtifactRegistry container registry.""" class FakeContainerImage(container.ContainerImage): + """Minimal ContainerImage stub for registry tests.""" - def __init__(self, name, directory=None): + def __init__(self, name, directory=None): # pylint: disable=super-init-not-called self.name = name self.directory = directory or f'docker/{name}/Dockerfile' @@ -117,6 +121,7 @@ def setUp(self): ) def testFullRegistryTag(self): + """Tests that full registry tag is constructed correctly.""" spec = container_spec.ContainerRegistrySpec( 'NAME', **{ @@ -132,6 +137,7 @@ def testFullRegistryTag(self): ) def testRemoteBuildCreateSucceeds(self): + """Tests that _Build succeeds when gcloud Issue returns success.""" spec = container_spec.ContainerRegistrySpec( 'NAME', **{ @@ -147,9 +153,11 @@ def testRemoteBuildCreateSucceeds(self): class GoogleKubernetesEngineCustomMachineTypeTestCase(PatchedObjectsTestCase): + """Tests for GKE cluster creation with a custom machine type.""" @staticmethod def create_kubernetes_engine_spec(): + """Creates a GKE spec with a custom CPU/memory machine type.""" kubernetes_engine_spec = container_spec.ContainerClusterSpec( 'NAME', **{ @@ -180,9 +188,11 @@ def testCreate(self): class GoogleKubernetesEngineTestCase(PatchedObjectsTestCase): + """Tests for standard GKE cluster create/delete/exists operations.""" @staticmethod def create_kubernetes_engine_spec(): + """Creates a standard GKE cluster spec with typical VM options.""" kubernetes_engine_spec = container_spec.ContainerClusterSpec( 'NAME', **{ @@ -204,6 +214,7 @@ def create_kubernetes_engine_spec(): return kubernetes_engine_spec def testCreate(self): + """Tests that _Create issues the correct gcloud command with all flags.""" spec = self.create_kubernetes_engine_spec() with self.patch_critical_objects() as issue_command: cluster = google_kubernetes_engine.GkeCluster(spec) @@ -242,6 +253,7 @@ def testCreateQuotaExceeded(self): cluster._Create() def testCreateResourcesExhausted(self): + """Tests _Create raises InsufficientCapacityCloudFailure on exhaustion.""" spec = self.create_kubernetes_engine_spec() with self.patch_critical_objects( stderr=""" @@ -258,6 +270,7 @@ def testCreateResourcesExhausted(self): cluster._Create() def testGetCredentials(self): + """Tests that _PostCreate issues get-credentials with KUBECONFIG set.""" spec = self.create_kubernetes_engine_spec() with self.patch_critical_objects() as issue_command, mock.patch.object( kubectl, 'RunKubectlCommand' @@ -266,7 +279,7 @@ def testGetCredentials(self): cluster._Create() cluster._PostCreate() self.assertIn( - 'gcloud container clusters get-credentials pkb-{}'.format(_RUN_URI), + f'gcloud container clusters get-credentials pkb-{_RUN_URI}', issue_command.all_commands, ) self.assertIn( @@ -282,7 +295,7 @@ def testDelete(self): cluster._Delete() self.assertEqual(issue_command.func_to_mock.call_count, 5) self.assertIn( - 'gcloud container clusters delete pkb-{}'.format(_RUN_URI), + f'gcloud container clusters delete pkb-{_RUN_URI}', issue_command.all_commands, ) self.assertIn('--zone us-central1-a', issue_command.all_commands) @@ -293,11 +306,12 @@ def testExists(self): cluster = google_kubernetes_engine.GkeCluster(spec) cluster._Exists() self.assertIn( - 'gcloud container clusters describe pkb-{}'.format(_RUN_URI), + f'gcloud container clusters describe pkb-{_RUN_URI}', issue_command.all_commands, ) def testGetResourceMetadata(self): + """Tests that GetResourceMetadata returns all expected fields.""" spec = self.create_kubernetes_engine_spec() with self.patch_critical_objects(stdout=_KUBECTL_VERSION): cluster = google_kubernetes_engine.GkeCluster(spec) @@ -328,9 +342,11 @@ def testCidrCalculations(self): class GoogleKubernetesEngineAutoscalingTestCase(PatchedObjectsTestCase): + """Tests for GKE cluster creation with cluster-level autoscaling.""" @staticmethod def create_kubernetes_engine_spec(): + """Creates a GKE spec with cluster-level autoscaling enabled.""" kubernetes_engine_spec = container_spec.ContainerClusterSpec( 'NAME', **{ @@ -350,6 +366,7 @@ def create_kubernetes_engine_spec(): return kubernetes_engine_spec def testCreate(self): + """Tests that _Create passes autoscaling flags to gcloud.""" spec = self.create_kubernetes_engine_spec() with self.patch_critical_objects() as issue_command: cluster = google_kubernetes_engine.GkeCluster(spec) @@ -364,6 +381,7 @@ def testCreate(self): self.assertIn('--cluster-ipv4-cidr /18', issue_command.all_commands) def testGetResourceMetadata(self): + """Tests that metadata includes autoscaling size fields.""" spec = self.create_kubernetes_engine_spec() with self.patch_critical_objects(stdout=_KUBECTL_VERSION): cluster = google_kubernetes_engine.GkeCluster(spec) @@ -393,9 +411,11 @@ def testLabelDisks(self): class GoogleKubernetesEngineVersionFlagTestCase(PatchedObjectsTestCase): + """Tests for GKE cluster creation with version and release-channel flags.""" @staticmethod def create_kubernetes_engine_spec(): + """Creates a GKE spec for testing version and release-channel flags.""" kubernetes_engine_spec = container_spec.ContainerClusterSpec( 'NAME', **{ @@ -443,9 +463,11 @@ def testCreateRapidChannel(self): class GoogleKubernetesEngineGvnicFlagTestCase(PatchedObjectsTestCase): + """Tests for GKE cluster creation with gVNIC enable/disable flags.""" @staticmethod def create_kubernetes_engine_spec(): + """Creates a GKE spec for testing the gVNIC flag.""" kubernetes_engine_spec = container_spec.ContainerClusterSpec( 'NAME', **{ @@ -478,9 +500,11 @@ def testCreateDisableGvnic(self): class GoogleKubernetesEngineWithGpusTestCase(PatchedObjectsTestCase): + """Tests for GKE cluster creation with GPU accelerator configuration.""" @staticmethod def create_kubernetes_engine_spec(gpu_type): + """Creates a GKE spec with the given GPU type and 2 GPUs.""" kubernetes_engine_spec = container_spec.ContainerClusterSpec( 'NAME', **{ @@ -501,6 +525,7 @@ def create_kubernetes_engine_spec(gpu_type): @flagsaver.flagsaver(gke_gpu_driver_version='latest') def testCreate(self): + """Tests that _Create includes the correct --accelerator flag for K80.""" spec = self.create_kubernetes_engine_spec('k80') with self.patch_critical_objects() as issue_command: cluster = google_kubernetes_engine.GkeCluster(spec) @@ -525,16 +550,19 @@ def testCreateGpuH100(self): cluster._Create() self.assertIn( '--accelerator ' - 'type=nvidia-h100-80gb,count=2,gpu-driver-version=default', + + 'type=nvidia-h100-80gb,count=2,gpu-driver-version=default', issue_command.all_commands, ) class GoogleKubernetesEngineGetNodesTestCase(GoogleKubernetesEngineTestCase): + """Tests for GKE node/instance-group enumeration methods.""" def testGetInstanceGroups(self): + """Tests that _GetInstanceGroups parses node-pools list output.""" path = os.path.join(os.path.dirname(__file__), _NODE_POOLS_LIST_OUTPUT) - output = open(path).read() + with open(path) as f: + output = f.read() spec = self.create_kubernetes_engine_spec() with self.patch_critical_objects(stdout=output) as issue_command: cluster = google_kubernetes_engine.GkeCluster(spec) @@ -552,9 +580,13 @@ def testGetInstanceGroups(self): self.assertEqual(expected, set(instance_groups)) # order doesn't matter def testGetNodePoolNames(self): - output = ['default-pool', 'nap-e2-standard-2-iu4vquho', 'test-pool'] + """Tests that GetNodePoolNames returns names from cluster describe.""" + pool_names = ['default-pool', 'nap-e2-standard-2-iu4vquho', 'test-pool'] + json_output = json.dumps( + {'nodePools': [{'name': n} for n in pool_names]} + ) spec = self.create_kubernetes_engine_spec() - with self.patch_critical_objects(stdout='\n'.join(output)) as issue_command: + with self.patch_critical_objects(stdout=json_output) as issue_command: cluster = google_kubernetes_engine.GkeCluster(spec) node_pools = cluster.GetNodePoolNames() @@ -562,8 +594,8 @@ def testGetNodePoolNames(self): 'gcloud container clusters describe ' + cluster.name, issue_command.all_commands, ) - self.assertIn('--flatten', issue_command.all_commands) - self.assertIn('--format', issue_command.all_commands) + self.assertIn('--format json', issue_command.all_commands) + self.assertNotIn('--flatten', issue_command.all_commands) expected = { 'default-pool', @@ -574,9 +606,11 @@ def testGetNodePoolNames(self): class GoogleKubernetesEngineRegionalTestCase(PatchedObjectsTestCase): + """Tests for GKE regional cluster creation with multiple nodepools.""" @staticmethod def create_kubernetes_engine_spec(use_zonal_nodepools=False): + """Creates a regional GKE spec with two nodepools.""" kubernetes_engine_spec = container_spec.ContainerClusterSpec( 'NAME', **{ @@ -619,6 +653,7 @@ def create_kubernetes_engine_spec(use_zonal_nodepools=False): return kubernetes_engine_spec def testCreateRegionalCluster(self): + """Tests regional cluster creation with region-wide nodepools.""" spec = self.create_kubernetes_engine_spec(use_zonal_nodepools=False) with self.patch_critical_objects() as issue_command: cluster = google_kubernetes_engine.GkeCluster(spec) @@ -668,6 +703,7 @@ def testCreateRegionalCluster(self): self.assertNotIn('--node-locations', create_nodepool2) def testCreateRegionalClusterZonalNodepool(self): + """Tests regional cluster creation with zone-pinned nodepools.""" spec = self.create_kubernetes_engine_spec(use_zonal_nodepools=True) with self.patch_critical_objects() as issue_command: cluster = google_kubernetes_engine.GkeCluster(spec) @@ -706,9 +742,11 @@ def testCreateRegionalClusterZonalNodepool(self): class GoogleKubernetesEngineMachineFamiliesTestCase(PatchedObjectsTestCase): + """Tests for GKE nodepool creation with machine-family constraints.""" @staticmethod def create_kubernetes_engine_spec(): + """Creates a GKE spec with a nodepool using machine families.""" kubernetes_engine_spec = container_spec.ContainerClusterSpec( 'NAME', **{ @@ -735,6 +773,7 @@ def create_kubernetes_engine_spec(): return kubernetes_engine_spec def testCreateWithMachineFamilies(self): + """Tests that machine-family nodepool issues a node-pools update command.""" spec = self.create_kubernetes_engine_spec() with self.patch_critical_objects() as issue_command, mock.patch.object( kubernetes_commands, 'ApplyYaml' @@ -752,9 +791,11 @@ def testCreateWithMachineFamilies(self): class GoogleKubernetesEngineAutopilotTestCase(PatchedObjectsTestCase): + """Tests for GKE Autopilot cluster creation and metadata.""" @staticmethod def create_kubernetes_engine_spec(): + """Creates a GKE Autopilot cluster spec.""" kubernetes_engine_spec = container_spec.ContainerClusterSpec( 'NAME', **{ @@ -771,6 +812,7 @@ def create_kubernetes_engine_spec(): return kubernetes_engine_spec def testCreate(self): + """Tests Autopilot _Create uses create-auto without node flags.""" spec = self.create_kubernetes_engine_spec() with self.patch_critical_objects() as issue_command: cluster = google_kubernetes_engine.GkeAutopilotCluster(spec) @@ -788,6 +830,7 @@ def testCreate(self): self.assertNotIn('--num-nodes', issue_command.all_commands) def testGetResourceMetadata(self): + """Tests that Autopilot metadata includes Auto values for size/type.""" spec = self.create_kubernetes_engine_spec() with self.patch_critical_objects(): cluster = google_kubernetes_engine.GkeAutopilotCluster(spec) @@ -818,8 +861,9 @@ def testGetResourceMetadataIncludesReleaseChannel(self): metadata, ) - @flagsaver.flagsaver(run_uri='123') + @flagsaver.flagsaver(gpu_type='h100', gpu_count=1, run_uri='123') def testApplyYamlGpusH100(self): + """Tests Autopilot YAML generation for H100 GPU node selectors.""" self.enter_context( mock.patch( gce_network.__name__ + '.GceFirewall.GetFirewall', @@ -858,8 +902,6 @@ def testApplyYamlGpusH100(self): ) ) spec = self.create_kubernetes_engine_spec() - spec.vm_spec.gpu_count = 1 - spec.vm_spec.gpu_type = 'h100' with self.assertLogs(level='INFO') as logs: cluster = google_kubernetes_engine.GkeAutopilotCluster(spec) yamls = kubernetes_commands.ConvertManifestToYamlDicts( @@ -884,6 +926,7 @@ def testApplyYamlGpusH100(self): self.assertIn('cloud.google.com/compute-class: Accelerator', full_logs) def testGetMachineTypeFromNodeName(self): + """Tests GetMachineTypeFromNodeName queries kubectl for node type.""" spec = self.create_kubernetes_engine_spec() with self.patch_critical_objects(): cluster = google_kubernetes_engine.GkeAutopilotCluster(spec) @@ -899,8 +942,10 @@ def testGetMachineTypeFromNodeName(self): class GoogleKubernetesEngineNodepoolAutoscalingTestCase(PatchedObjectsTestCase): + """Tests GKE per-nodepool autoscaling overrides cluster-level settings.""" def testCreateWithPerNodepoolAutoscaling(self): + """Tests per-nodepool autoscaling settings override cluster defaults.""" kubernetes_engine_spec = container_spec.ContainerClusterSpec( 'NAME', **{ @@ -949,5 +994,298 @@ def testCreateWithPerNodepoolAutoscaling(self): self.assertIn('--max-nodes 10', nodepool_cmd) +class GkeManagementPlaneTestCase(PatchedObjectsTestCase): + """Tests for GKE management-plane methods (k8s_management_benchmark).""" + + @staticmethod + def create_kubernetes_engine_spec(): + """Creates a GKE spec for management-plane method tests.""" + return container_spec.ContainerClusterSpec( + 'NAME', + **{ + 'cloud': 'GCP', + 'vm_spec': { + 'GCP': { + 'machine_type': 'fake-machine-type', + 'zone': 'us-central1-a', + }, + }, + 'vm_count': 2, + 'poll_for_events': False, + }, + ) + + def _make_nodepool_config(self, name='pkbpool0'): + """Returns a minimal BaseNodePoolConfig-like object.""" + cfg = mock.MagicMock() + cfg.name = name + cfg.num_nodes = 1 + cfg.machine_type = 'n1-standard-2' + cfg.disk_size = 100 + cfg.max_local_disks = 0 + cfg.zone = None + return cfg + + # ---- GetNodePoolNames (JSON format) --------------------------------------- + + def testGetNodePoolNamesJsonFormat(self): + """GetNodePoolNames parses JSON cluster describe output.""" + cluster_json = ( + '{"nodePools": [{"name": "default-pool"}, {"name": "extra-pool"}]}' + ) + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects(stdout=cluster_json) as issue_command: + cluster = google_kubernetes_engine.GkeCluster(spec) + names = cluster.GetNodePoolNames() + + self.assertIn( + 'gcloud container clusters describe ' + cluster.name, + issue_command.all_commands, + ) + self.assertIn('--format', issue_command.all_commands) + # Must NOT use --flatten (old format) + self.assertNotIn('--flatten', issue_command.all_commands) + self.assertEqual({'default-pool', 'extra-pool'}, set(names)) + + def testGetNodePoolNamesEmptyFallback(self): + """GetNodePoolNames falls back to split() on non-JSON output.""" + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects(stdout='pool-a pool-b'): + cluster = google_kubernetes_engine.GkeCluster(spec) + names = cluster.GetNodePoolNames() + self.assertEqual({'pool-a', 'pool-b'}, set(names)) + + # ---- CreateNodePool ------------------------------------------------------- + + def testCreateNodePool(self): + """CreateNodePool issues gcloud node-pools create with cluster flag.""" + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects() as issue_command: + cluster = google_kubernetes_engine.GkeCluster(spec) + cfg = self._make_nodepool_config('mypool') + cluster.CreateNodePool(cfg) + + cmd = issue_command.GetCommandWithSubstring('node-pools create mypool') + self.assertIn('--cluster', cmd) + self.assertNotIn('--node-version', cmd) + + def testCreateNodePoolWithVersion(self): + """CreateNodePool passes --node-version when provided.""" + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects() as issue_command: + cluster = google_kubernetes_engine.GkeCluster(spec) + cfg = self._make_nodepool_config('mypool') + cluster.CreateNodePool(cfg, node_version='1.34.1-gke.100') + + cmd = issue_command.GetCommandWithSubstring('node-pools create mypool') + self.assertIn('--node-version 1.34.1-gke.100', cmd) + + # ---- DeleteNodePool ------------------------------------------------------- + + def testDeleteNodePool(self): + """DeleteNodePool issues gcloud node-pools delete with --quiet.""" + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects() as issue_command: + cluster = google_kubernetes_engine.GkeCluster(spec) + cluster.DeleteNodePool('old-pool') + + cmd = issue_command.GetCommandWithSubstring('node-pools delete old-pool') + self.assertIn('--cluster', cmd) + self.assertIn('--quiet', cmd) + + # ---- UpgradeNodePool ------------------------------------------------------ + + def testUpgradeNodePool(self): + """UpgradeNodePool issues gcloud clusters upgrade with --node-pool.""" + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects() as issue_command: + cluster = google_kubernetes_engine.GkeCluster(spec) + cluster.UpgradeNodePool('my-pool', '1.34.1-gke.200') + + cmd = issue_command.GetCommandWithSubstring('clusters upgrade') + self.assertIn('--node-pool my-pool', cmd) + self.assertIn('--cluster-version 1.34.1-gke.200', cmd) + self.assertIn('--quiet', cmd) + + # ---- UpdateCluster -------------------------------------------------------- + + def testUpdateCluster(self): + """UpdateCluster issues gcloud clusters update with a timestamp label.""" + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects() as issue_command: + cluster = google_kubernetes_engine.GkeCluster(spec) + cluster.UpdateCluster() + + cmd = issue_command.GetCommandWithSubstring('clusters update') + self.assertIn('--update-labels', cmd) + self.assertIn('k8s-mgmt-ts=', cmd) + + # ---- Async variants ------------------------------------------------------- + + def testCreateNodePoolAsyncReturnsOpName(self): + """CreateNodePoolAsync returns the GKE operation name.""" + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects( + stdout='extra line\noperation-1234\n' + ) as issue_command: + cluster = google_kubernetes_engine.GkeCluster(spec) + cfg = self._make_nodepool_config('asyncpool') + handle = cluster.CreateNodePoolAsync(cfg) + + cmd = issue_command.GetCommandWithSubstring('node-pools create asyncpool') + self.assertIn('--async', cmd) + self.assertNotIn('--timeout', cmd) + self.assertEqual('operation-1234', handle) + + def testCreateNodePoolAsyncWithVersion(self): + """CreateNodePoolAsync passes --node-version when provided.""" + spec = self.create_kubernetes_engine_spec() + stdout = 'operation-5678\n' + with self.patch_critical_objects(stdout=stdout) as issue_command: + cluster = google_kubernetes_engine.GkeCluster(spec) + cfg = self._make_nodepool_config('verpool') + cluster.CreateNodePoolAsync(cfg, node_version='1.33.5-gke.1') + + cmd = issue_command.GetCommandWithSubstring('node-pools create verpool') + self.assertIn('--node-version 1.33.5-gke.1', cmd) + + def testDeleteNodePoolAsyncReturnsOpName(self): + """DeleteNodePoolAsync issues delete with --async.""" + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects(stdout='operation-del\n') as issue_command: + cluster = google_kubernetes_engine.GkeCluster(spec) + handle = cluster.DeleteNodePoolAsync('to-delete') + + cmd = issue_command.GetCommandWithSubstring('node-pools delete to-delete') + self.assertIn('--async', cmd) + self.assertIn('--quiet', cmd) + self.assertEqual('operation-del', handle) + + def testUpgradeNodePoolAsyncReturnsOpName(self): + """UpgradeNodePoolAsync issues upgrade with --async.""" + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects(stdout='operation-upg\n') as issue_command: + cluster = google_kubernetes_engine.GkeCluster(spec) + handle = cluster.UpgradeNodePoolAsync('my-pool', '1.34.2-gke.100') + + cmd = issue_command.GetCommandWithSubstring('clusters upgrade') + self.assertIn('--async', cmd) + self.assertIn('--node-pool my-pool', cmd) + self.assertIn('--cluster-version 1.34.2-gke.100', cmd) + self.assertEqual('operation-upg', handle) + + def testUpdateClusterAsyncReturnsOpName(self): + """UpdateClusterAsync issues clusters update with --async.""" + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects(stdout='operation-upd\n') as issue_command: + cluster = google_kubernetes_engine.GkeCluster(spec) + handle = cluster.UpdateClusterAsync() + + cmd = issue_command.GetCommandWithSubstring('clusters update') + self.assertIn('--async', cmd) + self.assertIn('k8s-mgmt-ts=', cmd) + self.assertEqual('operation-upd', handle) + + def testIssueAsyncRaisesOnNonZeroRetcode(self): + """_IssueAsync raises CreationError when the command fails.""" + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects(stderr='boom', return_code=1): + cluster = google_kubernetes_engine.GkeCluster(spec) + cfg = self._make_nodepool_config('failpool') + with self.assertRaises(Exception): + cluster.CreateNodePoolAsync(cfg) + + def testIssueAsyncRaisesOnEmptyOpName(self): + """_IssueAsync raises CreationError when stdout produces no op name.""" + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects(stdout=' \n '): + cluster = google_kubernetes_engine.GkeCluster(spec) + cfg = self._make_nodepool_config('emptypool') + with self.assertRaises(Exception): + cluster.CreateNodePoolAsync(cfg) + + # ---- WaitForOperation ----------------------------------------------------- + + def testWaitForOperationDone(self): + """WaitForOperation returns immediately when status is DONE.""" + spec = self.create_kubernetes_engine_spec() + done_json = '{"status": "DONE"}' + with self.patch_critical_objects(stdout=done_json): + cluster = google_kubernetes_engine.GkeCluster(spec) + # Should not raise + cluster.WaitForOperation('operation-xyz') + + def testWaitForOperationAbortingRaises(self): + """WaitForOperation raises CreationError when status is ABORTING.""" + spec = self.create_kubernetes_engine_spec() + aborted_json = '{"status": "ABORTING"}' + with self.patch_critical_objects(stdout=aborted_json): + cluster = google_kubernetes_engine.GkeCluster(spec) + with self.assertRaises(errors.Resource.CreationError): + cluster.WaitForOperation('operation-bad') + + # ---- ResolveNodePoolVersions ---------------------------------------------- + + def testResolveNodePoolVersions(self): + """ResolveNodePoolVersions returns (N-1 qualified, N qualified).""" + server_config = { + 'validNodeVersions': [ + '1.34.5-gke.100', + '1.34.3-gke.50', + '1.33.8-gke.200', + '1.33.5-gke.99', + '1.32.1-gke.10', + ] + } + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects( + stdout=json.dumps(server_config) + ) as issue_command: + cluster = google_kubernetes_engine.GkeCluster(spec) + initial, target = cluster.ResolveNodePoolVersions() + + cmd = issue_command.GetCommandWithSubstring('get-server-config') + self.assertIn('--format', cmd) + # target = newest overall = 1.34.5-gke.100 + self.assertEqual('1.34.5-gke.100', target) + # initial = best version for minor 33 = 1.33.8-gke.200 + self.assertEqual('1.33.8-gke.200', initial) + + def testResolveNodePoolVersionsNoVersionsRaises(self): + """ResolveNodePoolVersions raises GetError when versions list is empty.""" + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects(stdout='{"validNodeVersions": []}'): + cluster = google_kubernetes_engine.GkeCluster(spec) + with self.assertRaises(errors.Resource.GetError): + cluster.ResolveNodePoolVersions() + + # ---- HasActiveUpgradeOperations ------------------------------------------- + + def testHasActiveUpgradeOperationsTrue(self): + """HasActiveUpgradeOperations returns True when an upgrade is running.""" + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects(stdout='operation-upgrade-123\n'): + cluster = google_kubernetes_engine.GkeCluster(spec) + self.assertTrue(cluster.HasActiveUpgradeOperations()) + + def testHasActiveUpgradeOperationsFalse(self): + """HasActiveUpgradeOperations returns False when no upgrade is running.""" + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects(stdout=''): + cluster = google_kubernetes_engine.GkeCluster(spec) + self.assertFalse(cluster.HasActiveUpgradeOperations()) + + def testHasActiveUpgradeOperationsUsesCorrectFilter(self): + """HasActiveUpgradeOperations queries for UPGRADE_NODES AND RUNNING.""" + spec = self.create_kubernetes_engine_spec() + with self.patch_critical_objects(stdout='') as issue_command: + cluster = google_kubernetes_engine.GkeCluster(spec) + cluster.HasActiveUpgradeOperations() + + self.assertIn('operations list', issue_command.all_commands) + self.assertIn('UPGRADE_NODES', issue_command.all_commands) + self.assertIn('RUNNING', issue_command.all_commands) + + if __name__ == '__main__': unittest.main() From d658cf98e187a0588f5649d129a6438ca68c5a9a Mon Sep 17 00:00:00 2001 From: Srikant Patil Date: Wed, 27 May 2026 15:43:24 +0530 Subject: [PATCH 12/19] Removed duplicate file --- .../k8s_management_benchmark.py | 717 ------------------ 1 file changed, 717 deletions(-) delete mode 100644 perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py diff --git a/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py deleted file mode 100644 index 72ffea3caf..0000000000 --- a/perfkitbenchmarker/linux_benchmarks/k8s_management_benchmark.py +++ /dev/null @@ -1,717 +0,0 @@ -# Copyright 2026 PerfKitBenchmarker Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Benchmark for Kubernetes management plane operations. - -Measures GKE/EKS/AKS control-plane API responsiveness via three scenarios: - A. Concurrent node-pool create/upgrade/delete. - B. Node-pool create overlapping with a long-running cluster update. - C. Large-scale node-pool provisioning (single scale or sweep). - -Optimizations for minimum run time: - - Streaming concurrency in Scenario C (no batch barriers) - - Optional pipelined Scenario A (create->upgrade->delete per thread) - - Reduced poll_interval in provider WaitForOperation (5s vs 10s) - - Per-op threads capped at _MAX_CONCURRENT to avoid OS limits - - Accurate delete success rate via attempted_ops denominator -""" - -import copy -import threading -import time -from typing import Callable - -from absl import flags -from absl import logging -from perfkitbenchmarker import background_tasks -from perfkitbenchmarker import vm_util -from perfkitbenchmarker import benchmark_spec as bm_spec -from perfkitbenchmarker import configs -from perfkitbenchmarker import errors -from perfkitbenchmarker import sample -from perfkitbenchmarker.configs import benchmark_config_spec -from perfkitbenchmarker.resources.container_service import container as container_lib -from perfkitbenchmarker.resources.container_service import kubectl -from perfkitbenchmarker.resources.container_service import kubernetes_cluster - -_SLEEP_POD_NAME = 'pkb-mgmt-sleep' - -BENCHMARK_NAME = 'k8s_management' - -BENCHMARK_CONFIG = """ -k8s_management: - description: > - Benchmarks GKE/EKS/AKS management plane operations: concurrent node pool - create/upgrade/delete, overlapping cluster + node-pool ops, and large-scale - provisioning. Focused on control-plane API responsiveness. - Spec regions: GCP us-central1, AWS us-east-1 (closest), Azure eastus (closest). - Equivalent machine types across clouds per Google benchmark spec. - container_cluster: - type: Kubernetes - vm_count: 1 - vm_spec: - GCP: - # us-central1-a: spec primary region for GCP - # e2-standard-2: 2 vCPU 8GB — equivalent to t3.medium / Standard_D2s_v3 - machine_type: e2-standard-2 - zone: us-central1-a - AWS: - # us-east-1a: closest comparable region to GCP us-central1 - # t3.medium: 2 vCPU 4GB — closest equivalent to e2-standard-2 (Google spec) - machine_type: t3.medium - zone: us-east-1a - Azure: - # eastus: closest comparable region to GCP us-central1 - # Standard_D2s_v3: 2 vCPU 8GB — equivalent to e2-standard-2 - machine_type: Standard_D2s_v3 - zone: eastus -""" - -_VALID_SCENARIOS = frozenset({'A', 'B', 'C'}) - -_CONCURRENT_NODEPOOLS = flags.DEFINE_integer( - 'k8s_mgmt_concurrent_nodepools', - 5, - 'Number of node pools to create/upgrade/delete concurrently in Scenario A.', -) -_LARGE_SCALE_NODEPOOLS = flags.DEFINE_integer( - 'k8s_mgmt_large_scale_nodepools', - 1000, - 'Number of node pools to provision in the large-scale Scenario C. ' - 'Spec target is 1000; ensure VPC/quota is available before running.', -) -_NODES_PER_NODEPOOL = flags.DEFINE_integer( - 'k8s_mgmt_nodes_per_nodepool', - 2, - 'Number of nodes per node pool. Google spec: 2 nodes per pool.', -) -_INITIAL_VERSION = flags.DEFINE_string( - 'k8s_mgmt_initial_version', - None, - 'Kubernetes version for newly-created node pools (N-1). None = auto.', -) -_TARGET_VERSION = flags.DEFINE_string( - 'k8s_mgmt_target_version', - None, - 'Kubernetes version to upgrade node pools to (N). None = cluster version.', -) -_SCENARIOS = flags.DEFINE_list( - 'k8s_mgmt_scenarios', - ['A', 'B', 'C'], - 'Comma-separated subset of scenarios to run. Valid values: A, B, C.', -) -_SCALE_SWEEP = flags.DEFINE_list( - 'k8s_mgmt_scale_sweep', - [], - 'Comma-separated list of node-pool counts for Scenario C scale sweep. ' - 'Each scale runs as a separate sub-run with full create/delete cycle. ' - 'Example: --k8s_mgmt_scale_sweep=10,50,100,500,1000. ' - 'If empty, uses --k8s_mgmt_large_scale_nodepools.', -) -_MAX_CONCURRENT = flags.DEFINE_integer( - 'k8s_mgmt_max_concurrent', - 50, - 'Cap on concurrent provider API calls within a batch. ' - 'Higher = faster but more aggressive on connection pools.', -) -_PIPELINE_SCENARIO_A = flags.DEFINE_boolean( - 'k8s_mgmt_pipeline_scenario_a', - False, - 'If True, run Scenario A as a per-pool pipeline (create->upgrade->delete ' - 'back-to-back per thread). Minimizes wall time but measures ops under ' - 'mixed-type concurrent load. Default False = phase-by-phase (spec-strict).', -) - -# AKS caps node-pool names at 12 chars — keep all names within that limit. -_PREFIX = 'pkbm' -_SCENARIO_A_NAME = lambda i: f'{_PREFIX}a{i:03d}' -_SCENARIO_B_NAME = f'{_PREFIX}b' -_SCENARIO_C_NAME = lambda i: f'{_PREFIX}c{i:04d}' - - -def GetConfig(user_config): - return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) - - -def CheckPrerequisites( - benchmark_config: benchmark_config_spec.BenchmarkConfigSpec, -): - """Validates flag values and cluster type before any cloud calls.""" - invalid = [ - s for s in _SCENARIOS.value if s.strip().upper() not in _VALID_SCENARIOS - ] - if invalid: - raise errors.Config.InvalidValue( - f'Invalid value(s) for --k8s_mgmt_scenarios: {invalid}. ' - f'Valid options: {sorted(_VALID_SCENARIOS)}.' - ) - for s in _SCALE_SWEEP.value: - try: - int(s.strip()) - except ValueError as e: - raise errors.Config.InvalidValue( - f'Non-integer value in --k8s_mgmt_scale_sweep: {s!r}' - ) from e - if benchmark_config.container_cluster.type != 'Kubernetes': - raise errors.Config.InvalidValue( - 'k8s_management benchmark requires a Kubernetes container cluster.' - ) - - -def Prepare(benchmark_spec: bm_spec.BenchmarkSpec) -> None: - """Asserts the cluster is reachable; deploys spec-defined sleep workload.""" - cluster = benchmark_spec.container_cluster - assert isinstance(cluster, kubernetes_cluster.KubernetesCluster) - benchmark_spec.always_call_cleanup = True - logging.info( - 'k8s_management Prepare: cluster=%s, version=%s', - cluster.name, - cluster.k8s_version, - ) - # Spec workload: "a simple container that sleeps for a given time". - # Confirms data-plane reachability; generates no data-plane load. - _, _, rc = kubectl.RunKubectlCommand( - [ - 'run', _SLEEP_POD_NAME, - '--image=busybox', - '--restart=Never', - '--', 'sleep', '86400', - ], - raise_on_failure=False, - ) - if rc: - logging.warning( - 'Sleep workload deploy returned rc=%d (non-fatal; continuing)', rc - ) - - -def _CleanStartSweep(cluster: kubernetes_cluster.KubernetesCluster) -> None: - """Deletes any stale pkbm* node pools so each run starts clean (spec C.2).""" - try: - stale = [n for n in cluster.GetNodePoolNames() if n.startswith(_PREFIX)] - except Exception: # pylint: disable=broad-except - logging.exception('CleanStart: failed to list node pools') - return - if not stale: - logging.info('CleanStart: no stale pools found — clean start confirmed.') - return - logging.warning('CleanStart: deleting %d stale pools: %s', len(stale), stale) - background_tasks.RunThreaded(cluster.DeleteNodePool, stale) - - -def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]: - """Runs the selected scenarios and returns flat list of samples.""" - cluster = benchmark_spec.container_cluster - assert isinstance(cluster, kubernetes_cluster.KubernetesCluster) - - # Spec C.2: start clean. - _CleanStartSweep(cluster) - - # Resolve versions once; log clearly; tag every sample. - # Google spec: initial=N-1, target=N (adjacent minor upgrade). - flag_initial = _INITIAL_VERSION.value - flag_target = _TARGET_VERSION.value - if flag_initial and flag_target: - initial, target = flag_initial, flag_target - source = 'flags' - else: - resolved_initial, resolved_target = cluster.ResolveNodePoolVersions() - initial = flag_initial or resolved_initial - target = flag_target or resolved_target - source = 'auto-resolved' if not (flag_initial or flag_target) else 'mixed' - - logging.info( - 'NodePool versions (%s): initial=%s -> target=%s ' - '(cluster k8s_version=%s) | nodes_per_pool=%d | machine_type=%s', - source, initial, target, cluster.k8s_version, - _NODES_PER_NODEPOOL.value, - cluster.default_nodepool.machine_type - if hasattr(cluster, 'default_nodepool') else 'unknown', - ) - - scenarios = {s.strip().upper() for s in _SCENARIOS.value} - samples: list[sample.Sample] = [] - - if 'A' in scenarios: - samples += _RunScenarioA(cluster, initial, target) - if 'B' in scenarios: - samples += _RunScenarioB(cluster, initial) - if 'C' in scenarios: - scales = ( - [int(x.strip()) for x in _SCALE_SWEEP.value] - if _SCALE_SWEEP.value - else [_LARGE_SCALE_NODEPOOLS.value] - ) - logging.info('Scenario C: scale sweep = %s', scales) - for scale in scales: - scenario_c_samples = _RunScenarioC(cluster, initial, scale) - for s in scenario_c_samples: - s.metadata['scenario_c_scale'] = str(scale) - samples += scenario_c_samples - - # Tag all samples with version path and run config for published results. - run_meta = { - 'initial_version': str(initial), - 'target_version': str(target), - 'cluster_k8s_version': str(cluster.k8s_version), - 'nodes_per_nodepool': str(_NODES_PER_NODEPOOL.value), - 'concurrent_nodepools': str(_CONCURRENT_NODEPOOLS.value), - } - for s in samples: - s.metadata.update(run_meta) - - return samples - - -def Cleanup(benchmark_spec: bm_spec.BenchmarkSpec) -> None: - """Best-effort delete of leftover benchmark node pools and sleep pod.""" - cluster = benchmark_spec.container_cluster - if cluster is None: - return - kubectl.RunKubectlCommand( - ['delete', 'pod', _SLEEP_POD_NAME, '--ignore-not-found'], - raise_on_failure=False, - ) - try: - leftover = [ - n for n in cluster.GetNodePoolNames() if n.startswith(_PREFIX) - ] - except Exception: # pylint: disable=broad-except - logging.exception('Cleanup: failed to list node pools') - return - if not leftover: - return - logging.info('Cleanup: deleting %d leftover node pools', len(leftover)) - background_tasks.RunThreaded(cluster.DeleteNodePool, leftover) - - -# --------------------------------------------------------------------------- -# Scenario A -# --------------------------------------------------------------------------- - -def _RunScenarioA( - cluster: kubernetes_cluster.KubernetesCluster, - initial: str, - target: str, -) -> list[sample.Sample]: - """Concurrent CreateNodePool, UpgradeNodePool, DeleteNodePool.""" - n = _CONCURRENT_NODEPOOLS.value - if _PIPELINE_SCENARIO_A.value: - logging.info( - 'Scenario A (pipelined): %d pools, initial=%s, target=%s', n, initial, target) - return _RunScenarioAPipelined(cluster, n, initial, target) - - logging.info( - 'Scenario A (phase-by-phase): %d pools, initial=%s, target=%s', n, initial, target) - pool_names = [_SCENARIO_A_NAME(i) for i in range(n)] - configs_ = [_MakeNodePoolConfig(cluster, name) for name in pool_names] - samples: list[sample.Sample] = [] - - # ── Phase 1: concurrent creates ────────────────────────────────────────── - create_results = _RunAsync( - kickoff = lambda cfg: cluster.CreateNodePoolAsync(cfg, node_version=initial), - wait_fn = cluster.WaitForOperation, - items = configs_, - get_name = lambda cfg: cfg.name, - ) - samples += _OpSamples('ScenarioA_Create', create_results, - attempted_ops=len(pool_names)) - - # ── Phase 2: concurrent upgrades (only successfully created pools) ──────── - created = [name for name, _, _, err in create_results if err is None] - logging.info('Scenario A: %d/%d pools created — proceeding to upgrade', - len(created), n) - upgrade_results = _RunAsync( - kickoff = lambda name: cluster.UpgradeNodePoolAsync(name, target), - wait_fn = cluster.WaitForOperation, - items = created, - get_name = str, - ) - samples += _OpSamples('ScenarioA_Upgrade', upgrade_results, - attempted_ops=len(created)) - - # ── Phase 3: concurrent deletes (live-list to catch EKS rollbacks) ──────── - alive = [p for p in cluster.GetNodePoolNames() if p.startswith(f'{_PREFIX}a')] - logging.info('Scenario A: %d live pools found for delete (originally %d)', - len(alive), n) - delete_results = _RunAsync( - kickoff = cluster.DeleteNodePoolAsync, - wait_fn = cluster.WaitForOperation, - items = alive, - get_name = str, - ) - # attempted_ops=n: success rate reflects original request, not just live pools. - # EKS rolls back timed-out pools silently — without this fix delete shows 100%. - samples += _OpSamples('ScenarioA_Delete', delete_results, - attempted_ops=n) - return samples - - -def _RunScenarioAPipelined( - cluster: kubernetes_cluster.KubernetesCluster, - n: int, - initial: str, - target: str, -) -> list[sample.Sample]: - """Per-pool pipeline: create->upgrade->delete back-to-back per thread. - - Minimizes wall time: max_i(create_i + upgrade_i + delete_i) vs - max(creates)+max(upgrades)+max(deletes) in phase-by-phase mode. - Trade-off: ops run under mixed-type concurrent load. - """ - pool_names = [_SCENARIO_A_NAME(i) for i in range(n)] - creates = _Results() - upgrades = _Results() - deletes = _Results() - - def _do_pool(name: str): - cfg = _MakeNodePoolConfig(cluster, name) - init, e2e, err = _TimedAsync( - lambda: cluster.CreateNodePoolAsync(cfg, node_version=initial), - cluster.WaitForOperation, - ) - creates.add(name, init, e2e, err) - if err is not None: - return - init, e2e, err = _TimedAsync( - lambda: cluster.UpgradeNodePoolAsync(name, target), - cluster.WaitForOperation, - ) - upgrades.add(name, init, e2e, err) - init, e2e, err = _TimedAsync( - lambda: cluster.DeleteNodePoolAsync(name), - cluster.WaitForOperation, - ) - deletes.add(name, init, e2e, err) - - background_tasks.RunThreaded( - _do_pool, pool_names, - max_concurrent_threads=min(n, _MAX_CONCURRENT.value), - ) - samples: list[sample.Sample] = [] - samples += _OpSamples('ScenarioA_Create', creates.entries, attempted_ops=n) - samples += _OpSamples('ScenarioA_Upgrade', upgrades.entries, attempted_ops=n) - samples += _OpSamples('ScenarioA_Delete', deletes.entries, attempted_ops=n) - return samples - - -# --------------------------------------------------------------------------- -# Scenario B -# --------------------------------------------------------------------------- - -def _RunScenarioB( - cluster: kubernetes_cluster.KubernetesCluster, - initial: str, -) -> list[sample.Sample]: - """CreateNodePool fired concurrently with a long-running cluster update. - - Both ops kick off async on separate threads; initiation + E2E latency - recorded independently. Overlap window duration = ClusterUpdate E2E latency. - """ - logging.info('Scenario B: overlapping cluster update + node-pool create') - cfg = _MakeNodePoolConfig(cluster, _SCENARIO_B_NAME) - results = _Results() - - def _do_cluster_update(): - init, e2e, err = _TimedAsync( - cluster.UpdateClusterAsync, cluster.WaitForOperation) - results.add('ScenarioB_ClusterUpdate', init, e2e, err) - logging.info('Scenario B ClusterUpdate: init=%.2fs e2e=%.2fs ok=%s', - init, e2e, err is None) - - def _do_create(): - init, e2e, err = _TimedAsync( - lambda: cluster.CreateNodePoolAsync(cfg, node_version=initial), - cluster.WaitForOperation, - ) - results.add('ScenarioB_NodePoolCreate', init, e2e, err) - logging.info('Scenario B NodePoolCreate: init=%.2fs e2e=%.2fs ok=%s', - init, e2e, err is None) - - background_tasks.RunThreaded(lambda fn: fn(), [_do_cluster_update, _do_create]) - - samples: list[sample.Sample] = [] - for entry in results.entries: - name, init_dur, e2e_dur, err = entry - samples += _OpSamples(name, [(name, init_dur, e2e_dur, err)], attempted_ops=1) - - # Remove test pool (best-effort). - try: - cluster.DeleteNodePool(_SCENARIO_B_NAME) - except Exception: # pylint: disable=broad-except - logging.exception('Scenario B: failed to delete test pool') - return samples - - -# --------------------------------------------------------------------------- -# Scenario C -# --------------------------------------------------------------------------- - -def _RunScenarioC( - cluster: kubernetes_cluster.KubernetesCluster, - initial: str, - scale: int, -) -> list[sample.Sample]: - """Large-scale node-pool provisioning at a given scale. - - Streams all `scale` creates through a single executor capped at - _MAX_CONCURRENT workers — as each op completes the next starts immediately - (no batch barriers). Delete uses a live-list so EKS-rolled-back pools are - excluded from the denominator correctly. - """ - logging.info( - 'Scenario C: scale=%d, max_concurrent=%d, initial_version=%s', - scale, _MAX_CONCURRENT.value, initial, - ) - pool_names = [_SCENARIO_C_NAME(i) for i in range(scale)] - configs_ = [_MakeNodePoolConfig(cluster, name) for name in pool_names] - samples: list[sample.Sample] = [] - - # ── Creates ─────────────────────────────────────────────────────────────── - create_results = _RunAsync( - kickoff = lambda cfg: cluster.CreateNodePoolAsync( - cfg, node_version=initial), - wait_fn = cluster.WaitForOperation, - items = configs_, - get_name = lambda cfg: cfg.name, - ) - created_ok = sum(1 for _, _, _, err in create_results if err is None) - logging.info('Scenario C scale=%d: %d/%d creates succeeded', - scale, created_ok, scale) - samples += _OpSamples('ScenarioC_Create', create_results, - attempted_ops=scale) - - # ── Deletes (live-list) ─────────────────────────────────────────────────── - alive = [p for p in cluster.GetNodePoolNames() if p.startswith(f'{_PREFIX}c')] - logging.info( - 'Scenario C scale=%d: %d live pools for delete (originally requested %d; ' - '%d rolled back by cloud)', - scale, len(alive), scale, scale - len(alive), - ) - if not alive: - logging.warning( - 'Scenario C scale=%d: 0 live pools — all timed-out creates were ' - 'rolled back. Recording 0%% delete success rate.', scale) - samples += _OpSamples('ScenarioC_Delete', [], attempted_ops=scale) - return samples - - delete_results = _RunAsync( - kickoff = cluster.DeleteNodePoolAsync, - wait_fn = cluster.WaitForOperation, - items = alive, - get_name = str, - ) - # attempted_ops=scale: accurate rate against original request count. - samples += _OpSamples('ScenarioC_Delete', delete_results, - attempted_ops=scale) - return samples - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -class _Results: - """Thread-safe collector for (name, init_latency, e2e_latency, error).""" - - def __init__(self): - self._lock = threading.Lock() - self.entries: list[tuple[str, float, float, Exception | None]] = [] - - def add(self, name: str, init_dur: float, e2e_dur: float, - err: Exception | None) -> None: - with self._lock: - self.entries.append((name, init_dur, e2e_dur, err)) - - -def _TimedAsync( - kickoff: Callable[[], str], - wait_fn: Callable[[str], None], -) -> tuple[float, float, Exception | None]: - """Runs kickoff() then wait_fn(handle); returns (init_lat, e2e_lat, err). - - init_lat = time for kickoff() to return (API accepted). - e2e_lat = total wall time including wait. On kickoff failure both are set - to elapsed time at failure point. - """ - init_start = time.time() - try: - handle = kickoff() - except Exception as exc: # pylint: disable=broad-except - elapsed = time.time() - init_start - return elapsed, elapsed, exc - init_dur = time.time() - init_start - try: - wait_fn(handle) - return init_dur, time.time() - init_start, None - except Exception as exc: # pylint: disable=broad-except - return init_dur, time.time() - init_start, exc - - -def _RunAsync( - kickoff: Callable, - wait_fn: Callable[[str], None], - items: list, - get_name: Callable[[object], str], -) -> list[tuple[str, float, float, Exception | None]]: - """Fires kickoff(item) concurrently for all items; returns timed results. - - Uses background_tasks.RunThreaded with a concurrency cap for streaming - execution — completed ops free their slot immediately for the next one. - """ - if not items: - return [] - results = _Results() - cap = min(len(items), _MAX_CONCURRENT.value) - - def _wrap(item): - init_dur, e2e_dur, err = _TimedAsync(lambda: kickoff(item), wait_fn) - name = get_name(item) - results.add(name, init_dur, e2e_dur, err) - logging.info('%s ok=%s initiation=%.2fs end_to_end=%.2fs', - name, err is None, init_dur, e2e_dur) - - background_tasks.RunThreaded(_wrap, items, max_concurrent_threads=cap) - return results.entries - - -def _MakeNodePoolConfig( - cluster: kubernetes_cluster.KubernetesCluster, - name: str, -) -> container_lib.BaseNodePoolConfig: - """Builds a node-pool config from the cluster's default pool.""" - cfg = copy.copy(cluster.default_nodepool) - cfg.name = name - cfg.num_nodes = _NODES_PER_NODEPOOL.value - cfg.min_nodes = _NODES_PER_NODEPOOL.value - cfg.max_nodes = _NODES_PER_NODEPOOL.value - return cfg - - -def _OpSamples( - metric_prefix: str, - results: list[tuple[str, float, float, Exception | None]], - attempted_ops: int = None, -) -> list[sample.Sample]: - """Per-op + aggregate samples for initiation and end-to-end latency. - - Args: - metric_prefix: prefix for all metric names. - results: list of (operation_name, init_lat, e2e_lat, err). - attempted_ops: total ops originally requested. Used as the denominator - for SuccessRate so EKS-rolled-back pools (which never - appear in results) are counted as failures, not ignored. - If None, len(results) is used (original behavior). - """ - samples: list[sample.Sample] = [] - init_latencies: list[float] = [] - e2e_latencies: list[float] = [] - success = 0 - - for name, init_dur, e2e_dur, err in results: - meta = {'operation_name': name, 'success': str(err is None)} - if err is not None: - meta['error'] = str(err)[:200] - else: - success += 1 - init_latencies.append(init_dur) - e2e_latencies.append(e2e_dur) - samples.append(sample.Sample( - f'{metric_prefix}_InitiationLatency', init_dur, 'seconds', dict(meta))) - samples.append(sample.Sample( - f'{metric_prefix}_EndToEndLatency', e2e_dur, 'seconds', dict(meta))) - - # ── Success rate ────────────────────────────────────────────────────────── - total = attempted_ops if attempted_ops is not None else len(results) - executed = len(results) - if total > 0: - samples.append(sample.Sample( - f'{metric_prefix}_SuccessRate', - 100.0 * success / total, - 'percent', - { - 'total_ops': str(total), - 'executed_ops': str(executed), - 'successful_ops': str(success), - 'skipped_ops': str(total - executed), # cloud-rolled-back ops - }, - )) - - # ── Aggregate stats (successful ops only) ──────────────────────────────── - for phase_label, latencies in ( - ('InitiationLatency', init_latencies), - ('EndToEndLatency', e2e_latencies), - ): - if len(latencies) >= 2: - samples += _AggregateSamples(metric_prefix, phase_label, latencies) - if len(latencies) >= 4: - samples += _OutlierSamples(metric_prefix, phase_label, latencies) - - return samples - - -def _AggregateSamples( - metric_prefix: str, phase_label: str, latencies: list[float] -) -> list[sample.Sample]: - """Emits Mean/StdDev/Min/Median/P90/P99/Max samples.""" - pcts = sample.PercentileCalculator( - latencies, percentiles=(0, 50, 90, 99, 100)) - agg_meta = {'sample_count': str(len(latencies))} - out: list[sample.Sample] = [] - for label, key in ( - ('Mean', 'average'), - ('StdDev', 'stddev'), - ('Min', 'p0'), - ('Median', 'p50'), - ('P90', 'p90'), - ('P99', 'p99'), - ('Max', 'p100'), - ): - if key in pcts: - out.append(sample.Sample( - f'{metric_prefix}_{phase_label}_{label}', - pcts[key], 'seconds', agg_meta)) - return out - - -def _OutlierSamples( - metric_prefix: str, phase_label: str, latencies: list[float] -) -> list[sample.Sample]: - """Tukey IQR outlier detection; emits OutlierCount sample with metadata.""" - sorted_lats = sorted(latencies) - n = len(sorted_lats) - q1 = sorted_lats[n // 4] - q3 = sorted_lats[(3 * n) // 4] - iqr = q3 - q1 - upper_fence = q3 + 1.5 * iqr - lower_fence = q1 - 1.5 * iqr - outliers = [v for v in latencies if v > upper_fence or v < lower_fence] - meta = { - 'sample_count': str(n), - 'q1': f'{q1:.3f}', - 'q3': f'{q3:.3f}', - 'iqr': f'{iqr:.3f}', - 'upper_fence': f'{upper_fence:.3f}', - 'lower_fence': f'{lower_fence:.3f}', - 'outlier_values': ','.join(f'{v:.2f}' for v in outliers), - } - if outliers: - logging.warning( - '[Outliers] %s %s: %d outlier(s) detected: %s (fence: %.2f-%.2f)', - metric_prefix, phase_label, len(outliers), - [f'{v:.2f}s' for v in outliers], lower_fence, upper_fence, - ) - return [sample.Sample( - f'{metric_prefix}_{phase_label}_OutlierCount', - len(outliers), 'count', meta)] From 57ba3812d354ebc5dfb8e0c95a60876494584d69 Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Wed, 27 May 2026 11:30:07 +0000 Subject: [PATCH 13/19] EKS: re-apply AWS fixes on top of merged remote --- .../aws/elastic_kubernetes_service.py | 87 ++++++++++++++++--- 1 file changed, 74 insertions(+), 13 deletions(-) diff --git a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py index 10fed1e035..26fa0e5f1b 100644 --- a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py +++ b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py @@ -865,7 +865,7 @@ def _DiscoverSubnetsPerAZ(self) -> dict[str, str]: 'ec2', 'describe-subnets', '--region', self.region, '--subnet-ids', *subnet_ids, - '--query', 'Subnets[*].{SubnetId:SubnetId,AZ:AvailabilityZone}', + '--query', 'Subnets[*].{SubnetId:SubnetId,AZ:AvailabilityZone,Public:MapPublicIpOnLaunch}', '--output', 'json', ], raise_on_failure=False, @@ -882,13 +882,22 @@ def _DiscoverSubnetsPerAZ(self) -> dict[str, str]: # Do NOT filter by control_plane_zones — PKB truncates it to 2 AZs. # Accept all subnets the VPC has across all AZs. + # Build AZ map — always prefer public subnets (MapPublicIpOnLaunch=True) + # which have an internet gateway route. Private subnets lack IGW routes + # and nodes launched there cannot reach the EKS API server to join. az_map: dict[str, str] = {} + az_map_private: dict[str, str] = {} for s in subnets: az = s['AZ'] - # Keep only one subnet per AZ (prefer public subnets — already filtered - # by _DiscoverSubnets which returns the cluster's configured subnets) - if az not in az_map: + if s.get('Public'): az_map[az] = s['SubnetId'] + logging.info('[EKS] AZ %s → public subnet %s', az, s['SubnetId']) + elif az not in az_map: + az_map_private[az] = s['SubnetId'] + for az, sid in az_map_private.items(): + if az not in az_map: + logging.warning('[EKS] AZ %s has no public subnet — using private %s', az, sid) + az_map[az] = sid logging.info( '[EKS] Subnet-per-AZ mapping: %s (from %d total subnets)', @@ -1000,7 +1009,9 @@ def CreateNodePoolAsync( # Extract numeric suffix from pool name to determine AZ assignment name = nodepool_config.name suffix = ''.join(c for c in name if c.isdigit()) - idx = int(suffix) if suffix else 0 + # pkbmb (Scenario B) has no suffix — assign to us-east-1b (idx=1) + # to avoid competing with us-east-1a which has the default nodegroup. + idx = int(suffix) if suffix else 1 zones = sorted(az_subnets.keys()) assigned_az = zones[idx % len(zones)] subnets = [az_subnets[assigned_az]] @@ -1081,11 +1092,29 @@ def CreateNodePoolAsync( '--cli-input-json', f'file://{filename}', ] - _, stderr, retcode = vm_util.IssueCommand( - cmd, timeout=300, raise_on_failure=False - ) - if retcode: + # Retry on EC2 RunInstances throttling at high concurrency (99 pools). + max_retries = 5 + base_delay = 10 + for attempt in range(max_retries): + _, stderr, retcode = vm_util.IssueCommand( + cmd, timeout=300, raise_on_failure=False + ) + if retcode == 0: + break + if 'Request limit exceeded' in stderr or 'ThrottlingException' in stderr: + if attempt < max_retries - 1: + delay = base_delay * (2 ** attempt) + logging.warning( + '[EKS] CreateNodegroup %s throttled — retry %d/%d in %ds', + nodepool_config.name, attempt + 1, max_retries, delay, + ) + time.sleep(delay) + continue raise errors.Resource.CreationError(stderr) + else: + raise errors.Resource.CreationError( + f'CreateNodegroup {nodepool_config.name} failed after retries: {stderr}' + ) return f'ng_active:{nodepool_config.name}' def UpgradeNodePoolAsync(self, name: str, target_version: str) -> str: @@ -1199,10 +1228,42 @@ def UpdateClusterAsync(self) -> str: '--logging', payload, ] - stdout, stderr, retcode = vm_util.IssueCommand( - upd, timeout=300, raise_on_failure=False - ) - if retcode: + # Wait for cluster ACTIVE before firing update — at 99-pool scale + # Scenario A leaves the cluster UPDATING causing ResourceInUseException. + logging.info('[EKS] Waiting for cluster ACTIVE before ClusterUpdate...') + for _ in range(60): + status_out, _, status_rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'eks', 'describe-cluster', + '--name', self.name, + '--region', self.region, + '--query', 'cluster.status', + '--output', 'text', + ], + raise_on_failure=False, + ) + if status_rc == 0 and status_out.strip() == 'ACTIVE': + logging.info('[EKS] Cluster is ACTIVE — proceeding with ClusterUpdate') + break + logging.info('[EKS] Cluster status=%s — waiting 5s...', status_out.strip()) + time.sleep(5) + # Retry on ResourceInUseException race condition + upd_max_retries = 10 + upd_base_delay = 30 + for upd_attempt in range(upd_max_retries): + stdout, stderr, retcode = vm_util.IssueCommand( + upd, timeout=300, raise_on_failure=False + ) + if retcode == 0: + break + if 'ResourceInUseException' in stderr and upd_attempt < upd_max_retries - 1: + delay = upd_base_delay * (upd_attempt + 1) + logging.warning( + '[EKS] UpdateClusterConfig ResourceInUseException — retry %d/%d in %ds', + upd_attempt + 1, upd_max_retries, delay, + ) + time.sleep(delay) + continue raise errors.Resource.CreationError(stderr) update_id = json.loads(stdout)['update']['id'] return f'cluster_update:{update_id}' From 5f49bef0e7ed09012c60fb217717edf078791b61 Mon Sep 17 00:00:00 2001 From: Srikant Patil Date: Wed, 27 May 2026 17:37:09 +0530 Subject: [PATCH 14/19] PR comments fixes --- .pylintrc | 160 ------------------ .../kubernetes_management_benchmark.py | 64 +++---- .../container_service/kubernetes_cluster.py | 12 +- 3 files changed, 32 insertions(+), 204 deletions(-) delete mode 100644 .pylintrc diff --git a/.pylintrc b/.pylintrc deleted file mode 100644 index 5bfdc81cb3..0000000000 --- a/.pylintrc +++ /dev/null @@ -1,160 +0,0 @@ -# copybara:strip_begin(internal) -# This is based on http://google3/devtools/gpylint/config/base/rc -# copybara:strip_end - - -# Default configuration for pylint, which should pass for all (incremental) changes. -# See CONTRIBUTING.md for more. - -[MESSAGES CONTROL] -# List of checkers and warnings to enable. -enable=indexing-exception,old-raise-syntax - -disable=abstract-method, - attribute-defined-outside-init, - bad-option-value, - c-extension-no-member, - design, - file-ignored, - fixme, - global-statement, - invalid-metaclass, - locally-disabled, - locally-enabled, - misplaced-comparison-constant, - no-else-break, - no-else-continue, - no-else-raise, - no-else-return, - no-self-use, - pointless-except, - redundant-u-string-prefix, - similarities, - star-args, - suppressed-message, - trailing-newlines, - ungrouped-imports, - unnecessary-pass, - unspecified-encoding, - unsubscriptable-object, - useless-else-on-loop, - useless-object-inheritance, - useless-suppression, - -[BASIC] - -# Regular expression which should only match the name -# of functions or classes which do not require a docstring. -no-docstring-rgx=(__.*__|main) - -# Min length in lines of a function that requires a docstring. -docstring-min-length=12 - -# Regular expression which should only match correct module names. The -# leading underscore is sanctioned for private modules by Google's style -# guide. -# -# There are exceptions to the basic rule (_?[a-z][a-z0-9_]*) to cover -# requirements of Python's module system and of the presubmit framework. -module-rgx=^(_?[a-z][a-z0-9_]*)|__init__|PRESUBMIT|PRESUBMIT_unittest$ - -# Regular expression which should only match correct module level names -const-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ - -# Regular expression which should only match correct class attribute -class-attribute-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ - -# Regular expression which should only match correct class names -class-rgx=^_?[A-Z][a-zA-Z0-9]*$ - -# Regular expression which should only match correct function names. -# 'PascalCase' and 'snake_case' group names are used for consistency of naming -# styles across functions and methods. -function-rgx=^(?:(?P_?[A-Z][a-zA-Z0-9]*)|(?P_?[a-z][a-z0-9_]*))$ - -# Regular expression which should only match correct method names. -# 'PascalCase' and 'snake_case' group names are used for consistency of naming -# styles across functions and methods. 'exempt' indicates a name which is -# consistent with all naming styles. -method-rgx=(?x)^(?:(?P_[a-z0-9_]+__|next)|(?P_{0,2}[A-Z][a-zA-Z0-9_]*)|(?P_{0,2}[a-z][a-z0-9_]*))$ - -# Regular expression which should only match correct instance attribute names -attr-rgx=^_{0,2}[a-z][a-z0-9_]*$ - -# Regular expression which should only match correct argument names -argument-rgx=^[a-z][a-z0-9_]*$ - -# Regular expression which should only match correct variable names -variable-rgx=^[a-z][a-z0-9_]*$ - -# Regular expression which should only match correct list comprehension / -# generator expression variable names -inlinevar-rgx=^[a-z][a-z0-9_]*$ - -# Regular expression which should only match correct TypeVar names -typevar-rgx=^_{0,2}(?:[^\W\da-z_]+|(?:[^\W\da-z_]+[^\WA-Z_]+)+T?)(?:_co(?:ntra)?)?$ - -# Good variable names which should always be accepted, separated by a comma -good-names=main,_ - -# List of decorators that define properties, such as abc.abstractproperty. -property-classes=abc.abstractproperty,functools.cached_property,google3.pyglib.function_utils.cached.property,cached_property.cached_property,cached_property.threaded_cached_property,cached_property.cached_property_with_ttl,cached_property.threaded_cached_property_with_ttl,werkzeug.utils.cached_property - -[VARIABLES] - -# Tells whether we should check for unused import in __init__ files. -init-import=no - -# A regular expression matching names used for dummy variables (i.e. not used). -dummy-variables-rgx=^\*{0,2}(_$|unused_|dummy_) - -# List of additional names supposed to be defined in builtins. Remember that -# you should avoid to define new builtins when possible. -additional-builtins= - -# List of modules that are allowed to redefine builtins. -redefining-builtins-modules=six,six.moves,past.builtins,future.builtins,functools - -[STRING] - -# This flag controls whether the implicit-str-concat should -# generate a warning on implicit string concatenation in sequences defined over -# several lines. -check-str-concat-over-line-jumps=yes - -[CLASSES] - -# List of method names used to declare (i.e. assign) instance attributes. -defining-attr-methods=__init__,__new__,setUp - -# "class_" is also a valid for the first argument to a class method. -valid-classmethod-first-arg=cls,class_ - - -[FORMAT] - -# Maximum number of characters on a single line. -max-line-length=80 - -# Regexp for a line that is allowed to be longer than the limit. -# This "ignore" regex is today composed of: -# (1) p4 expansion $Id$ lines -# (2) Depot paths for go/ifthisthenthatlint directives. -# (3) Long string constants not containing whitespaces. This is needed now we -# have switched Pyformat to use Pyink, and it would wrap strings constants -# with a narrow range of lengths (less than 80 - indentation) in parens. -# This causes GPylint to complain otherwise allowed per -# go/pystyle#line-length. See b/262137806 for more information. -# Other lines might be allowed to be long by gpylint.pyformat_filter: see that -# module for more information. -ignore-long-lines=(?x)(\$Id:\s\/\/depot\/.+\#\d+\s\$|^\s*\#\ LINT\.ThenChange|^\s*\w+\ =\ (?P['"])\S+(?P=quote)$) - -# Maximum number of lines in a module -max-module-lines=99999 - -# String used as indentation unit. We differ from PEP8's normal 4 spaces. -indent-string=' ' - -# Do not warn about multiple statements on a single line for constructs like -# if test: stmt -single-line-if-stmt=y diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py index 5819062b61..6b6569dca8 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py @@ -186,22 +186,17 @@ def Prepare(benchmark_spec: bm_spec.BenchmarkSpec) -> None: ) # Spec workload: "a simple container that sleeps for a given time". # Confirms data-plane reachability; generates no data-plane load. - _, _, rc = kubectl.RunKubectlCommand( - [ - 'run', - _SLEEP_POD_NAME, - '--image=busybox', - '--restart=Never', - '--', - 'sleep', - '86400', - ], - raise_on_failure=False, + kubectl.RunKubectlCommand( + [ + 'run', + _SLEEP_POD_NAME, + '--image=busybox', + '--restart=Never', + '--', + 'sleep', + '86400', + ], ) - if rc: - logging.warning( - 'Sleep workload deploy returned rc=%d (non-fatal; continuing)', rc) - def _CleanStartSweep(cluster: kubernetes_cluster.KubernetesCluster) -> None: """Deletes any stale pkbm* node pools so each run starts clean (spec C.2).""" @@ -727,24 +722,20 @@ def _AggregateSamples(metric_prefix: str, phase_label: str, latencies: list[float]) -> list[sample.Sample]: """Emits Mean/StdDev/Min/Median/P90/P99/Max samples for a latency series.""" n = len(latencies) - sorted_lats = sorted(latencies) meta = {'sample_count': str(n)} - def _Percentile(p): - idx = (p / 100.0) * (n - 1) - lo = int(idx) - hi = min(lo + 1, n - 1) - frac = idx - lo - return sorted_lats[lo] * (1 - frac) + sorted_lats[hi] * frac + # statistics.quantiles with method='inclusive' matches linear interpolation + # and returns n-1 cut points; index 89→P90, 98→P99. + quantiles = statistics.quantiles(latencies, n=100, method='inclusive') stats = [ - ('Mean', statistics.mean(latencies)), - ('StdDev', statistics.pstdev(latencies)), - ('Min', sorted_lats[0]), - ('Median', statistics.median(latencies)), - ('P90', _Percentile(90)), - ('P99', _Percentile(99)), - ('Max', sorted_lats[-1]), + ('Mean', statistics.mean(latencies)), + ('StdDev', statistics.pstdev(latencies)), + ('Min', min(latencies)), + ('Median', statistics.median(latencies)), + ('P90', quantiles[89]), + ('P99', quantiles[98]), + ('Max', max(latencies)), ] result = [] for label, value in stats: @@ -761,18 +752,9 @@ def _Percentile(p): def _OutlierSamples(metric_prefix: str, phase_label: str, latencies: list[float]) -> list[sample.Sample]: """Emits a single OutlierCount sample using IQR-fence outlier detection.""" - sorted_lats = sorted(latencies) - n = len(sorted_lats) - - def _Percentile(p): - idx = (p / 100.0) * (n - 1) - lo = int(idx) - hi = min(lo + 1, n - 1) - frac = idx - lo - return sorted_lats[lo] * (1 - frac) + sorted_lats[hi] * frac - - q1 = _Percentile(25) - q3 = _Percentile(75) + # statistics.quantiles(n=4) returns [Q1, Q2, Q3]; indices 0 and 2. + quartiles = statistics.quantiles(latencies, n=4, method='inclusive') + q1, q3 = quartiles[0], quartiles[2] iqr = q3 - q1 lower_fence = q1 - 1.5 * iqr upper_fence = q3 + 1.5 * iqr diff --git a/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py b/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py index 4a5015819c..8c36bfa84b 100644 --- a/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py +++ b/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py @@ -321,9 +321,15 @@ def _GetAddressFromIngress(self, ingress_out: str): ) return 'http://' + ip.strip() - def AddNodepool(self, batch_name: str, pool_id: str): - """Adds an additional nodepool with the given name to the cluster.""" - pass + def AddNodepool(self, batch_name: str, pool_id: str) -> None: + """Adds a node pool; delegates to CreateNodePool for standard clusters. + + Karpenter-based subclasses override this to apply a manifest instead. + """ + nodepool_config = container_lib.BaseNodePoolConfig( + name=f'{batch_name}-{pool_id}', + ) + self.CreateNodePool(nodepool_config) def CreateNodePool( self, From 79fc26db6499a5c5a5be6d71d05124dee2e81e7b Mon Sep 17 00:00:00 2001 From: Srikant Patil Date: Wed, 27 May 2026 17:55:53 +0530 Subject: [PATCH 15/19] Azure Report fixes: scenario running before previous one finished --- .../kubernetes_management_benchmark.py | 4 ++ .../azure/azure_kubernetes_service.py | 51 +++++++++++++++++-- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py index 6b6569dca8..d153e08142 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py @@ -257,6 +257,10 @@ def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]: if 'B' in scenarios: samples += _RunScenarioB(cluster, initial) if 'C' in scenarios: + # fix: Scenario A/B pools may still be in Deleting state and count + # toward AKS's 100-pool cluster limit. Sweep them out before Scenario C + # so we don't hit MaxAgentPoolCountReached mid-run. + _CleanStartSweep(cluster) scales = ([int(x.strip()) for x in _SCALE_SWEEP.value] if _SCALE_SWEEP.value else [_LARGE_SCALE_NODEPOOLS.value]) logging.info('Scenario C: scale sweep = %s', scales) diff --git a/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py b/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py index f9c227a986..4e44726b4c 100644 --- a/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py +++ b/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py @@ -632,10 +632,32 @@ def CreateNodePoolAsync( f'pkb_nodepool={nodepool_config.name}', '--no-wait', ] + self._GetNodeFlags(nodepool_config, version_override=node_version) - _, stderr, retcode = vm_util.IssueCommand( - cmd, timeout=300, raise_on_failure=False + # fix: raise timeout to 600s (AKS can take >300s to accept a + # --no-wait request under concurrent load) and retry on transient errors + # that indicate the cluster is temporarily at its concurrent-op or + # pool-count limit. + _RETRYABLE = ( + 'OperationNotAllowed', + 'ConflictingOperationInProgress', + 'MaxAgentPoolCountReached', ) - if retcode: + _MAX_RETRIES = 5 + _RETRY_SLEEP_S = 30 + for attempt in range(_MAX_RETRIES + 1): + _, stderr, retcode = vm_util.IssueCommand( + cmd, timeout=600, raise_on_failure=False + ) + if not retcode: + break + if attempt < _MAX_RETRIES and any(e in stderr for e in _RETRYABLE): + logging.warning( + '[AKS] CreateNodePoolAsync %s: retryable error (attempt %d/%d),' + ' sleeping %ds: %s', + _AzureNodePoolName(nodepool_config.name), + attempt + 1, _MAX_RETRIES, _RETRY_SLEEP_S, stderr[:120], + ) + time.sleep(_RETRY_SLEEP_S) + continue raise errors.Resource.CreationError(stderr) return f'np_succeeded:{_AzureNodePoolName(nodepool_config.name)}' @@ -653,8 +675,10 @@ def UpgradeNodePoolAsync(self, name: str, target_version: str) -> str: target_version, '--no-wait', ] + self.resource_group.args + # fix: raise timeout to 600s — az aks nodepool upgrade --no-wait + # can take >300s to be accepted by Azure under concurrent load. _, stderr, retcode = vm_util.IssueCommand( - cmd, timeout=300, raise_on_failure=False + cmd, timeout=600, raise_on_failure=False ) if retcode: raise errors.Resource.CreationError(stderr) @@ -672,10 +696,20 @@ def DeleteNodePoolAsync(self, name: str) -> str: _AzureNodePoolName(name), '--no-wait', ] + self.resource_group.args + # fix: raise timeout to 600s and treat NotFound as success. + # A pool that never existed or was already removed is the desired end-state + # for a delete — raising CreationError here caused all delete phases to + # fail for any pool whose create had previously failed. _, stderr, retcode = vm_util.IssueCommand( - cmd, timeout=300, raise_on_failure=False + cmd, timeout=600, raise_on_failure=False ) if retcode: + if 'NotFound' in stderr or 'not found' in stderr.lower(): + logging.info( + '[AKS] DeleteNodePoolAsync: %s already gone — treating as success', + _AzureNodePoolName(name), + ) + return f'np_gone:{_AzureNodePoolName(name)}' raise errors.Resource.CreationError(stderr) return f'np_gone:{_AzureNodePoolName(name)}' @@ -766,6 +800,8 @@ def WaitForOperation(self, op_handle: str) -> None: retryable_exceptions=(errors.Resource.RetryableCreationError,), ) def _wait_np_succeeded(): + # fix: bound each individual poll call to 120s so a hung + # az aks nodepool show doesn't block the retry loop indefinitely. out, err, rc = vm_util.IssueCommand( [ azure.AZURE_PATH, @@ -783,6 +819,7 @@ def _wait_np_succeeded(): ] + self.resource_group.args, raise_on_failure=False, + timeout=120, ) if rc: raise errors.Resource.RetryableCreationError(err) @@ -804,6 +841,7 @@ def _wait_np_succeeded(): retryable_exceptions=(errors.Resource.RetryableDeletionError,), ) def _wait_np_gone(): + # fix: per-poll timeout bound. _, err, rc = vm_util.IssueCommand( [ azure.AZURE_PATH, @@ -817,6 +855,7 @@ def _wait_np_gone(): ] + self.resource_group.args, raise_on_failure=False, + timeout=120, ) if rc and ('NotFound' in (err or '') or 'not found' in (err or '').lower()): return @@ -833,6 +872,7 @@ def _wait_np_gone(): retryable_exceptions=(errors.Resource.RetryableCreationError,), ) def _wait_cluster_succeeded(): + # fix: per-poll timeout bound. out, err, rc = vm_util.IssueCommand( [ azure.AZURE_PATH, @@ -847,6 +887,7 @@ def _wait_cluster_succeeded(): ] + self.resource_group.args, raise_on_failure=False, + timeout=120, ) if rc: raise errors.Resource.RetryableCreationError(err) From 73ec55061deae7141d047200558b35f1c15c7dc0 Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Wed, 27 May 2026 12:32:00 +0000 Subject: [PATCH 16/19] EKS: gate capacity reservations + launch templates behind flag --- .../aws/elastic_kubernetes_service.py | 413 +++++++++--------- perfkitbenchmarker/providers/aws/flags.py | 8 + 2 files changed, 223 insertions(+), 198 deletions(-) diff --git a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py index 26fa0e5f1b..e656ab49eb 100644 --- a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py +++ b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py @@ -240,26 +240,28 @@ def _Delete(self): except Exception: # pylint: disable=broad-except pass # Clean up dynamically created launch templates and capacity reservations - for az in getattr(self, '_capacity_reservation_ids', {}).keys(): - vm_util.IssueCommand( - util.AWS_PREFIX + [ - 'ec2', 'delete-launch-template', - '--launch-template-name', f'pkb-eks-lt-{az}', - '--region', self.region, - ], - raise_on_failure=False, - ) - logging.info('[EKS] Deleted launch template pkb-eks-lt-%s', az) - for az, res_id in getattr(self, '_capacity_reservation_ids', {}).items(): - vm_util.IssueCommand( - util.AWS_PREFIX + [ - 'ec2', 'cancel-capacity-reservation', - '--capacity-reservation-id', res_id, - '--region', self.region, - ], - raise_on_failure=False, - ) - logging.info('[EKS] Cancelled capacity reservation %s in %s', res_id, az) + # Only runs if capacity reservations were actually created this run. + if getattr(FLAGS, 'eks_reserve_capacity_per_az', False): + for az in getattr(self, '_capacity_reservation_ids', {}).keys(): + vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'delete-launch-template', + '--launch-template-name', f'pkb-eks-lt-{az}', + '--region', self.region, + ], + raise_on_failure=False, + ) + logging.info('[EKS] Deleted launch template pkb-eks-lt-%s', az) + for az, res_id in getattr(self, '_capacity_reservation_ids', {}).items(): + vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'cancel-capacity-reservation', + '--capacity-reservation-id', res_id, + '--region', self.region, + ], + raise_on_failure=False, + ) + logging.info('[EKS] Cancelled capacity reservation %s in %s', res_id, az) super()._Delete() cmd = [ FLAGS.eksctl, @@ -496,137 +498,147 @@ def _Create(self): # Dynamically create capacity reservations + launch templates AFTER cluster # creation so cluster CA and endpoint are available for node bootstrap. - self._capacity_reservation_ids = {} - # Reserve enough capacity per AZ for 100 pools: - # ~67 pools per AZ × 2 nodes = 134 instances max per AZ (Scenario A) - # Plus default nodegroup (2) + buffer = 80 minimum for 10 pools, 150 for 100 pools - concurrent = getattr(FLAGS, 'k8s_mgmt_concurrent_nodepools', 10) - nodes_per_az = max(80, concurrent * 2 + 20) - # Fetch cluster CA and endpoint for bootstrap user data - import json as _json - cluster_out, _, cluster_rc = vm_util.IssueCommand( - util.AWS_PREFIX + [ - 'eks', 'describe-cluster', - '--name', self.name, - '--region', self.region, - '--query', 'cluster.{endpoint:endpoint,ca:certificateAuthority.data,cidr:kubernetesNetworkConfig.serviceIpv4Cidr}', - '--output', 'json', - ], - raise_on_failure=False, - ) - cluster_ca = '' - cluster_endpoint = '' - cluster_service_cidr = '10.100.0.0/16' # default fallback - if cluster_rc == 0 and cluster_out.strip(): - cluster_info = _json.loads(cluster_out.strip()) - cluster_ca = cluster_info.get('ca', '') - cluster_endpoint = cluster_info.get('endpoint', '') - cluster_service_cidr = cluster_info.get('cidr', '10.100.0.0/16') - logging.info('[EKS] Fetched cluster endpoint=%s cidr=%s for bootstrap', - cluster_endpoint, cluster_service_cidr) - - # Query EKS-optimized AMI once for all AZs - # cluster_version may be None if not explicitly set — fetch from cluster - if not self.cluster_version: - ver_out, _, ver_rc = vm_util.IssueCommand( + # Gate capacity reservations behind flag — disabled by default + # to avoid impacting other EKS benchmarks (kubernetes_nginx etc) + # that use different instance types and do not need reservations. + if not FLAGS.eks_reserve_capacity_per_az: + self._capacity_reservation_ids = {} + logging.info( + '[EKS] Skipping capacity reservations ' + '(--eks_reserve_capacity_per_az=False)' + ) + else: + self._capacity_reservation_ids = {} + # Reserve enough capacity per AZ for 100 pools: + # ~67 pools per AZ × 2 nodes = 134 instances max per AZ (Scenario A) + # Plus default nodegroup (2) + buffer = 80 minimum for 10 pools, 150 for 100 pools + concurrent = getattr(FLAGS, 'k8s_mgmt_concurrent_nodepools', 10) + nodes_per_az = max(80, concurrent * 2 + 20) + # Fetch cluster CA and endpoint for bootstrap user data + import json as _json + cluster_out, _, cluster_rc = vm_util.IssueCommand( util.AWS_PREFIX + [ 'eks', 'describe-cluster', '--name', self.name, '--region', self.region, - '--query', 'cluster.version', - '--output', 'text', + '--query', 'cluster.{endpoint:endpoint,ca:certificateAuthority.data,cidr:kubernetesNetworkConfig.serviceIpv4Cidr}', + '--output', 'json', ], raise_on_failure=False, ) - self.cluster_version = ver_out.strip() if ver_rc == 0 and ver_out.strip() else '1.34' - logging.info('[EKS] Resolved cluster version: %s', self.cluster_version) - k8s_minor_str = '.'.join(self.cluster_version.split('.')[:2]) - ami_out, _, ami_rc = vm_util.IssueCommand( - util.AWS_PREFIX + [ - 'ssm', 'get-parameter', - '--name', ( - f'/aws/service/eks/optimized-ami/{k8s_minor_str}/' - 'amazon-linux-2023/x86_64/standard/recommended/image_id' - ), - '--region', self.region, - '--query', 'Parameter.Value', - '--output', 'text', - ], - raise_on_failure=False, - ) - ami_id = ami_out.strip() if ami_rc == 0 and ami_out.strip() else '' - logging.info('[EKS] EKS AMI for K8s %s: %s', k8s_minor_str, ami_id) - - for az in cluster_azs: - logging.info('[EKS] Creating capacity reservation in %s (%d instances)...', az, nodes_per_az) - cap_out, _, cap_rc = vm_util.IssueCommand( + cluster_ca = '' + cluster_endpoint = '' + cluster_service_cidr = '10.100.0.0/16' # default fallback + if cluster_rc == 0 and cluster_out.strip(): + cluster_info = _json.loads(cluster_out.strip()) + cluster_ca = cluster_info.get('ca', '') + cluster_endpoint = cluster_info.get('endpoint', '') + cluster_service_cidr = cluster_info.get('cidr', '10.100.0.0/16') + logging.info('[EKS] Fetched cluster endpoint=%s cidr=%s for bootstrap', + cluster_endpoint, cluster_service_cidr) + + # Query EKS-optimized AMI once for all AZs + # cluster_version may be None if not explicitly set — fetch from cluster + if not self.cluster_version: + ver_out, _, ver_rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'eks', 'describe-cluster', + '--name', self.name, + '--region', self.region, + '--query', 'cluster.version', + '--output', 'text', + ], + raise_on_failure=False, + ) + self.cluster_version = ver_out.strip() if ver_rc == 0 and ver_out.strip() else '1.34' + logging.info('[EKS] Resolved cluster version: %s', self.cluster_version) + k8s_minor_str = '.'.join(self.cluster_version.split('.')[:2]) + ami_out, _, ami_rc = vm_util.IssueCommand( util.AWS_PREFIX + [ - 'ec2', 'create-capacity-reservation', - '--instance-type', 't3.medium', - '--instance-platform', 'Linux/UNIX', - '--availability-zone', az, - '--instance-count', str(nodes_per_az), + 'ssm', 'get-parameter', + '--name', ( + f'/aws/service/eks/optimized-ami/{k8s_minor_str}/' + 'amazon-linux-2023/x86_64/standard/recommended/image_id' + ), '--region', self.region, - '--query', 'CapacityReservation.CapacityReservationId', + '--query', 'Parameter.Value', '--output', 'text', ], raise_on_failure=False, ) - if cap_rc == 0 and cap_out.strip() and cap_out.strip() != 'None': - res_id = cap_out.strip() - self._capacity_reservation_ids[az] = res_id - logging.info('[EKS] Created capacity reservation %s in %s', res_id, az) - if ami_id and cluster_ca and cluster_endpoint: - import base64 as _b64 - # AL2023 uses nodeadm YAML config — NOT the old bootstrap.sh - nodeadm_config = ( - 'apiVersion: node.eks.aws/v1alpha1' + chr(10) + - 'kind: NodeConfig' + chr(10) + - 'spec:' + chr(10) + - ' cluster:' + chr(10) + - f' name: {self.name}' + chr(10) + - f' apiServerEndpoint: {cluster_endpoint}' + chr(10) + - f' certificateAuthority: {cluster_ca}' + chr(10) + - f' cidr: {cluster_service_cidr}' - ) - user_data = _b64.b64encode(('MIME-Version: 1.0' + chr(10) + - 'Content-Type: multipart/mixed; boundary="==BOUNDARY=="' + chr(10) + - chr(10) + - '--==BOUNDARY==' + chr(10) + - 'Content-Type: application/node.eks.aws' + chr(10) + - chr(10) + - nodeadm_config + chr(10) + - '--==BOUNDARY==--').encode()).decode() - logging.info('[EKS] Using AL2023 nodeadm bootstrap for %s', az) - lt_data = ( - '{' - f'"ImageId":"{ami_id}",' - '"CapacityReservationSpecification":{' - '"CapacityReservationPreference":"capacity-reservations-only",' - f'"CapacityReservationTarget":{{"CapacityReservationId":"{res_id}"}}}},' - f'"UserData":"{user_data}"' - '}' - ) - _, _, lt_rc = vm_util.IssueCommand( - util.AWS_PREFIX + [ - 'ec2', 'create-launch-template', - '--region', self.region, - '--launch-template-name', f'pkb-eks-lt-{az}', - '--launch-template-data', lt_data, - ], - raise_on_failure=False, - ) - if lt_rc == 0: - logging.info( - '[EKS] Created launch template pkb-eks-lt-%s (AMI=%s) -> %s', - az, ami_id, res_id, + ami_id = ami_out.strip() if ami_rc == 0 and ami_out.strip() else '' + logging.info('[EKS] EKS AMI for K8s %s: %s', k8s_minor_str, ami_id) + + for az in cluster_azs: + logging.info('[EKS] Creating capacity reservation in %s (%d instances)...', az, nodes_per_az) + cap_out, _, cap_rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'create-capacity-reservation', + '--instance-type', 't3.medium', + '--instance-platform', 'Linux/UNIX', + '--availability-zone', az, + '--instance-count', str(nodes_per_az), + '--region', self.region, + '--query', 'CapacityReservation.CapacityReservationId', + '--output', 'text', + ], + raise_on_failure=False, + ) + if cap_rc == 0 and cap_out.strip() and cap_out.strip() != 'None': + res_id = cap_out.strip() + self._capacity_reservation_ids[az] = res_id + logging.info('[EKS] Created capacity reservation %s in %s', res_id, az) + if ami_id and cluster_ca and cluster_endpoint: + import base64 as _b64 + # AL2023 uses nodeadm YAML config — NOT the old bootstrap.sh + nodeadm_config = ( + 'apiVersion: node.eks.aws/v1alpha1' + chr(10) + + 'kind: NodeConfig' + chr(10) + + 'spec:' + chr(10) + + ' cluster:' + chr(10) + + f' name: {self.name}' + chr(10) + + f' apiServerEndpoint: {cluster_endpoint}' + chr(10) + + f' certificateAuthority: {cluster_ca}' + chr(10) + + f' cidr: {cluster_service_cidr}' + ) + user_data = _b64.b64encode(('MIME-Version: 1.0' + chr(10) + + 'Content-Type: multipart/mixed; boundary="==BOUNDARY=="' + chr(10) + + chr(10) + + '--==BOUNDARY==' + chr(10) + + 'Content-Type: application/node.eks.aws' + chr(10) + + chr(10) + + nodeadm_config + chr(10) + + '--==BOUNDARY==--').encode()).decode() + logging.info('[EKS] Using AL2023 nodeadm bootstrap for %s', az) + lt_data = ( + '{' + f'"ImageId":"{ami_id}",' + '"CapacityReservationSpecification":{' + '"CapacityReservationPreference":"capacity-reservations-only",' + f'"CapacityReservationTarget":{{"CapacityReservationId":"{res_id}"}}}},' + f'"UserData":"{user_data}"' + '}' + ) + _, _, lt_rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'create-launch-template', + '--region', self.region, + '--launch-template-name', f'pkb-eks-lt-{az}', + '--launch-template-data', lt_data, + ], + raise_on_failure=False, ) + if lt_rc == 0: + logging.info( + '[EKS] Created launch template pkb-eks-lt-%s (AMI=%s) -> %s', + az, ami_id, res_id, + ) + else: + logging.warning('[EKS] Failed to create launch template for %s', az) else: - logging.warning('[EKS] Failed to create launch template for %s', az) + logging.warning('[EKS] Missing AMI/CA/endpoint — no launch template for %s', az) else: - logging.warning('[EKS] Missing AMI/CA/endpoint — no launch template for %s', az) - else: - logging.warning('[EKS] Failed to create capacity reservation in %s — on-demand', az) + logging.warning('[EKS] Failed to create capacity reservation in %s — on-demand', az) # Above create command passes "withOidc=true", but it doesn't seem to work & # therefore this command is needed. @@ -1046,36 +1058,36 @@ def CreateNodePoolAsync( }, } _az = assigned_az if az_subnets and len(az_subnets) > 1 else f'{self.region}a' - _lt_name = f'pkb-eks-lt-{_az}' - _lt_out, _, _lt_rc = vm_util.IssueCommand( - util.AWS_PREFIX + [ - 'ec2', 'describe-launch-templates', - '--region', self.region, - '--filters', f'Name=launch-template-name,Values={_lt_name}', - '--query', 'LaunchTemplates[0].LaunchTemplateId', - '--output', 'text', - ], - raise_on_failure=False, - ) - # Use launch template WITH correct EKS bootstrap to target capacity reservation. - # The launch template must specify the EKS-optimized AMI and bootstrap user data - # so nodes can join the cluster, while also targeting the capacity reservation. - res_id = self._capacity_reservation_ids.get(_az, '') - if res_id and _lt_rc == 0 and _lt_out.strip() and _lt_out.strip() not in ('None', 'null', ''): - payload['launchTemplate'] = {'id': _lt_out.strip(), 'version': '$Latest'} - # When launch template specifies an ImageId, EKS rejects these fields: - # - releaseVersion: conflicts with AMI - # - instanceTypes: must come from launch template only - # - amiType: conflicts with AMI - payload.pop('releaseVersion', None) - payload.pop('instanceTypes', None) - payload.pop('amiType', None) - logging.info( - '[EKS] Nodegroup %s using launch template %s targeting reservation %s in AZ %s', - nodepool_config.name, _lt_name, res_id, _az, + # Only look up launch templates and capacity reservations when + # --eks_reserve_capacity_per_az=true. Other benchmarks skip this entirely. + if FLAGS.eks_reserve_capacity_per_az: + _lt_name = f'pkb-eks-lt-{_az}' + _lt_out, _, _lt_rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'describe-launch-templates', + '--region', self.region, + '--filters', f'Name=launch-template-name,Values={_lt_name}', + '--query', 'LaunchTemplates[0].LaunchTemplateId', + '--output', 'text', + ], + raise_on_failure=False, ) - else: - logging.warning('[EKS] No reservation/template for AZ %s — using on-demand', _az) + res_id = self._capacity_reservation_ids.get(_az, '') + if res_id and _lt_rc == 0 and _lt_out.strip() and _lt_out.strip() not in ('None', 'null', ''): + payload['launchTemplate'] = {'id': _lt_out.strip(), 'version': '$Latest'} + # When launch template specifies an ImageId, EKS rejects these fields: + # - releaseVersion: conflicts with AMI + # - instanceTypes: must come from launch template only + # - amiType: conflicts with AMI + payload.pop('releaseVersion', None) + payload.pop('instanceTypes', None) + payload.pop('amiType', None) + logging.info( + '[EKS] Nodegroup %s using launch template %s targeting reservation %s in AZ %s', + nodepool_config.name, _lt_name, res_id, _az, + ) + else: + logging.warning('[EKS] No reservation/template for AZ %s — using on-demand', _az) if node_version: # EKS rejects both 'version' and 'releaseVersion' when a launch template @@ -1131,20 +1143,23 @@ def UpgradeNodePoolAsync(self, name: str, target_version: str) -> str: _az = zones[idx % len(zones)] else: _az = f'{self.region}a' - _lt_name = f'pkb-eks-lt-{_az}' - - # Check if launch template exists for this AZ - lt_out, _, lt_rc = vm_util.IssueCommand( - util.AWS_PREFIX + [ - 'ec2', 'describe-launch-templates', - '--region', self.region, - '--filters', f'Name=launch-template-name,Values={_lt_name}', - '--query', 'LaunchTemplates[0].LaunchTemplateId', - '--output', 'text', - ], - raise_on_failure=False, - ) - lt_id = lt_out.strip() if lt_rc == 0 and lt_out.strip() not in ('', 'None', 'null') else '' + # Only look up launch template when capacity reservations are enabled. + # For other benchmarks, always use standard kubernetes-version upgrade. + lt_id = '' + _lt_name = '' + if FLAGS.eks_reserve_capacity_per_az: + _lt_name = f'pkb-eks-lt-{_az}' + lt_out, _, lt_rc = vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'describe-launch-templates', + '--region', self.region, + '--filters', f'Name=launch-template-name,Values={_lt_name}', + '--query', 'LaunchTemplates[0].LaunchTemplateId', + '--output', 'text', + ], + raise_on_failure=False, + ) + lt_id = lt_out.strip() if lt_rc == 0 and lt_out.strip() not in ('', 'None', 'null') else '' # Custom AMI nodegroups cannot use --kubernetes-version — use launch template only if lt_id: @@ -1512,26 +1527,28 @@ def _Delete(self): except Exception: # pylint: disable=broad-except pass # Clean up dynamically created launch templates and capacity reservations - for az in getattr(self, '_capacity_reservation_ids', {}).keys(): - vm_util.IssueCommand( - util.AWS_PREFIX + [ - 'ec2', 'delete-launch-template', - '--launch-template-name', f'pkb-eks-lt-{az}', - '--region', self.region, - ], - raise_on_failure=False, - ) - logging.info('[EKS] Deleted launch template pkb-eks-lt-%s', az) - for az, res_id in getattr(self, '_capacity_reservation_ids', {}).items(): - vm_util.IssueCommand( - util.AWS_PREFIX + [ - 'ec2', 'cancel-capacity-reservation', - '--capacity-reservation-id', res_id, - '--region', self.region, - ], - raise_on_failure=False, - ) - logging.info('[EKS] Cancelled capacity reservation %s in %s', res_id, az) + # Only runs if capacity reservations were actually created this run. + if getattr(FLAGS, 'eks_reserve_capacity_per_az', False): + for az in getattr(self, '_capacity_reservation_ids', {}).keys(): + vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'delete-launch-template', + '--launch-template-name', f'pkb-eks-lt-{az}', + '--region', self.region, + ], + raise_on_failure=False, + ) + logging.info('[EKS] Deleted launch template pkb-eks-lt-%s', az) + for az, res_id in getattr(self, '_capacity_reservation_ids', {}).items(): + vm_util.IssueCommand( + util.AWS_PREFIX + [ + 'ec2', 'cancel-capacity-reservation', + '--capacity-reservation-id', res_id, + '--region', self.region, + ], + raise_on_failure=False, + ) + logging.info('[EKS] Cancelled capacity reservation %s in %s', res_id, az) super()._Delete() cmd = [ FLAGS.eksctl, diff --git a/perfkitbenchmarker/providers/aws/flags.py b/perfkitbenchmarker/providers/aws/flags.py index 414c8c3fa1..b7f6ca214c 100644 --- a/perfkitbenchmarker/providers/aws/flags.py +++ b/perfkitbenchmarker/providers/aws/flags.py @@ -380,6 +380,14 @@ def _ValidatePreprovisionedDataAccess(flag_values: dict[str, Any]) -> bool: # Flag to skip EBS CSI driver setup during EKS cluster creation. # Safe for benchmarks that do not use persistent volumes (e.g. k8s_management). # Saves ~3 minutes per run. +flags.DEFINE_boolean( + 'eks_reserve_capacity_per_az', + False, + 'If True, dynamically creates EC2 capacity reservations and launch ' + 'templates per AZ before nodegroup creation. Enable only for the ' + 'k8s_management benchmark. Leaving enabled for other benchmarks ' + 'wastes reserved capacity on wrong instance types.', +) flags.DEFINE_boolean( 'eks_skip_ebs_csi', False, From d07c14ed4c31dd3f8c08adba28e5d8f171ffcf46 Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Wed, 27 May 2026 12:53:32 +0000 Subject: [PATCH 17/19] EKS: raise exception instead of hardcoding k8s version 1.34 --- .../providers/aws/elastic_kubernetes_service.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py index e656ab49eb..c02b820ee7 100644 --- a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py +++ b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py @@ -550,7 +550,13 @@ def _Create(self): ], raise_on_failure=False, ) - self.cluster_version = ver_out.strip() if ver_rc == 0 and ver_out.strip() else '1.34' + if ver_rc != 0 or not ver_out.strip(): + raise errors.Resource.CreationError( + '[EKS] Failed to determine cluster version from describe-cluster. ' + 'Cannot proceed without a valid Kubernetes version. ' + f'rc={ver_rc} out={ver_out.strip()!r}' + ) + self.cluster_version = ver_out.strip() logging.info('[EKS] Resolved cluster version: %s', self.cluster_version) k8s_minor_str = '.'.join(self.cluster_version.split('.')[:2]) ami_out, _, ami_rc = vm_util.IssueCommand( From 6a7747f53ed03b2da4b2df7ea9d0b80d15fd0ebb Mon Sep 17 00:00:00 2001 From: Srikant Patil Date: Wed, 27 May 2026 21:33:32 +0530 Subject: [PATCH 18/19] ManagementPlane: Azure Report and Comment fixes --- .../kubernetes_management_benchmark.py | 59 +++++++++++-------- .../azure/azure_kubernetes_service.py | 22 ++++++- 2 files changed, 54 insertions(+), 27 deletions(-) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py index d153e08142..dbf07127ae 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py @@ -27,10 +27,12 @@ """ import copy +import dataclasses import statistics import threading import time from typing import Callable +from unicodedata import name from absl import flags from absl import logging @@ -147,6 +149,14 @@ def _ScenarioAName(i): def _ScenarioCName(i): return f'{_PREFIX}c{i:04d}' +@dataclasses.dataclass +class _OpResult: + """Holds timing and outcome for a single async management-plane operation.""" + name: str + init_dur: float + e2e_dur: float + error: Exception | None = None + def GetConfig(user_config): return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) @@ -344,7 +354,7 @@ def _RunScenarioA( attempted_ops=len(pool_names)) # ── Phase 2: concurrent upgrades (only successfully created pools) ─────── - created = [name for name, _, _, err in create_results if err is None] + created = [r.name for r in create_results if r.error is None] logging.info( 'Scenario A: %d/%d pools created — proceeding to upgrade', len(created), n) @@ -489,9 +499,7 @@ def DoCreate(): samples: list[sample.Sample] = [] for entry in results.entries: - name, init_dur, e2e_dur, err = entry - samples += _OpSamples(name, [(name, init_dur, e2e_dur, err)], - attempted_ops=1) + samples += _OpSamples(entry.name, [entry], attempted_ops=1) # Remove test pool (best-effort). try: @@ -536,7 +544,7 @@ def _RunScenarioC( items=configs_, get_name=lambda cfg: cfg.name, ) - created_ok = sum(1 for _, _, _, err in create_results if err is None) + created_ok = sum(1 for r in create_results if r.error is None) logging.info('Scenario C scale=%d: %d/%d creates succeeded', scale, created_ok, scale) samples += _OpSamples('ScenarioC_Create', @@ -585,12 +593,13 @@ class _Results: def __init__(self): self._lock = threading.Lock() - self.entries: list[tuple[str, float, float, Exception | None]] = [] + self.entries: list[_OpResult] = [] def add(self, name: str, init_dur: float, e2e_dur: float, - err: Exception | None) -> None: + err: Exception | None) -> None: + result = _OpResult(name, init_dur, e2e_dur, err) with self._lock: - self.entries.append((name, init_dur, e2e_dur, err)) + self.entries.append(result) def _TimedAsync( @@ -603,18 +612,18 @@ def _TimedAsync( e2e_lat = total wall time including wait. On kickoff failure both are set to elapsed time at failure point. """ - init_start = time.time() + init_start = time.monotonic() try: handle = kickoff() except Exception as exc: # pylint: disable=broad-except - elapsed = time.time() - init_start + elapsed = time.monotonic() - init_start return elapsed, elapsed, exc - init_dur = time.time() - init_start + init_dur = time.monotonic() - init_start try: wait_fn(handle) - return init_dur, time.time() - init_start, None + return init_dur, time.monotonic() - init_start, None except Exception as exc: # pylint: disable=broad-except - return init_dur, time.time() - init_start, exc + return init_dur, time.monotonic() - init_start, exc def _RunAsync( @@ -659,8 +668,8 @@ def _MakeNodePoolConfig( def _OpSamples( metric_prefix: str, - results: list[tuple[str, float, float, Exception | None]], - attempted_ops: int = None, + results: list[_OpResult], + attempted_ops: int | None = None, ) -> list[sample.Sample]: """Per-op + aggregate samples for initiation and end-to-end latency. @@ -677,19 +686,19 @@ def _OpSamples( e2e_latencies: list[float] = [] success = 0 - for name, init_dur, e2e_dur, err in results: - meta = {'operation_name': name, 'success': str(err is None)} - if err is not None: - meta['error'] = str(err)[:200] + for r in results: + meta = {'operation_name': r.name, 'success': str(r.error is None)} + if r.error is not None: + meta['error'] = str(r.error)[:200] else: - success += 1 - init_latencies.append(init_dur) - e2e_latencies.append(e2e_dur) + success += 1 + init_latencies.append(r.init_dur) + e2e_latencies.append(r.e2e_dur) samples.append( - sample.Sample(f'{metric_prefix}_InitiationLatency', init_dur, + sample.Sample(f'{metric_prefix}_InitiationLatency', r.init_dur, 'seconds', dict(meta))) samples.append( - sample.Sample(f'{metric_prefix}_EndToEndLatency', e2e_dur, + sample.Sample(f'{metric_prefix}_EndToEndLatency', r.e2e_dur, 'seconds', dict(meta))) # ── Success rate ───────────────────────────────────────────────────────── @@ -771,7 +780,7 @@ def _OutlierSamples(metric_prefix: str, phase_label: str, 'iqr': str(iqr), 'upper_fence': str(upper_fence), 'lower_fence': str(lower_fence), - 'sample_count': str(n), + 'sample_count': str(len(latencies)), } return [ sample.Sample( diff --git a/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py b/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py index 4e44726b4c..4ce6174edd 100644 --- a/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py +++ b/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py @@ -546,6 +546,13 @@ def CreateNodePool( node_version: str | None = None, ) -> None: """Creates a single named node pool on the cluster.""" + node_flags = self._GetNodeFlags(nodepool_config) + if node_version: + # _GetNodeFlags may have added self.cluster_version; replace or append. + if '--kubernetes-version' in node_flags: + node_flags[node_flags.index('--kubernetes-version') + 1] = node_version + else: + node_flags += ['--kubernetes-version', node_version] cmd = [ azure.AZURE_PATH, 'aks', @@ -557,7 +564,7 @@ def CreateNodePool( _AzureNodePoolName(nodepool_config.name), '--labels', f'pkb_nodepool={nodepool_config.name}', - ] + self._GetNodeFlags(nodepool_config, version_override=node_version) + ] + node_flags _, stderr, retcode = vm_util.IssueCommand( cmd, timeout=1800, raise_on_failure=False ) @@ -619,6 +626,13 @@ def CreateNodePoolAsync( nodepool_config: container.BaseNodePoolConfig, node_version: str | None = None, ) -> str: + node_flags = self._GetNodeFlags(nodepool_config) + if node_version: + # _GetNodeFlags may have added self.cluster_version; replace or append. + if '--kubernetes-version' in node_flags: + node_flags[node_flags.index('--kubernetes-version') + 1] = node_version + else: + node_flags += ['--kubernetes-version', node_version] cmd = [ azure.AZURE_PATH, 'aks', @@ -631,7 +645,7 @@ def CreateNodePoolAsync( '--labels', f'pkb_nodepool={nodepool_config.name}', '--no-wait', - ] + self._GetNodeFlags(nodepool_config, version_override=node_version) + ] + node_flags # fix: raise timeout to 600s (AKS can take >300s to accept a # --no-wait request under concurrent load) and retry on transient errors # that indicate the cluster is temporarily at its concurrent-op or @@ -822,6 +836,10 @@ def _wait_np_succeeded(): timeout=120, ) if rc: + if 'NotFound' in (err or '') or 'not found' in (err or '').lower(): + raise errors.Resource.CreationError( + f'nodepool {name} not found while waiting for Succeeded: {err}' + ) raise errors.Resource.RetryableCreationError(err) status = out.strip() if status == 'Succeeded': From 1c12d9cc1550fb44e6503ffa84a7e635ab5d4443 Mon Sep 17 00:00:00 2001 From: Srikant Patil Date: Wed, 27 May 2026 21:53:13 +0530 Subject: [PATCH 19/19] ManagementPlane: event poller kubernetes clueter revert --- .../container_service/kubernetes_cluster.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py b/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py index 8c36bfa84b..fecb126114 100644 --- a/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py +++ b/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py @@ -58,22 +58,7 @@ def _PostCreate(self): """Starts the event poller after the cluster has been created.""" super()._PostCreate() if self.event_poller: - try: - self.event_poller.StartPolling() - except Exception as exc: # pylint: disable=broad-except - # Python 3.14 tightened pickling rules for multiprocessing — local - # functions passed to Process cannot be pickled. Rather than crashing - # PKB entirely (which prevents cleanup and orphans cloud resources), - # log a warning and continue without the event poller. - # Impact: no Kubernetes event streaming during the run — benchmark - # metrics are unaffected. - logging.warning( - 'Event poller failed to start (non-fatal, continuing without ' - + 'event polling): %s. This is a known Python 3.14 pickling ' - + 'issue — switch to Python 3.13 to enable event polling.', - exc, - ) - self.event_poller = None + self.event_poller.StartPolling() def Delete(self, freeze: bool = False) -> None: if self.inference_server: