From 8a6488b88344cbbb13832361b3217355f8b0ffb4 Mon Sep 17 00:00:00 2001 From: Patrick Clay Date: Mon, 20 Apr 2026 14:32:01 -0700 Subject: [PATCH] diffbase PiperOrigin-RevId: 902825962 --- CHANGES.next.md | 2 + perfkitbenchmarker/managed_vm_group.py | 8 + .../gcp/gce_managed_instance_group.py | 201 ++++++++++++++++++ .../gcp/gce_managed_instance_group_test.py | 83 ++++++++ 4 files changed, 294 insertions(+) create mode 100644 perfkitbenchmarker/providers/gcp/gce_managed_instance_group.py create mode 100644 tests/providers/gcp/gce_managed_instance_group_test.py diff --git a/CHANGES.next.md b/CHANGES.next.md index b06ed766d0..ff43ea32c2 100644 --- a/CHANGES.next.md +++ b/CHANGES.next.md @@ -453,6 +453,8 @@ maintenance_simulation_trigger.py. - Added `--retry_on_insufficient_capacity_cloud_failure` so that resource creation can be retried on stock outs. +- Add support for deploying VMs inside managed VM groups with + `--use_managed_vm_groups`. ### Bug fixes and maintenance updates: diff --git a/perfkitbenchmarker/managed_vm_group.py b/perfkitbenchmarker/managed_vm_group.py index af73711618..98634f2a66 100644 --- a/perfkitbenchmarker/managed_vm_group.py +++ b/perfkitbenchmarker/managed_vm_group.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Module containing class for managed VM groups.""" + import abc from collections.abc import Callable import copy @@ -46,6 +47,7 @@ def __init__( super().__init__() self.spec: vm_group_decoders.VmGroupSpec = spec self.vm_config = vm_config + self.vm_config.metadata['in_managed_vm_group'] = True # When we clone the VM config and rename it, our assumptions about the # disk names are wrong. # TODO(pclay): improve support for disks. @@ -64,6 +66,12 @@ def vms(self) -> Sequence[virtual_machine.BaseVirtualMachine]: """Returns the VMs in the managed VM group.""" return list(self._vms.values()) + def _CreateDependencies(self): + self.vm_config._CreateDependencies() # pylint: disable=protected-access + + def _DeleteDependencies(self): + self.vm_config._DeleteDependencies() # pylint: disable=protected-access + @dataclasses.dataclass class VmReference: name: str diff --git a/perfkitbenchmarker/providers/gcp/gce_managed_instance_group.py b/perfkitbenchmarker/providers/gcp/gce_managed_instance_group.py new file mode 100644 index 0000000000..b922487173 --- /dev/null +++ b/perfkitbenchmarker/providers/gcp/gce_managed_instance_group.py @@ -0,0 +1,201 @@ +# Copyright 2026 PerfKitBenchmarker Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""GCE Managed Instance Group resource.""" + +import json +from typing import Any, cast + +from perfkitbenchmarker import errors +from perfkitbenchmarker import managed_vm_group +from perfkitbenchmarker import provider_info +from perfkitbenchmarker import resource +from perfkitbenchmarker import virtual_machine +from perfkitbenchmarker.configs import vm_group_decoders +from perfkitbenchmarker.providers.gcp import gce_virtual_machine +from perfkitbenchmarker.providers.gcp import util + +VmReference = managed_vm_group.BaseManagedVmGroup.VmReference + + +class GceInstanceTemplate(resource.BaseResource): + """GCE Instance Template. + + Contains an initialized GceVirtualMachine, which eventually has a + gcloud compute instances create command that shares most flags with the + instance template creation command. + """ + + CLOUD = provider_info.GCP + + FLAGS_NOT_SHARED_WITH_VM = ['async', 'zone'] + + def __init__( + self, + vm_config: gce_virtual_machine.GceVirtualMachine, + region: str, + name: str, + ): + super().__init__() + self.vm_config: gce_virtual_machine.GceVirtualMachine = vm_config + self.name: str = name + self.project: str = vm_config.project + self.region: str = region + self.qualified_name: str = ( + f'projects/{self.project}/regions/{self.region}' + f'/instanceTemplates/{self.name}' + ) + + def _GcloudCmd(self, *args) -> util.GcloudCommand: + return util.GcloudCommand(self, 'compute', 'instance-templates', *args) + + def _Create(self): + # vm_config._CreateDependencies() pre-creates the gcloud instances create + # command, which has most of the flags that we need for the instance + # template. + assert self.vm_config.create_cmd + cmd = self._GcloudCmd( + 'create', + self.name, + '--instance-template-region', + self.region, + ) + cmd.flags = { + k: v + for k, v in self.vm_config.create_cmd.flags.items() + if k not in self.FLAGS_NOT_SHARED_WITH_VM + } + cmd.Issue() + + def _Delete(self): + cmd = self._GcloudCmd( + self, + 'compute', + 'instance-templates', + 'delete', + self.name, + '--region', + self.region, + ) + cmd.Issue() + + def _Exists(self): + cmd = self._GcloudCmd( + 'describe', + self.name, + '--region', + self.region, + ) + _, _, retcode = cmd.Issue(raise_on_failure=False) + return retcode == 0 + + +class GceManagedInstanceGroup(managed_vm_group.BaseManagedVmGroup): + """GCE Managed Instance Group.""" + + CLOUD = provider_info.GCP + + def __init__( + self, + spec: vm_group_decoders.VmGroupSpec, + vm_config: virtual_machine.BaseVirtualMachine, + ): + super().__init__(spec, vm_config) + self.vm_config: gce_virtual_machine.GceVirtualMachine = cast( + gce_virtual_machine.GceVirtualMachine, self.vm_config + ) + self.project = self.vm_config.project + # TODO(pclay): Add support for regional managed instance groups. + # It's unclear how multiple zones would be plumbed through BaseVmSpec. + self.zone = self.vm_config.zone + self.region = util.GetRegionFromZone(self.zone) + + self.instance_template = GceInstanceTemplate( + self.vm_config, self.region, name=self.name + ) + + def _CreateDependencies(self): + super()._CreateDependencies() + self.instance_template.Create() + + def _GcloudCmd(self, *args) -> util.GcloudCommand: + return util.GcloudCommand( + self, 'compute', 'instance-groups', 'managed', *args + ) + + def _Create(self): + cmd = self._GcloudCmd( + 'create', + self.name, + '--template', + self.instance_template.qualified_name, + '--size', + str(self.vm_count), + ) + # TODO(pclay): Consider beta and --resource-manager-tags for labels. + cmd.Issue() + + def _DeleteDependencies(self): + self.instance_template.Delete() + super()._DeleteDependencies() + + def _Delete(self): + cmd = self._GcloudCmd('delete', self.name) + cmd.Issue() + + def _Get(self) -> dict[str, Any] | None: + cmd = self._GcloudCmd('describe', self.name) + stdout, stderr, retcode = cmd.Issue(raise_on_failure=False) + if retcode != 0: + if 'not found' in stderr: + return None + raise errors.Resource.GetError( + f'Failed to describe managed instance group {self.name}:\n{stderr}\n' + ) + return json.loads(stdout) + + def _Exists(self) -> bool: + return bool(self._Get()) + + def _IsReady(self) -> bool: + return self._Get()['status']['isStable'] + + def _GetCurrentVms(self) -> list[VmReference]: + cmd = self._GcloudCmd('list-instances', self.name) + stdout, stderr, retcode = cmd.Issue(raise_on_failure=False) + if retcode != 0: + raise errors.Resource.GetError( + f'Failed to list instances for managed instance group {self.name}:\n' + f'{stderr}\n' + ) + instances = json.loads(stdout) + return [ + VmReference(name=instance['name']) + for instance in instances + ] + + def _AddVms(self, num_vms_to_add: int): + cmd = self._GcloudCmd('create-instance', self.name) + for i in range(num_vms_to_add): + vm_name = f'{self.name}-{i}' + cmd.args += ['--instance', vm_name] + cmd.Issue() + + def _RemoveVms(self, vm_names: list[str]): + cmd = self._GcloudCmd('delete-instances', self.name) + for vm_name in vm_names: + cmd.args += ['--instance', vm_name] + cmd.Issue() + + def _Resize(self, new_vm_count: int): + self._GcloudCmd('resize', self.name, '--size', str(new_vm_count)).Issue() diff --git a/tests/providers/gcp/gce_managed_instance_group_test.py b/tests/providers/gcp/gce_managed_instance_group_test.py new file mode 100644 index 0000000000..6f13bfdc5e --- /dev/null +++ b/tests/providers/gcp/gce_managed_instance_group_test.py @@ -0,0 +1,83 @@ +import builtins +import unittest +from unittest import mock + +from absl import flags +from perfkitbenchmarker import vm_util +from perfkitbenchmarker.configs import vm_group_decoders +from perfkitbenchmarker.providers.gcp import gce_managed_instance_group +from perfkitbenchmarker.providers.gcp import gce_virtual_machine +from perfkitbenchmarker.providers.gcp import util as gcp_utils +from tests import pkb_common_test_case + +FLAGS = flags.FLAGS + + +class GceManagedInstanceGroupTest(pkb_common_test_case.PkbCommonTestCase): + + def setUp(self): + super().setUp() + FLAGS.run_uri = 'test_run' + self.mock_cmd = self.MockIssueCommand( + { + '': [('', '', 0)], + }, + ) + + @mock.patch.object(gcp_utils, 'GetRegionFromZone', return_value='us-central1') + @mock.patch.object(gce_virtual_machine.gce_network.GceFirewall, 'GetFirewall') + @mock.patch.object(gce_virtual_machine.gce_network.GceNetwork, 'GetNetwork') + def TestMig(self, mock_get_network, *_): + mock_get_network.return_value.placement_group.name = 'test_placement_group' + vm_config = pkb_common_test_case.TestGceVirtualMachine( + gce_virtual_machine.GceVmSpec( + 'test_component', + machine_type='n1-standard-4', + zone='us-central1-c', + ) + ) + return gce_managed_instance_group.GceManagedInstanceGroup( + vm_group_decoders.VmGroupSpec( + 'test_component', + cloud='GCP', + os_type='debian12', + vm_spec={'GCP': {'machine_type': 'n1-standard-4'}}, + ), + vm_config, + ) + + def testCreate(self, *_): + mig = self.TestMig() + mig._Create() + self.assertIn( + 'gcloud compute instance-groups managed create pkb-test_run-0' + ' --template' + ' projects/test_project/regions/us-central1/instanceTemplates/pkb-test_run-0' + ' --size 1 --format json --project test_project --quiet --zone' + ' us-central1-c', + self.mock_cmd.all_commands, + ) + + # SSH keys + @mock.patch.object(builtins, 'open') + @mock.patch.object(vm_util, 'NamedTemporaryFile') + def testCreateDependencies(self, mock_named_temporary_file, _): + mig = self.TestMig() + mock_named_temporary_file.return_value.__enter__.return_value.name = ( + 'ssh_key_file' + ) + mig._CreateDependencies() + self.assertIn( + 'gcloud compute instance-templates create pkb-test_run-1' + ' --instance-template-region us-central1 --format json --labels ' + ' --machine-type n1-standard-4 --maintenance-policy TERMINATE' + ' --metadata enable-oslogin=FALSE,vm_nature=ephemeral' + ' --metadata-from-file sshKeys=ssh_key_file --no-restart-on-failure' + ' --project test_project --quiet --resource-policies' + ' test_placement_group --tags perfkitbenchmarker', + self.mock_cmd.all_commands, + ) + + +if __name__ == '__main__': + unittest.main()