Skip to content

Commit 36fb0e3

Browse files
authored
Fix under-utilized resource usage (#1398)
* Fixes case where setting num_gpus to zero was treated as None. * Make under-resourced runs use minimal nodes needed. * Ensure multi-node resource sets assigned correctly. * Update unit tests for better coverage of resource configs.
1 parent 2d90eef commit 36fb0e3

9 files changed

Lines changed: 83 additions & 37 deletions

File tree

libensemble/executors/mpi_executor.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -326,12 +326,9 @@ def submit(
326326
if not num_procs and not match_procs_to_gpus:
327327
num_procs = self.gen_nprocs
328328

329-
if not num_gpus:
329+
if num_gpus is None:
330330
num_gpus = self.gen_ngpus
331331

332-
if not num_nodes and (self.gen_ngpus or self.gen_nprocs):
333-
num_nodes = self.resources.worker_resources.local_node_count
334-
335332
if mpi_runner_type is not None:
336333
if isinstance(mpi_runner_type, str):
337334
mpi_config = {"mpi_runner": mpi_runner_type}

libensemble/executors/mpi_runner.py

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def _set_gpu_cli_option(self, wresources, extra_args, gpu_setting_name, gpu_valu
121121
def _set_gpu_env_var(self, wresources, task, gpus_per_node, gpus_env):
122122
"""Add GPU environment variable setting to the tasks environment"""
123123
jassert(wresources.matching_slots, f"Cannot assign CPUs/GPUs to non-matching slots per node {wresources.slots}")
124-
slot_list = wresources.get_slots_as_string(multiplier=wresources.gpus_per_rset, limit=gpus_per_node)
124+
slot_list = wresources.get_slots_as_string(multiplier=wresources.gpus_per_rset_per_node, limit=gpus_per_node)
125125
task._add_to_env(gpus_env, slot_list)
126126

127127
def _local_runner_set_gpus(self, task, wresources, extra_args, gpus_per_node, ppn):
@@ -171,7 +171,7 @@ def _assign_gpus(self, task, resources, nprocs, nnodes, ppn, ngpus, extra_args,
171171

172172
# gpus per node for this worker.
173173
if wresources.doihave_gpus():
174-
gpus_avail_per_node = wresources.slot_count * wresources.gpus_per_rset
174+
gpus_avail_per_node = wresources.slot_count * wresources.gpus_per_rset_per_node
175175
else:
176176
gpus_avail_per_node = 0
177177

@@ -224,6 +224,35 @@ def _assign_gpus(self, task, resources, nprocs, nnodes, ppn, ngpus, extra_args,
224224

225225
return nprocs, nnodes, ppn, extra_args
226226

227+
def _get_min_nodes(self, nprocs, ppn, nnodes, ngpus, resources):
228+
"""Get minimum nodes needed to match configuration"""
229+
if nnodes is not None:
230+
return nnodes
231+
if ppn:
232+
return None # nnodes gets processed later.
233+
if resources is not None:
234+
wresources = resources.worker_resources
235+
total_nodes = wresources.local_node_count
236+
procs_on_node = wresources.slot_count * wresources.procs_per_rset_per_node
237+
238+
if not nprocs and ngpus is None:
239+
# Delay node evaluation to GPU assignment code
240+
return None
241+
proc_min_nodes = 1
242+
gpu_min_nodes = 1
243+
if nprocs:
244+
proc_min_nodes = (nprocs + procs_on_node - 1) // procs_on_node
245+
if ngpus:
246+
gpus_on_node = wresources.slot_count * wresources.gpus_per_rset_per_node
247+
gpu_min_nodes = (ngpus + gpus_on_node - 1) // gpus_on_node
248+
249+
min_nodes = max(proc_min_nodes, gpu_min_nodes)
250+
nnodes = min(min_nodes, total_nodes)
251+
# Must have atleast one processor per node to use GPUs
252+
if nprocs:
253+
nnodes = min(nnodes, nprocs)
254+
return nnodes
255+
227256
def _adjust_procs(self, nprocs, ppn, nnodes, ngpus, resources):
228257
"""Adjust an invalid config"""
229258

@@ -241,8 +270,8 @@ def adjust_resource(n_units, units_attr, units_name):
241270

242271
if resources is not None:
243272
wresources = resources.worker_resources
244-
ngpus = adjust_resource(ngpus, "gpus_per_rset", "ngpus")
245-
nprocs = adjust_resource(nprocs, "procs_per_rset", "nprocs")
273+
ngpus = adjust_resource(ngpus, "gpus_per_rset_per_node", "ngpus")
274+
nprocs = adjust_resource(nprocs, "procs_per_rset_per_node", "nprocs")
246275
return nprocs, ngpus
247276

248277
def get_mpi_specs(
@@ -284,6 +313,8 @@ def get_mpi_specs(
284313

285314
if match_procs_to_gpus:
286315
jassert(no_config_set, "match_procs_to_gpus is mutually exclusive with either of nprocs/ppn")
316+
317+
nnodes = self._get_min_nodes(nprocs, ppn, nnodes, ngpus, resources)
287318
nprocs, ngpus = self._adjust_procs(nprocs, ppn, nnodes, ngpus, resources)
288319

289320
if auto_assign_gpus or ngpus is not None:
@@ -294,7 +325,7 @@ def get_mpi_specs(
294325
task, resources, nprocs, nnodes, ppn, ngpus, extra_args, match_procs_to_gpus
295326
)
296327

297-
rm_rpn = True if self.rm_rpn and ppn is None and nnodes is None else False
328+
rm_rpn = self.rm_rpn and ppn is None and nnodes is None
298329

299330
hostlist = None
300331
if machinefile and not self.mfile_support:

libensemble/resources/mpi_resources.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def get_resources(resources, num_procs=None, num_nodes=None, procs_per_node=None
213213
)
214214

215215
if num_nodes < local_node_count:
216-
logger.warning(
216+
logger.debug(
217217
"User constraints mean fewer nodes being used "
218218
f"than available. {num_nodes} nodes used. {local_node_count} nodes available"
219219
)

libensemble/resources/rset_resources.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,9 @@ def __init__(self, num_workers, resources):
5151
self.num_workers = num_workers
5252
self.num_workers_2assign2 = RSetResources.get_workers2assign2(self.num_workers, resources)
5353
self.total_num_rsets = resources.num_resource_sets or self.num_workers_2assign2
54-
54+
self.num_nodes = len(resources.global_nodelist)
5555
self.split_list, self.local_rsets_list = RSetResources.get_partitioned_nodelist(self.total_num_rsets, resources)
56+
self.nodes_in_rset = len(self.split_list[0])
5657

5758
gpus_avail_per_node = resources.gpus_avail_per_node
5859
self.rsets_per_node = RSetResources.get_rsets_on_a_node(self.total_num_rsets, resources)
@@ -67,16 +68,20 @@ def __init__(self, num_workers, resources):
6768
self.total_num_gpu_rsets = np.count_nonzero(self.all_rsets["gpus"])
6869
self.total_num_nongpu_rsets = np.count_nonzero(~self.all_rsets["gpus"])
6970

70-
self.gpus_per_rset = gpus_avail_per_node // self.gpu_rsets_per_node if self.gpu_rsets_per_node else 0
71-
self.cores_per_rset = resources.physical_cores_avail_per_node // self.rsets_per_node
71+
self.gpus_per_rset_per_node = gpus_avail_per_node // self.gpu_rsets_per_node if self.gpu_rsets_per_node else 0
72+
self.cores_per_rset_per_node = resources.physical_cores_avail_per_node // self.rsets_per_node
7273

7374
# Oversubsribe
74-
if self.cores_per_rset == 0:
75+
if self.cores_per_rset_per_node == 0:
7576
cpn = resources.physical_cores_avail_per_node
7677
procs_per_core = self.rsets_per_node // cpn + (self.rsets_per_node % cpn > 0)
77-
self.procs_per_rset = resources.physical_cores_avail_per_node * procs_per_core
78+
self.procs_per_rset_per_node = resources.physical_cores_avail_per_node * procs_per_core
7879
else:
79-
self.procs_per_rset = self.cores_per_rset
80+
self.procs_per_rset_per_node = self.cores_per_rset_per_node
81+
82+
self.gpus_per_rset = self.gpus_per_rset_per_node * self.nodes_in_rset
83+
self.cores_per_rset = self.cores_per_rset_per_node * self.nodes_in_rset
84+
self.procs_per_rset = self.procs_per_rset_per_node * self.nodes_in_rset
8085

8186
@staticmethod
8287
def get_group_list(split_list, gpus_per_node=0, gpus_per_group=None):

libensemble/resources/worker_resources.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ def set_env_to_gpus(self, env_var=None, delimiter=","):
273273
"""
274274
assert self.matching_slots, f"Cannot assign GPUs to non-matching slots per node {self.slots}"
275275
if self.doihave_gpus():
276-
env_value = self.get_slots_as_string(multiplier=self.gpus_per_rset, limit=self.gen_ngpus)
276+
env_value = self.get_slots_as_string(multiplier=self.gpus_per_rset_per_node, limit=self.gen_ngpus)
277277
if env_var is None:
278278
if self.platform_info is not None:
279279
if self.platform_info.get("gpu_setting_type") == "env":

libensemble/sim_funcs/var_resources.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ def CUDA_variable_resources(H, _, sim_specs, libE_info):
279279
cores_per_node = resources.slot_count
280280

281281
# Set to detected GPUs
282-
# gpus_per_slot = resources.gpus_per_rset
282+
# gpus_per_slot = resources.gpus_per_rset_per_node
283283
# resources.set_env_to_slots("CUDA_VISIBLE_DEVICES", multiplier=gpus_per_slot)
284284
# cores_per_node = resources.slot_count * gpus_per_slot # One CPU per GPU
285285

libensemble/tests/functionality_tests/test_mpi_runners.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,12 +196,12 @@
196196
"jsrun -n 32 /path/to/fakeapp.x --testid base2",
197197
"jsrun -n 32 --xarg 1 /path/to/fakeapp.x --testid base3",
198198
"jsrun -n 128 --xarg 1 /path/to/fakeapp.x --testid base4",
199-
"jsrun -n 16 --xarg 1 /path/to/fakeapp.x --testid base5",
199+
"jsrun -n 16 -r 16 --xarg 1 /path/to/fakeapp.x --testid base5",
200200
"jsrun -n 16 -r 8 --xarg 1 /path/to/fakeapp.x --testid base6",
201201
"jsrun -n 16 --xarg 1 -r 16 /path/to/fakeapp.x --testid jsr1",
202202
"jsrun -n 8 --xarg 1 -r 4 /path/to/fakeapp.x --testid jsr2",
203-
'jsrun -n 3 -a 1 -c 1 -g 1 --bind=packed:1 --smpiargs="-gpu" /path/to/fakeapp.x --testid jsr3',
204-
'jsrun -n 3 -a 1 -c 1 -g 1 --bind=packed:1 --smpiargs="-gpu" /path/to/fakeapp.x --testid jsr4',
203+
'jsrun -n 3 -r 3 -a 1 -c 1 -g 1 --bind=packed:1 --smpiargs="-gpu" /path/to/fakeapp.x --testid jsr3',
204+
'jsrun -r 3 -n 3 -a 1 -c 1 -g 1 --bind=packed:1 --smpiargs="-gpu" /path/to/fakeapp.x --testid jsr4',
205205
]
206206

207207
exp_custom = [

libensemble/tests/unit_tests/test_executor_gpus.py

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,10 @@ def run_check(exp_env, exp_cmd, **kwargs):
118118
args_for_sim = "sleep 0"
119119
exp_runline = exp_cmd + " simdir/my_simtask.x sleep 0"
120120
task = exctr.submit(calc_type="sim", app_args=args_for_sim, dry_run=True, **kwargs)
121-
assert task.env == exp_env, f"task.env does not match expected: {task.env}"
122-
assert task.runline == exp_runline, f"exp_runline does not match expected: {task.runline}"
121+
assert task.env == exp_env, f"Task env does not match expected:\n Received: {task.env}\n Expected: {exp_env}"
122+
assert (
123+
task.runline == exp_runline
124+
), f"Run line does not match expected.\n Received: {task.runline}\n Expected: {exp_runline}"
123125

124126
return run_check
125127

@@ -307,37 +309,46 @@ def test_dry_run_ngpus_srun_plat3_2nodes():
307309
run_check(exp_env, exp_cmd, num_procs=2, num_nodes=2, auto_assign_gpus=True)
308310
run_check(exp_env, exp_cmd, procs_per_node=1, auto_assign_gpus=True)
309311

312+
# restrict with num_gpus - too many, restrict to those available
313+
exp_env = {"TESTING_VISIBLE_DEVICES": "0,1,2,3,4"}
314+
run_check(exp_env, exp_cmd, procs_per_node=1, auto_assign_gpus=True, num_gpus=10)
315+
310316
# auto_assign_gpus
311317
exp_env = {"TESTING_VISIBLE_DEVICES": "0,1,2,3,4,5"}
312318
exp_cmd = "srun -w node-1 --ntasks 1 --nodes 1 --ntasks-per-node 1 --exact"
313319
run_check(exp_env, exp_cmd, num_procs=1, auto_assign_gpus=True)
314320

315-
# restrict with num_gpus - too many, restrict to those available
321+
# restrict with num_gpus - too many, restrict to those available (now honor num_procs=1)
316322
run_check(exp_env, exp_cmd, num_procs=1, auto_assign_gpus=True, num_gpus=10)
317323
run_check(exp_env, exp_cmd, num_procs=1, num_gpus=10)
318324

319-
exp_env = {"TESTING_VISIBLE_DEVICES": "0,1,2,3,4,5"}
320-
exp_cmd = "srun -w node-1,node-2 --ntasks 2 --nodes 2 --ntasks-per-node 1 --exact"
321-
run_check(exp_env, exp_cmd, procs_per_node=1, auto_assign_gpus=True)
325+
exp_env = {"TESTING_VISIBLE_DEVICES": "0,1"}
326+
exp_cmd = "srun -w node-1 --ntasks 2 --nodes 1 --ntasks-per-node 2 --exact"
327+
run_check(exp_env, exp_cmd, num_procs=2, auto_assign_gpus=True, num_gpus=2)
328+
run_check(exp_env, exp_cmd, num_procs=2, num_gpus=2)
322329

323-
# restrict with num_gpus
324330
exp_env = {"TESTING_VISIBLE_DEVICES": "0"}
325331
exp_cmd = "srun -w node-1,node-2 --ntasks 2 --nodes 2 --ntasks-per-node 1 --exact"
326-
run_check(exp_env, exp_cmd, num_procs=2, auto_assign_gpus=True, num_gpus=2)
327-
run_check(exp_env, exp_cmd, num_procs=2, num_gpus=2)
332+
run_check(exp_env, exp_cmd, num_procs=2, procs_per_node=1, auto_assign_gpus=True, num_gpus=2)
333+
run_check(exp_env, exp_cmd, num_procs=2, num_nodes=2, num_gpus=2)
328334

329335
# match_procs_to_gpus
330336
exp_env = {"TESTING_VISIBLE_DEVICES": "0,1,2,3,4,5"}
331337
exp_cmd = "srun -w node-1,node-2 --ntasks 12 --nodes 2 --ntasks-per-node 6 --exact"
332338
run_check(exp_env, exp_cmd, match_procs_to_gpus=True, auto_assign_gpus=True)
333339

334-
exp_env = {"TESTING_VISIBLE_DEVICES": "0,1"}
335-
exp_cmd = "srun -w node-1,node-2 --ntasks 4 --nodes 2 --ntasks-per-node 2 --exact"
340+
exp_env = {"TESTING_VISIBLE_DEVICES": "0,1,2,3"}
341+
exp_cmd = "srun -w node-1 --ntasks 4 --nodes 1 --ntasks-per-node 4 --exact"
336342
run_check(exp_env, exp_cmd, match_procs_to_gpus=True, num_gpus=4)
337343

338-
exp_env = {"TESTING_VISIBLE_DEVICES": "0"}
339-
exp_cmd = "srun -w node-1,node-2 --ntasks 2 --nodes 2 --ntasks-per-node 1 --exact"
340-
run_check(exp_env, exp_cmd, match_procs_to_gpus=True, num_gpus=3)
344+
exp_env = {"TESTING_VISIBLE_DEVICES": "0,1,2,3"}
345+
exp_cmd = "srun -w node-1,node-2 --ntasks 8 --nodes 2 --ntasks-per-node 4 --exact"
346+
run_check(exp_env, exp_cmd, match_procs_to_gpus=True, num_gpus=8)
347+
run_check(exp_env, exp_cmd, match_procs_to_gpus=True, num_gpus=7)
348+
349+
exp_env = {"TESTING_VISIBLE_DEVICES": "0,1"}
350+
exp_cmd = "srun -w node-1,node-2 --ntasks 4 --nodes 2 --ntasks-per-node 2 --exact"
351+
run_check(exp_env, exp_cmd, procs_per_node=2, num_gpus=4)
341352

342353

343354
if __name__ == "__main__":

libensemble/tools/test_support.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def check_gpu_setting(task, assert_setting=True, print_setting=False, resources=
204204

205205
# Get expected numbers
206206
if cmd_line:
207-
expected_nums = _safe_min(wresources.slot_count * wresources.gpus_per_rset, wresources.gen_ngpus)
207+
expected_nums = _safe_min(wresources.slot_count * wresources.gpus_per_rset_per_node, wresources.gen_ngpus)
208208
if gpus_per_task:
209209
stype = "runline option: gpus per task"
210210
expected_nums //= int(ppn)
@@ -219,7 +219,9 @@ def check_gpu_setting(task, assert_setting=True, print_setting=False, resources=
219219
gpu_setting = _get_opt_value(expected_setting, task.runline)
220220
else:
221221
stype = "Env var"
222-
expected_nums = wresources.get_slots_as_string(multiplier=wresources.gpus_per_rset, limit=wresources.gen_ngpus)
222+
expected_nums = wresources.get_slots_as_string(
223+
multiplier=wresources.gpus_per_rset_per_node, limit=wresources.gen_ngpus
224+
)
223225
expected_nums = expected_nums if _set_gpus(task, wresources) else None
224226
if expected_nums is not None:
225227
expected = {expected_setting: expected_nums}

0 commit comments

Comments
 (0)