@@ -118,8 +118,10 @@ def run_check(exp_env, exp_cmd, **kwargs):
118118 args_for_sim = "sleep 0"
119119 exp_runline = exp_cmd + " simdir/my_simtask.x sleep 0"
120120 task = exctr .submit (calc_type = "sim" , app_args = args_for_sim , dry_run = True , ** kwargs )
121- assert task .env == exp_env , f"task.env does not match expected: { task .env } "
122- assert task .runline == exp_runline , f"exp_runline does not match expected: { task .runline } "
121+ assert task .env == exp_env , f"Task env does not match expected:\n Received: { task .env } \n Expected: { exp_env } "
122+ assert (
123+ task .runline == exp_runline
124+ ), f"Run line does not match expected.\n Received: { task .runline } \n Expected: { exp_runline } "
123125
124126 return run_check
125127
@@ -307,37 +309,46 @@ def test_dry_run_ngpus_srun_plat3_2nodes():
307309 run_check (exp_env , exp_cmd , num_procs = 2 , num_nodes = 2 , auto_assign_gpus = True )
308310 run_check (exp_env , exp_cmd , procs_per_node = 1 , auto_assign_gpus = True )
309311
312+ # restrict with num_gpus - too many, restrict to those available
313+ exp_env = {"TESTING_VISIBLE_DEVICES" : "0,1,2,3,4" }
314+ run_check (exp_env , exp_cmd , procs_per_node = 1 , auto_assign_gpus = True , num_gpus = 10 )
315+
310316 # auto_assign_gpus
311317 exp_env = {"TESTING_VISIBLE_DEVICES" : "0,1,2,3,4,5" }
312318 exp_cmd = "srun -w node-1 --ntasks 1 --nodes 1 --ntasks-per-node 1 --exact"
313319 run_check (exp_env , exp_cmd , num_procs = 1 , auto_assign_gpus = True )
314320
315- # restrict with num_gpus - too many, restrict to those available
321+ # restrict with num_gpus - too many, restrict to those available (now honor num_procs=1)
316322 run_check (exp_env , exp_cmd , num_procs = 1 , auto_assign_gpus = True , num_gpus = 10 )
317323 run_check (exp_env , exp_cmd , num_procs = 1 , num_gpus = 10 )
318324
319- exp_env = {"TESTING_VISIBLE_DEVICES" : "0,1,2,3,4,5" }
320- exp_cmd = "srun -w node-1,node-2 --ntasks 2 --nodes 2 --ntasks-per-node 1 --exact"
321- run_check (exp_env , exp_cmd , procs_per_node = 1 , auto_assign_gpus = True )
325+ exp_env = {"TESTING_VISIBLE_DEVICES" : "0,1" }
326+ exp_cmd = "srun -w node-1 --ntasks 2 --nodes 1 --ntasks-per-node 2 --exact"
327+ run_check (exp_env , exp_cmd , num_procs = 2 , auto_assign_gpus = True , num_gpus = 2 )
328+ run_check (exp_env , exp_cmd , num_procs = 2 , num_gpus = 2 )
322329
323- # restrict with num_gpus
324330 exp_env = {"TESTING_VISIBLE_DEVICES" : "0" }
325331 exp_cmd = "srun -w node-1,node-2 --ntasks 2 --nodes 2 --ntasks-per-node 1 --exact"
326- run_check (exp_env , exp_cmd , num_procs = 2 , auto_assign_gpus = True , num_gpus = 2 )
327- run_check (exp_env , exp_cmd , num_procs = 2 , num_gpus = 2 )
332+ run_check (exp_env , exp_cmd , num_procs = 2 , procs_per_node = 1 , auto_assign_gpus = True , num_gpus = 2 )
333+ run_check (exp_env , exp_cmd , num_procs = 2 , num_nodes = 2 , num_gpus = 2 )
328334
329335 # match_procs_to_gpus
330336 exp_env = {"TESTING_VISIBLE_DEVICES" : "0,1,2,3,4,5" }
331337 exp_cmd = "srun -w node-1,node-2 --ntasks 12 --nodes 2 --ntasks-per-node 6 --exact"
332338 run_check (exp_env , exp_cmd , match_procs_to_gpus = True , auto_assign_gpus = True )
333339
334- exp_env = {"TESTING_VISIBLE_DEVICES" : "0,1" }
335- exp_cmd = "srun -w node-1,node-2 --ntasks 4 --nodes 2 --ntasks-per-node 2 --exact"
340+ exp_env = {"TESTING_VISIBLE_DEVICES" : "0,1,2,3 " }
341+ exp_cmd = "srun -w node-1 --ntasks 4 --nodes 1 --ntasks-per-node 4 --exact"
336342 run_check (exp_env , exp_cmd , match_procs_to_gpus = True , num_gpus = 4 )
337343
338- exp_env = {"TESTING_VISIBLE_DEVICES" : "0" }
339- exp_cmd = "srun -w node-1,node-2 --ntasks 2 --nodes 2 --ntasks-per-node 1 --exact"
340- run_check (exp_env , exp_cmd , match_procs_to_gpus = True , num_gpus = 3 )
344+ exp_env = {"TESTING_VISIBLE_DEVICES" : "0,1,2,3" }
345+ exp_cmd = "srun -w node-1,node-2 --ntasks 8 --nodes 2 --ntasks-per-node 4 --exact"
346+ run_check (exp_env , exp_cmd , match_procs_to_gpus = True , num_gpus = 8 )
347+ run_check (exp_env , exp_cmd , match_procs_to_gpus = True , num_gpus = 7 )
348+
349+ exp_env = {"TESTING_VISIBLE_DEVICES" : "0,1" }
350+ exp_cmd = "srun -w node-1,node-2 --ntasks 4 --nodes 2 --ntasks-per-node 2 --exact"
351+ run_check (exp_env , exp_cmd , procs_per_node = 2 , num_gpus = 4 )
341352
342353
343354if __name__ == "__main__" :
0 commit comments