Skip to content

Commit 94fc628

Browse files
committed
vibe coded approach lets see how this works
1 parent 9a8425e commit 94fc628

11 files changed

Lines changed: 78 additions & 43 deletions

File tree

libensemble/libE.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def libE(
155155
exit_criteria: ExitCriteria,
156156
persis_info: dict = {},
157157
alloc_specs: AllocSpecs = AllocSpecs(),
158-
libE_specs: LibeSpecs = {},
158+
libE_specs: LibeSpecs | dict = {},
159159
H0=None,
160160
) -> (np.ndarray, dict, int):
161161
"""
@@ -242,11 +242,16 @@ def libE(
242242
]
243243
exit_criteria = specs_dump(ensemble.exit_criteria, by_alias=True, exclude_none=True)
244244

245-
# Restore objects that don't survive serialization via model_dump
246-
if hasattr(ensemble.gen_specs, "generator") and ensemble.gen_specs.generator is not None:
247-
gen_specs["generator"] = ensemble.gen_specs.generator
248-
if hasattr(ensemble.gen_specs, "vocs") and ensemble.gen_specs.vocs is not None:
249-
gen_specs["vocs"] = ensemble.gen_specs.vocs
245+
if hasattr(ensemble.sim_specs, "simulator") and ensemble.sim_specs.simulator is not None:
246+
sim_specs["simulator"] = ensemble.sim_specs.simulator
247+
if hasattr(ensemble.sim_specs, "vocs") and ensemble.sim_specs.vocs is not None:
248+
sim_specs["vocs"] = ensemble.sim_specs.vocs
249+
250+
if ensemble.gen_specs is not None:
251+
if hasattr(ensemble.gen_specs, "generator") and ensemble.gen_specs.generator is not None:
252+
gen_specs["generator"] = ensemble.gen_specs.generator
253+
if hasattr(ensemble.gen_specs, "vocs") and ensemble.gen_specs.vocs is not None:
254+
gen_specs["vocs"] = ensemble.gen_specs.vocs
250255

251256
# Extract platform info from settings or environment
252257
platform_info = get_platform(libE_specs)
@@ -358,7 +363,7 @@ def libE_mpi(sim_specs, gen_specs, exit_criteria, persis_info, alloc_specs, libE
358363
logger.manager_warning("*WARNING* libEnsemble detected a NULL communicator")
359364
return [], persis_info, 3 # Process not in mpi_comm
360365

361-
assert libE_specs["mpi_comm"].Get_size() > 1, "Manager only - must be at least one worker (2 MPI tasks)"
366+
assert libE_specs["mpi_comm"].Get_size() >= 1, "Manager only - must be at least one MPI task"
362367

363368
with DupComm(libE_specs["mpi_comm"]) as mpi_comm:
364369
is_manager = mpi_comm.Get_rank() == 0
@@ -368,7 +373,6 @@ def libE_mpi(sim_specs, gen_specs, exit_criteria, persis_info, alloc_specs, libE
368373
local_host = socket.gethostname()
369374
libE_nodes = list(set(mpi_comm.allgather(local_host)))
370375
resources.add_comm_info(libE_nodes=libE_nodes)
371-
nworkers = mpi_comm.Get_size() - 1
372376

373377
exctr = Executor.executor
374378
if exctr is not None:
@@ -379,7 +383,8 @@ def libE_mpi(sim_specs, gen_specs, exit_criteria, persis_info, alloc_specs, libE
379383
# Run manager or worker code, depending
380384
if is_manager:
381385
if resources is not None:
382-
resources.set_resource_manager(nworkers)
386+
n_resource_workers = mpi_comm.Get_size()
387+
resources.set_resource_manager(n_resource_workers)
383388
return libE_mpi_manager(
384389
mpi_comm, sim_specs, gen_specs, exit_criteria, persis_info, alloc_specs, libE_specs, H0
385390
)
@@ -497,7 +502,8 @@ def libE_local(sim_specs, gen_specs, exit_criteria, persis_info, alloc_specs, li
497502

498503
# Set manager resources after the forkpoint.
499504
if resources is not None:
500-
resources.set_resource_manager(libE_specs["nworkers"])
505+
n_resource_workers = libE_specs["nworkers"] + (not libE_specs.get("gen_on_worker", False))
506+
resources.set_resource_manager(n_resource_workers)
501507

502508
if not libE_specs["disable_log_files"]:
503509
exit_logger = manager_logging_config(specs=libE_specs)

libensemble/manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ def __init__(
232232
(1, "stop_val", self.term_test_stop_val),
233233
]
234234

235-
gen_on_manager = self.libE_specs.get("gen_on_manager", False)
235+
gen_on_manager = not self.libE_specs.get("gen_on_worker", False)
236236

237237
self.W = np.zeros(len(self.wcomms) + gen_on_manager, dtype=Manager.worker_dtype)
238238
if gen_on_manager:
@@ -662,7 +662,7 @@ def _get_alloc_libE_info(self) -> dict:
662662
"use_resource_sets": self.use_resource_sets,
663663
"gen_num_procs": self.gen_num_procs,
664664
"gen_num_gpus": self.gen_num_gpus,
665-
"gen_on_manager": self.libE_specs.get("gen_on_manager", False),
665+
"gen_on_worker": self.libE_specs.get("gen_on_worker", False),
666666
}
667667

668668
def _alloc_work(self, H: npt.NDArray, persis_info: dict) -> dict:

libensemble/resources/resources.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ def __init__(self, libE_specs: dict, platform_info: dict = {}, top_level_dir: st
166166
"""
167167
self.top_level_dir = top_level_dir
168168
self.dedicated_mode = libE_specs.get("dedicated_mode", False)
169-
self.zero_resource_workers = libE_specs.get("zero_resource_workers", [])
169+
self.zero_resource_workers = libE_specs.get("zero_resource_workers", [0])
170170
self.num_resource_sets = libE_specs.get("num_resource_sets", None)
171171
self.enforce_worker_core_bounds = libE_specs.get("enforce_worker_core_bounds", False)
172172
self.gpus_per_group = libE_specs.get("gpus_per_group")

libensemble/resources/worker_resources.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,8 @@ def free_rsets(self, worker=None):
105105
def get_index_list(num_workers: int, num_rsets: int, zero_resource_list: list[int | Any]) -> list[int | None]:
106106
"""Map WorkerID to index into a nodelist"""
107107
index = 0
108-
index_list = []
109-
for i in range(1, num_workers + 1):
108+
index_list: list[int | None] = []
109+
for i in range(0, num_workers):
110110
if i in zero_resource_list:
111111
index_list.append(None)
112112
else:
@@ -116,6 +116,11 @@ def get_index_list(num_workers: int, num_rsets: int, zero_resource_list: list[in
116116
else:
117117
index_list.append(index)
118118
index += 1
119+
120+
for i in zero_resource_list:
121+
if i >= num_workers:
122+
logger.warning(f"Worker index {i} from zero_resource_workers is out of range (0-{num_workers - 1})")
123+
119124
return index_list
120125

121126

@@ -364,7 +369,7 @@ def get_local_nodelist(
364369
local_nodelist = list(OrderedDict.fromkeys(team_list)) # Maintain order of nodes
365370
logger.debug(f"Worker's local_nodelist is {local_nodelist}")
366371

367-
slots = {}
372+
slots: dict[str, list[int]] = {}
368373
for node in local_nodelist:
369374
slots[node] = []
370375

libensemble/specs.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -344,9 +344,9 @@ class LibeSpecs(BaseModel):
344344
nworkers: int | None = 0
345345
""" Number of worker processes in ``"local"``, ``"threads"``, or ``"tcp"``."""
346346

347-
gen_on_manager: bool | None = False
348-
""" Instructs Manager process to run generator functions.
349-
This generator function can access/modify user objects by reference.
347+
gen_on_worker: bool = False
348+
""" Instructs libEnsemble to run generator functions on a worker rank.
349+
By default, the generator runs on the manager process as a thread (Worker 0).
350350
"""
351351

352352
mpi_comm: object | None = None
@@ -635,10 +635,11 @@ class LibeSpecs(BaseModel):
635635
libEnsemble processes (manager and workers) are running.
636636
"""
637637

638-
zero_resource_workers: list[int] | None = []
638+
zero_resource_workers: list[int] | None = [0]
639639
"""
640640
list of workers that require no resources. For when a fixed mapping of workers
641641
to resources is required. Otherwise, use ``num_resource_sets``.
642+
By default, Worker 0 (manager thread) is a zero-resource worker.
642643
For use with supported allocation functions.
643644
"""
644645

libensemble/tests/functionality_tests/test_mpi_gpu_settings.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,15 @@
6666
# Main block is necessary only when using local comms with spawn start method (default on macOS and Windows).
6767
if __name__ == "__main__":
6868
nworkers, is_manager, libE_specs, _ = parse_args()
69-
libE_specs["num_resource_sets"] = nworkers - 1 # Persistent gen does not need resources
69+
70+
# If gen_on_worker is False (default), then all nworkers are available for sims.
71+
# Worker 0 is the generator (and it is a zero_resource_worker by default).
72+
if not libE_specs.get("gen_on_worker", False):
73+
nsim_workers = nworkers
74+
else:
75+
nsim_workers = nworkers - 1
76+
77+
libE_specs["num_resource_sets"] = nsim_workers
7078
libE_specs["use_workflow_dir"] = True # Only a place for Open MPI machinefiles
7179

7280
if libE_specs["comms"] == "tcp":
@@ -88,8 +96,8 @@
8896
"persis_in": ["f", "x", "sim_id"],
8997
"out": [("priority", float), ("resource_sets", int), ("x", float, n)],
9098
"user": {
91-
"initial_batch_size": nworkers - 1,
92-
"max_resource_sets": nworkers - 1, # Any sim created can req. 1 worker up to all.
99+
"initial_batch_size": nsim_workers,
100+
"max_resource_sets": nsim_workers, # Any sim created can req. 1 worker up to all.
93101
"lb": np.array([-3, -2]),
94102
"ub": np.array([3, 2]),
95103
},

libensemble/tests/functionality_tests/test_mpi_gpu_settings_env.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,15 @@
4343
# Main block is necessary only when using local comms with spawn start method (default on macOS and Windows).
4444
if __name__ == "__main__":
4545
nworkers, is_manager, libE_specs, _ = parse_args()
46-
libE_specs["num_resource_sets"] = nworkers - 1 # Persistent gen does not need resources
46+
47+
# If gen_on_worker is False (default), then all nworkers are available for sims.
48+
# Worker 0 is the generator (and it is a zero_resource_worker by default).
49+
if not libE_specs.get("gen_on_worker", False):
50+
nsim_workers = nworkers
51+
else:
52+
nsim_workers = nworkers - 1
53+
54+
libE_specs["num_resource_sets"] = nsim_workers
4755
libE_specs["use_workflow_dir"] = True # Only a place for Open MPI machinefiles
4856

4957
# Optional for organization of output scripts
@@ -70,8 +78,8 @@
7078
"persis_in": ["f", "x", "sim_id"],
7179
"out": [("priority", float), ("resource_sets", int), ("x", float, n)],
7280
"user": {
73-
"initial_batch_size": nworkers - 1,
74-
"max_resource_sets": nworkers - 1, # Any sim created can req. 1 worker up to all.
81+
"initial_batch_size": nsim_workers,
82+
"max_resource_sets": nsim_workers, # Any sim created can req. 1 worker up to all.
7583
"lb": np.array([-3, -2]),
7684
"ub": np.array([3, 2]),
7785
},

libensemble/tests/functionality_tests/test_persistent_uniform_gen_decides_stop.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,18 @@
3333
nworkers, is_manager, libE_specs, _ = parse_args()
3434

3535
for ngens in range(1, 3):
36+
# If gen_on_worker is False (default), the first gen is on the manager (Worker 0).
37+
# Subsequent gens (if ngens > 1) move to worker ranks.
38+
if not libE_specs.get("gen_on_worker", False):
39+
nsim_workers = nworkers - (ngens - 1)
40+
else:
41+
nsim_workers = nworkers - ngens
42+
3643
n = 2
37-
init_batch_size = nworkers - ngens
44+
init_batch_size = nsim_workers
3845

39-
if ngens >= nworkers:
40-
sys.exit("The number of generators must be less than the number of workers -- aborting...")
46+
if nsim_workers <= 0:
47+
sys.exit("The number of generators must be less than the available workers -- aborting...")
4148

4249
sim_specs = {
4350
"sim_f": sim_f,

libensemble/tests/unit_tests/test_manager_main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import libensemble.manager as man
77
import libensemble.tests.unit_tests.setup as setup
88

9-
libE_specs = {"comms": "local"}
9+
libE_specs = {"comms": "local", "gen_on_worker": True}
1010

1111

1212
def test_term_test_1():

libensemble/tests/unit_tests/test_resources.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -694,23 +694,23 @@ def test_map_workerid_to_index():
694694

695695
zero_resource_list = []
696696
index_list = ResourceManager.get_index_list(num_workers, num_rsets, zero_resource_list)
697-
for workerID in range(1, num_workers + 1):
698-
index = index_list[workerID - 1]
699-
assert index == workerID - 1, "index incorrect. Received: " + str(index)
697+
for workerID in range(0, num_workers):
698+
index = index_list[workerID]
699+
assert index == workerID, "index incorrect. Received: " + str(index)
700700

701-
zero_resource_list = [1]
701+
zero_resource_list = [0]
702702
index_list = ResourceManager.get_index_list(num_workers, num_rsets, zero_resource_list)
703-
for workerID in range(2, num_workers + 1):
704-
index = index_list[workerID - 1]
705-
assert index == workerID - 2, "index incorrect. Received: " + str(index)
703+
for workerID in range(1, num_workers):
704+
index = index_list[workerID]
705+
assert index == workerID - 1, "index incorrect. Received: " + str(index)
706706

707-
zero_resource_list = [1, 2]
707+
zero_resource_list = [0, 1]
708708
index_list = ResourceManager.get_index_list(num_workers, num_rsets, zero_resource_list)
709-
for workerID in range(3, num_workers + 1):
710-
index = index_list[workerID - 1]
711-
assert index == workerID - 3, "index incorrect. Received: " + str(index)
709+
for workerID in range(2, num_workers):
710+
index = index_list[workerID]
711+
assert index == workerID - 2, "index incorrect. Received: " + str(index)
712712

713-
zero_resource_list = [1, 3]
713+
zero_resource_list = [0, 2]
714714
index_list = ResourceManager.get_index_list(num_workers, num_rsets, zero_resource_list)
715715

716716
workerID = 2

0 commit comments

Comments
 (0)