|
11 | 11 | from concurrent.futures import ( |
12 | 12 | FIRST_COMPLETED, |
13 | 13 | ProcessPoolExecutor, |
| 14 | + ThreadPoolExecutor, |
14 | 15 | wait, |
15 | 16 | ) |
16 | 17 | from dataclasses import dataclass |
@@ -431,30 +432,42 @@ def _run(self): |
431 | 432 | yield "kind_graph", kind_graph |
432 | 433 |
|
433 | 434 | logger.info("Generating full task set") |
434 | | - # The short version of the below is: we only support parallel kind |
435 | | - # processing on Linux. |
| 435 | + |
| 436 | + # The next block deals with enabling parallel kind processing, which |
| 437 | + # currently has different support on different platforms. In summary: |
| 438 | + # * Parallel kind processing is supported and enabled by default on |
| 439 | + # Linux. We use multiple processes by default, but experimental |
| 440 | + # support for multiple threads can be enabled instead. |
| 441 | + # * On other platforms, we have experimental support for parallel |
| 442 | + # kind processing with multiple threads. |
436 | 443 | # |
437 | | - # Current parallel generation relies on multiprocessing, and more |
438 | | - # specifically: the "fork" multiprocessing method. This is not supported |
439 | | - # at all on Windows (it uses "spawn"). Forking is supported on macOS, |
440 | | - # but no longer works reliably in all cases, and our usage of it here |
441 | | - # causes crashes. See https://github.com/python/cpython/issues/77906 |
442 | | - # and http://sealiesoftware.com/blog/archive/2017/6/5/Objective-C_and_fork_in_macOS_1013.html |
443 | | - # for more details on that. |
444 | | - # Other methods of multiprocessing (both "spawn" and "forkserver") |
445 | | - # do not work for our use case, because they cause global variables |
446 | | - # to be reinitialized, which are sometimes modified earlier in graph |
447 | | - # generation. These issues can theoretically be worked around by |
448 | | - # eliminating all reliance on globals as part of task generation, but |
449 | | - # is far from a small amount of work in users like Gecko/Firefox. |
450 | | - # In the long term, the better path forward is likely to be switching |
451 | | - # to threading with a free-threaded python to achieve similar parallel |
452 | | - # processing. |
453 | | - if platform.system() != "Linux" or os.environ.get("TASKGRAPH_SERIAL"): |
454 | | - all_tasks = self._load_tasks_serial(kinds, kind_graph, parameters) |
455 | | - else: |
456 | | - executor = ProcessPoolExecutor(mp_context=multiprocessing.get_context("fork")) |
457 | | - all_tasks = self._load_tasks_parallel(kinds, kind_graph, parameters, executor) |
| 444 | + # On all platforms serial kind processing can be enabled by setting |
| 445 | + # TASKGRAPH_SERIAL in the environment. |
| 446 | + # |
| 447 | + # On all platforms, multiple threads can be enabled by setting |
| 448 | + # TASKGRAPH_USE_THREADS in the environment. Taskgraph must be running |
| 449 | + # from a free-threaded Python build to see any performance benefits. |
| 450 | + # |
| 451 | + # In the long term, the goal is turn enabled parallel kind processing for |
| 452 | + # all platforms by default using threads, and remove support for multiple |
| 453 | + # processes altogether. |
| 454 | + def load_tasks(): |
| 455 | + if platform.system() == "Linux": |
| 456 | + if os.environ.get("TASKGRAPH_SERIAL"): |
| 457 | + return self._load_tasks_serial(kinds, kind_graph, parameters) |
| 458 | + elif os.environ.get("TASKGRAPH_USE_THREADS"): |
| 459 | + executor = ThreadPoolExecutor(max_workers=os.process_cpu_count()) |
| 460 | + else: |
| 461 | + executor = ProcessPoolExecutor(mp_context=multiprocessing.get_context("fork")) |
| 462 | + return self._load_tasks_parallel(kinds, kind_graph, parameters, executor) |
| 463 | + else: |
| 464 | + if os.environ.get("TASKGRAPH_SERIAL") or not os.environ.get("TASKGRAPH_USE_THREADS"): |
| 465 | + return self._load_tasks_serial(kinds, kind_graph, parameters) |
| 466 | + else: |
| 467 | + executor = ThreadPoolExecutor(max_workers=os.process_cpu_count()) |
| 468 | + return self._load_tasks_parallel(kinds, kind_graph, parameters, executor) |
| 469 | + |
| 470 | + all_tasks = load_tasks() |
458 | 471 |
|
459 | 472 | full_task_set = TaskGraph(all_tasks, Graph(frozenset(all_tasks), frozenset())) |
460 | 473 | yield self.verify("full_task_set", full_task_set, graph_config, parameters) |
|
0 commit comments