Skip to content

Commit c01f67c

Browse files
committed
feat: add support for parallel kind processing with threads
Even with 3.14 free-threaded python, this is still a bit slower than multiprocessing on Linux, but it will allow us to start experimenting with it more, and may allow users on macOS and Windows to immediately see a speed-up.
1 parent fc3dff4 commit c01f67c

2 files changed

Lines changed: 66 additions & 25 deletions

File tree

src/taskgraph/generator.py

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from concurrent.futures import (
1212
FIRST_COMPLETED,
1313
ProcessPoolExecutor,
14+
ThreadPoolExecutor,
1415
wait,
1516
)
1617
from dataclasses import dataclass
@@ -431,30 +432,42 @@ def _run(self):
431432
yield "kind_graph", kind_graph
432433

433434
logger.info("Generating full task set")
434-
# The short version of the below is: we only support parallel kind
435-
# processing on Linux.
435+
436+
# The next block deals with enabling parallel kind processing, which
437+
# currently has different support on different platforms. In summary:
438+
# * Parallel kind processing is supported and enabled by default on
439+
# Linux. We use multiple processes by default, but experimental
440+
# support for multiple threads can be enabled instead.
441+
# * On other platforms, we have experimental support for parallel
442+
# kind processing with multiple threads.
436443
#
437-
# Current parallel generation relies on multiprocessing, and more
438-
# specifically: the "fork" multiprocessing method. This is not supported
439-
# at all on Windows (it uses "spawn"). Forking is supported on macOS,
440-
# but no longer works reliably in all cases, and our usage of it here
441-
# causes crashes. See https://github.com/python/cpython/issues/77906
442-
# and http://sealiesoftware.com/blog/archive/2017/6/5/Objective-C_and_fork_in_macOS_1013.html
443-
# for more details on that.
444-
# Other methods of multiprocessing (both "spawn" and "forkserver")
445-
# do not work for our use case, because they cause global variables
446-
# to be reinitialized, which are sometimes modified earlier in graph
447-
# generation. These issues can theoretically be worked around by
448-
# eliminating all reliance on globals as part of task generation, but
449-
# is far from a small amount of work in users like Gecko/Firefox.
450-
# In the long term, the better path forward is likely to be switching
451-
# to threading with a free-threaded python to achieve similar parallel
452-
# processing.
453-
if platform.system() != "Linux" or os.environ.get("TASKGRAPH_SERIAL"):
454-
all_tasks = self._load_tasks_serial(kinds, kind_graph, parameters)
455-
else:
456-
executor = ProcessPoolExecutor(mp_context=multiprocessing.get_context("fork"))
457-
all_tasks = self._load_tasks_parallel(kinds, kind_graph, parameters, executor)
444+
# On all platforms serial kind processing can be enabled by setting
445+
# TASKGRAPH_SERIAL in the environment.
446+
#
447+
# On all platforms, multiple threads can be enabled by setting
448+
# TASKGRAPH_USE_THREADS in the environment. Taskgraph must be running
449+
# from a free-threaded Python build to see any performance benefits.
450+
#
451+
# In the long term, the goal is turn enabled parallel kind processing for
452+
# all platforms by default using threads, and remove support for multiple
453+
# processes altogether.
454+
def load_tasks():
455+
if platform.system() == "Linux":
456+
if os.environ.get("TASKGRAPH_SERIAL"):
457+
return self._load_tasks_serial(kinds, kind_graph, parameters)
458+
elif os.environ.get("TASKGRAPH_USE_THREADS"):
459+
executor = ThreadPoolExecutor(max_workers=os.process_cpu_count())
460+
else:
461+
executor = ProcessPoolExecutor(mp_context=multiprocessing.get_context("fork"))
462+
return self._load_tasks_parallel(kinds, kind_graph, parameters, executor)
463+
else:
464+
if os.environ.get("TASKGRAPH_SERIAL") or not os.environ.get("TASKGRAPH_USE_THREADS"):
465+
return self._load_tasks_serial(kinds, kind_graph, parameters)
466+
else:
467+
executor = ThreadPoolExecutor(max_workers=os.process_cpu_count())
468+
return self._load_tasks_parallel(kinds, kind_graph, parameters, executor)
469+
470+
all_tasks = load_tasks()
458471

459472
full_task_set = TaskGraph(all_tasks, Graph(frozenset(all_tasks), frozenset()))
460473
yield self.verify("full_task_set", full_task_set, graph_config, parameters)

test/test_generator.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
44

55

6+
import os
67
import platform
78
from concurrent.futures import ProcessPoolExecutor
89

@@ -14,9 +15,13 @@
1415
from taskgraph.loader.default import loader as default_loader
1516

1617
linuxonly = pytest.mark.skipif(
17-
platform.system() != "Linux",
18+
platform.system() != "Linux" or os.environ.get("TASKGRAPH_USE_THREADS"),
1819
reason="requires Linux and 'fork' multiprocessing support",
1920
)
21+
threadsonly = pytest.mark.skipif(
22+
not os.environ.get("TASKGRAPH_USE_THREADS"),
23+
reason="requires multithreading to be enabled",
24+
)
2025

2126

2227
class FakePPE(ProcessPoolExecutor):
@@ -27,8 +32,16 @@ def submit(self, kind_load_tasks, *args):
2732
return super().submit(kind_load_tasks, *args)
2833

2934

35+
class FakeTPE(ProcessPoolExecutor):
36+
loaded_kinds = []
37+
38+
def submit(self, kind_load_tasks, *args):
39+
self.loaded_kinds.append(kind_load_tasks.__self__.name)
40+
return super().submit(kind_load_tasks, *args)
41+
42+
3043
@linuxonly
31-
def test_kind_ordering(mocker, maketgg):
44+
def test_kind_ordering_multiprocess(mocker, maketgg):
3245
"When task kinds depend on each other, they are loaded in postorder"
3346
mocked_ppe = mocker.patch.object(generator, "ProcessPoolExecutor", new=FakePPE)
3447
tgg = maketgg(
@@ -42,6 +55,21 @@ def test_kind_ordering(mocker, maketgg):
4255
assert mocked_ppe.loaded_kinds == ["_fake1", "_fake2", "_fake3"]
4356

4457

58+
@threadsonly
59+
def test_kind_ordering_multiprocess(mocker, maketgg):
60+
"When task kinds depend on each other, they are loaded in postorder"
61+
mocked_tpe = mocker.patch.object(generator, "ThreadPoolExecutor", new=FakeTPE)
62+
tgg = maketgg(
63+
kinds=[
64+
("_fake3", {"kind-dependencies": ["_fake2", "_fake1"]}),
65+
("_fake2", {"kind-dependencies": ["_fake1"]}),
66+
("_fake1", {"kind-dependencies": []}),
67+
]
68+
)
69+
tgg._run_until("full_task_set")
70+
assert mocked_tpe.loaded_kinds == ["_fake1", "_fake2", "_fake3"]
71+
72+
4573
def test_full_task_set(maketgg):
4674
"The full_task_set property has all tasks"
4775
tgg = maketgg()

0 commit comments

Comments
 (0)