feat: add support for parallel kind processing with threads

bhearsum · bhearsum · commit c01f67c1395e · 2026-02-12T20:00:28.000-05:00
Even with 3.14 free-threaded python, this is still a bit slower than multiprocessing on Linux, but it will allow us to start experimenting with it more, and may allow users on macOS and Windows to immediately see a speed-up.
diff --git a/src/taskgraph/generator.py b/src/taskgraph/generator.py
@@ -11,6 +11,7 @@
 from concurrent.futures import (
     FIRST_COMPLETED,
     ProcessPoolExecutor,
+    ThreadPoolExecutor,
     wait,
 )
 from dataclasses import dataclass
@@ -431,30 +432,42 @@ def _run(self):
         yield "kind_graph", kind_graph
 
         logger.info("Generating full task set")
-        # The short version of the below is: we only support parallel kind
-        # processing on Linux.
+
+        # The next block deals with enabling parallel kind processing, which
+        # currently has different support on different platforms. In summary:
+        # * Parallel kind processing is supported and enabled by default on
+        #   Linux. We use multiple processes by default, but experimental
+        #   support for multiple threads can be enabled instead.
+        # * On other platforms, we have experimental support for parallel
+        #   kind processing with multiple threads.
         #
-        # Current parallel generation relies on multiprocessing, and more
-        # specifically: the "fork" multiprocessing method. This is not supported
-        # at all on Windows (it uses "spawn"). Forking is supported on macOS,
-        # but no longer works reliably in all cases, and our usage of it here
-        # causes crashes. See https://github.com/python/cpython/issues/77906
-        # and http://sealiesoftware.com/blog/archive/2017/6/5/Objective-C_and_fork_in_macOS_1013.html
-        # for more details on that.
-        # Other methods of multiprocessing (both "spawn" and "forkserver")
-        # do not work for our use case, because they cause global variables
-        # to be reinitialized, which are sometimes modified earlier in graph
-        # generation. These issues can theoretically be worked around by
-        # eliminating all reliance on globals as part of task generation, but
-        # is far from a small amount of work in users like Gecko/Firefox.
-        # In the long term, the better path forward is likely to be switching
-        # to threading with a free-threaded python to achieve similar parallel
-        # processing.
-        if platform.system() != "Linux" or os.environ.get("TASKGRAPH_SERIAL"):
-            all_tasks = self._load_tasks_serial(kinds, kind_graph, parameters)
-        else:
-            executor = ProcessPoolExecutor(mp_context=multiprocessing.get_context("fork"))
-            all_tasks = self._load_tasks_parallel(kinds, kind_graph, parameters, executor)
+        # On all platforms serial kind processing can be enabled by setting
+        # TASKGRAPH_SERIAL in the environment.
+        #
+        # On all platforms, multiple threads can be enabled by setting
+        # TASKGRAPH_USE_THREADS in the environment. Taskgraph must be running
+        # from a free-threaded Python build to see any performance benefits.
+        #
+        # In the long term, the goal is turn enabled parallel kind processing for
+        # all platforms by default using threads, and remove support for multiple
+        # processes altogether.
+        def load_tasks():
+            if platform.system() == "Linux":
+                if os.environ.get("TASKGRAPH_SERIAL"):
+                    return self._load_tasks_serial(kinds, kind_graph, parameters)
+                elif os.environ.get("TASKGRAPH_USE_THREADS"):
+                    executor = ThreadPoolExecutor(max_workers=os.process_cpu_count())
+                else:
+                    executor = ProcessPoolExecutor(mp_context=multiprocessing.get_context("fork"))
+                return self._load_tasks_parallel(kinds, kind_graph, parameters, executor)
+            else:
+                if os.environ.get("TASKGRAPH_SERIAL") or not os.environ.get("TASKGRAPH_USE_THREADS"):
+                    return self._load_tasks_serial(kinds, kind_graph, parameters)
+                else:
+                    executor = ThreadPoolExecutor(max_workers=os.process_cpu_count())
+                return self._load_tasks_parallel(kinds, kind_graph, parameters, executor)
+
+        all_tasks = load_tasks()
 
         full_task_set = TaskGraph(all_tasks, Graph(frozenset(all_tasks), frozenset()))
         yield self.verify("full_task_set", full_task_set, graph_config, parameters)
diff --git a/test/test_generator.py b/test/test_generator.py
@@ -3,6 +3,7 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 
+import os
 import platform
 from concurrent.futures import ProcessPoolExecutor
 
@@ -14,9 +15,13 @@
 from taskgraph.loader.default import loader as default_loader
 
 linuxonly = pytest.mark.skipif(
-    platform.system() != "Linux",
+    platform.system() != "Linux" or os.environ.get("TASKGRAPH_USE_THREADS"),
     reason="requires Linux and 'fork' multiprocessing support",
 )
+threadsonly = pytest.mark.skipif(
+    not os.environ.get("TASKGRAPH_USE_THREADS"),
+    reason="requires multithreading to be enabled",
+)
 
 
 class FakePPE(ProcessPoolExecutor):
@@ -27,8 +32,16 @@ def submit(self, kind_load_tasks, *args):
         return super().submit(kind_load_tasks, *args)
 
 
+class FakeTPE(ProcessPoolExecutor):
+    loaded_kinds = []
+
+    def submit(self, kind_load_tasks, *args):
+        self.loaded_kinds.append(kind_load_tasks.__self__.name)
+        return super().submit(kind_load_tasks, *args)
+
+
 @linuxonly
-def test_kind_ordering(mocker, maketgg):
+def test_kind_ordering_multiprocess(mocker, maketgg):
     "When task kinds depend on each other, they are loaded in postorder"
     mocked_ppe = mocker.patch.object(generator, "ProcessPoolExecutor", new=FakePPE)
     tgg = maketgg(
@@ -42,6 +55,21 @@ def test_kind_ordering(mocker, maketgg):
     assert mocked_ppe.loaded_kinds == ["_fake1", "_fake2", "_fake3"]
 
 
+@threadsonly
+def test_kind_ordering_multiprocess(mocker, maketgg):
+    "When task kinds depend on each other, they are loaded in postorder"
+    mocked_tpe = mocker.patch.object(generator, "ThreadPoolExecutor", new=FakeTPE)
+    tgg = maketgg(
+        kinds=[
+            ("_fake3", {"kind-dependencies": ["_fake2", "_fake1"]}),
+            ("_fake2", {"kind-dependencies": ["_fake1"]}),
+            ("_fake1", {"kind-dependencies": []}),
+        ]
+    )
+    tgg._run_until("full_task_set")
+    assert mocked_tpe.loaded_kinds == ["_fake1", "_fake2", "_fake3"]
+
+
 def test_full_task_set(maketgg):
     "The full_task_set property has all tasks"
     tgg = maketgg()