Skip to content

Commit 77a8d89

Browse files
committed
Fix --native subinterpreter merge for shared TIDs using shim stack anchors
When multiple subinterpreters execute on the same OS thread, each PyThread previously received the full native stack for that TID. That made native/Python merging fail because every thread in the group saw the same set of eval frames, so n_eval did not match each thread's entry-frame count. This change makes native merging deterministic for same-TID subinterpreter groups. The game is played like this: - Capture a per-thread stack anchor in the native layer: - add Thread::StackAnchor() and d_stack_anchor. - compute the anchor from the Python frame chain by walking backwards to the nearest stack/shim-owned frame (FRAME_OWNED_BY_INTERPRETER / FRAME_OWNED_BY_CSTACK on 3.14+, FRAME_OWNED_BY_CSTACK on 3.12/3.13). - Thread construction now forwards this anchor into PyThread as stack_anchor. - Switch process/core thread assembly from immediate yielding to collect-then-normalize. - Group Python threads by tid when native mode is enabled. - For groups with more than one thread: - pick a canonical native stack, - sort group members by stack_anchor (stable tie-breaker), - partition eval-frame ownership according to each thread's Python entry-frame count, - slice native frames accordingly per thread. - If counts are inconsistent, keep existing behavior for that group and skip slicing.
1 parent c46223d commit 77a8d89

7 files changed

Lines changed: 437 additions & 6 deletions

File tree

src/pystack/_pystack.pyx

Lines changed: 97 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ from .types import NativeFrame
6666
from .types import PyCodeObject
6767
from .types import PyFrame
6868
from .types import PyThread
69+
from .types import frame_type
6970

7071
LOGGER = logging.getLogger(__file__)
7172

@@ -490,6 +491,7 @@ cdef object _construct_threads_from_interpreter_state(
490491
python_version,
491492
interpreter_id,
492493
name=get_thread_name(pid, current_thread.Tid()),
494+
stack_anchor=current_thread.StackAnchor(),
493495
)
494496
)
495497
current_thread = (
@@ -498,6 +500,91 @@ cdef object _construct_threads_from_interpreter_state(
498500

499501
return threads
500502

503+
504+
def _entry_frame_count(thread: PyThread) -> int:
505+
return sum(1 for frame in thread.all_frames if frame.is_entry)
506+
507+
508+
def _eval_frame_positions(thread: PyThread):
509+
if not thread.python_version:
510+
return []
511+
return [
512+
index
513+
for index, native_frame in enumerate(thread.native_frames)
514+
if frame_type(native_frame, thread.python_version) == NativeFrame.FrameType.EVAL
515+
]
516+
517+
518+
def _slice_native_stacks_for_same_tid_threads(threads) -> None:
519+
if len(threads) < 2:
520+
return
521+
522+
canonical = next((thread for thread in threads if thread.native_frames), None)
523+
if canonical is None:
524+
return
525+
526+
canonical_frames = list(canonical.native_frames)
527+
eval_positions = [
528+
index
529+
for index, native_frame in enumerate(canonical_frames)
530+
if frame_type(native_frame, canonical.python_version) == NativeFrame.FrameType.EVAL
531+
]
532+
if not eval_positions:
533+
return
534+
535+
entry_counts = [_entry_frame_count(thread) for thread in threads]
536+
if sum(entry_counts) != len(eval_positions):
537+
LOGGER.debug(
538+
"Skipping same-tid native slicing for tid %s due to mismatched counts: "
539+
"entry=%s eval=%s",
540+
threads[0].tid,
541+
sum(entry_counts),
542+
len(eval_positions),
543+
)
544+
return
545+
546+
ordered_threads = sorted(
547+
enumerate(threads),
548+
key=lambda item: (
549+
item[1].stack_anchor is None,
550+
-(item[1].stack_anchor or 0),
551+
item[0],
552+
),
553+
)
554+
555+
cursor = 0
556+
for _, thread in ordered_threads:
557+
required_eval_frames = _entry_frame_count(thread)
558+
if required_eval_frames == 0:
559+
thread.native_frames = []
560+
continue
561+
562+
group_start = cursor
563+
group_end = cursor + required_eval_frames
564+
prev_eval = eval_positions[group_start - 1] if group_start > 0 else -1
565+
next_eval = (
566+
eval_positions[group_end]
567+
if group_end < len(eval_positions)
568+
else len(canonical_frames)
569+
)
570+
thread.native_frames = canonical_frames[prev_eval + 1 : next_eval]
571+
cursor = group_end
572+
573+
574+
def _normalize_python_threads(threads, native_mode: NativeReportingMode):
575+
if native_mode == NativeReportingMode.OFF:
576+
return threads
577+
578+
threads_by_tid = {}
579+
for thread in threads:
580+
threads_by_tid.setdefault(thread.tid, []).append(thread)
581+
582+
for group in threads_by_tid.values():
583+
if len(group) <= 1:
584+
continue
585+
_slice_native_stacks_for_same_tid_threads(group)
586+
return threads
587+
501588
cdef object _construct_os_thread(
502589
shared_ptr[AbstractProcessManager] manager, int pid, int tid
503590
):
@@ -625,6 +712,7 @@ def _get_process_threads(
625712
)
626713

627714
all_tids = list(manager.get().Tids())
715+
threads = []
628716
while head:
629717
add_native_traces = native_mode != NativeReportingMode.OFF
630718
for thread in _construct_threads_from_interpreter_state(
@@ -637,9 +725,12 @@ def _get_process_threads(
637725
):
638726
if thread.tid in all_tids:
639727
all_tids.remove(thread.tid)
640-
yield thread
728+
threads.append(thread)
641729
head = InterpreterUtils.getNextInterpreter(manager, head)
642730

731+
for thread in _normalize_python_threads(threads, native_mode):
732+
yield thread
733+
643734
if native_mode == NativeReportingMode.ALL:
644735
yield from _construct_os_threads(manager, pid, all_tids)
645736

@@ -772,6 +863,7 @@ def _get_process_threads_for_core(
772863
)
773864
774865
all_tids = list(manager.get().Tids())
866+
threads = []
775867
776868
while head:
777869
add_native_traces = native_mode != NativeReportingMode.OFF
@@ -785,8 +877,11 @@ def _get_process_threads_for_core(
785877
):
786878
if thread.tid in all_tids:
787879
all_tids.remove(thread.tid)
788-
yield thread
880+
threads.append(thread)
789881
head = InterpreterUtils.getNextInterpreter(manager, head)
790882
883+
for thread in _normalize_python_threads(threads, native_mode):
884+
yield thread
885+
791886
if native_mode == NativeReportingMode.ALL:
792887
yield from _construct_os_threads(manager, pymanager.pid, all_tids)

src/pystack/_pystack/pythread.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include <cassert>
33
#include <memory>
44

5+
#include "cpython/frame.h"
56
#include "cpython/pthread.h"
67
#include "interpreter.h"
78
#include "logging.h"
@@ -18,6 +19,7 @@ namespace pystack {
1819
Thread::Thread(pid_t pid, pid_t tid)
1920
: d_pid(pid)
2021
, d_tid(tid)
22+
, d_stack_anchor(0)
2123
{
2224
}
2325

@@ -27,6 +29,12 @@ Thread::Tid() const
2729
return d_tid;
2830
}
2931

32+
remote_addr_t
33+
Thread::StackAnchor() const
34+
{
35+
return d_stack_anchor;
36+
}
37+
3038
const std::vector<NativeFrame>&
3139
Thread::NativeFrames() const
3240
{
@@ -148,6 +156,7 @@ PyThread::PyThread(const std::shared_ptr<const AbstractProcessManager>& manager,
148156
<< frame_addr;
149157
d_first_frame = std::make_unique<FrameObject>(manager, frame_addr, 0);
150158
}
159+
d_stack_anchor = getStackAnchor(manager, frame_addr);
151160

152161
d_addr = addr;
153162
remote_addr_t candidate_next_addr = ts.getField(&py_thread_v::o_next);
@@ -237,6 +246,44 @@ PyThread::getFrameAddr(
237246
}
238247
}
239248

249+
remote_addr_t
250+
PyThread::getStackAnchor(
251+
const std::shared_ptr<const AbstractProcessManager>& manager,
252+
remote_addr_t frame_addr)
253+
{
254+
if (!frame_addr) {
255+
return 0;
256+
}
257+
if (!manager->versionIsAtLeast(3, 12)) {
258+
return frame_addr;
259+
}
260+
261+
remote_addr_t current_addr = frame_addr;
262+
for (int i = 0; i < 4096 && current_addr; ++i) {
263+
Structure<py_frame_v> current_frame(manager, current_addr);
264+
auto owner = current_frame.getField(&py_frame_v::o_owner);
265+
266+
if (manager->versionIsAtLeast(3, 14)) {
267+
if (owner == Python3_14::FRAME_OWNED_BY_INTERPRETER
268+
|| owner == Python3_14::FRAME_OWNED_BY_CSTACK)
269+
{
270+
return current_addr;
271+
}
272+
} else {
273+
if (owner == Python3_12::FRAME_OWNED_BY_CSTACK) {
274+
return current_addr;
275+
}
276+
}
277+
278+
remote_addr_t next_addr = current_frame.getField(&py_frame_v::o_back);
279+
if (next_addr == current_addr) {
280+
break;
281+
}
282+
current_addr = next_addr;
283+
}
284+
return frame_addr;
285+
}
286+
240287
std::shared_ptr<FrameObject>
241288
PyThread::FirstFrame() const
242289
{

src/pystack/_pystack/pythread.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ class Thread
1616
public:
1717
Thread(pid_t pid, pid_t tid);
1818
pid_t Tid() const;
19+
remote_addr_t StackAnchor() const;
1920
const std::vector<NativeFrame>& NativeFrames() const;
2021

2122
// Methods
@@ -25,6 +26,7 @@ class Thread
2526
// Data members
2627
pid_t d_pid;
2728
pid_t d_tid;
29+
remote_addr_t d_stack_anchor;
2830
std::vector<NativeFrame> d_native_frames;
2931
};
3032

@@ -50,6 +52,9 @@ class PyThread : public Thread
5052
static remote_addr_t getFrameAddr(
5153
const std::shared_ptr<const AbstractProcessManager>& manager,
5254
Structure<py_thread_v>& ts);
55+
static remote_addr_t getStackAnchor(
56+
const std::shared_ptr<const AbstractProcessManager>& manager,
57+
remote_addr_t frame_addr);
5358

5459
private:
5560
// Data members

src/pystack/_pystack/pythread.pxd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ cdef extern from "pythread.h" namespace "pystack":
1111
cdef cppclass NativeThread "pystack::Thread":
1212
NativeThread(int, int) except+
1313
int Tid()
14+
remote_addr_t StackAnchor()
1415
vector[NativeFrame]& NativeFrames()
1516
void populateNativeStackTrace(shared_ptr[AbstractProcessManager] manager) except+
1617

@@ -28,6 +29,7 @@ cdef extern from "pythread.h" namespace "pystack::PyThread":
2829
cdef extern from "pythread.h" namespace "pystack":
2930
cdef cppclass Thread "pystack::PyThread":
3031
int Tid()
32+
remote_addr_t StackAnchor()
3133
shared_ptr[FrameObject] FirstFrame()
3234
shared_ptr[Thread] NextThread()
3335
vector[NativeFrame]& NativeFrames()

src/pystack/types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ class PyThread:
117117
python_version: Optional[Tuple[int, int]]
118118
interpreter_id: Optional[int] = None
119119
name: Optional[str] = None
120+
stack_anchor: Optional[int] = None
120121

121122
@property
122123
def frames(self) -> Iterable[PyFrame]:

0 commit comments

Comments
 (0)