From d11a74d625aa76ab271761754ea7f5e063eed968 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 30 Mar 2026 11:30:25 +0100 Subject: [PATCH 1/4] Make _Py_get_machine_stack_pointer return the stack pointer (or close to it), not the frame pointer * Make ``_Py_ReachedRecursionLimit`` inline again * Remove ``_Py_MakeRecCheck`` relacing its use with ``_Py_ReachedRecursionLimit`` * Move stack swtiching check into ``_Py_CheckRecursiveCall`` --- Include/internal/pycore_ceval.h | 16 +++++------ Include/internal/pycore_pystate.h | 22 +++++++-------- Include/internal/pycore_pythonrun.h | 3 ++- Include/pyport.h | 5 ++++ Python/ceval.c | 42 +++++++---------------------- 5 files changed, 35 insertions(+), 53 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 9fd3be74404907..c86cd58e295e53 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -211,16 +211,16 @@ extern void _PyEval_DeactivateOpCache(void); /* --- _Py_EnterRecursiveCall() ----------------------------------------- */ -static inline int _Py_MakeRecCheck(PyThreadState *tstate) { +static inline int _Py_ReachedRecursionLimit(PyThreadState *tstate) { uintptr_t here_addr = _Py_get_machine_stack_pointer(); _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; - // Overflow if stack pointer is between soft limit and the base of the hardware stack. - // If it is below the hardware stack base, assume that we have the wrong stack limits, and do nothing. - // We could have the wrong stack limits because of limited platform support, or user-space threads. + // Possible overflow if stack pointer is beyond the soft limit. + // _Py_CheckRecursiveCall will check for corner cases and + // report an error if there is an overflow. #if _Py_STACK_GROWS_DOWN - return here_addr < _tstate->c_stack_soft_limit && here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES; + return here_addr < _tstate->c_stack_soft_limit; #else - return here_addr > _tstate->c_stack_soft_limit && here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES; + return here_addr > _tstate->c_stack_soft_limit; #endif } @@ -235,7 +235,7 @@ PyAPI_FUNC(int) _Py_CheckRecursiveCallPy( static inline int _Py_EnterRecursiveCallTstate(PyThreadState *tstate, const char *where) { - return (_Py_MakeRecCheck(tstate) && _Py_CheckRecursiveCall(tstate, where)); + return (_Py_ReachedRecursionLimit(tstate) && _Py_CheckRecursiveCall(tstate, where)); } static inline int _Py_EnterRecursiveCall(const char *where) { @@ -249,8 +249,6 @@ static inline void _Py_LeaveRecursiveCallTstate(PyThreadState *tstate) { PyAPI_FUNC(void) _Py_InitializeRecursionLimits(PyThreadState *tstate); -PyAPI_FUNC(int) _Py_ReachedRecursionLimit(PyThreadState *tstate); - // Export for test_peg_generator PyAPI_FUNC(int) _Py_ReachedRecursionLimitWithMargin( PyThreadState *tstate, diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 189a8dde9f09ed..bf580be3e35c35 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -306,23 +306,23 @@ _Py_AssertHoldsTstateFunc(const char *func) #define _Py_AssertHoldsTstate() #endif -#if !_Py__has_builtin(__builtin_frame_address) && !defined(__GNUC__) && !defined(_MSC_VER) -static uintptr_t return_pointer_as_int(char* p) { - return (uintptr_t)p; -} -#endif static inline uintptr_t _Py_get_machine_stack_pointer(void) { -#if _Py__has_builtin(__builtin_frame_address) || defined(__GNUC__) - return (uintptr_t)__builtin_frame_address(0); -#elif defined(_MSC_VER) - return (uintptr_t)_AddressOfReturnAddress(); + uintptr_t result; +#if !defined(_MSC_VER) && defined(_M_ARM64) + result = __getReg(31); +#elif defined(_MSC_VER) && defined(_M_X64) + result = _AddressOfReturnAddress(); +#elif defined(__aarch64__) + __asm__ ("mov %0, sp" : "=r" (result)); +#elif defined(__x86_64__) + __asm__("{movq %%rsp, %0" : "=r" (result)); #else char here; - /* Avoid compiler warning about returning stack address */ - return return_pointer_as_int(&here); + result = (uintptr_t)&here; #endif + return result; } static inline intptr_t diff --git a/Include/internal/pycore_pythonrun.h b/Include/internal/pycore_pythonrun.h index 2a544edc431e6b..66dd7cd843b04f 100644 --- a/Include/internal/pycore_pythonrun.h +++ b/Include/internal/pycore_pythonrun.h @@ -46,7 +46,8 @@ extern PyObject * _Py_CompileStringObjectWithModule( * stack consumption of PyEval_EvalDefault */ #if (defined(Py_DEBUG) \ || defined(_Py_ADDRESS_SANITIZER) \ - || defined(_Py_THREAD_SANITIZER)) + || defined(_Py_THREAD_SANITIZER)) \ + || defined(_Py_UNDEFINED_BEHAVIOR_SANITIZER) # define _PyOS_LOG2_STACK_MARGIN 12 #else # define _PyOS_LOG2_STACK_MARGIN 11 diff --git a/Include/pyport.h b/Include/pyport.h index f7bb5d513b9ae6..ee90711c202482 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -598,6 +598,11 @@ extern "C" { # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread)) # endif # endif +# if __has_feature(undefined_behavior_sanitizer) +# if !defined(_Py_UNDEFINED_BEHAVIOR_SANITIZER) +# define _Py_UNDEFINED_BEHAVIOR_SANITIZER +# endif +# endif #elif defined(__GNUC__) # if defined(__SANITIZE_ADDRESS__) # define _Py_ADDRESS_SANITIZER diff --git a/Python/ceval.c b/Python/ceval.c index bf550f2da3662e..d4bc45f9c9130e 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -49,20 +49,6 @@ _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count) #endif } -void -_Py_EnterRecursiveCallUnchecked(PyThreadState *tstate) -{ - uintptr_t here_addr = _Py_get_machine_stack_pointer(); - _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; -#if _Py_STACK_GROWS_DOWN - if (here_addr < _tstate->c_stack_hard_limit) { -#else - if (here_addr > _tstate->c_stack_hard_limit) { -#endif - Py_FatalError("Unchecked stack overflow."); - } -} - #if defined(__s390x__) # define Py_C_STACK_SIZE 320000 #elif defined(_WIN32) @@ -278,7 +264,7 @@ PyUnstable_ThreadState_ResetStackProtection(PyThreadState *tstate) /* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall() - if the stack pointer is between the stack base and c_stack_hard_limit. */ + if the stack pointer is beyond c_stack_soft_limit. */ int _Py_CheckRecursiveCall(PyThreadState *tstate, const char *where) { @@ -287,16 +273,21 @@ _Py_CheckRecursiveCall(PyThreadState *tstate, const char *where) assert(_tstate->c_stack_soft_limit != 0); assert(_tstate->c_stack_hard_limit != 0); #if _Py_STACK_GROWS_DOWN - assert(here_addr >= _tstate->c_stack_hard_limit - _PyOS_STACK_MARGIN_BYTES); if (here_addr < _tstate->c_stack_hard_limit) { - /* Overflowing while handling an overflow. Give up. */ + if (here_addr < _tstate->c_stack_hard_limit - _PyOS_STACK_MARGIN_BYTES) { + // Far out of bounds -- Assume stack switching has occurred + return 0; + } int kbytes_used = (int)(_tstate->c_stack_top - here_addr)/1024; #else - assert(here_addr <= _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES); if (here_addr > _tstate->c_stack_hard_limit) { - /* Overflowing while handling an overflow. Give up. */ + if (here_addr > _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES) { + // Far out of bounds -- Assume stack switching has occurred + return 0; + } int kbytes_used = (int)(here_addr - _tstate->c_stack_top)/1024; #endif + /* Too much stack used to safely raise an exception. Give up. */ char buffer[80]; snprintf(buffer, 80, "Unrecoverable stack overflow (used %d kB)%s", kbytes_used, where); Py_FatalError(buffer); @@ -1201,19 +1192,6 @@ _PyEval_GetIter(_PyStackRef iterable, _PyStackRef *index_or_null, int yield_from return PyStackRef_FromPyObjectSteal(iter_o); } -Py_NO_INLINE int -_Py_ReachedRecursionLimit(PyThreadState *tstate) { - uintptr_t here_addr = _Py_get_machine_stack_pointer(); - _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; - assert(_tstate->c_stack_hard_limit != 0); -#if _Py_STACK_GROWS_DOWN - return here_addr <= _tstate->c_stack_soft_limit; -#else - return here_addr >= _tstate->c_stack_soft_limit; -#endif -} - - #if (defined(__GNUC__) && __GNUC__ >= 10 && !defined(__clang__)) && defined(__x86_64__) /* * gh-129987: The SLP autovectorizer can cause poor code generation for From c924dd6326418ff8d470bcfc97aac1486a8643d4 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 31 Mar 2026 18:00:55 +0100 Subject: [PATCH 2/4] Punctuation --- Python/jit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/jit.c b/Python/jit.c index 4990c743224d3c..d3a40ee6ff64d5 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -734,7 +734,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz return 0; } -/* One-off compilation of the jit entry shim +/* One-off compilation of the jit entry shim. * We compile this once only as it effectively a normal * function, but we need to use the JIT because it needs * to understand the jit-specific calling convention. From c90e42300fe231c165bece7714abcaee1d09fa93 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 1 Apr 2026 09:42:12 +0100 Subject: [PATCH 3/4] Add missing cast --- Include/internal/pycore_pystate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index bf580be3e35c35..054360d69e6fae 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -313,7 +313,7 @@ _Py_get_machine_stack_pointer(void) { #if !defined(_MSC_VER) && defined(_M_ARM64) result = __getReg(31); #elif defined(_MSC_VER) && defined(_M_X64) - result = _AddressOfReturnAddress(); + result = (uintptr_t)_AddressOfReturnAddress(); #elif defined(__aarch64__) __asm__ ("mov %0, sp" : "=r" (result)); #elif defined(__x86_64__) From 5f056eb8acea516951d328e145baaddd1e495b6e Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 1 Apr 2026 10:10:04 +0100 Subject: [PATCH 4/4] Increase depth for pyexpat recursion test --- Lib/test/test_pyexpat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index cace780f79f515..0361d9f3da9069 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -707,7 +707,7 @@ def test_trigger_leak(self): def test_deeply_nested_content_model(self): # This should raise a RecursionError and not crash. # See https://github.com/python/cpython/issues/145986. - N = 500_000 + N = 800_000 data = ( b'