From a14685fde7b5999a953640de6ad53e24bedfc5af Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 1 Apr 2026 21:57:19 +0200 Subject: [PATCH 1/5] gh-147988: Initialize digits in long_alloc() in debug mode When Python is built in debug mode, long_alloc() now initializes digits with a pattern to detect usage of uninitialized digits. _PyLong_CompactValue() makes sure that the digit is zero when the sign is zero. --- Include/cpython/longintrepr.h | 5 +++++ Objects/longobject.c | 21 ++++++++++++++------- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/Include/cpython/longintrepr.h b/Include/cpython/longintrepr.h index 804c1e9427e063..5d3f928b1c7097 100644 --- a/Include/cpython/longintrepr.h +++ b/Include/cpython/longintrepr.h @@ -133,6 +133,11 @@ _PyLong_CompactValue(const PyLongObject *op) assert(PyType_HasFeature(op->ob_base.ob_type, Py_TPFLAGS_LONG_SUBCLASS)); assert(PyUnstable_Long_IsCompact(op)); sign = 1 - (op->long_value.lv_tag & _PyLong_SIGN_MASK); + if (sign == 0) { + // gh-147988: Make sure that the digit is zero, + // it helps detecting usage of uninitialized digits. + assert(op->long_value.ob_digit[0] == 0); + } return sign * (Py_ssize_t)op->long_value.ob_digit[0]; } diff --git a/Objects/longobject.c b/Objects/longobject.c index d416fc1747ecac..ee50466fa0edc4 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -187,9 +187,11 @@ long_alloc(Py_ssize_t size) _PyObject_Init((PyObject*)result, &PyLong_Type); } _PyLong_SetSignAndDigitCount(result, size != 0, size); - /* The digit has to be initialized explicitly to avoid - * use-of-uninitialized-value. */ - result->long_value.ob_digit[0] = 0; +#ifdef Py_DEBUG + // gh-147988: Fill digits with an invalid pattern to catch usage + // of uninitialized digits. + memset(result->long_value.ob_digit, 0xFF, ndigits * sizeof(digit)); +#endif return result; } @@ -1094,6 +1096,7 @@ _PyLong_FromByteArray(const unsigned char* bytes, size_t n, int sign = is_signed ? -1: 1; if (idigit == 0) { sign = 0; + v->long_value.ob_digit[0] = 0; } _PyLong_SetSignAndDigitCount(v, sign, idigit); return (PyObject *)maybe_small_long(long_normalize(v)); @@ -2852,6 +2855,7 @@ long_from_non_binary_base(const char *start, const char *end, Py_ssize_t digits, *res = NULL; return 0; } + z->long_value.ob_digit[0] = 0; _PyLong_SetSignAndDigitCount(z, 0, 0); /* `convwidth` consecutive input digits are treated as a single @@ -3365,6 +3369,7 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem) *prem = NULL; return NULL; } + a->long_value.ob_digit[0] = 0; v0 = v->long_value.ob_digit; w0 = w->long_value.ob_digit; wm1 = w0[size_w-1]; @@ -4141,10 +4146,6 @@ k_mul(PyLongObject *a, PyLongObject *b) /* 1. Allocate result space. */ ret = long_alloc(asize + bsize); if (ret == NULL) goto fail; -#ifdef Py_DEBUG - /* Fill with trash, to catch reference to uninitialized digits. */ - memset(ret->long_value.ob_digit, 0xDF, _PyLong_DigitCount(ret) * sizeof(digit)); -#endif /* 2. t1 <- ah*bh, and copy into high digits of result. */ if ((t1 = k_mul(ah, bh)) == NULL) goto fail; @@ -5633,6 +5634,12 @@ long_bitwise(PyLongObject *a, Py_UNREACHABLE(); } + if ((size_z + negz) == 0) { + Py_XDECREF(new_a); + Py_XDECREF(new_b); + return get_small_int(0); + } + /* We allow an extra digit if z is negative, to make sure that the final two's complement of z doesn't overflow. */ z = long_alloc(size_z + negz); From 91ace76f43bc5bdd1d19225d158f7143b8697453 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 1 Apr 2026 22:51:31 +0200 Subject: [PATCH 2/5] PyLongWriter_Finish() detects uninitialized digits --- Include/cpython/longintrepr.h | 4 ++-- Objects/longobject.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Include/cpython/longintrepr.h b/Include/cpython/longintrepr.h index 5d3f928b1c7097..998ebe6891577e 100644 --- a/Include/cpython/longintrepr.h +++ b/Include/cpython/longintrepr.h @@ -134,8 +134,8 @@ _PyLong_CompactValue(const PyLongObject *op) assert(PyUnstable_Long_IsCompact(op)); sign = 1 - (op->long_value.lv_tag & _PyLong_SIGN_MASK); if (sign == 0) { - // gh-147988: Make sure that the digit is zero, - // it helps detecting usage of uninitialized digits. + // gh-147988: Make sure that the digit is zero. + // It helps detecting the usage of uninitialized digits. assert(op->long_value.ob_digit[0] == 0); } return sign * (Py_ssize_t)op->long_value.ob_digit[0]; diff --git a/Objects/longobject.c b/Objects/longobject.c index ee50466fa0edc4..38466d96adbe8e 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6958,6 +6958,20 @@ PyLongWriter_Finish(PyLongWriter *writer) PyLongObject *obj = (PyLongObject *)writer; assert(Py_REFCNT(obj) == 1); +#ifdef Py_DEBUG + // gh-147988: Detect uninitialized digits: + // long_alloc() fills digits with 0xFF byte pattern. + Py_ssize_t ndigits = _PyLong_DigitCount(obj); + if (ndigits == 0) { + // Check ob_digit[0] digit for the number zero + ndigits = 1; + } + for (Py_ssize_t i=0; i < ndigits; i++) { + digit d = obj->long_value.ob_digit[i]; + assert(d < PyLong_BASE); + } +#endif + // Normalize and get singleton if possible obj = maybe_small_long(long_normalize(obj)); From bb7e393511aa18f8403d88d0ebb42d4808844ac2 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 2 Apr 2026 01:21:52 +0200 Subject: [PATCH 3/5] Apply suggestion from @serhiy-storchaka Co-authored-by: Serhiy Storchaka --- Objects/longobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index 38466d96adbe8e..7a352a4fc727bc 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6966,7 +6966,7 @@ PyLongWriter_Finish(PyLongWriter *writer) // Check ob_digit[0] digit for the number zero ndigits = 1; } - for (Py_ssize_t i=0; i < ndigits; i++) { + for (Py_ssize_t i = 0; i < ndigits; i++) { digit d = obj->long_value.ob_digit[i]; assert(d < PyLong_BASE); } From 4be289eaf8c8df693e5d732c5190289382dc00e6 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 2 Apr 2026 13:07:16 +0200 Subject: [PATCH 4/5] PyLongWriter_Finish() now raises SystemError PyLongWriter_Finish() now raises SystemError instead of stopping the process with abort(). Add test on PyLongWriter_Finish() bug. --- Lib/test/test_capi/test_long.py | 10 ++++++++++ Modules/_testcapi/long.c | 20 ++++++++++++++++++++ Objects/longobject.c | 13 ++++++++++--- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index fc0454b71cb780..d1467caf6880a6 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -803,6 +803,16 @@ def to_digits(num): self.assertEqual(pylongwriter_create(negative, digits), num, (negative, digits)) + @unittest.skipUnless(support.Py_DEBUG, "need a debug build (Py_DEBUG)") + def test_longwriter_finish(self): + # Test PyLongWriter_Create(0, 3, &digits) with PyLongWriter_Finish() + # where the last digit is left uninitialized + pylongwriter_finish_bug = _testcapi.pylongwriter_finish_bug + with self.assertRaises(SystemError) as cm: + pylongwriter_finish_bug() + self.assertEqual(str(cm.exception), + 'PyLongWriter_Finish: digit 2 is uninitialized') + def test_bug_143050(self): with support.adjust_int_max_str_digits(0): # Bug coming from using _pylong.int_from_string(), that diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 6313abf5485fff..008a7d37726869 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -254,6 +254,25 @@ pylongwriter_create(PyObject *module, PyObject *args) } +static PyObject * +pylongwriter_finish_bug(PyObject *module, PyObject *Py_UNUSED(args)) +{ + void *writer_digits; + PyLongWriter *writer = PyLongWriter_Create(0, 3, &writer_digits); + if (writer == NULL) { + return NULL; + } + + assert(PyLong_GetNativeLayout()->digit_size == sizeof(digit)); + digit *digits = writer_digits; + digits[0] = 1; + digits[1] = 1; + // Oops, digits[2] is left uninitialized on purpose + // to test PyLongWriter_Finish() + return PyLongWriter_Finish(writer); +} + + static PyObject * get_pylong_layout(PyObject *module, PyObject *Py_UNUSED(args)) { @@ -271,6 +290,7 @@ static PyMethodDef test_methods[] = { {"pylong_aspid", pylong_aspid, METH_O}, {"pylong_export", pylong_export, METH_O}, {"pylongwriter_create", pylongwriter_create, METH_VARARGS}, + {"pylongwriter_finish_bug", pylongwriter_finish_bug, METH_NOARGS}, {"get_pylong_layout", get_pylong_layout, METH_NOARGS}, {"pylong_ispositive", pylong_ispositive, METH_O}, {"pylong_isnegative", pylong_isnegative, METH_O}, diff --git a/Objects/longobject.c b/Objects/longobject.c index 7a352a4fc727bc..a47cefc1f89ddc 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6959,8 +6959,9 @@ PyLongWriter_Finish(PyLongWriter *writer) assert(Py_REFCNT(obj) == 1); #ifdef Py_DEBUG - // gh-147988: Detect uninitialized digits: - // long_alloc() fills digits with 0xFF byte pattern. + // gh-147988: Detect uninitialized digits: long_alloc() fills digits with + // 0xFF byte pattern. It's posssible because PyLong_BASE is smaller than + // the maximum value of the C digit type (uint32_t or unsigned short). Py_ssize_t ndigits = _PyLong_DigitCount(obj); if (ndigits == 0) { // Check ob_digit[0] digit for the number zero @@ -6968,7 +6969,13 @@ PyLongWriter_Finish(PyLongWriter *writer) } for (Py_ssize_t i = 0; i < ndigits; i++) { digit d = obj->long_value.ob_digit[i]; - assert(d < PyLong_BASE); + if (d >= PyLong_BASE) { + Py_DECREF(obj); + PyErr_Format(PyExc_SystemError, + "PyLongWriter_Finish: digit %zd is uninitialized", + i); + return NULL; + } } #endif From d3293fde7421c0e69d6e441203850288854e0c5b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 2 Apr 2026 13:26:14 +0200 Subject: [PATCH 5/5] PyLongWriter_Finish() checks most significant bits --- Objects/longobject.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index a47cefc1f89ddc..0af7755016bd90 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6961,7 +6961,8 @@ PyLongWriter_Finish(PyLongWriter *writer) #ifdef Py_DEBUG // gh-147988: Detect uninitialized digits: long_alloc() fills digits with // 0xFF byte pattern. It's posssible because PyLong_BASE is smaller than - // the maximum value of the C digit type (uint32_t or unsigned short). + // the maximum value of the C digit type (uint32_t or unsigned short): + // most significan bits are unused by the API. Py_ssize_t ndigits = _PyLong_DigitCount(obj); if (ndigits == 0) { // Check ob_digit[0] digit for the number zero @@ -6969,7 +6970,7 @@ PyLongWriter_Finish(PyLongWriter *writer) } for (Py_ssize_t i = 0; i < ndigits; i++) { digit d = obj->long_value.ob_digit[i]; - if (d >= PyLong_BASE) { + if (d & ~(digit)PyLong_MASK) { Py_DECREF(obj); PyErr_Format(PyExc_SystemError, "PyLongWriter_Finish: digit %zd is uninitialized",