-
-
Notifications
You must be signed in to change notification settings - Fork 34.4k
gh-147988: Initialize digits in long_alloc() in debug mode #147989
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a14685f
91ace76
bb7e393
4be289e
d3293fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -187,9 +187,11 @@ long_alloc(Py_ssize_t size) | |
| _PyObject_Init((PyObject*)result, &PyLong_Type); | ||
| } | ||
| _PyLong_SetSignAndDigitCount(result, size != 0, size); | ||
| /* The digit has to be initialized explicitly to avoid | ||
| * use-of-uninitialized-value. */ | ||
| result->long_value.ob_digit[0] = 0; | ||
| #ifdef Py_DEBUG | ||
| // gh-147988: Fill digits with an invalid pattern to catch usage | ||
| // of uninitialized digits. | ||
| memset(result->long_value.ob_digit, 0xFF, ndigits * sizeof(digit)); | ||
| #endif | ||
| return result; | ||
| } | ||
|
|
||
|
|
@@ -1094,6 +1096,7 @@ _PyLong_FromByteArray(const unsigned char* bytes, size_t n, | |
| int sign = is_signed ? -1: 1; | ||
| if (idigit == 0) { | ||
| sign = 0; | ||
| v->long_value.ob_digit[0] = 0; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it needed in non-debug build?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As described in the issue, MSAN generates a memory error without this line, since If you ignore MSAN (and other sanitizers), This PR moves
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Technically, we could live without all this. But I think it's a good idea to set digit[0] for zero. Historically, we set number of digits to zero for zero. I think it should be 1 to reflect actual memory allocation. Then zero representation will make sense in both "big int" format (sign + magnitude as 1-element array) and as a "small int" ( |
||
| } | ||
| _PyLong_SetSignAndDigitCount(v, sign, idigit); | ||
| return (PyObject *)maybe_small_long(long_normalize(v)); | ||
|
|
@@ -2852,6 +2855,7 @@ long_from_non_binary_base(const char *start, const char *end, Py_ssize_t digits, | |
| *res = NULL; | ||
| return 0; | ||
| } | ||
| z->long_value.ob_digit[0] = 0; | ||
| _PyLong_SetSignAndDigitCount(z, 0, 0); | ||
|
|
||
| /* `convwidth` consecutive input digits are treated as a single | ||
|
|
@@ -3365,6 +3369,7 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem) | |
| *prem = NULL; | ||
| return NULL; | ||
| } | ||
| a->long_value.ob_digit[0] = 0; | ||
| v0 = v->long_value.ob_digit; | ||
| w0 = w->long_value.ob_digit; | ||
| wm1 = w0[size_w-1]; | ||
|
|
@@ -4141,10 +4146,6 @@ k_mul(PyLongObject *a, PyLongObject *b) | |
| /* 1. Allocate result space. */ | ||
| ret = long_alloc(asize + bsize); | ||
| if (ret == NULL) goto fail; | ||
| #ifdef Py_DEBUG | ||
| /* Fill with trash, to catch reference to uninitialized digits. */ | ||
| memset(ret->long_value.ob_digit, 0xDF, _PyLong_DigitCount(ret) * sizeof(digit)); | ||
| #endif | ||
|
|
||
| /* 2. t1 <- ah*bh, and copy into high digits of result. */ | ||
| if ((t1 = k_mul(ah, bh)) == NULL) goto fail; | ||
|
|
@@ -5633,6 +5634,12 @@ long_bitwise(PyLongObject *a, | |
| Py_UNREACHABLE(); | ||
| } | ||
|
|
||
| if ((size_z + negz) == 0) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is equivalent to
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
My intent is to add a special case for
The alternative is to add Do you prefer adding
See my reply on
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, it does not matter. |
||
| Py_XDECREF(new_a); | ||
| Py_XDECREF(new_b); | ||
| return get_small_int(0); | ||
| } | ||
|
|
||
| /* We allow an extra digit if z is negative, to make sure that | ||
| the final two's complement of z doesn't overflow. */ | ||
| z = long_alloc(size_z + negz); | ||
|
|
@@ -6951,6 +6958,28 @@ PyLongWriter_Finish(PyLongWriter *writer) | |
| PyLongObject *obj = (PyLongObject *)writer; | ||
| assert(Py_REFCNT(obj) == 1); | ||
|
|
||
| #ifdef Py_DEBUG | ||
| // gh-147988: Detect uninitialized digits: long_alloc() fills digits with | ||
| // 0xFF byte pattern. It's posssible because PyLong_BASE is smaller than | ||
| // the maximum value of the C digit type (uint32_t or unsigned short): | ||
| // most significan bits are unused by the API. | ||
| Py_ssize_t ndigits = _PyLong_DigitCount(obj); | ||
| if (ndigits == 0) { | ||
| // Check ob_digit[0] digit for the number zero | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should not we simply assert
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I modified the code to raise |
||
| ndigits = 1; | ||
| } | ||
| for (Py_ssize_t i = 0; i < ndigits; i++) { | ||
| digit d = obj->long_value.ob_digit[i]; | ||
| if (d & ~(digit)PyLong_MASK) { | ||
| Py_DECREF(obj); | ||
| PyErr_Format(PyExc_SystemError, | ||
| "PyLongWriter_Finish: digit %zd is uninitialized", | ||
| i); | ||
| return NULL; | ||
| } | ||
| } | ||
| #endif | ||
|
|
||
| // Normalize and get singleton if possible | ||
| obj = maybe_small_long(long_normalize(obj)); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does not it emit a warning or generate inefficient code in non-debug build?
You can write
assert(size != 0 || ...).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that sane compiler should recognize here no-op. I've tested assembly code for a toy program with
gcc -O2.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In release mode, assertions are removed and so the code becomes
if (size == 0) {}. C compilers are good to remove such dead code. I looked at the assembly code of Python built in release mode, and I cannot see any instruction related toif (size == 0).gcc -O3doesn't emit a warning on such code.