Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,476 changes: 1,243 additions & 1,233 deletions Include/internal/pycore_uop_ids.h

Large diffs are not rendered by default.

53 changes: 53 additions & 0 deletions Include/internal/pycore_uop_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

123 changes: 123 additions & 0 deletions Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3348,6 +3348,129 @@ def testfunc(args):
uops = get_opnames(ex)
self.assertNotIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)

def test_float_truediv_inplace_unique_lhs(self):
# (a + b) / (c + d): LHS is unique float from add, RHS is unique
# float from add. The division reuses the LHS in place.
def testfunc(args):
a, b, c, d, n = args
total = 0.0
for _ in range(n):
total += (a + b) / (c + d)
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 1.0, 3.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * 1.25)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_TRUEDIV_FLOAT_INPLACE", uops)

def test_float_truediv_inplace_unique_rhs(self):
# x = c + d stores to a local (not unique when reloaded).
# (a + b) is unique. The division should use inplace on the RHS.
def testfunc(args):
a, b, c, d, n = args
total = 0.0
for _ in range(n):
x = c + d
total += x / (a + b)
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, 5.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * (9.0 / 5.0))
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT", uops)

def test_float_truediv_type_propagation(self):
# Test the _BINARY_OP_TRUEDIV_FLOAT propagates type information
def testfunc(args):
a, b, n = args
total = 0.0
for _ in range(n):
x = (a + b) # type of x will specialize to float
total += x / x - x / x
return total

res, ex = self._run_with_optimizer(testfunc,
(2.0, 3.0, TIER2_THRESHOLD))
expected = TIER2_THRESHOLD * ((2.0 + 3.0) / (2.0 + 3.0) - (2.0 + 3.0) / (2.0 + 3.0))
self.assertAlmostEqual(res, expected)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_TRUEDIV_FLOAT", uops)
self.assertIn("_BINARY_OP_SUBTRACT_FLOAT_INPLACE", uops)

def test_float_truediv_unique_result_enables_inplace(self):
# (a+b) / (c+d) / (e+f): chained divisions where each result
# is unique, enabling inplace for subsequent divisions.
def testfunc(args):
a, b, c, d, e, f, n = args
total = 0.0
for _ in range(n):
total += (a + b) / (c + d) / (e + f)
return total

res, ex = self._run_with_optimizer(testfunc,
(2.0, 3.0, 1.0, 1.0, 1.0, 1.0, TIER2_THRESHOLD))
expected = TIER2_THRESHOLD * ((2.0 + 3.0) / (1.0 + 1.0) / (1.0 + 1.0))
self.assertAlmostEqual(res, expected)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_TRUEDIV_FLOAT_INPLACE", uops)

def test_float_add_chain_both_unique(self):
# (a+b) + (c+d): both sub-additions produce unique floats.
# The outer + should use inplace on one of them.
def testfunc(args):
a, b, c, d, n = args
total = 0.0
for _ in range(n):
total += (a + b) + (c + d)
return total

res, ex = self._run_with_optimizer(testfunc, (1.0, 2.0, 3.0, 4.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * 10.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# The outer + should use inplace (at least one operand is unique)
inplace = (
"_BINARY_OP_ADD_FLOAT_INPLACE" in uops
or "_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT" in uops
)
self.assertTrue(inplace, "Expected inplace add for unique sub-results")

def test_float_truediv_non_float_type_no_crash(self):
# Fraction / Fraction goes through _BINARY_OP with NB_TRUE_DIVIDE
# but returns Fraction, not float. The optimizer must not assume
# the result is float for non-int/float operands. See gh-146306.
from fractions import Fraction
def testfunc(args):
a, b, n = args
total = Fraction(0)
for _ in range(n):
total += a / b
return float(total)

res, ex = self._run_with_optimizer(testfunc, (Fraction(10), Fraction(3), TIER2_THRESHOLD))
expected = float(TIER2_THRESHOLD * Fraction(10, 3))
self.assertAlmostEqual(res, expected)

def test_float_truediv_mixed_float_fraction_no_crash(self):
# float / Fraction: lhs is known float from a prior guard,
# but rhs is Fraction. The guard insertion for rhs should
# deopt cleanly at runtime, not crash.
from fractions import Fraction
def testfunc(args):
a, b, c, n = args
total = 0.0
for _ in range(n):
total += (a + b) / c # (a+b) is float, c is Fraction
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, Fraction(4), TIER2_THRESHOLD))
expected = TIER2_THRESHOLD * (5.0 / Fraction(4))
self.assertAlmostEqual(res, float(expected))

def test_load_attr_instance_value(self):
def testfunc(n):
class C():
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Specialize float true division in the tier 2 optimizer with inplace
mutation for uniquely-referenced operands.
47 changes: 47 additions & 0 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,53 @@ dummy_func(
INPUTS_DEAD();
}

// Float true division — not specialized at tier 1, emitted by the
// tier 2 optimizer when both operands are known floats.
tier2 op(_BINARY_OP_TRUEDIV_FLOAT, (left, right -- res, l, r)) {
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
assert(PyFloat_CheckExact(left_o));
assert(PyFloat_CheckExact(right_o));
STAT_INC(BINARY_OP, hit);
double divisor = ((PyFloatObject *)right_o)->ob_fval;
if (divisor == 0.0) {
PyErr_SetString(PyExc_ZeroDivisionError,
"float division by zero");
ERROR_NO_POP();
}
double dres = ((PyFloatObject *)left_o)->ob_fval / divisor;
PyObject *d = PyFloat_FromDouble(dres);
if (d == NULL) {
ERROR_NO_POP();
}
res = PyStackRef_FromPyObjectSteal(d);
l = left;
r = right;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_TRUEDIV_FLOAT_INPLACE, (left, right -- res, l, r)) {
FLOAT_INPLACE_DIVOP(left, right, left);
if (_divop_err) {
ERROR_NO_POP();
}
res = left;
l = PyStackRef_NULL;
r = right;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT, (left, right -- res, l, r)) {
FLOAT_INPLACE_DIVOP(left, right, right);
if (_divop_err) {
ERROR_NO_POP();
}
res = right;
l = left;
r = PyStackRef_NULL;
INPUTS_DEAD();
}

pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res, l, r)) {
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
Expand Down
24 changes: 24 additions & 0 deletions Python/ceval_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -562,3 +562,27 @@ gen_try_set_executing(PyGenObject *gen)
((PyFloatObject *)PyStackRef_AsPyObjectBorrow(TARGET)) \
->ob_fval = _dres; \
} while (0)

// Inplace float true division. Sets _divop_err to 1 on zero division.
// Caller must check _divop_err and call ERROR_NO_POP() if set.
#define FLOAT_INPLACE_DIVOP(left, right, TARGET) \
int _divop_err = 0; \
do { \
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); \
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); \
assert(PyFloat_CheckExact(left_o)); \
assert(PyFloat_CheckExact(right_o)); \
assert(_PyObject_IsUniquelyReferenced( \
PyStackRef_AsPyObjectBorrow(TARGET))); \
STAT_INC(BINARY_OP, hit); \
double _divisor = ((PyFloatObject *)right_o)->ob_fval; \
if (_divisor == 0.0) { \
PyErr_SetString(PyExc_ZeroDivisionError, \
"float division by zero"); \
_divop_err = 1; \
break; \
} \
double _dres = ((PyFloatObject *)left_o)->ob_fval / _divisor; \
((PyFloatObject *)PyStackRef_AsPyObjectBorrow(TARGET)) \
->ob_fval = _dres; \
} while (0)
Loading
Loading