diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 2986afb142b5d1..101909d11baccb 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -393,6 +393,7 @@ extern JitOptRef _Py_uop_sym_new_type( extern JitOptRef _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val); extern JitOptRef _Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val); +extern bool _Py_uop_sym_is_safe_type(JitOptRef sym); bool _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym); _PyStackRef _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym); extern JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx); diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 2005dd9b0866bd..bda26bc7464c5a 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2263,6 +2263,44 @@ def testfunc(n): self.assertNotIn("_GUARD_TOS_UNICODE", uops) self.assertIn("_BINARY_OP_ADD_UNICODE", uops) + def test_format_simple_narrows_to_str(self): + def testfunc(n): + x = [] + for _ in range(n): + v = 42 + s = f"{v}" + t = "hello" + s + x.append(t) + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, ["hello42"] * TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_FORMAT_SIMPLE", uops) + self.assertNotIn("_GUARD_TOS_UNICODE", uops) + self.assertIn("_BINARY_OP_ADD_UNICODE", uops) + + def test_format_with_spec_narrows_to_str(self): + def testfunc(n): + x = [] + for _ in range(n): + v = 3.14 + s = f"{v:.2f}" + t = "pi=" + s + x.append(t) + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, ["pi=3.14"] * TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_FORMAT_WITH_SPEC", uops) + self.assertNotIn("_GUARD_TOS_UNICODE", uops) + self.assertIn("_BINARY_OP_ADD_UNICODE", uops) + def test_binary_op_subscr_str_int(self): def testfunc(n): x = 0 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst new file mode 100644 index 00000000000000..b587598be65b7d --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst @@ -0,0 +1,2 @@ +Allow the JIT to remove unicode guards after ``_FORMAT_SIMPLE`` and +``_FORMAT_WITH_SPEC`` when the input type is a known built-in type. diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 4672a272fc9203..3e4942c483b7ec 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -250,6 +250,7 @@ add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr, /* Shortened forms for convenience, used in optimizer_bytecodes.c */ #define sym_is_not_null _Py_uop_sym_is_not_null #define sym_is_const _Py_uop_sym_is_const +#define sym_is_safe_type _Py_uop_sym_is_safe_type #define sym_is_safe_const _Py_uop_sym_is_safe_const #define sym_get_const _Py_uop_sym_get_const #define sym_new_const_steal _Py_uop_sym_new_const_steal diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 792f83cdbd2d3a..ec449ab0dccd46 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -1551,6 +1551,22 @@ dummy_func(void) { set = sym_new_type(ctx, &PySet_Type); } + op(_FORMAT_SIMPLE, (value -- res)) { + if (sym_is_safe_type(value)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } else { + res = sym_new_not_null(ctx); + } + } + + op(_FORMAT_WITH_SPEC, (value, fmt_spec -- res)) { + if (sym_is_safe_type(value)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } else { + res = sym_new_not_null(ctx); + } + } + op(_SET_UPDATE, (set, unused[oparg-1], iterable -- set, unused[oparg-1], i)) { (void)set; i = iterable; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 7539133fb92096..86d6fa6a77872c 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -4279,15 +4279,27 @@ } case _FORMAT_SIMPLE: { + JitOptRef value; JitOptRef res; - res = sym_new_not_null(ctx); + value = stack_pointer[-1]; + if (sym_is_safe_type(value)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } else { + res = sym_new_not_null(ctx); + } stack_pointer[-1] = res; break; } case _FORMAT_WITH_SPEC: { + JitOptRef value; JitOptRef res; - res = sym_new_not_null(ctx); + value = stack_pointer[-2]; + if (sym_is_safe_type(value)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } else { + res = sym_new_not_null(ctx); + } CHECK_STACK_BOUNDS(-1); stack_pointer[-2] = res; stack_pointer += -1; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index d6f1c09490aac9..d6c014a838cc5d 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -264,6 +264,30 @@ _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym) return PyStackRef_FromPyObjectBorrow(const_val); } +static bool +is_safe_builtin_type(PyTypeObject *typ) +{ + return (typ == &PyUnicode_Type) || + (typ == &PyFloat_Type) || + (typ == &_PyNone_Type) || + (typ == &PyBool_Type) || + (typ == &PyFrozenDict_Type); +} + +/* + Indicates whether the type is a known built-in type + that is safe to narrow. + */ +bool +_Py_uop_sym_is_safe_type(JitOptRef sym) +{ + PyTypeObject *typ = _Py_uop_sym_get_type(sym); + if (typ == NULL) { + return false; + } + return (typ == &PyLong_Type) || is_safe_builtin_type(typ); +} + /* Indicates whether the constant is safe to constant evaluate (without side effects). @@ -279,11 +303,7 @@ _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym) return true; } PyTypeObject *typ = Py_TYPE(const_val); - return (typ == &PyUnicode_Type) || - (typ == &PyFloat_Type) || - (typ == &_PyNone_Type) || - (typ == &PyBool_Type) || - (typ == &PyFrozenDict_Type); + return is_safe_builtin_type(typ); } void