https://github.com/python/cpython/commit/1384f025f5e8ad943c1ac699fd60877b046c0183
commit: 1384f025f5e8ad943c1ac699fd60877b046c0183
branch: main
author: Ken Jin <[email protected]>
committer: Fidget-Spinner <[email protected]>
date: 2026-03-28T03:38:54+08:00
summary:

gh-126910: Verify that JIT stencils preserve frame pointer (GH-146524)

files:
M Include/internal/pycore_ceval.h
M Include/internal/pycore_pystate.h
M Python/ceval.c
M Python/pystate.c
M Tools/jit/_optimizers.py
M Tools/jit/_targets.py

diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h
index 16913289a02f59..2c83101b6b26fe 100644
--- a/Include/internal/pycore_ceval.h
+++ b/Include/internal/pycore_ceval.h
@@ -249,16 +249,7 @@ static inline void 
_Py_LeaveRecursiveCallTstate(PyThreadState *tstate) {
 
 PyAPI_FUNC(void) _Py_InitializeRecursionLimits(PyThreadState *tstate);
 
-static inline int _Py_ReachedRecursionLimit(PyThreadState *tstate)  {
-    uintptr_t here_addr = _Py_get_machine_stack_pointer();
-    _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
-    assert(_tstate->c_stack_hard_limit != 0);
-#if _Py_STACK_GROWS_DOWN
-    return here_addr <= _tstate->c_stack_soft_limit;
-#else
-    return here_addr >= _tstate->c_stack_soft_limit;
-#endif
-}
+PyAPI_FUNC(int) _Py_ReachedRecursionLimit(PyThreadState *tstate);
 
 // Export for test_peg_generator
 PyAPI_FUNC(int) _Py_ReachedRecursionLimitWithMargin(
diff --git a/Include/internal/pycore_pystate.h 
b/Include/internal/pycore_pystate.h
index a66543cf1eb164..189a8dde9f09ed 100644
--- a/Include/internal/pycore_pystate.h
+++ b/Include/internal/pycore_pystate.h
@@ -312,7 +312,18 @@ static uintptr_t return_pointer_as_int(char* p) {
 }
 #endif
 
-PyAPI_DATA(uintptr_t) _Py_get_machine_stack_pointer(void);
+static inline uintptr_t
+_Py_get_machine_stack_pointer(void) {
+#if _Py__has_builtin(__builtin_frame_address) || defined(__GNUC__)
+    return (uintptr_t)__builtin_frame_address(0);
+#elif defined(_MSC_VER)
+    return (uintptr_t)_AddressOfReturnAddress();
+#else
+    char here;
+    /* Avoid compiler warning about returning stack address */
+    return return_pointer_as_int(&here);
+#endif
+}
 
 static inline intptr_t
 _Py_RecursionLimit_GetMargin(PyThreadState *tstate)
diff --git a/Python/ceval.c b/Python/ceval.c
index b4c57b65d13d18..f95900ae01a6af 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -1201,6 +1201,19 @@ _PyEval_GetIter(_PyStackRef iterable, _PyStackRef 
*index_or_null, int yield_from
     return PyStackRef_FromPyObjectSteal(iter_o);
 }
 
+Py_NO_INLINE int
+_Py_ReachedRecursionLimit(PyThreadState *tstate)  {
+    uintptr_t here_addr = _Py_get_machine_stack_pointer();
+    _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+    assert(_tstate->c_stack_hard_limit != 0);
+#if _Py_STACK_GROWS_DOWN
+    return here_addr <= _tstate->c_stack_soft_limit;
+#else
+    return here_addr >= _tstate->c_stack_soft_limit;
+#endif
+}
+
+
 #if (defined(__GNUC__) && __GNUC__ >= 10 && !defined(__clang__)) && 
defined(__x86_64__)
 /*
  * gh-129987: The SLP autovectorizer can cause poor code generation for
diff --git a/Python/pystate.c b/Python/pystate.c
index f974c82c391f6a..143175da0f45c7 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -3286,16 +3286,3 @@ _Py_GetMainConfig(void)
     }
     return _PyInterpreterState_GetConfig(interp);
 }
-
-uintptr_t
-_Py_get_machine_stack_pointer(void) {
-#if _Py__has_builtin(__builtin_frame_address) || defined(__GNUC__)
-    return (uintptr_t)__builtin_frame_address(0);
-#elif defined(_MSC_VER)
-    return (uintptr_t)_AddressOfReturnAddress();
-#else
-    char here;
-    /* Avoid compiler warning about returning stack address */
-    return return_pointer_as_int(&here);
-#endif
-}
diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py
index 83c878d8fe205b..ef28e0c0ddeac8 100644
--- a/Tools/jit/_optimizers.py
+++ b/Tools/jit/_optimizers.py
@@ -162,6 +162,7 @@ class Optimizer:
     label_prefix: str
     symbol_prefix: str
     re_global: re.Pattern[str]
+    frame_pointers: bool
     # The first block in the linked list:
     _root: _Block = dataclasses.field(init=False, default_factory=_Block)
     _labels: dict[str, _Block] = dataclasses.field(init=False, 
default_factory=dict)
@@ -193,6 +194,7 @@ class Optimizer:
     _re_small_const_1 = _RE_NEVER_MATCH
     _re_small_const_2 = _RE_NEVER_MATCH
     const_reloc = "<Not supported>"
+    _frame_pointer_modify: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH
 
     def __post_init__(self) -> None:
         # Split the code into a linked list of basic blocks. A basic block is 
an
@@ -553,6 +555,16 @@ def _small_const_2(self, inst: Instruction) -> tuple[str, 
Instruction | None]:
     def _small_consts_match(self, inst1: Instruction, inst2: Instruction) -> 
bool:
         raise NotImplementedError()
 
+    def _validate(self) -> None:
+        for block in self._blocks():
+            if not block.instructions:
+                continue
+            for inst in block.instructions:
+                if self.frame_pointers:
+                    assert (
+                        self._frame_pointer_modify.match(inst.text) is None
+                    ), "Frame pointer should not be modified"
+
     def run(self) -> None:
         """Run this optimizer."""
         self._insert_continue_label()
@@ -565,6 +577,7 @@ def run(self) -> None:
             self._remove_unreachable()
         self._fixup_external_labels()
         self._fixup_constants()
+        self._validate()
         self.path.write_text(self._body())
 
 
@@ -595,6 +608,7 @@ class OptimizerAArch64(Optimizer):  # pylint: disable = 
too-few-public-methods
         
r"\s*(?P<instruction>ldr)\s+.*(?P<value>_JIT_OP(ARG|ERAND(0|1))_(16|32)).*"
     )
     const_reloc = "CUSTOM_AARCH64_CONST"
+    _frame_pointer_modify = re.compile(r"\s*stp\s+x29.*")
 
     def _get_reg(self, inst: Instruction) -> str:
         _, rest = inst.text.split(inst.name)
@@ -649,4 +663,5 @@ class OptimizerX86(Optimizer):  # pylint: disable = 
too-few-public-methods
     # https://www.felixcloutier.com/x86/jmp
     _re_jump = re.compile(r"\s*jmp\s+(?P<target>[\w.]+)")
     # https://www.felixcloutier.com/x86/ret
-    _re_return = re.compile(r"\s*ret\b")
+    _re_return = re.compile(r"\s*retq?\b")
+    _frame_pointer_modify = re.compile(r"\s*movq?\s+%(\w+),\s+%rbp.*")
diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py
index ad2d5b3c780d54..787fcf53260f3d 100644
--- a/Tools/jit/_targets.py
+++ b/Tools/jit/_targets.py
@@ -176,8 +176,9 @@ async def _compile(
             f"{s}",
             f"{c}",
         ]
+        is_shim = opname == "shim"
         if self.frame_pointers:
-            frame_pointer = "all" if opname == "shim" else "reserved"
+            frame_pointer = "all" if is_shim else "reserved"
             args_s += ["-Xclang", f"-mframe-pointer={frame_pointer}"]
         args_s += self.args
         # Allow user-provided CFLAGS to override any defaults
@@ -185,12 +186,14 @@ async def _compile(
         await _llvm.run(
             "clang", args_s, echo=self.verbose, llvm_version=self.llvm_version
         )
-        self.optimizer(
-            s,
-            label_prefix=self.label_prefix,
-            symbol_prefix=self.symbol_prefix,
-            re_global=self.re_global,
-        ).run()
+        if not is_shim:
+            self.optimizer(
+                s,
+                label_prefix=self.label_prefix,
+                symbol_prefix=self.symbol_prefix,
+                re_global=self.re_global,
+                frame_pointers=self.frame_pointers,
+            ).run()
         args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"]
         await _llvm.run(
             "clang", args_o, echo=self.verbose, llvm_version=self.llvm_version

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to