https://github.com/python/cpython/commit/3f37b94c7377a971a063aaf13387b940cb4cac01
commit: 3f37b94c7377a971a063aaf13387b940cb4cac01
branch: main
author: Mark Shannon <[email protected]>
committer: markshannon <[email protected]>
date: 2026-02-19T11:52:57Z
summary:

GH-144651: Optimize the new uops added when recording values during tracing. 
(GH-144948)

* Handle dependencies in the optimizer, not the tracer
* Strengthen some checks to avoid relying on optimizer for correctness

files:
M Include/internal/pycore_optimizer.h
M Include/internal/pycore_optimizer_types.h
M Include/internal/pycore_uop_ids.h
M Include/internal/pycore_uop_metadata.h
M Lib/test/test_capi/test_opt.py
M Modules/_testinternalcapi/test_cases.c.h
M Objects/codeobject.c
M Objects/frameobject.c
M Objects/funcobject.c
M Python/bytecodes.c
M Python/executor_cases.c.h
M Python/generated_cases.c.h
M Python/instrumentation.c
M Python/optimizer.c
M Python/optimizer_analysis.c
M Python/optimizer_bytecodes.c
M Python/optimizer_cases.c.h
M Python/optimizer_symbols.c

diff --git a/Include/internal/pycore_optimizer.h 
b/Include/internal/pycore_optimizer.h
index 79a2d60eb788ea..d9f7f59de1798e 100644
--- a/Include/internal/pycore_optimizer.h
+++ b/Include/internal/pycore_optimizer.h
@@ -22,6 +22,10 @@ typedef struct _PyJitUopBuffer {
     _PyUOpInstruction *end;
 } _PyJitUopBuffer;
 
+typedef struct _JitOptRefBuffer {
+    JitOptRef *used;
+    JitOptRef *end;
+} _JitOptRefBuffer;
 
 typedef struct _JitOptContext {
     char done;
@@ -37,10 +41,15 @@ typedef struct _JitOptContext {
     // Arena for the symbolic types.
     ty_arena t_arena;
 
-    JitOptRef *n_consumed;
-    JitOptRef *limit;
-    JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
+    /* To do -- We could make this more space efficient
+     * by using a single array and growing the stack and
+     * locals toward each other. */
+    _JitOptRefBuffer locals;
+    _JitOptRefBuffer stack;
+    JitOptRef locals_array[ABSTRACT_INTERP_LOCALS_SIZE];
+    JitOptRef stack_array[ABSTRACT_INTERP_STACK_SIZE];
     _PyJitUopBuffer out_buffer;
+    _PyBloomFilter *dependencies;
 } JitOptContext;
 
 
@@ -83,13 +92,11 @@ typedef struct _PyJitTracerInitialState {
 } _PyJitTracerInitialState;
 
 typedef struct _PyJitTracerPreviousState {
-    bool dependencies_still_valid;
     int instr_oparg;
     int instr_stacklevel;
     _Py_CODEUNIT *instr;
     PyCodeObject *instr_code; // Strong
     struct _PyInterpreterFrame *instr_frame;
-    _PyBloomFilter dependencies;
     PyObject *recorded_value; // Strong, may be NULL
 } _PyJitTracerPreviousState;
 
@@ -303,25 +310,24 @@ extern void _Py_uop_sym_set_recorded_type(JitOptContext 
*ctx, JitOptRef sym, PyT
 extern void _Py_uop_sym_set_recorded_gen_func(JitOptContext *ctx, JitOptRef 
ref, PyFunctionObject *value);
 extern PyCodeObject *_Py_uop_sym_get_probable_func_code(JitOptRef sym);
 extern PyObject *_Py_uop_sym_get_probable_value(JitOptRef sym);
+extern JitOptRef *_Py_uop_sym_set_stack_depth(JitOptContext *ctx, int 
stack_depth, JitOptRef *current_sp);
 
-extern void _Py_uop_abstractcontext_init(JitOptContext *ctx);
+extern void _Py_uop_abstractcontext_init(JitOptContext *ctx, _PyBloomFilter 
*dependencies);
 extern void _Py_uop_abstractcontext_fini(JitOptContext *ctx);
 
 extern _Py_UOpsAbstractFrame *_Py_uop_frame_new(
     JitOptContext *ctx,
     PyCodeObject *co,
-    int curr_stackentries,
     JitOptRef *args,
     int arg_len);
 
 extern _Py_UOpsAbstractFrame *_Py_uop_frame_new_from_symbol(
     JitOptContext *ctx,
     JitOptRef callable,
-    int curr_stackentries,
     JitOptRef *args,
     int arg_len);
 
-extern int _Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, int 
curr_stackentries);
+extern int _Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co);
 
 PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);
 
@@ -357,8 +363,6 @@ PyAPI_FUNC(void) _PyJit_FinalizeTracing(PyThreadState 
*tstate, int err);
 void _PyPrintExecutor(_PyExecutorObject *executor, const _PyUOpInstruction 
*marker);
 void _PyJit_TracerFree(_PyThreadStateImpl *_tstate);
 
-void _PyJit_Tracer_InvalidateDependency(PyThreadState *old_tstate, void *obj);
-
 #ifdef _Py_TIER2
 typedef void (*_Py_RecordFuncPtr)(_PyInterpreterFrame *frame, _PyStackRef 
*stackpointer, int oparg, PyObject **recorded_value);
 PyAPI_DATA(const _Py_RecordFuncPtr) _PyOpcode_RecordFunctions[];
diff --git a/Include/internal/pycore_optimizer_types.h 
b/Include/internal/pycore_optimizer_types.h
index 57c0c828c2aabd..2958db5b787975 100644
--- a/Include/internal/pycore_optimizer_types.h
+++ b/Include/internal/pycore_optimizer_types.h
@@ -11,8 +11,9 @@ extern "C" {
 #include <stdbool.h>
 #include "pycore_uop.h"  // UOP_MAX_TRACE_LENGTH
 
-// Holds locals, stack, locals, stack ... (in that order)
-#define MAX_ABSTRACT_INTERP_SIZE 512
+#define ABSTRACT_INTERP_STACK_SIZE 256
+#define ABSTRACT_INTERP_LOCALS_SIZE 512
+
 
 #define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)
 
@@ -138,6 +139,7 @@ typedef struct _Py_UOpsAbstractFrame {
     // Max stacklen
     int stack_len;
     int locals_len;
+    bool caller; // We have made a call from this frame during the trace
     PyFunctionObject *func;
     PyCodeObject *code;
 
diff --git a/Include/internal/pycore_uop_ids.h 
b/Include/internal/pycore_uop_ids.h
index 94b05b736ed277..ebf21b12633c78 100644
--- a/Include/internal/pycore_uop_ids.h
+++ b/Include/internal/pycore_uop_ids.h
@@ -147,7 +147,7 @@ extern "C" {
 #define _GUARD_CALLABLE_STR_1 402
 #define _GUARD_CALLABLE_TUPLE_1 403
 #define _GUARD_CALLABLE_TYPE_1 404
-#define _GUARD_CODE 405
+#define _GUARD_CODE_VERSION 405
 #define _GUARD_DORV_NO_DICT 406
 #define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 407
 #define _GUARD_GLOBALS_VERSION 408
@@ -658,10 +658,10 @@ extern "C" {
 #define _GUARD_CALLABLE_TYPE_1_r13 855
 #define _GUARD_CALLABLE_TYPE_1_r23 856
 #define _GUARD_CALLABLE_TYPE_1_r33 857
-#define _GUARD_CODE_r00 858
-#define _GUARD_CODE_r11 859
-#define _GUARD_CODE_r22 860
-#define _GUARD_CODE_r33 861
+#define _GUARD_CODE_VERSION_r00 858
+#define _GUARD_CODE_VERSION_r11 859
+#define _GUARD_CODE_VERSION_r22 860
+#define _GUARD_CODE_VERSION_r33 861
 #define _GUARD_DORV_NO_DICT_r01 862
 #define _GUARD_DORV_NO_DICT_r11 863
 #define _GUARD_DORV_NO_DICT_r22 864
diff --git a/Include/internal/pycore_uop_metadata.h 
b/Include/internal/pycore_uop_metadata.h
index 5a47eae7a9abb1..7921d229f11db3 100644
--- a/Include/internal/pycore_uop_metadata.h
+++ b/Include/internal/pycore_uop_metadata.h
@@ -370,7 +370,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_TIER2_RESUME_CHECK] = HAS_PERIODIC_FLAG,
     [_COLD_EXIT] = HAS_SYNC_SP_FLAG,
     [_COLD_DYNAMIC_EXIT] = HAS_SYNC_SP_FLAG,
-    [_GUARD_CODE] = HAS_EXIT_FLAG,
+    [_GUARD_CODE_VERSION] = HAS_EXIT_FLAG,
     [_GUARD_IP__PUSH_FRAME] = HAS_EXIT_FLAG,
     [_GUARD_IP_YIELD_VALUE] = HAS_EXIT_FLAG,
     [_GUARD_IP_RETURN_VALUE] = HAS_EXIT_FLAG,
@@ -3404,13 +3404,13 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = {
             { -1, -1, -1 },
         },
     },
-    [_GUARD_CODE] = {
+    [_GUARD_CODE_VERSION] = {
         .best = { 0, 1, 2, 3 },
         .entries = {
-            { 0, 0, _GUARD_CODE_r00 },
-            { 1, 1, _GUARD_CODE_r11 },
-            { 2, 2, _GUARD_CODE_r22 },
-            { 3, 3, _GUARD_CODE_r33 },
+            { 0, 0, _GUARD_CODE_VERSION_r00 },
+            { 1, 1, _GUARD_CODE_VERSION_r11 },
+            { 2, 2, _GUARD_CODE_VERSION_r22 },
+            { 3, 3, _GUARD_CODE_VERSION_r33 },
         },
     },
     [_GUARD_IP__PUSH_FRAME] = {
@@ -4221,10 +4221,10 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = {
     [_TIER2_RESUME_CHECK_r33] = _TIER2_RESUME_CHECK,
     [_COLD_EXIT_r00] = _COLD_EXIT,
     [_COLD_DYNAMIC_EXIT_r00] = _COLD_DYNAMIC_EXIT,
-    [_GUARD_CODE_r00] = _GUARD_CODE,
-    [_GUARD_CODE_r11] = _GUARD_CODE,
-    [_GUARD_CODE_r22] = _GUARD_CODE,
-    [_GUARD_CODE_r33] = _GUARD_CODE,
+    [_GUARD_CODE_VERSION_r00] = _GUARD_CODE_VERSION,
+    [_GUARD_CODE_VERSION_r11] = _GUARD_CODE_VERSION,
+    [_GUARD_CODE_VERSION_r22] = _GUARD_CODE_VERSION,
+    [_GUARD_CODE_VERSION_r33] = _GUARD_CODE_VERSION,
     [_GUARD_IP__PUSH_FRAME_r00] = _GUARD_IP__PUSH_FRAME,
     [_GUARD_IP__PUSH_FRAME_r11] = _GUARD_IP__PUSH_FRAME,
     [_GUARD_IP__PUSH_FRAME_r22] = _GUARD_IP__PUSH_FRAME,
@@ -4655,11 +4655,11 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] 
= {
     [_GUARD_CALLABLE_TYPE_1_r13] = "_GUARD_CALLABLE_TYPE_1_r13",
     [_GUARD_CALLABLE_TYPE_1_r23] = "_GUARD_CALLABLE_TYPE_1_r23",
     [_GUARD_CALLABLE_TYPE_1_r33] = "_GUARD_CALLABLE_TYPE_1_r33",
-    [_GUARD_CODE] = "_GUARD_CODE",
-    [_GUARD_CODE_r00] = "_GUARD_CODE_r00",
-    [_GUARD_CODE_r11] = "_GUARD_CODE_r11",
-    [_GUARD_CODE_r22] = "_GUARD_CODE_r22",
-    [_GUARD_CODE_r33] = "_GUARD_CODE_r33",
+    [_GUARD_CODE_VERSION] = "_GUARD_CODE_VERSION",
+    [_GUARD_CODE_VERSION_r00] = "_GUARD_CODE_VERSION_r00",
+    [_GUARD_CODE_VERSION_r11] = "_GUARD_CODE_VERSION_r11",
+    [_GUARD_CODE_VERSION_r22] = "_GUARD_CODE_VERSION_r22",
+    [_GUARD_CODE_VERSION_r33] = "_GUARD_CODE_VERSION_r33",
     [_GUARD_DORV_NO_DICT] = "_GUARD_DORV_NO_DICT",
     [_GUARD_DORV_NO_DICT_r01] = "_GUARD_DORV_NO_DICT_r01",
     [_GUARD_DORV_NO_DICT_r11] = "_GUARD_DORV_NO_DICT_r11",
@@ -6070,7 +6070,7 @@ int _PyUop_num_popped(int opcode, int oparg)
             return 0;
         case _COLD_DYNAMIC_EXIT:
             return 0;
-        case _GUARD_CODE:
+        case _GUARD_CODE_VERSION:
             return 0;
         case _GUARD_IP__PUSH_FRAME:
             return 0;
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
index 2cad53d9c0728b..7ac71fbfab1fe0 100644
--- a/Lib/test/test_capi/test_opt.py
+++ b/Lib/test/test_capi/test_opt.py
@@ -110,6 +110,7 @@ def f{n}():
             for exe in executors[:i]:
                 self.assertTrue(exe.is_valid())
 
+    @unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop 
optimizer to run.")
     def test_uop_optimizer_invalidation(self):
         # Generate a new function at each call
         ns = {}
diff --git a/Modules/_testinternalcapi/test_cases.c.h 
b/Modules/_testinternalcapi/test_cases.c.h
index a9cd0574a596a1..fde6db4933f74a 100644
--- a/Modules/_testinternalcapi/test_cases.c.h
+++ b/Modules/_testinternalcapi/test_cases.c.h
@@ -5675,7 +5675,8 @@
             assert(executor->vm_data.code == code);
             assert(executor->vm_data.valid);
             assert(tstate->current_executor == NULL);
-            if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & 
_PY_EVAL_EVENTS_MASK) {
+            uintptr_t iversion = 
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(code->_co_instrumentation_version);
+            if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) != 
iversion) {
                 opcode = executor->vm_data.opcode;
                 oparg = (oparg & ~255) | executor->vm_data.oparg;
                 next_instr = this_instr;
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index ed3cc41480ab5c..776444a0cc2086 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -2433,7 +2433,6 @@ code_dealloc(PyObject *self)
         PyMem_Free(co_extra);
     }
 #ifdef _Py_TIER2
-    _PyJit_Tracer_InvalidateDependency(tstate, self);
     if (co->co_executors != NULL) {
         clear_executors(co);
     }
diff --git a/Objects/frameobject.c b/Objects/frameobject.c
index 9d774a71edb797..9a7abfc0ec26ab 100644
--- a/Objects/frameobject.c
+++ b/Objects/frameobject.c
@@ -262,7 +262,6 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, 
PyObject *value)
 
 #if _Py_TIER2
         _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), co, 1);
-        _PyJit_Tracer_InvalidateDependency(_PyThreadState_GET(), co);
 #endif
 
         _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i);
diff --git a/Objects/funcobject.c b/Objects/funcobject.c
index 8099b82f4835fb..efe27a2b70c4de 100644
--- a/Objects/funcobject.c
+++ b/Objects/funcobject.c
@@ -12,7 +12,7 @@
 #include "pycore_setobject.h"     // _PySet_NextEntry()
 #include "pycore_stats.h"
 #include "pycore_weakref.h"       // FT_CLEAR_WEAKREFS()
-#include "pycore_optimizer.h"     // _PyJit_Tracer_InvalidateDependency
+#include "pycore_optimizer.h"     // _Py_Executors_InvalidateDependency
 
 static const char *
 func_event_name(PyFunction_WatchEvent event) {
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 63a4222264985a..01eaf4a59b645a 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -3125,10 +3125,10 @@ dummy_func(
             assert(executor->vm_data.code == code);
             assert(executor->vm_data.valid);
             assert(tstate->current_executor == NULL);
-            /* If the eval breaker is set then stay in tier 1.
-             * This avoids any potentially infinite loops
-             * involving _RESUME_CHECK */
-            if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & 
_PY_EVAL_EVENTS_MASK) {
+            /* If the eval breaker is set, or instrumentation is needed, then 
stay in tier 1.
+             * This avoids any potentially infinite loops involving 
_RESUME_CHECK */
+            uintptr_t iversion = 
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(code->_co_instrumentation_version);
+            if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) != 
iversion) {
                 opcode = executor->vm_data.opcode;
                 oparg = (oparg & ~255) | executor->vm_data.oparg;
                 next_instr = this_instr;
@@ -5616,9 +5616,9 @@ dummy_func(
             HANDLE_PENDING_AND_DEOPT_IF(_Py_emscripten_signal_clock == 0);
             _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
 #endif
+            uintptr_t iversion = 
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
             uintptr_t eval_breaker = 
_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
-            HANDLE_PENDING_AND_DEOPT_IF(eval_breaker & _PY_EVAL_EVENTS_MASK);
-            assert(tstate->tracing || eval_breaker == 
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version));
+            HANDLE_PENDING_AND_DEOPT_IF(eval_breaker != iversion);
         }
 
         tier2 op(_COLD_EXIT, ( -- )) {
@@ -5668,9 +5668,9 @@ dummy_func(
             Py_UNREACHABLE();
         }
 
-        tier2 op(_GUARD_CODE, (version/2 -- )) {
+        tier2 op(_GUARD_CODE_VERSION, (version/2 -- )) {
             PyObject *code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
-            EXIT_IF(code == Py_None);
+            assert(PyCode_Check(code));
             EXIT_IF(((PyCodeObject *)code)->co_version != version);
         }
 
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 1b3de80e4443b1..8b36d1abf2e916 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -20034,13 +20034,13 @@
             }
             _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
             #endif
+            uintptr_t iversion = 
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
             uintptr_t eval_breaker = 
_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
-            if (eval_breaker & _PY_EVAL_EVENTS_MASK) {
+            if (eval_breaker != iversion) {
                 UOP_STAT_INC(uopcode, miss);
                 SET_CURRENT_CACHED_VALUES(0);
                 JUMP_TO_JUMP_TARGET();
             }
-            assert(tstate->tracing || eval_breaker == 
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version));
             SET_CURRENT_CACHED_VALUES(0);
             assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
             break;
@@ -20059,14 +20059,14 @@
             }
             _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
             #endif
+            uintptr_t iversion = 
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
             uintptr_t eval_breaker = 
_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
-            if (eval_breaker & _PY_EVAL_EVENTS_MASK) {
+            if (eval_breaker != iversion) {
                 UOP_STAT_INC(uopcode, miss);
                 _tos_cache0 = _stack_item_0;
                 SET_CURRENT_CACHED_VALUES(1);
                 JUMP_TO_JUMP_TARGET();
             }
-            assert(tstate->tracing || eval_breaker == 
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version));
             _tos_cache0 = _stack_item_0;
             SET_CURRENT_CACHED_VALUES(1);
             assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
@@ -20088,15 +20088,15 @@
             }
             _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
             #endif
+            uintptr_t iversion = 
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
             uintptr_t eval_breaker = 
_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
-            if (eval_breaker & _PY_EVAL_EVENTS_MASK) {
+            if (eval_breaker != iversion) {
                 UOP_STAT_INC(uopcode, miss);
                 _tos_cache1 = _stack_item_1;
                 _tos_cache0 = _stack_item_0;
                 SET_CURRENT_CACHED_VALUES(2);
                 JUMP_TO_JUMP_TARGET();
             }
-            assert(tstate->tracing || eval_breaker == 
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version));
             _tos_cache1 = _stack_item_1;
             _tos_cache0 = _stack_item_0;
             SET_CURRENT_CACHED_VALUES(2);
@@ -20121,8 +20121,9 @@
             }
             _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
             #endif
+            uintptr_t iversion = 
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
             uintptr_t eval_breaker = 
_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
-            if (eval_breaker & _PY_EVAL_EVENTS_MASK) {
+            if (eval_breaker != iversion) {
                 UOP_STAT_INC(uopcode, miss);
                 _tos_cache2 = _stack_item_2;
                 _tos_cache1 = _stack_item_1;
@@ -20130,7 +20131,6 @@
                 SET_CURRENT_CACHED_VALUES(3);
                 JUMP_TO_JUMP_TARGET();
             }
-            assert(tstate->tracing || eval_breaker == 
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version));
             _tos_cache2 = _stack_item_2;
             _tos_cache1 = _stack_item_1;
             _tos_cache0 = _stack_item_0;
@@ -20184,16 +20184,12 @@
             GOTO_TIER_ONE(target);
         }
 
-        case _GUARD_CODE_r00: {
+        case _GUARD_CODE_VERSION_r00: {
             CHECK_CURRENT_CACHED_VALUES(0);
             assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
             uint32_t version = (uint32_t)CURRENT_OPERAND0_32();
             PyObject *code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
-            if (code == Py_None) {
-                UOP_STAT_INC(uopcode, miss);
-                SET_CURRENT_CACHED_VALUES(0);
-                JUMP_TO_JUMP_TARGET();
-            }
+            assert(PyCode_Check(code));
             if (((PyCodeObject *)code)->co_version != version) {
                 UOP_STAT_INC(uopcode, miss);
                 SET_CURRENT_CACHED_VALUES(0);
@@ -20204,18 +20200,13 @@
             break;
         }
 
-        case _GUARD_CODE_r11: {
+        case _GUARD_CODE_VERSION_r11: {
             CHECK_CURRENT_CACHED_VALUES(1);
             assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
             _PyStackRef _stack_item_0 = _tos_cache0;
             uint32_t version = (uint32_t)CURRENT_OPERAND0_32();
             PyObject *code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
-            if (code == Py_None) {
-                UOP_STAT_INC(uopcode, miss);
-                _tos_cache0 = _stack_item_0;
-                SET_CURRENT_CACHED_VALUES(1);
-                JUMP_TO_JUMP_TARGET();
-            }
+            assert(PyCode_Check(code));
             if (((PyCodeObject *)code)->co_version != version) {
                 UOP_STAT_INC(uopcode, miss);
                 _tos_cache0 = _stack_item_0;
@@ -20228,20 +20219,14 @@
             break;
         }
 
-        case _GUARD_CODE_r22: {
+        case _GUARD_CODE_VERSION_r22: {
             CHECK_CURRENT_CACHED_VALUES(2);
             assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
             _PyStackRef _stack_item_0 = _tos_cache0;
             _PyStackRef _stack_item_1 = _tos_cache1;
             uint32_t version = (uint32_t)CURRENT_OPERAND0_32();
             PyObject *code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
-            if (code == Py_None) {
-                UOP_STAT_INC(uopcode, miss);
-                _tos_cache1 = _stack_item_1;
-                _tos_cache0 = _stack_item_0;
-                SET_CURRENT_CACHED_VALUES(2);
-                JUMP_TO_JUMP_TARGET();
-            }
+            assert(PyCode_Check(code));
             if (((PyCodeObject *)code)->co_version != version) {
                 UOP_STAT_INC(uopcode, miss);
                 _tos_cache1 = _stack_item_1;
@@ -20256,7 +20241,7 @@
             break;
         }
 
-        case _GUARD_CODE_r33: {
+        case _GUARD_CODE_VERSION_r33: {
             CHECK_CURRENT_CACHED_VALUES(3);
             assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
             _PyStackRef _stack_item_0 = _tos_cache0;
@@ -20264,14 +20249,7 @@
             _PyStackRef _stack_item_2 = _tos_cache2;
             uint32_t version = (uint32_t)CURRENT_OPERAND0_32();
             PyObject *code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
-            if (code == Py_None) {
-                UOP_STAT_INC(uopcode, miss);
-                _tos_cache2 = _stack_item_2;
-                _tos_cache1 = _stack_item_1;
-                _tos_cache0 = _stack_item_0;
-                SET_CURRENT_CACHED_VALUES(3);
-                JUMP_TO_JUMP_TARGET();
-            }
+            assert(PyCode_Check(code));
             if (((PyCodeObject *)code)->co_version != version) {
                 UOP_STAT_INC(uopcode, miss);
                 _tos_cache2 = _stack_item_2;
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index 829a6988954e5f..bc9ae7e0ab3be3 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -5675,7 +5675,8 @@
             assert(executor->vm_data.code == code);
             assert(executor->vm_data.valid);
             assert(tstate->current_executor == NULL);
-            if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & 
_PY_EVAL_EVENTS_MASK) {
+            uintptr_t iversion = 
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(code->_co_instrumentation_version);
+            if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) != 
iversion) {
                 opcode = executor->vm_data.opcode;
                 oparg = (oparg & ~255) | executor->vm_data.oparg;
                 next_instr = this_instr;
diff --git a/Python/instrumentation.c b/Python/instrumentation.c
index 28bbe1d82a3b88..b074d23277878b 100644
--- a/Python/instrumentation.c
+++ b/Python/instrumentation.c
@@ -1785,7 +1785,6 @@ force_instrument_lock_held(PyCodeObject *code, 
PyInterpreterState *interp)
         _PyCode_Clear_Executors(code);
     }
     _Py_Executors_InvalidateDependency(interp, code, 1);
-    _PyJit_Tracer_InvalidateDependency(PyThreadState_GET(), code);
 #endif
     int code_len = (int)Py_SIZE(code);
     /* Exit early to avoid creating instrumentation
@@ -2115,6 +2114,9 @@ int _PyMonitoring_ClearToolId(int tool_id)
     // Set the new global version so all the code objects can refresh the
     // instrumentation.
     set_global_version(_PyThreadState_GET(), version);
+#ifdef _Py_TIER2
+    _Py_Executors_InvalidateAll(interp, 1);
+#endif
     int res = instrument_all_executing_code_objects(interp);
     _PyEval_StartTheWorld(interp);
     return res;
@@ -2457,6 +2459,9 @@ monitoring_restart_events_impl(PyObject *module)
     }
     interp->last_restart_version = restart_version;
     set_global_version(tstate, new_version);
+#ifdef _Py_TIER2
+    _Py_Executors_InvalidateAll(interp, 1);
+#endif
     int res = instrument_all_executing_code_objects(interp);
     _PyEval_StartTheWorld(interp);
 
diff --git a/Python/optimizer.c b/Python/optimizer.c
index 466729b158d345..f075e28d71e0f8 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -160,11 +160,6 @@ _PyOptimizer_Optimize(
         interp->compiling = false;
         return 0;
     }
-    // One of our dependencies while tracing was invalidated. Not worth 
compiling.
-    if (!_tstate->jit_tracer_state->prev_state.dependencies_still_valid) {
-        interp->compiling = false;
-        return 0;
-    }
     _PyExecutorObject *executor;
     int err = uop_optimize(frame, tstate, &executor, progress_needed);
     if (err <= 0) {
@@ -615,7 +610,6 @@ _PyJit_translate_single_bytecode_to_trace(
     _PyJitTracerState *tracer = _tstate->jit_tracer_state;
     PyCodeObject *old_code = tracer->prev_state.instr_code;
     bool progress_needed = (tracer->initial_state.chain_depth % 
MAX_CHAIN_DEPTH) == 0;
-    _PyBloomFilter *dependencies = &tracer->prev_state.dependencies;
     _PyJitUopBuffer *trace = &tracer->code_buffer;
 
     _Py_CODEUNIT *this_instr =  tracer->prev_state.instr;
@@ -701,10 +695,6 @@ _PyJit_translate_single_bytecode_to_trace(
     }
 #endif
 
-    if (!tracer->prev_state.dependencies_still_valid) {
-        goto done;
-    }
-
     // This happens when a recursive call happens that we can't trace. Such as 
Python -> C -> Python calls
     // If we haven't guarded the IP, then it's untraceable.
     if (frame != tracer->prev_state.instr_frame && !needs_guard_ip) {
@@ -784,11 +774,6 @@ _PyJit_translate_single_bytecode_to_trace(
         ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)target_instr, target);
     }
 
-    // Can be NULL for the entry frame.
-    if (old_code != NULL) {
-        _Py_BloomFilter_Add(dependencies, old_code);
-    }
-
     switch (opcode) {
         case POP_JUMP_IF_NONE:
         case POP_JUMP_IF_NOT_NONE:
@@ -925,15 +910,6 @@ _PyJit_translate_single_bytecode_to_trace(
                                 expansion->uops[i].offset);
                         Py_FatalError("garbled expansion");
                 }
-                if (uop == _PUSH_FRAME || uop == _RETURN_VALUE || uop == 
_RETURN_GENERATOR || uop == _YIELD_VALUE) {
-                    PyCodeObject *new_code = (PyCodeObject 
*)PyStackRef_AsPyObjectBorrow(frame->f_executable);
-                    if (new_code != NULL && !Py_IsNone((PyObject*)new_code)) {
-                        _Py_BloomFilter_Add(dependencies, new_code);
-                    }
-                    ADD_TO_TRACE(uop, oparg, operand, target);
-                    uop_buffer_last(trace)->operand1 = 
PyStackRef_IsNone(frame->f_executable) ? 2 : ((int)(frame->stackpointer - 
_PyFrame_Stackbase(frame)));
-                    break;
-                }
                 if (uop == _BINARY_OP_INPLACE_ADD_UNICODE) {
                     assert(i + 1 == nuops);
                     _Py_CODEUNIT *next = target_instr + 1 + 
_PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
@@ -964,7 +940,10 @@ _PyJit_translate_single_bytecode_to_trace(
         ADD_TO_TRACE(_RECORD_CODE, 0, (uintptr_t)code, 0);
         ADD_TO_TRACE(guard_ip, 0, (uintptr_t)next_instr, 0);
         if (PyCode_Check(code)) {
-            ADD_TO_TRACE(_GUARD_CODE, 0, ((PyCodeObject *)code)->co_version, 
0);
+            /* Record stack depth, in operand1 */
+            int stack_depth = (int)(frame->stackpointer - 
_PyFrame_Stackbase(frame));
+            uop_buffer_last(trace)->operand1 = stack_depth;
+            ADD_TO_TRACE(_GUARD_CODE_VERSION, 0, ((PyCodeObject 
*)code)->co_version, 0);
         }
     }
     // Loop back to the start
@@ -1046,7 +1025,6 @@ _PyJit_TryInitializeTracing(
     tracer->initial_state.exit = exit;
     tracer->initial_state.stack_depth = (int)(stack_pointer - 
_PyFrame_Stackbase(frame));
     tracer->initial_state.chain_depth = chain_depth;
-    tracer->prev_state.dependencies_still_valid = true;
     tracer->prev_state.instr_code = (PyCodeObject 
*)Py_NewRef(_PyFrame_GetCode(frame));
     tracer->prev_state.instr = curr_instr;
     tracer->prev_state.instr_frame = frame;
@@ -1064,7 +1042,6 @@ _PyJit_TryInitializeTracing(
     if (_PyOpcode_Caches[_PyOpcode_Deopt[close_loop_instr->op.code]]) {
         close_loop_instr[1].counter = trigger_backoff_counter();
     }
-    _Py_BloomFilter_Init(&tracer->prev_state.dependencies);
     tracer->is_tracing = true;
     return 1;
 }
@@ -1216,7 +1193,7 @@ prepare_for_execution(_PyUOpInstruction *buffer, int 
length)
                 base_opcode == _GUARD_IP_RETURN_VALUE ||
                 base_opcode == _GUARD_IP_YIELD_VALUE ||
                 base_opcode == _GUARD_IP_RETURN_GENERATOR ||
-                base_opcode == _GUARD_CODE
+                base_opcode == _GUARD_CODE_VERSION
             ) {
                 base_exit_op = _DYNAMIC_EXIT;
             }
@@ -1498,7 +1475,6 @@ uop_optimize(
 {
     _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
     assert(_tstate->jit_tracer_state != NULL);
-    _PyBloomFilter *dependencies = 
&_tstate->jit_tracer_state->prev_state.dependencies;
     _PyUOpInstruction *buffer = _tstate->jit_tracer_state->code_buffer.start;
     OPT_STAT_INC(attempts);
     bool is_noopt = !tstate->interp->opt_config.uops_optimize_enabled;
@@ -1510,11 +1486,15 @@ uop_optimize(
     assert(length > 0);
     assert(length < UOP_MAX_TRACE_LENGTH);
     OPT_STAT_INC(traces_created);
+
+    _PyBloomFilter dependencies;
+    _Py_BloomFilter_Init(&dependencies);
     if (!is_noopt) {
         _PyUOpInstruction *output = 
&_tstate->jit_tracer_state->uop_array[UOP_MAX_TRACE_LENGTH];
         length = _Py_uop_analyze_and_optimize(
             _tstate, buffer, length, curr_stackentries,
-            output, dependencies);
+            output, &dependencies);
+
         if (length <= 0) {
             return length;
         }
@@ -1546,7 +1526,7 @@ uop_optimize(
     length = prepare_for_execution(buffer, length);
     assert(length <= UOP_MAX_TRACE_LENGTH);
     _PyExecutorObject *executor = make_executor_from_uops(
-        _tstate, buffer, length, dependencies);
+        _tstate, buffer, length, &dependencies);
     if (executor == NULL) {
         return -1;
     }
@@ -1861,21 +1841,6 @@ _Py_Executors_InvalidateDependency(PyInterpreterState 
*interp, void *obj, int is
     _Py_Executors_InvalidateAll(interp, is_invalidation);
 }
 
-void
-_PyJit_Tracer_InvalidateDependency(PyThreadState *tstate, void *obj)
-{
-    _PyBloomFilter obj_filter;
-    _Py_BloomFilter_Init(&obj_filter);
-    _Py_BloomFilter_Add(&obj_filter, obj);
-    _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
-    if (_tstate->jit_tracer_state == NULL) {
-        return;
-    }
-    if 
(bloom_filter_may_contain(&_tstate->jit_tracer_state->prev_state.dependencies, 
&obj_filter))
-    {
-        _tstate->jit_tracer_state->prev_state.dependencies_still_valid = false;
-    }
-}
 /* Invalidate all executors */
 void
 _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation)
diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c
index c6a513ad220b63..45dd42c96064bc 100644
--- a/Python/optimizer_analysis.c
+++ b/Python/optimizer_analysis.c
@@ -55,23 +55,21 @@
 static void
 dump_abstract_stack(_Py_UOpsAbstractFrame *frame, JitOptRef *stack_pointer)
 {
-    JitOptRef *stack_base = frame->stack;
-    JitOptRef *locals_base = frame->locals;
     printf("    locals=[");
-    for (JitOptRef *ptr = locals_base; ptr < stack_base; ptr++) {
-        if (ptr != locals_base) {
+    for (int i = 0 ; i < frame->locals_len; i++) {
+        if (i > 0) {
             printf(", ");
         }
-        _PyUOpSymPrint(*ptr);
+        _PyUOpSymPrint(frame->locals[i]);
     }
     printf("]\n");
-    if (stack_pointer < stack_base) {
-        printf("    stack=%d\n", (int)(stack_pointer - stack_base));
+    if (stack_pointer < frame->stack) {
+        printf("    stack=%d\n", (int)(stack_pointer - frame->stack));
     }
     else {
         printf("    stack=[");
-        for (JitOptRef *ptr = stack_base; ptr < stack_pointer; ptr++) {
-            if (ptr != stack_base) {
+        for (JitOptRef *ptr = frame->stack; ptr < stack_pointer; ptr++) {
+            if (ptr != frame->stack) {
                 printf(", ");
             }
             _PyUOpSymPrint(*ptr);
@@ -291,6 +289,7 @@ add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr,
 #define sym_set_recorded_gen_func(SYM, VAL) 
_Py_uop_sym_set_recorded_gen_func(ctx, SYM, VAL)
 #define sym_get_probable_func_code _Py_uop_sym_get_probable_func_code
 #define sym_get_probable_value _Py_uop_sym_get_probable_value
+#define sym_set_stack_depth(DEPTH, SP) _Py_uop_sym_set_stack_depth(ctx, DEPTH, 
SP)
 
 /* Comparison oparg masks */
 #define COMPARE_LT_MASK 2
@@ -473,14 +472,15 @@ optimize_uops(
         interp->type_watchers[TYPE_WATCHER_ID] = type_watcher_callback;
     }
 
-    _Py_uop_abstractcontext_init(ctx);
-    _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, (PyCodeObject 
*)func->func_code, curr_stacklen, NULL, 0);
+    _Py_uop_abstractcontext_init(ctx, dependencies);
+    _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, (PyCodeObject 
*)func->func_code, NULL, 0);
     if (frame == NULL) {
         return 0;
     }
     frame->func = func;
     ctx->curr_frame_depth++;
     ctx->frame = frame;
+    _Py_uop_sym_set_stack_depth(ctx, curr_stacklen, frame->stack_pointer);
 
     _PyUOpInstruction *this_instr = NULL;
     JitOptRef *stack_pointer = ctx->frame->stack_pointer;
@@ -718,8 +718,7 @@ _Py_uop_analyze_and_optimize(
     OPT_STAT_INC(optimizer_attempts);
 
     length = optimize_uops(
-         tstate, buffer, length, curr_stacklen,
-         output, dependencies);
+        tstate, buffer, length, curr_stacklen, output, dependencies);
 
     if (length == 0) {
         return length;
diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c
index 2b35628ad99999..228bd51a28bb69 100644
--- a/Python/optimizer_bytecodes.c
+++ b/Python/optimizer_bytecodes.c
@@ -46,6 +46,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
 #define sym_set_recorded_gen_func(SYM, VAL) 
_Py_uop_sym_set_recorded_gen_func(ctx, SYM, VAL)
 #define sym_get_probable_func_code _Py_uop_sym_get_probable_func_code
 #define sym_get_probable_value _Py_uop_sym_get_probable_value
+#define sym_set_stack_depth(DEPTH, SP) _Py_uop_sym_set_stack_depth(ctx, DEPTH, 
SP)
 
 extern int
 optimize_to_bool(
@@ -362,7 +363,7 @@ dummy_func(void) {
     }
 
     op(_BINARY_OP_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame)) {
-        _Py_UOpsAbstractFrame *f = frame_new_from_symbol(ctx, getitem, 0, 
NULL, 0);
+        _Py_UOpsAbstractFrame *f = frame_new_from_symbol(ctx, getitem, NULL, 
0);
         if (f == NULL) {
             break;
         }
@@ -833,7 +834,7 @@ dummy_func(void) {
         // + 1 for _SAVE_RETURN_OFFSET
         // FIX ME -- This needs a version check and function watcher
         PyCodeObject *co = (PyCodeObject *)((PyFunctionObject 
*)fget)->func_code;
-        _Py_UOpsAbstractFrame *f = frame_new(ctx, co, 0, NULL, 0);
+        _Py_UOpsAbstractFrame *f = frame_new(ctx, co, NULL, 0);
         if (f == NULL) {
             break;
         }
@@ -894,9 +895,9 @@ dummy_func(void) {
         }
 
         if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) {
-            new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, 
callable, 0, args, argcount));
+            new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, 
callable, args, argcount));
         } else {
-            new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, 
callable, 0, NULL, 0));
+            new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, 
callable, NULL, 0));
         }
     }
 
@@ -907,15 +908,15 @@ dummy_func(void) {
     }
 
     op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame)) {
-        new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, callable, 
0, NULL, 0));
+        new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, callable, 
NULL, 0));
     }
 
     op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- 
new_frame)) {
-        new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, callable, 
0, NULL, 0));
+        new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, callable, 
NULL, 0));
     }
 
     op(_PY_FRAME_EX, (func_st, null, callargs_st, kwargs_st -- ex_frame)) {
-        ex_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, func_st, 0, 
NULL, 0));
+        ex_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, func_st, 
NULL, 0));
     }
 
     op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, 
args[oparg] -- callable, self_or_null, args[oparg])) {
@@ -927,18 +928,18 @@ dummy_func(void) {
 
     op(_CREATE_INIT_FRAME, (init, self, args[oparg] -- init_frame)) {
         ctx->frame->stack_pointer = stack_pointer - oparg - 2;
-        _Py_UOpsAbstractFrame *shim = frame_new(ctx, (PyCodeObject 
*)&_Py_InitCleanup, 0, NULL, 0);
+        _Py_UOpsAbstractFrame *shim = frame_new(ctx, (PyCodeObject 
*)&_Py_InitCleanup, NULL, 0);
         if (shim == NULL) {
             break;
         }
         /* Push self onto stack of shim */
-        shim->stack[0] = self;
+        shim->stack_pointer[0] = self;
         shim->stack_pointer++;
         assert((int)(shim->stack_pointer - shim->stack) == 1);
         ctx->frame = shim;
         ctx->curr_frame_depth++;
         assert((this_instr + 1)->opcode == _PUSH_FRAME);
-        init_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, init, 0, 
args-1, oparg+1));
+        init_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, init, 
args-1, oparg+1));
     }
 
     op(_RETURN_VALUE, (retval -- res)) {
@@ -954,15 +955,7 @@ dummy_func(void) {
             ctx->done = true;
             break;
         }
-        int returning_stacklevel = (int)this_instr->operand1;
-        if (ctx->curr_frame_depth >= 2) {
-            PyCodeObject *expected_code = ctx->frames[ctx->curr_frame_depth - 
2].code;
-            if (expected_code == returning_code) {
-                assert(this_instr[2].opcode == _GUARD_IP_RETURN_VALUE);
-                REPLACE_OP((this_instr + 2), _NOP, 0, 0);
-            }
-        }
-        if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+        if (frame_pop(ctx, returning_code)) {
             break;
         }
         stack_pointer = ctx->frame->stack_pointer;
@@ -976,14 +969,12 @@ dummy_func(void) {
         ctx->frame->stack_pointer = stack_pointer;
         assert(this_instr[1].opcode == _RECORD_CODE);
         PyCodeObject *returning_code = (PyCodeObject *)this_instr[1].operand0;
-        assert(PyCode_Check(returning_code));
         if (returning_code == NULL) {
             ctx->done = true;
             break;
         }
-        _Py_BloomFilter_Add(dependencies, returning_code);
-        int returning_stacklevel = (int)this_instr->operand1;
-        if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+        assert(PyCode_Check(returning_code));
+        if (frame_pop(ctx, returning_code)) {
             break;
         }
         stack_pointer = ctx->frame->stack_pointer;
@@ -998,14 +989,12 @@ dummy_func(void) {
         ctx->frame->stack_pointer = stack_pointer;
         assert(this_instr[1].opcode == _RECORD_CODE);
         PyCodeObject *returning_code = (PyCodeObject *)this_instr[1].operand0;
-        assert(PyCode_Check(returning_code));
         if (returning_code == NULL) {
             ctx->done = true;
             break;
         }
-        _Py_BloomFilter_Add(dependencies, returning_code);
-        int returning_stacklevel = (int)this_instr->operand1;
-        if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+        assert(PyCode_Check(returning_code));
+        if (frame_pop(ctx, returning_code)) {
             break;
         }
         stack_pointer = ctx->frame->stack_pointer;
@@ -1025,22 +1014,24 @@ dummy_func(void) {
     }
 
     op(_FOR_ITER_GEN_FRAME, (iter, unused -- iter, unused, gen_frame)) {
-        _Py_UOpsAbstractFrame *new_frame = frame_new_from_symbol(ctx, iter, 1, 
NULL, 0);
+        _Py_UOpsAbstractFrame *new_frame = frame_new_from_symbol(ctx, iter, 
NULL, 0);
         if (new_frame == NULL) {
             ctx->done = true;
             break;
         }
-        new_frame->stack[0] = sym_new_const(ctx, Py_None);
+        new_frame->stack_pointer[0] = sym_new_const(ctx, Py_None);
+        new_frame->stack_pointer++;
         gen_frame = PyJitRef_WrapInvalid(new_frame);
     }
 
     op(_SEND_GEN_FRAME, (receiver, v -- receiver, gen_frame)) {
-        _Py_UOpsAbstractFrame *new_frame = frame_new_from_symbol(ctx, 
receiver, 1, NULL, 0);
+        _Py_UOpsAbstractFrame *new_frame = frame_new_from_symbol(ctx, 
receiver, NULL, 0);
         if (new_frame == NULL) {
             ctx->done = true;
             break;
         }
-        new_frame->stack[0] = PyJitRef_StripReferenceInfo(v);
+        new_frame->stack_pointer[0] = PyJitRef_StripReferenceInfo(v);
+        new_frame->stack_pointer++;
         gen_frame = PyJitRef_WrapInvalid(new_frame);
     }
 
@@ -1062,14 +1053,10 @@ dummy_func(void) {
         if (!CURRENT_FRAME_IS_INIT_SHIM()) {
             ctx->frame->stack_pointer = stack_pointer;
         }
+        ctx->frame->caller = true;
         ctx->frame = (_Py_UOpsAbstractFrame *)PyJitRef_Unwrap(new_frame);
         ctx->curr_frame_depth++;
         stack_pointer = ctx->frame->stack_pointer;
-        // Fixed calls don't need IP guards.
-        if ((this_instr-1)->opcode == _CREATE_INIT_FRAME) {
-            assert((this_instr+1)->opcode == _GUARD_IP__PUSH_FRAME);
-            REPLACE_OP(this_instr+1, _NOP, 0, 0);
-        }
         assert(ctx->frame->locals != NULL);
     }
 
@@ -1653,6 +1640,47 @@ dummy_func(void) {
         sym_set_recorded_gen_func(nos, func);
     }
 
+    op(_GUARD_IP__PUSH_FRAME, (ip/4 --)) {
+        stack_pointer = sym_set_stack_depth(this_instr->operand1, 
stack_pointer);
+        // TO DO
+        // Normal function calls to known functions
+        // do not need an IP guard.
+    }
+
+    op(_GUARD_CODE_VERSION, (version/2 -- )) {
+        PyCodeObject *co = get_current_code_object(ctx);
+        if (co->co_version == version) {
+            _Py_BloomFilter_Add(dependencies, co);
+            REPLACE_OP(this_instr, _NOP, 0, 0);
+        }
+        else {
+            ctx->done = true;
+        }
+    }
+
+    op(_GUARD_IP_YIELD_VALUE, (ip/4 --)) {
+        if (ctx->frame->caller) {
+            REPLACE_OP(this_instr, _NOP, 0, 0);
+        }
+        stack_pointer = sym_set_stack_depth(this_instr->operand1, 
stack_pointer);
+    }
+
+    op(_GUARD_IP_RETURN_VALUE, (ip/4 --)) {
+        if (ctx->frame->caller) {
+            REPLACE_OP(this_instr, _NOP, 0, 0);
+        }
+        stack_pointer = sym_set_stack_depth(this_instr->operand1, 
stack_pointer);
+    }
+
+    op(_GUARD_IP_RETURN_GENERATOR, (ip/4 --)) {
+        if (ctx->frame->caller) {
+            REPLACE_OP(this_instr, _NOP, 0, 0);
+        }
+        stack_pointer = sym_set_stack_depth(this_instr->operand1, 
stack_pointer);
+    }
+
+
+
 // END BYTECODES //
 
 }
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index 7faa699a058249..a93e85329297cd 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -1162,7 +1162,7 @@
             getitem = stack_pointer[-1];
             sub = stack_pointer[-2];
             container = stack_pointer[-3];
-            _Py_UOpsAbstractFrame *f = frame_new_from_symbol(ctx, getitem, 0, 
NULL, 0);
+            _Py_UOpsAbstractFrame *f = frame_new_from_symbol(ctx, getitem, 
NULL, 0);
             if (f == NULL) {
                 break;
             }
@@ -1272,15 +1272,7 @@
                 ctx->done = true;
                 break;
             }
-            int returning_stacklevel = (int)this_instr->operand1;
-            if (ctx->curr_frame_depth >= 2) {
-                PyCodeObject *expected_code = 
ctx->frames[ctx->curr_frame_depth - 2].code;
-                if (expected_code == returning_code) {
-                    assert(this_instr[2].opcode == _GUARD_IP_RETURN_VALUE);
-                    REPLACE_OP((this_instr + 2), _NOP, 0, 0);
-                }
-            }
-            if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+            if (frame_pop(ctx, returning_code)) {
                 break;
             }
             stack_pointer = ctx->frame->stack_pointer;
@@ -1324,12 +1316,13 @@
             JitOptRef gen_frame;
             v = stack_pointer[-1];
             receiver = stack_pointer[-2];
-            _Py_UOpsAbstractFrame *new_frame = frame_new_from_symbol(ctx, 
receiver, 1, NULL, 0);
+            _Py_UOpsAbstractFrame *new_frame = frame_new_from_symbol(ctx, 
receiver, NULL, 0);
             if (new_frame == NULL) {
                 ctx->done = true;
                 break;
             }
-            new_frame->stack[0] = PyJitRef_StripReferenceInfo(v);
+            new_frame->stack_pointer[0] = PyJitRef_StripReferenceInfo(v);
+            new_frame->stack_pointer++;
             gen_frame = PyJitRef_WrapInvalid(new_frame);
             stack_pointer[-1] = gen_frame;
             break;
@@ -1346,14 +1339,12 @@
             ctx->frame->stack_pointer = stack_pointer;
             assert(this_instr[1].opcode == _RECORD_CODE);
             PyCodeObject *returning_code = (PyCodeObject 
*)this_instr[1].operand0;
-            assert(PyCode_Check(returning_code));
             if (returning_code == NULL) {
                 ctx->done = true;
                 break;
             }
-            _Py_BloomFilter_Add(dependencies, returning_code);
-            int returning_stacklevel = (int)this_instr->operand1;
-            if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+            assert(PyCode_Check(returning_code));
+            if (frame_pop(ctx, returning_code)) {
                 break;
             }
             stack_pointer = ctx->frame->stack_pointer;
@@ -2011,7 +2002,7 @@
             owner = stack_pointer[-1];
             PyObject *fget = (PyObject *)this_instr->operand0;
             PyCodeObject *co = (PyCodeObject *)((PyFunctionObject 
*)fget)->func_code;
-            _Py_UOpsAbstractFrame *f = frame_new(ctx, co, 0, NULL, 0);
+            _Py_UOpsAbstractFrame *f = frame_new(ctx, co, NULL, 0);
             if (f == NULL) {
                 break;
             }
@@ -2711,12 +2702,13 @@
             JitOptRef iter;
             JitOptRef gen_frame;
             iter = stack_pointer[-2];
-            _Py_UOpsAbstractFrame *new_frame = frame_new_from_symbol(ctx, 
iter, 1, NULL, 0);
+            _Py_UOpsAbstractFrame *new_frame = frame_new_from_symbol(ctx, 
iter, NULL, 0);
             if (new_frame == NULL) {
                 ctx->done = true;
                 break;
             }
-            new_frame->stack[0] = sym_new_const(ctx, Py_None);
+            new_frame->stack_pointer[0] = sym_new_const(ctx, Py_None);
+            new_frame->stack_pointer++;
             gen_frame = PyJitRef_WrapInvalid(new_frame);
             CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = gen_frame;
@@ -2897,7 +2889,7 @@
             JitOptRef callable;
             JitOptRef new_frame;
             callable = stack_pointer[-2 - oparg];
-            new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, 
callable, 0, NULL, 0));
+            new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, 
callable, NULL, 0));
             CHECK_STACK_BOUNDS(-1 - oparg);
             stack_pointer[-2 - oparg] = new_frame;
             stack_pointer += -1 - oparg;
@@ -3033,9 +3025,9 @@
                 argcount++;
             }
             if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) {
-                new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, 
callable, 0, args, argcount));
+                new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, 
callable, args, argcount));
             } else {
-                new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, 
callable, 0, NULL, 0));
+                new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, 
callable, NULL, 0));
             }
             CHECK_STACK_BOUNDS(-1 - oparg);
             stack_pointer[-2 - oparg] = new_frame;
@@ -3053,13 +3045,10 @@
             if (!CURRENT_FRAME_IS_INIT_SHIM()) {
                 ctx->frame->stack_pointer = stack_pointer;
             }
+            ctx->frame->caller = true;
             ctx->frame = (_Py_UOpsAbstractFrame *)PyJitRef_Unwrap(new_frame);
             ctx->curr_frame_depth++;
             stack_pointer = ctx->frame->stack_pointer;
-            if ((this_instr-1)->opcode == _CREATE_INIT_FRAME) {
-                assert((this_instr+1)->opcode == _GUARD_IP__PUSH_FRAME);
-                REPLACE_OP(this_instr+1, _NOP, 0, 0);
-            }
             assert(ctx->frame->locals != NULL);
             break;
         }
@@ -3213,17 +3202,17 @@
             self = stack_pointer[-1 - oparg];
             init = stack_pointer[-2 - oparg];
             ctx->frame->stack_pointer = stack_pointer - oparg - 2;
-            _Py_UOpsAbstractFrame *shim = frame_new(ctx, (PyCodeObject 
*)&_Py_InitCleanup, 0, NULL, 0);
+            _Py_UOpsAbstractFrame *shim = frame_new(ctx, (PyCodeObject 
*)&_Py_InitCleanup, NULL, 0);
             if (shim == NULL) {
                 break;
             }
-            shim->stack[0] = self;
+            shim->stack_pointer[0] = self;
             shim->stack_pointer++;
             assert((int)(shim->stack_pointer - shim->stack) == 1);
             ctx->frame = shim;
             ctx->curr_frame_depth++;
             assert((this_instr + 1)->opcode == _PUSH_FRAME);
-            init_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, init, 
0, args-1, oparg+1));
+            init_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, init, 
args-1, oparg+1));
             CHECK_STACK_BOUNDS(-1 - oparg);
             stack_pointer[-2 - oparg] = init_frame;
             stack_pointer += -1 - oparg;
@@ -3500,7 +3489,7 @@
             JitOptRef callable;
             JitOptRef new_frame;
             callable = stack_pointer[-3 - oparg];
-            new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, 
callable, 0, NULL, 0));
+            new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, 
callable, NULL, 0));
             CHECK_STACK_BOUNDS(-2 - oparg);
             stack_pointer[-3 - oparg] = new_frame;
             stack_pointer += -2 - oparg;
@@ -3548,7 +3537,7 @@
             JitOptRef func_st;
             JitOptRef ex_frame;
             func_st = stack_pointer[-4];
-            ex_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, 
func_st, 0, NULL, 0));
+            ex_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, 
func_st, NULL, 0));
             CHECK_STACK_BOUNDS(-3);
             stack_pointer[-4] = ex_frame;
             stack_pointer += -3;
@@ -3592,14 +3581,12 @@
             ctx->frame->stack_pointer = stack_pointer;
             assert(this_instr[1].opcode == _RECORD_CODE);
             PyCodeObject *returning_code = (PyCodeObject 
*)this_instr[1].operand0;
-            assert(PyCode_Check(returning_code));
             if (returning_code == NULL) {
                 ctx->done = true;
                 break;
             }
-            _Py_BloomFilter_Add(dependencies, returning_code);
-            int returning_stacklevel = (int)this_instr->operand1;
-            if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+            assert(PyCode_Check(returning_code));
+            if (frame_pop(ctx, returning_code)) {
                 break;
             }
             stack_pointer = ctx->frame->stack_pointer;
@@ -4157,23 +4144,49 @@
             break;
         }
 
-        case _GUARD_CODE: {
+        case _GUARD_CODE_VERSION: {
+            uint32_t version = (uint32_t)this_instr->operand0;
+            PyCodeObject *co = get_current_code_object(ctx);
+            if (co->co_version == version) {
+                _Py_BloomFilter_Add(dependencies, co);
+                REPLACE_OP(this_instr, _NOP, 0, 0);
+            }
+            else {
+                ctx->done = true;
+            }
             break;
         }
 
         case _GUARD_IP__PUSH_FRAME: {
+            PyObject *ip = (PyObject *)this_instr->operand0;
+            stack_pointer = sym_set_stack_depth(this_instr->operand1, 
stack_pointer);
             break;
         }
 
         case _GUARD_IP_YIELD_VALUE: {
+            PyObject *ip = (PyObject *)this_instr->operand0;
+            if (ctx->frame->caller) {
+                REPLACE_OP(this_instr, _NOP, 0, 0);
+            }
+            stack_pointer = sym_set_stack_depth(this_instr->operand1, 
stack_pointer);
             break;
         }
 
         case _GUARD_IP_RETURN_VALUE: {
+            PyObject *ip = (PyObject *)this_instr->operand0;
+            if (ctx->frame->caller) {
+                REPLACE_OP(this_instr, _NOP, 0, 0);
+            }
+            stack_pointer = sym_set_stack_depth(this_instr->operand1, 
stack_pointer);
             break;
         }
 
         case _GUARD_IP_RETURN_GENERATOR: {
+            PyObject *ip = (PyObject *)this_instr->operand0;
+            if (ctx->frame->caller) {
+                REPLACE_OP(this_instr, _NOP, 0, 0);
+            }
+            stack_pointer = sym_set_stack_depth(this_instr->operand1, 
stack_pointer);
             break;
         }
 
diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c
index 635ce622c3c589..dcbe093fd6d74c 100644
--- a/Python/optimizer_symbols.c
+++ b/Python/optimizer_symbols.c
@@ -1284,7 +1284,6 @@ _Py_UOpsAbstractFrame *
 _Py_uop_frame_new_from_symbol(
     JitOptContext *ctx,
     JitOptRef callable,
-    int curr_stackentries,
     JitOptRef *args,
     int arg_len)
 {
@@ -1293,7 +1292,7 @@ _Py_uop_frame_new_from_symbol(
         ctx->done = true;
         return NULL;
     }
-    _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, 
curr_stackentries, args, arg_len);
+    _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, args, arg_len);
     if (frame == NULL) {
         return NULL;
     }
@@ -1311,7 +1310,6 @@ _Py_UOpsAbstractFrame *
 _Py_uop_frame_new(
     JitOptContext *ctx,
     PyCodeObject *co,
-    int curr_stackentries,
     JitOptRef *args,
     int arg_len)
 {
@@ -1324,17 +1322,21 @@ _Py_uop_frame_new(
     }
     _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth];
     frame->code = co;
-    frame->stack_len = co->co_stacksize;
+
+    frame->locals = ctx->locals.used;
+    ctx->locals.used += co->co_nlocalsplus;
     frame->locals_len = co->co_nlocalsplus;
 
-    frame->locals = ctx->n_consumed;
-    frame->stack = frame->locals + co->co_nlocalsplus;
-    frame->stack_pointer = frame->stack + curr_stackentries;
+    frame->stack = ctx->stack.used;
+    ctx->stack.used += co->co_stacksize;
+    frame->stack_len = co->co_stacksize;
+
+    frame->stack_pointer = frame->stack;
     frame->globals_checked_version = 0;
     frame->globals_watched = false;
     frame->func = NULL;
-    ctx->n_consumed = ctx->n_consumed + (co->co_nlocalsplus + 
co->co_stacksize);
-    if (ctx->n_consumed >= ctx->limit) {
+    frame->caller = false;
+    if (ctx->locals.used > ctx->locals.end || ctx->stack.used > 
ctx->stack.end) {
         ctx->done = true;
         ctx->out_of_space = true;
         return NULL;
@@ -1354,16 +1356,45 @@ _Py_uop_frame_new(
         frame->locals[i] = local;
     }
 
-    // Initialize the stack as well
-    for (int i = 0; i < curr_stackentries; i++) {
-        JitOptRef stackvar = _Py_uop_sym_new_unknown(ctx);
-        frame->stack[i] = stackvar;
-    }
+    /* Most optimizations rely on code objects being immutable (including 
sys._getframe modifications),
+     * and up to date for instrumentation. */
+    _Py_BloomFilter_Add(ctx->dependencies, co);
 
     assert(frame->locals != NULL);
     return frame;
 }
 
+JitOptRef *
+_Py_uop_sym_set_stack_depth(JitOptContext *ctx, int stack_depth, JitOptRef 
*current_sp) {
+    _Py_UOpsAbstractFrame *frame = ctx->frame;
+    assert(frame->stack != NULL);
+    JitOptRef *new_stack_pointer = frame->stack + stack_depth;
+    if (current_sp > new_stack_pointer) {
+        ctx->done = true;
+        ctx->contradiction = true;
+        return NULL;
+    }
+    if (new_stack_pointer > ctx->stack.end) {
+        ctx->done = true;
+        ctx->out_of_space = true;
+        return NULL;
+    }
+    int delta = (int)(new_stack_pointer - current_sp);
+    assert(delta >= 0);
+    if (delta) {
+        /* Shift existing stack elements up */
+        for (JitOptRef *p = current_sp-1; p >= frame->stack; p--) {
+            p[delta] = *p;
+        }
+        /* Fill rest of stack with unknowns */
+        for (int i = 0; i < delta; i++) {
+            frame->stack[i] = _Py_uop_sym_new_unknown(ctx);
+        }
+    }
+    return frame->stack_pointer = new_stack_pointer;
+}
+
+
 void
 _Py_uop_abstractcontext_fini(JitOptContext *ctx)
 {
@@ -1380,15 +1411,24 @@ _Py_uop_abstractcontext_fini(JitOptContext *ctx)
     }
 }
 
+// Leave a bit of space to push values before checking that there is space for 
a new frame
+#define STACK_HEADROOM 2
+
 void
-_Py_uop_abstractcontext_init(JitOptContext *ctx)
+_Py_uop_abstractcontext_init(JitOptContext *ctx, _PyBloomFilter *dependencies)
 {
     static_assert(sizeof(JitOptSymbol) <= 3 * sizeof(uint64_t), "JitOptSymbol 
has grown");
-    ctx->limit = ctx->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE;
-    ctx->n_consumed = ctx->locals_and_stack;
+
+    ctx->stack.used = ctx->stack_array;
+    ctx->stack.end = 
&ctx->stack_array[ABSTRACT_INTERP_STACK_SIZE-STACK_HEADROOM];
+    ctx->locals.used = ctx->locals_array;
+    ctx->locals.end = 
&ctx->locals_array[ABSTRACT_INTERP_LOCALS_SIZE-STACK_HEADROOM];
 #ifdef Py_DEBUG // Aids debugging a little. There should never be NULL in the 
abstract interpreter.
-    for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) {
-        ctx->locals_and_stack[i] = PyJitRef_NULL;
+    for (int i = 0 ; i < ABSTRACT_INTERP_STACK_SIZE; i++) {
+        ctx->stack_array[i] = PyJitRef_NULL;
+    }
+    for (int i = 0 ; i < ABSTRACT_INTERP_LOCALS_SIZE; i++) {
+        ctx->locals_array[i] = PyJitRef_NULL;
     }
 #endif
 
@@ -1406,13 +1446,15 @@ _Py_uop_abstractcontext_init(JitOptContext *ctx)
     ctx->out_of_space = false;
     ctx->contradiction = false;
     ctx->builtins_watched = false;
+    ctx->dependencies = dependencies;
 }
 
 int
-_Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, int curr_stackentries)
+_Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co)
 {
     _Py_UOpsAbstractFrame *frame = ctx->frame;
-    ctx->n_consumed = frame->locals;
+    ctx->stack.used = frame->stack;
+    ctx->locals.used = frame->locals;
 
     ctx->curr_frame_depth--;
 
@@ -1436,9 +1478,7 @@ _Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, 
int curr_stackentries)
     // Else: trace stack underflow.
 
     // This handles swapping out frames.
-    assert(curr_stackentries >= 1);
-    // -1 to stackentries as we push to the stack our return value after this.
-    _Py_UOpsAbstractFrame *new_frame = _Py_uop_frame_new(ctx, co, 
curr_stackentries - 1, NULL, 0);
+    _Py_UOpsAbstractFrame *new_frame = _Py_uop_frame_new(ctx, co, NULL, 0);
     if (new_frame == NULL) {
         ctx->done = true;
         return 1;
@@ -1474,7 +1514,7 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject 
*Py_UNUSED(ignored))
 {
     JitOptContext context;
     JitOptContext *ctx = &context;
-    _Py_uop_abstractcontext_init(ctx);
+    _Py_uop_abstractcontext_init(ctx, NULL);
     PyObject *val_42 = NULL;
     PyObject *val_43 = NULL;
     PyObject *val_big = NULL;

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to