https://github.com/python/cpython/commit/3f37b94c7377a971a063aaf13387b940cb4cac01
commit: 3f37b94c7377a971a063aaf13387b940cb4cac01
branch: main
author: Mark Shannon <[email protected]>
committer: markshannon <[email protected]>
date: 2026-02-19T11:52:57Z
summary:
GH-144651: Optimize the new uops added when recording values during tracing.
(GH-144948)
* Handle dependencies in the optimizer, not the tracer
* Strengthen some checks to avoid relying on optimizer for correctness
files:
M Include/internal/pycore_optimizer.h
M Include/internal/pycore_optimizer_types.h
M Include/internal/pycore_uop_ids.h
M Include/internal/pycore_uop_metadata.h
M Lib/test/test_capi/test_opt.py
M Modules/_testinternalcapi/test_cases.c.h
M Objects/codeobject.c
M Objects/frameobject.c
M Objects/funcobject.c
M Python/bytecodes.c
M Python/executor_cases.c.h
M Python/generated_cases.c.h
M Python/instrumentation.c
M Python/optimizer.c
M Python/optimizer_analysis.c
M Python/optimizer_bytecodes.c
M Python/optimizer_cases.c.h
M Python/optimizer_symbols.c
diff --git a/Include/internal/pycore_optimizer.h
b/Include/internal/pycore_optimizer.h
index 79a2d60eb788ea..d9f7f59de1798e 100644
--- a/Include/internal/pycore_optimizer.h
+++ b/Include/internal/pycore_optimizer.h
@@ -22,6 +22,10 @@ typedef struct _PyJitUopBuffer {
_PyUOpInstruction *end;
} _PyJitUopBuffer;
+typedef struct _JitOptRefBuffer {
+ JitOptRef *used;
+ JitOptRef *end;
+} _JitOptRefBuffer;
typedef struct _JitOptContext {
char done;
@@ -37,10 +41,15 @@ typedef struct _JitOptContext {
// Arena for the symbolic types.
ty_arena t_arena;
- JitOptRef *n_consumed;
- JitOptRef *limit;
- JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
+ /* To do -- We could make this more space efficient
+ * by using a single array and growing the stack and
+ * locals toward each other. */
+ _JitOptRefBuffer locals;
+ _JitOptRefBuffer stack;
+ JitOptRef locals_array[ABSTRACT_INTERP_LOCALS_SIZE];
+ JitOptRef stack_array[ABSTRACT_INTERP_STACK_SIZE];
_PyJitUopBuffer out_buffer;
+ _PyBloomFilter *dependencies;
} JitOptContext;
@@ -83,13 +92,11 @@ typedef struct _PyJitTracerInitialState {
} _PyJitTracerInitialState;
typedef struct _PyJitTracerPreviousState {
- bool dependencies_still_valid;
int instr_oparg;
int instr_stacklevel;
_Py_CODEUNIT *instr;
PyCodeObject *instr_code; // Strong
struct _PyInterpreterFrame *instr_frame;
- _PyBloomFilter dependencies;
PyObject *recorded_value; // Strong, may be NULL
} _PyJitTracerPreviousState;
@@ -303,25 +310,24 @@ extern void _Py_uop_sym_set_recorded_type(JitOptContext
*ctx, JitOptRef sym, PyT
extern void _Py_uop_sym_set_recorded_gen_func(JitOptContext *ctx, JitOptRef
ref, PyFunctionObject *value);
extern PyCodeObject *_Py_uop_sym_get_probable_func_code(JitOptRef sym);
extern PyObject *_Py_uop_sym_get_probable_value(JitOptRef sym);
+extern JitOptRef *_Py_uop_sym_set_stack_depth(JitOptContext *ctx, int
stack_depth, JitOptRef *current_sp);
-extern void _Py_uop_abstractcontext_init(JitOptContext *ctx);
+extern void _Py_uop_abstractcontext_init(JitOptContext *ctx, _PyBloomFilter
*dependencies);
extern void _Py_uop_abstractcontext_fini(JitOptContext *ctx);
extern _Py_UOpsAbstractFrame *_Py_uop_frame_new(
JitOptContext *ctx,
PyCodeObject *co,
- int curr_stackentries,
JitOptRef *args,
int arg_len);
extern _Py_UOpsAbstractFrame *_Py_uop_frame_new_from_symbol(
JitOptContext *ctx,
JitOptRef callable,
- int curr_stackentries,
JitOptRef *args,
int arg_len);
-extern int _Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, int
curr_stackentries);
+extern int _Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co);
PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);
@@ -357,8 +363,6 @@ PyAPI_FUNC(void) _PyJit_FinalizeTracing(PyThreadState
*tstate, int err);
void _PyPrintExecutor(_PyExecutorObject *executor, const _PyUOpInstruction
*marker);
void _PyJit_TracerFree(_PyThreadStateImpl *_tstate);
-void _PyJit_Tracer_InvalidateDependency(PyThreadState *old_tstate, void *obj);
-
#ifdef _Py_TIER2
typedef void (*_Py_RecordFuncPtr)(_PyInterpreterFrame *frame, _PyStackRef
*stackpointer, int oparg, PyObject **recorded_value);
PyAPI_DATA(const _Py_RecordFuncPtr) _PyOpcode_RecordFunctions[];
diff --git a/Include/internal/pycore_optimizer_types.h
b/Include/internal/pycore_optimizer_types.h
index 57c0c828c2aabd..2958db5b787975 100644
--- a/Include/internal/pycore_optimizer_types.h
+++ b/Include/internal/pycore_optimizer_types.h
@@ -11,8 +11,9 @@ extern "C" {
#include <stdbool.h>
#include "pycore_uop.h" // UOP_MAX_TRACE_LENGTH
-// Holds locals, stack, locals, stack ... (in that order)
-#define MAX_ABSTRACT_INTERP_SIZE 512
+#define ABSTRACT_INTERP_STACK_SIZE 256
+#define ABSTRACT_INTERP_LOCALS_SIZE 512
+
#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)
@@ -138,6 +139,7 @@ typedef struct _Py_UOpsAbstractFrame {
// Max stacklen
int stack_len;
int locals_len;
+ bool caller; // We have made a call from this frame during the trace
PyFunctionObject *func;
PyCodeObject *code;
diff --git a/Include/internal/pycore_uop_ids.h
b/Include/internal/pycore_uop_ids.h
index 94b05b736ed277..ebf21b12633c78 100644
--- a/Include/internal/pycore_uop_ids.h
+++ b/Include/internal/pycore_uop_ids.h
@@ -147,7 +147,7 @@ extern "C" {
#define _GUARD_CALLABLE_STR_1 402
#define _GUARD_CALLABLE_TUPLE_1 403
#define _GUARD_CALLABLE_TYPE_1 404
-#define _GUARD_CODE 405
+#define _GUARD_CODE_VERSION 405
#define _GUARD_DORV_NO_DICT 406
#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 407
#define _GUARD_GLOBALS_VERSION 408
@@ -658,10 +658,10 @@ extern "C" {
#define _GUARD_CALLABLE_TYPE_1_r13 855
#define _GUARD_CALLABLE_TYPE_1_r23 856
#define _GUARD_CALLABLE_TYPE_1_r33 857
-#define _GUARD_CODE_r00 858
-#define _GUARD_CODE_r11 859
-#define _GUARD_CODE_r22 860
-#define _GUARD_CODE_r33 861
+#define _GUARD_CODE_VERSION_r00 858
+#define _GUARD_CODE_VERSION_r11 859
+#define _GUARD_CODE_VERSION_r22 860
+#define _GUARD_CODE_VERSION_r33 861
#define _GUARD_DORV_NO_DICT_r01 862
#define _GUARD_DORV_NO_DICT_r11 863
#define _GUARD_DORV_NO_DICT_r22 864
diff --git a/Include/internal/pycore_uop_metadata.h
b/Include/internal/pycore_uop_metadata.h
index 5a47eae7a9abb1..7921d229f11db3 100644
--- a/Include/internal/pycore_uop_metadata.h
+++ b/Include/internal/pycore_uop_metadata.h
@@ -370,7 +370,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_TIER2_RESUME_CHECK] = HAS_PERIODIC_FLAG,
[_COLD_EXIT] = HAS_SYNC_SP_FLAG,
[_COLD_DYNAMIC_EXIT] = HAS_SYNC_SP_FLAG,
- [_GUARD_CODE] = HAS_EXIT_FLAG,
+ [_GUARD_CODE_VERSION] = HAS_EXIT_FLAG,
[_GUARD_IP__PUSH_FRAME] = HAS_EXIT_FLAG,
[_GUARD_IP_YIELD_VALUE] = HAS_EXIT_FLAG,
[_GUARD_IP_RETURN_VALUE] = HAS_EXIT_FLAG,
@@ -3404,13 +3404,13 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = {
{ -1, -1, -1 },
},
},
- [_GUARD_CODE] = {
+ [_GUARD_CODE_VERSION] = {
.best = { 0, 1, 2, 3 },
.entries = {
- { 0, 0, _GUARD_CODE_r00 },
- { 1, 1, _GUARD_CODE_r11 },
- { 2, 2, _GUARD_CODE_r22 },
- { 3, 3, _GUARD_CODE_r33 },
+ { 0, 0, _GUARD_CODE_VERSION_r00 },
+ { 1, 1, _GUARD_CODE_VERSION_r11 },
+ { 2, 2, _GUARD_CODE_VERSION_r22 },
+ { 3, 3, _GUARD_CODE_VERSION_r33 },
},
},
[_GUARD_IP__PUSH_FRAME] = {
@@ -4221,10 +4221,10 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = {
[_TIER2_RESUME_CHECK_r33] = _TIER2_RESUME_CHECK,
[_COLD_EXIT_r00] = _COLD_EXIT,
[_COLD_DYNAMIC_EXIT_r00] = _COLD_DYNAMIC_EXIT,
- [_GUARD_CODE_r00] = _GUARD_CODE,
- [_GUARD_CODE_r11] = _GUARD_CODE,
- [_GUARD_CODE_r22] = _GUARD_CODE,
- [_GUARD_CODE_r33] = _GUARD_CODE,
+ [_GUARD_CODE_VERSION_r00] = _GUARD_CODE_VERSION,
+ [_GUARD_CODE_VERSION_r11] = _GUARD_CODE_VERSION,
+ [_GUARD_CODE_VERSION_r22] = _GUARD_CODE_VERSION,
+ [_GUARD_CODE_VERSION_r33] = _GUARD_CODE_VERSION,
[_GUARD_IP__PUSH_FRAME_r00] = _GUARD_IP__PUSH_FRAME,
[_GUARD_IP__PUSH_FRAME_r11] = _GUARD_IP__PUSH_FRAME,
[_GUARD_IP__PUSH_FRAME_r22] = _GUARD_IP__PUSH_FRAME,
@@ -4655,11 +4655,11 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1]
= {
[_GUARD_CALLABLE_TYPE_1_r13] = "_GUARD_CALLABLE_TYPE_1_r13",
[_GUARD_CALLABLE_TYPE_1_r23] = "_GUARD_CALLABLE_TYPE_1_r23",
[_GUARD_CALLABLE_TYPE_1_r33] = "_GUARD_CALLABLE_TYPE_1_r33",
- [_GUARD_CODE] = "_GUARD_CODE",
- [_GUARD_CODE_r00] = "_GUARD_CODE_r00",
- [_GUARD_CODE_r11] = "_GUARD_CODE_r11",
- [_GUARD_CODE_r22] = "_GUARD_CODE_r22",
- [_GUARD_CODE_r33] = "_GUARD_CODE_r33",
+ [_GUARD_CODE_VERSION] = "_GUARD_CODE_VERSION",
+ [_GUARD_CODE_VERSION_r00] = "_GUARD_CODE_VERSION_r00",
+ [_GUARD_CODE_VERSION_r11] = "_GUARD_CODE_VERSION_r11",
+ [_GUARD_CODE_VERSION_r22] = "_GUARD_CODE_VERSION_r22",
+ [_GUARD_CODE_VERSION_r33] = "_GUARD_CODE_VERSION_r33",
[_GUARD_DORV_NO_DICT] = "_GUARD_DORV_NO_DICT",
[_GUARD_DORV_NO_DICT_r01] = "_GUARD_DORV_NO_DICT_r01",
[_GUARD_DORV_NO_DICT_r11] = "_GUARD_DORV_NO_DICT_r11",
@@ -6070,7 +6070,7 @@ int _PyUop_num_popped(int opcode, int oparg)
return 0;
case _COLD_DYNAMIC_EXIT:
return 0;
- case _GUARD_CODE:
+ case _GUARD_CODE_VERSION:
return 0;
case _GUARD_IP__PUSH_FRAME:
return 0;
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
index 2cad53d9c0728b..7ac71fbfab1fe0 100644
--- a/Lib/test/test_capi/test_opt.py
+++ b/Lib/test/test_capi/test_opt.py
@@ -110,6 +110,7 @@ def f{n}():
for exe in executors[:i]:
self.assertTrue(exe.is_valid())
+ @unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop
optimizer to run.")
def test_uop_optimizer_invalidation(self):
# Generate a new function at each call
ns = {}
diff --git a/Modules/_testinternalcapi/test_cases.c.h
b/Modules/_testinternalcapi/test_cases.c.h
index a9cd0574a596a1..fde6db4933f74a 100644
--- a/Modules/_testinternalcapi/test_cases.c.h
+++ b/Modules/_testinternalcapi/test_cases.c.h
@@ -5675,7 +5675,8 @@
assert(executor->vm_data.code == code);
assert(executor->vm_data.valid);
assert(tstate->current_executor == NULL);
- if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) &
_PY_EVAL_EVENTS_MASK) {
+ uintptr_t iversion =
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(code->_co_instrumentation_version);
+ if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) !=
iversion) {
opcode = executor->vm_data.opcode;
oparg = (oparg & ~255) | executor->vm_data.oparg;
next_instr = this_instr;
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index ed3cc41480ab5c..776444a0cc2086 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -2433,7 +2433,6 @@ code_dealloc(PyObject *self)
PyMem_Free(co_extra);
}
#ifdef _Py_TIER2
- _PyJit_Tracer_InvalidateDependency(tstate, self);
if (co->co_executors != NULL) {
clear_executors(co);
}
diff --git a/Objects/frameobject.c b/Objects/frameobject.c
index 9d774a71edb797..9a7abfc0ec26ab 100644
--- a/Objects/frameobject.c
+++ b/Objects/frameobject.c
@@ -262,7 +262,6 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key,
PyObject *value)
#if _Py_TIER2
_Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), co, 1);
- _PyJit_Tracer_InvalidateDependency(_PyThreadState_GET(), co);
#endif
_PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i);
diff --git a/Objects/funcobject.c b/Objects/funcobject.c
index 8099b82f4835fb..efe27a2b70c4de 100644
--- a/Objects/funcobject.c
+++ b/Objects/funcobject.c
@@ -12,7 +12,7 @@
#include "pycore_setobject.h" // _PySet_NextEntry()
#include "pycore_stats.h"
#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS()
-#include "pycore_optimizer.h" // _PyJit_Tracer_InvalidateDependency
+#include "pycore_optimizer.h" // _Py_Executors_InvalidateDependency
static const char *
func_event_name(PyFunction_WatchEvent event) {
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 63a4222264985a..01eaf4a59b645a 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -3125,10 +3125,10 @@ dummy_func(
assert(executor->vm_data.code == code);
assert(executor->vm_data.valid);
assert(tstate->current_executor == NULL);
- /* If the eval breaker is set then stay in tier 1.
- * This avoids any potentially infinite loops
- * involving _RESUME_CHECK */
- if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) &
_PY_EVAL_EVENTS_MASK) {
+ /* If the eval breaker is set, or instrumentation is needed, then
stay in tier 1.
+ * This avoids any potentially infinite loops involving
_RESUME_CHECK */
+ uintptr_t iversion =
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(code->_co_instrumentation_version);
+ if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) !=
iversion) {
opcode = executor->vm_data.opcode;
oparg = (oparg & ~255) | executor->vm_data.oparg;
next_instr = this_instr;
@@ -5616,9 +5616,9 @@ dummy_func(
HANDLE_PENDING_AND_DEOPT_IF(_Py_emscripten_signal_clock == 0);
_Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
#endif
+ uintptr_t iversion =
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
uintptr_t eval_breaker =
_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
- HANDLE_PENDING_AND_DEOPT_IF(eval_breaker & _PY_EVAL_EVENTS_MASK);
- assert(tstate->tracing || eval_breaker ==
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version));
+ HANDLE_PENDING_AND_DEOPT_IF(eval_breaker != iversion);
}
tier2 op(_COLD_EXIT, ( -- )) {
@@ -5668,9 +5668,9 @@ dummy_func(
Py_UNREACHABLE();
}
- tier2 op(_GUARD_CODE, (version/2 -- )) {
+ tier2 op(_GUARD_CODE_VERSION, (version/2 -- )) {
PyObject *code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
- EXIT_IF(code == Py_None);
+ assert(PyCode_Check(code));
EXIT_IF(((PyCodeObject *)code)->co_version != version);
}
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 1b3de80e4443b1..8b36d1abf2e916 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -20034,13 +20034,13 @@
}
_Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
#endif
+ uintptr_t iversion =
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
uintptr_t eval_breaker =
_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
- if (eval_breaker & _PY_EVAL_EVENTS_MASK) {
+ if (eval_breaker != iversion) {
UOP_STAT_INC(uopcode, miss);
SET_CURRENT_CACHED_VALUES(0);
JUMP_TO_JUMP_TARGET();
}
- assert(tstate->tracing || eval_breaker ==
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version));
SET_CURRENT_CACHED_VALUES(0);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
@@ -20059,14 +20059,14 @@
}
_Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
#endif
+ uintptr_t iversion =
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
uintptr_t eval_breaker =
_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
- if (eval_breaker & _PY_EVAL_EVENTS_MASK) {
+ if (eval_breaker != iversion) {
UOP_STAT_INC(uopcode, miss);
_tos_cache0 = _stack_item_0;
SET_CURRENT_CACHED_VALUES(1);
JUMP_TO_JUMP_TARGET();
}
- assert(tstate->tracing || eval_breaker ==
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version));
_tos_cache0 = _stack_item_0;
SET_CURRENT_CACHED_VALUES(1);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
@@ -20088,15 +20088,15 @@
}
_Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
#endif
+ uintptr_t iversion =
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
uintptr_t eval_breaker =
_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
- if (eval_breaker & _PY_EVAL_EVENTS_MASK) {
+ if (eval_breaker != iversion) {
UOP_STAT_INC(uopcode, miss);
_tos_cache1 = _stack_item_1;
_tos_cache0 = _stack_item_0;
SET_CURRENT_CACHED_VALUES(2);
JUMP_TO_JUMP_TARGET();
}
- assert(tstate->tracing || eval_breaker ==
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version));
_tos_cache1 = _stack_item_1;
_tos_cache0 = _stack_item_0;
SET_CURRENT_CACHED_VALUES(2);
@@ -20121,8 +20121,9 @@
}
_Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
#endif
+ uintptr_t iversion =
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
uintptr_t eval_breaker =
_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
- if (eval_breaker & _PY_EVAL_EVENTS_MASK) {
+ if (eval_breaker != iversion) {
UOP_STAT_INC(uopcode, miss);
_tos_cache2 = _stack_item_2;
_tos_cache1 = _stack_item_1;
@@ -20130,7 +20131,6 @@
SET_CURRENT_CACHED_VALUES(3);
JUMP_TO_JUMP_TARGET();
}
- assert(tstate->tracing || eval_breaker ==
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version));
_tos_cache2 = _stack_item_2;
_tos_cache1 = _stack_item_1;
_tos_cache0 = _stack_item_0;
@@ -20184,16 +20184,12 @@
GOTO_TIER_ONE(target);
}
- case _GUARD_CODE_r00: {
+ case _GUARD_CODE_VERSION_r00: {
CHECK_CURRENT_CACHED_VALUES(0);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
uint32_t version = (uint32_t)CURRENT_OPERAND0_32();
PyObject *code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
- if (code == Py_None) {
- UOP_STAT_INC(uopcode, miss);
- SET_CURRENT_CACHED_VALUES(0);
- JUMP_TO_JUMP_TARGET();
- }
+ assert(PyCode_Check(code));
if (((PyCodeObject *)code)->co_version != version) {
UOP_STAT_INC(uopcode, miss);
SET_CURRENT_CACHED_VALUES(0);
@@ -20204,18 +20200,13 @@
break;
}
- case _GUARD_CODE_r11: {
+ case _GUARD_CODE_VERSION_r11: {
CHECK_CURRENT_CACHED_VALUES(1);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef _stack_item_0 = _tos_cache0;
uint32_t version = (uint32_t)CURRENT_OPERAND0_32();
PyObject *code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
- if (code == Py_None) {
- UOP_STAT_INC(uopcode, miss);
- _tos_cache0 = _stack_item_0;
- SET_CURRENT_CACHED_VALUES(1);
- JUMP_TO_JUMP_TARGET();
- }
+ assert(PyCode_Check(code));
if (((PyCodeObject *)code)->co_version != version) {
UOP_STAT_INC(uopcode, miss);
_tos_cache0 = _stack_item_0;
@@ -20228,20 +20219,14 @@
break;
}
- case _GUARD_CODE_r22: {
+ case _GUARD_CODE_VERSION_r22: {
CHECK_CURRENT_CACHED_VALUES(2);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef _stack_item_0 = _tos_cache0;
_PyStackRef _stack_item_1 = _tos_cache1;
uint32_t version = (uint32_t)CURRENT_OPERAND0_32();
PyObject *code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
- if (code == Py_None) {
- UOP_STAT_INC(uopcode, miss);
- _tos_cache1 = _stack_item_1;
- _tos_cache0 = _stack_item_0;
- SET_CURRENT_CACHED_VALUES(2);
- JUMP_TO_JUMP_TARGET();
- }
+ assert(PyCode_Check(code));
if (((PyCodeObject *)code)->co_version != version) {
UOP_STAT_INC(uopcode, miss);
_tos_cache1 = _stack_item_1;
@@ -20256,7 +20241,7 @@
break;
}
- case _GUARD_CODE_r33: {
+ case _GUARD_CODE_VERSION_r33: {
CHECK_CURRENT_CACHED_VALUES(3);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef _stack_item_0 = _tos_cache0;
@@ -20264,14 +20249,7 @@
_PyStackRef _stack_item_2 = _tos_cache2;
uint32_t version = (uint32_t)CURRENT_OPERAND0_32();
PyObject *code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
- if (code == Py_None) {
- UOP_STAT_INC(uopcode, miss);
- _tos_cache2 = _stack_item_2;
- _tos_cache1 = _stack_item_1;
- _tos_cache0 = _stack_item_0;
- SET_CURRENT_CACHED_VALUES(3);
- JUMP_TO_JUMP_TARGET();
- }
+ assert(PyCode_Check(code));
if (((PyCodeObject *)code)->co_version != version) {
UOP_STAT_INC(uopcode, miss);
_tos_cache2 = _stack_item_2;
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index 829a6988954e5f..bc9ae7e0ab3be3 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -5675,7 +5675,8 @@
assert(executor->vm_data.code == code);
assert(executor->vm_data.valid);
assert(tstate->current_executor == NULL);
- if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) &
_PY_EVAL_EVENTS_MASK) {
+ uintptr_t iversion =
FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(code->_co_instrumentation_version);
+ if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) !=
iversion) {
opcode = executor->vm_data.opcode;
oparg = (oparg & ~255) | executor->vm_data.oparg;
next_instr = this_instr;
diff --git a/Python/instrumentation.c b/Python/instrumentation.c
index 28bbe1d82a3b88..b074d23277878b 100644
--- a/Python/instrumentation.c
+++ b/Python/instrumentation.c
@@ -1785,7 +1785,6 @@ force_instrument_lock_held(PyCodeObject *code,
PyInterpreterState *interp)
_PyCode_Clear_Executors(code);
}
_Py_Executors_InvalidateDependency(interp, code, 1);
- _PyJit_Tracer_InvalidateDependency(PyThreadState_GET(), code);
#endif
int code_len = (int)Py_SIZE(code);
/* Exit early to avoid creating instrumentation
@@ -2115,6 +2114,9 @@ int _PyMonitoring_ClearToolId(int tool_id)
// Set the new global version so all the code objects can refresh the
// instrumentation.
set_global_version(_PyThreadState_GET(), version);
+#ifdef _Py_TIER2
+ _Py_Executors_InvalidateAll(interp, 1);
+#endif
int res = instrument_all_executing_code_objects(interp);
_PyEval_StartTheWorld(interp);
return res;
@@ -2457,6 +2459,9 @@ monitoring_restart_events_impl(PyObject *module)
}
interp->last_restart_version = restart_version;
set_global_version(tstate, new_version);
+#ifdef _Py_TIER2
+ _Py_Executors_InvalidateAll(interp, 1);
+#endif
int res = instrument_all_executing_code_objects(interp);
_PyEval_StartTheWorld(interp);
diff --git a/Python/optimizer.c b/Python/optimizer.c
index 466729b158d345..f075e28d71e0f8 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -160,11 +160,6 @@ _PyOptimizer_Optimize(
interp->compiling = false;
return 0;
}
- // One of our dependencies while tracing was invalidated. Not worth
compiling.
- if (!_tstate->jit_tracer_state->prev_state.dependencies_still_valid) {
- interp->compiling = false;
- return 0;
- }
_PyExecutorObject *executor;
int err = uop_optimize(frame, tstate, &executor, progress_needed);
if (err <= 0) {
@@ -615,7 +610,6 @@ _PyJit_translate_single_bytecode_to_trace(
_PyJitTracerState *tracer = _tstate->jit_tracer_state;
PyCodeObject *old_code = tracer->prev_state.instr_code;
bool progress_needed = (tracer->initial_state.chain_depth %
MAX_CHAIN_DEPTH) == 0;
- _PyBloomFilter *dependencies = &tracer->prev_state.dependencies;
_PyJitUopBuffer *trace = &tracer->code_buffer;
_Py_CODEUNIT *this_instr = tracer->prev_state.instr;
@@ -701,10 +695,6 @@ _PyJit_translate_single_bytecode_to_trace(
}
#endif
- if (!tracer->prev_state.dependencies_still_valid) {
- goto done;
- }
-
// This happens when a recursive call happens that we can't trace. Such as
Python -> C -> Python calls
// If we haven't guarded the IP, then it's untraceable.
if (frame != tracer->prev_state.instr_frame && !needs_guard_ip) {
@@ -784,11 +774,6 @@ _PyJit_translate_single_bytecode_to_trace(
ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)target_instr, target);
}
- // Can be NULL for the entry frame.
- if (old_code != NULL) {
- _Py_BloomFilter_Add(dependencies, old_code);
- }
-
switch (opcode) {
case POP_JUMP_IF_NONE:
case POP_JUMP_IF_NOT_NONE:
@@ -925,15 +910,6 @@ _PyJit_translate_single_bytecode_to_trace(
expansion->uops[i].offset);
Py_FatalError("garbled expansion");
}
- if (uop == _PUSH_FRAME || uop == _RETURN_VALUE || uop ==
_RETURN_GENERATOR || uop == _YIELD_VALUE) {
- PyCodeObject *new_code = (PyCodeObject
*)PyStackRef_AsPyObjectBorrow(frame->f_executable);
- if (new_code != NULL && !Py_IsNone((PyObject*)new_code)) {
- _Py_BloomFilter_Add(dependencies, new_code);
- }
- ADD_TO_TRACE(uop, oparg, operand, target);
- uop_buffer_last(trace)->operand1 =
PyStackRef_IsNone(frame->f_executable) ? 2 : ((int)(frame->stackpointer -
_PyFrame_Stackbase(frame)));
- break;
- }
if (uop == _BINARY_OP_INPLACE_ADD_UNICODE) {
assert(i + 1 == nuops);
_Py_CODEUNIT *next = target_instr + 1 +
_PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
@@ -964,7 +940,10 @@ _PyJit_translate_single_bytecode_to_trace(
ADD_TO_TRACE(_RECORD_CODE, 0, (uintptr_t)code, 0);
ADD_TO_TRACE(guard_ip, 0, (uintptr_t)next_instr, 0);
if (PyCode_Check(code)) {
- ADD_TO_TRACE(_GUARD_CODE, 0, ((PyCodeObject *)code)->co_version,
0);
+ /* Record stack depth, in operand1 */
+ int stack_depth = (int)(frame->stackpointer -
_PyFrame_Stackbase(frame));
+ uop_buffer_last(trace)->operand1 = stack_depth;
+ ADD_TO_TRACE(_GUARD_CODE_VERSION, 0, ((PyCodeObject
*)code)->co_version, 0);
}
}
// Loop back to the start
@@ -1046,7 +1025,6 @@ _PyJit_TryInitializeTracing(
tracer->initial_state.exit = exit;
tracer->initial_state.stack_depth = (int)(stack_pointer -
_PyFrame_Stackbase(frame));
tracer->initial_state.chain_depth = chain_depth;
- tracer->prev_state.dependencies_still_valid = true;
tracer->prev_state.instr_code = (PyCodeObject
*)Py_NewRef(_PyFrame_GetCode(frame));
tracer->prev_state.instr = curr_instr;
tracer->prev_state.instr_frame = frame;
@@ -1064,7 +1042,6 @@ _PyJit_TryInitializeTracing(
if (_PyOpcode_Caches[_PyOpcode_Deopt[close_loop_instr->op.code]]) {
close_loop_instr[1].counter = trigger_backoff_counter();
}
- _Py_BloomFilter_Init(&tracer->prev_state.dependencies);
tracer->is_tracing = true;
return 1;
}
@@ -1216,7 +1193,7 @@ prepare_for_execution(_PyUOpInstruction *buffer, int
length)
base_opcode == _GUARD_IP_RETURN_VALUE ||
base_opcode == _GUARD_IP_YIELD_VALUE ||
base_opcode == _GUARD_IP_RETURN_GENERATOR ||
- base_opcode == _GUARD_CODE
+ base_opcode == _GUARD_CODE_VERSION
) {
base_exit_op = _DYNAMIC_EXIT;
}
@@ -1498,7 +1475,6 @@ uop_optimize(
{
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
assert(_tstate->jit_tracer_state != NULL);
- _PyBloomFilter *dependencies =
&_tstate->jit_tracer_state->prev_state.dependencies;
_PyUOpInstruction *buffer = _tstate->jit_tracer_state->code_buffer.start;
OPT_STAT_INC(attempts);
bool is_noopt = !tstate->interp->opt_config.uops_optimize_enabled;
@@ -1510,11 +1486,15 @@ uop_optimize(
assert(length > 0);
assert(length < UOP_MAX_TRACE_LENGTH);
OPT_STAT_INC(traces_created);
+
+ _PyBloomFilter dependencies;
+ _Py_BloomFilter_Init(&dependencies);
if (!is_noopt) {
_PyUOpInstruction *output =
&_tstate->jit_tracer_state->uop_array[UOP_MAX_TRACE_LENGTH];
length = _Py_uop_analyze_and_optimize(
_tstate, buffer, length, curr_stackentries,
- output, dependencies);
+ output, &dependencies);
+
if (length <= 0) {
return length;
}
@@ -1546,7 +1526,7 @@ uop_optimize(
length = prepare_for_execution(buffer, length);
assert(length <= UOP_MAX_TRACE_LENGTH);
_PyExecutorObject *executor = make_executor_from_uops(
- _tstate, buffer, length, dependencies);
+ _tstate, buffer, length, &dependencies);
if (executor == NULL) {
return -1;
}
@@ -1861,21 +1841,6 @@ _Py_Executors_InvalidateDependency(PyInterpreterState
*interp, void *obj, int is
_Py_Executors_InvalidateAll(interp, is_invalidation);
}
-void
-_PyJit_Tracer_InvalidateDependency(PyThreadState *tstate, void *obj)
-{
- _PyBloomFilter obj_filter;
- _Py_BloomFilter_Init(&obj_filter);
- _Py_BloomFilter_Add(&obj_filter, obj);
- _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
- if (_tstate->jit_tracer_state == NULL) {
- return;
- }
- if
(bloom_filter_may_contain(&_tstate->jit_tracer_state->prev_state.dependencies,
&obj_filter))
- {
- _tstate->jit_tracer_state->prev_state.dependencies_still_valid = false;
- }
-}
/* Invalidate all executors */
void
_Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation)
diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c
index c6a513ad220b63..45dd42c96064bc 100644
--- a/Python/optimizer_analysis.c
+++ b/Python/optimizer_analysis.c
@@ -55,23 +55,21 @@
static void
dump_abstract_stack(_Py_UOpsAbstractFrame *frame, JitOptRef *stack_pointer)
{
- JitOptRef *stack_base = frame->stack;
- JitOptRef *locals_base = frame->locals;
printf(" locals=[");
- for (JitOptRef *ptr = locals_base; ptr < stack_base; ptr++) {
- if (ptr != locals_base) {
+ for (int i = 0 ; i < frame->locals_len; i++) {
+ if (i > 0) {
printf(", ");
}
- _PyUOpSymPrint(*ptr);
+ _PyUOpSymPrint(frame->locals[i]);
}
printf("]\n");
- if (stack_pointer < stack_base) {
- printf(" stack=%d\n", (int)(stack_pointer - stack_base));
+ if (stack_pointer < frame->stack) {
+ printf(" stack=%d\n", (int)(stack_pointer - frame->stack));
}
else {
printf(" stack=[");
- for (JitOptRef *ptr = stack_base; ptr < stack_pointer; ptr++) {
- if (ptr != stack_base) {
+ for (JitOptRef *ptr = frame->stack; ptr < stack_pointer; ptr++) {
+ if (ptr != frame->stack) {
printf(", ");
}
_PyUOpSymPrint(*ptr);
@@ -291,6 +289,7 @@ add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr,
#define sym_set_recorded_gen_func(SYM, VAL)
_Py_uop_sym_set_recorded_gen_func(ctx, SYM, VAL)
#define sym_get_probable_func_code _Py_uop_sym_get_probable_func_code
#define sym_get_probable_value _Py_uop_sym_get_probable_value
+#define sym_set_stack_depth(DEPTH, SP) _Py_uop_sym_set_stack_depth(ctx, DEPTH,
SP)
/* Comparison oparg masks */
#define COMPARE_LT_MASK 2
@@ -473,14 +472,15 @@ optimize_uops(
interp->type_watchers[TYPE_WATCHER_ID] = type_watcher_callback;
}
- _Py_uop_abstractcontext_init(ctx);
- _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, (PyCodeObject
*)func->func_code, curr_stacklen, NULL, 0);
+ _Py_uop_abstractcontext_init(ctx, dependencies);
+ _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, (PyCodeObject
*)func->func_code, NULL, 0);
if (frame == NULL) {
return 0;
}
frame->func = func;
ctx->curr_frame_depth++;
ctx->frame = frame;
+ _Py_uop_sym_set_stack_depth(ctx, curr_stacklen, frame->stack_pointer);
_PyUOpInstruction *this_instr = NULL;
JitOptRef *stack_pointer = ctx->frame->stack_pointer;
@@ -718,8 +718,7 @@ _Py_uop_analyze_and_optimize(
OPT_STAT_INC(optimizer_attempts);
length = optimize_uops(
- tstate, buffer, length, curr_stacklen,
- output, dependencies);
+ tstate, buffer, length, curr_stacklen, output, dependencies);
if (length == 0) {
return length;
diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c
index 2b35628ad99999..228bd51a28bb69 100644
--- a/Python/optimizer_bytecodes.c
+++ b/Python/optimizer_bytecodes.c
@@ -46,6 +46,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
#define sym_set_recorded_gen_func(SYM, VAL)
_Py_uop_sym_set_recorded_gen_func(ctx, SYM, VAL)
#define sym_get_probable_func_code _Py_uop_sym_get_probable_func_code
#define sym_get_probable_value _Py_uop_sym_get_probable_value
+#define sym_set_stack_depth(DEPTH, SP) _Py_uop_sym_set_stack_depth(ctx, DEPTH,
SP)
extern int
optimize_to_bool(
@@ -362,7 +363,7 @@ dummy_func(void) {
}
op(_BINARY_OP_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame)) {
- _Py_UOpsAbstractFrame *f = frame_new_from_symbol(ctx, getitem, 0,
NULL, 0);
+ _Py_UOpsAbstractFrame *f = frame_new_from_symbol(ctx, getitem, NULL,
0);
if (f == NULL) {
break;
}
@@ -833,7 +834,7 @@ dummy_func(void) {
// + 1 for _SAVE_RETURN_OFFSET
// FIX ME -- This needs a version check and function watcher
PyCodeObject *co = (PyCodeObject *)((PyFunctionObject
*)fget)->func_code;
- _Py_UOpsAbstractFrame *f = frame_new(ctx, co, 0, NULL, 0);
+ _Py_UOpsAbstractFrame *f = frame_new(ctx, co, NULL, 0);
if (f == NULL) {
break;
}
@@ -894,9 +895,9 @@ dummy_func(void) {
}
if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) {
- new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx,
callable, 0, args, argcount));
+ new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx,
callable, args, argcount));
} else {
- new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx,
callable, 0, NULL, 0));
+ new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx,
callable, NULL, 0));
}
}
@@ -907,15 +908,15 @@ dummy_func(void) {
}
op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame)) {
- new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, callable,
0, NULL, 0));
+ new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, callable,
NULL, 0));
}
op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames --
new_frame)) {
- new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, callable,
0, NULL, 0));
+ new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, callable,
NULL, 0));
}
op(_PY_FRAME_EX, (func_st, null, callargs_st, kwargs_st -- ex_frame)) {
- ex_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, func_st, 0,
NULL, 0));
+ ex_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, func_st,
NULL, 0));
}
op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null,
args[oparg] -- callable, self_or_null, args[oparg])) {
@@ -927,18 +928,18 @@ dummy_func(void) {
op(_CREATE_INIT_FRAME, (init, self, args[oparg] -- init_frame)) {
ctx->frame->stack_pointer = stack_pointer - oparg - 2;
- _Py_UOpsAbstractFrame *shim = frame_new(ctx, (PyCodeObject
*)&_Py_InitCleanup, 0, NULL, 0);
+ _Py_UOpsAbstractFrame *shim = frame_new(ctx, (PyCodeObject
*)&_Py_InitCleanup, NULL, 0);
if (shim == NULL) {
break;
}
/* Push self onto stack of shim */
- shim->stack[0] = self;
+ shim->stack_pointer[0] = self;
shim->stack_pointer++;
assert((int)(shim->stack_pointer - shim->stack) == 1);
ctx->frame = shim;
ctx->curr_frame_depth++;
assert((this_instr + 1)->opcode == _PUSH_FRAME);
- init_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, init, 0,
args-1, oparg+1));
+ init_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, init,
args-1, oparg+1));
}
op(_RETURN_VALUE, (retval -- res)) {
@@ -954,15 +955,7 @@ dummy_func(void) {
ctx->done = true;
break;
}
- int returning_stacklevel = (int)this_instr->operand1;
- if (ctx->curr_frame_depth >= 2) {
- PyCodeObject *expected_code = ctx->frames[ctx->curr_frame_depth -
2].code;
- if (expected_code == returning_code) {
- assert(this_instr[2].opcode == _GUARD_IP_RETURN_VALUE);
- REPLACE_OP((this_instr + 2), _NOP, 0, 0);
- }
- }
- if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+ if (frame_pop(ctx, returning_code)) {
break;
}
stack_pointer = ctx->frame->stack_pointer;
@@ -976,14 +969,12 @@ dummy_func(void) {
ctx->frame->stack_pointer = stack_pointer;
assert(this_instr[1].opcode == _RECORD_CODE);
PyCodeObject *returning_code = (PyCodeObject *)this_instr[1].operand0;
- assert(PyCode_Check(returning_code));
if (returning_code == NULL) {
ctx->done = true;
break;
}
- _Py_BloomFilter_Add(dependencies, returning_code);
- int returning_stacklevel = (int)this_instr->operand1;
- if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+ assert(PyCode_Check(returning_code));
+ if (frame_pop(ctx, returning_code)) {
break;
}
stack_pointer = ctx->frame->stack_pointer;
@@ -998,14 +989,12 @@ dummy_func(void) {
ctx->frame->stack_pointer = stack_pointer;
assert(this_instr[1].opcode == _RECORD_CODE);
PyCodeObject *returning_code = (PyCodeObject *)this_instr[1].operand0;
- assert(PyCode_Check(returning_code));
if (returning_code == NULL) {
ctx->done = true;
break;
}
- _Py_BloomFilter_Add(dependencies, returning_code);
- int returning_stacklevel = (int)this_instr->operand1;
- if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+ assert(PyCode_Check(returning_code));
+ if (frame_pop(ctx, returning_code)) {
break;
}
stack_pointer = ctx->frame->stack_pointer;
@@ -1025,22 +1014,24 @@ dummy_func(void) {
}
op(_FOR_ITER_GEN_FRAME, (iter, unused -- iter, unused, gen_frame)) {
- _Py_UOpsAbstractFrame *new_frame = frame_new_from_symbol(ctx, iter, 1,
NULL, 0);
+ _Py_UOpsAbstractFrame *new_frame = frame_new_from_symbol(ctx, iter,
NULL, 0);
if (new_frame == NULL) {
ctx->done = true;
break;
}
- new_frame->stack[0] = sym_new_const(ctx, Py_None);
+ new_frame->stack_pointer[0] = sym_new_const(ctx, Py_None);
+ new_frame->stack_pointer++;
gen_frame = PyJitRef_WrapInvalid(new_frame);
}
op(_SEND_GEN_FRAME, (receiver, v -- receiver, gen_frame)) {
- _Py_UOpsAbstractFrame *new_frame = frame_new_from_symbol(ctx,
receiver, 1, NULL, 0);
+ _Py_UOpsAbstractFrame *new_frame = frame_new_from_symbol(ctx,
receiver, NULL, 0);
if (new_frame == NULL) {
ctx->done = true;
break;
}
- new_frame->stack[0] = PyJitRef_StripReferenceInfo(v);
+ new_frame->stack_pointer[0] = PyJitRef_StripReferenceInfo(v);
+ new_frame->stack_pointer++;
gen_frame = PyJitRef_WrapInvalid(new_frame);
}
@@ -1062,14 +1053,10 @@ dummy_func(void) {
if (!CURRENT_FRAME_IS_INIT_SHIM()) {
ctx->frame->stack_pointer = stack_pointer;
}
+ ctx->frame->caller = true;
ctx->frame = (_Py_UOpsAbstractFrame *)PyJitRef_Unwrap(new_frame);
ctx->curr_frame_depth++;
stack_pointer = ctx->frame->stack_pointer;
- // Fixed calls don't need IP guards.
- if ((this_instr-1)->opcode == _CREATE_INIT_FRAME) {
- assert((this_instr+1)->opcode == _GUARD_IP__PUSH_FRAME);
- REPLACE_OP(this_instr+1, _NOP, 0, 0);
- }
assert(ctx->frame->locals != NULL);
}
@@ -1653,6 +1640,47 @@ dummy_func(void) {
sym_set_recorded_gen_func(nos, func);
}
+ op(_GUARD_IP__PUSH_FRAME, (ip/4 --)) {
+ stack_pointer = sym_set_stack_depth(this_instr->operand1,
stack_pointer);
+ // TO DO
+ // Normal function calls to known functions
+ // do not need an IP guard.
+ }
+
+ op(_GUARD_CODE_VERSION, (version/2 -- )) {
+ PyCodeObject *co = get_current_code_object(ctx);
+ if (co->co_version == version) {
+ _Py_BloomFilter_Add(dependencies, co);
+ REPLACE_OP(this_instr, _NOP, 0, 0);
+ }
+ else {
+ ctx->done = true;
+ }
+ }
+
+ op(_GUARD_IP_YIELD_VALUE, (ip/4 --)) {
+ if (ctx->frame->caller) {
+ REPLACE_OP(this_instr, _NOP, 0, 0);
+ }
+ stack_pointer = sym_set_stack_depth(this_instr->operand1,
stack_pointer);
+ }
+
+ op(_GUARD_IP_RETURN_VALUE, (ip/4 --)) {
+ if (ctx->frame->caller) {
+ REPLACE_OP(this_instr, _NOP, 0, 0);
+ }
+ stack_pointer = sym_set_stack_depth(this_instr->operand1,
stack_pointer);
+ }
+
+ op(_GUARD_IP_RETURN_GENERATOR, (ip/4 --)) {
+ if (ctx->frame->caller) {
+ REPLACE_OP(this_instr, _NOP, 0, 0);
+ }
+ stack_pointer = sym_set_stack_depth(this_instr->operand1,
stack_pointer);
+ }
+
+
+
// END BYTECODES //
}
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index 7faa699a058249..a93e85329297cd 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -1162,7 +1162,7 @@
getitem = stack_pointer[-1];
sub = stack_pointer[-2];
container = stack_pointer[-3];
- _Py_UOpsAbstractFrame *f = frame_new_from_symbol(ctx, getitem, 0,
NULL, 0);
+ _Py_UOpsAbstractFrame *f = frame_new_from_symbol(ctx, getitem,
NULL, 0);
if (f == NULL) {
break;
}
@@ -1272,15 +1272,7 @@
ctx->done = true;
break;
}
- int returning_stacklevel = (int)this_instr->operand1;
- if (ctx->curr_frame_depth >= 2) {
- PyCodeObject *expected_code =
ctx->frames[ctx->curr_frame_depth - 2].code;
- if (expected_code == returning_code) {
- assert(this_instr[2].opcode == _GUARD_IP_RETURN_VALUE);
- REPLACE_OP((this_instr + 2), _NOP, 0, 0);
- }
- }
- if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+ if (frame_pop(ctx, returning_code)) {
break;
}
stack_pointer = ctx->frame->stack_pointer;
@@ -1324,12 +1316,13 @@
JitOptRef gen_frame;
v = stack_pointer[-1];
receiver = stack_pointer[-2];
- _Py_UOpsAbstractFrame *new_frame = frame_new_from_symbol(ctx,
receiver, 1, NULL, 0);
+ _Py_UOpsAbstractFrame *new_frame = frame_new_from_symbol(ctx,
receiver, NULL, 0);
if (new_frame == NULL) {
ctx->done = true;
break;
}
- new_frame->stack[0] = PyJitRef_StripReferenceInfo(v);
+ new_frame->stack_pointer[0] = PyJitRef_StripReferenceInfo(v);
+ new_frame->stack_pointer++;
gen_frame = PyJitRef_WrapInvalid(new_frame);
stack_pointer[-1] = gen_frame;
break;
@@ -1346,14 +1339,12 @@
ctx->frame->stack_pointer = stack_pointer;
assert(this_instr[1].opcode == _RECORD_CODE);
PyCodeObject *returning_code = (PyCodeObject
*)this_instr[1].operand0;
- assert(PyCode_Check(returning_code));
if (returning_code == NULL) {
ctx->done = true;
break;
}
- _Py_BloomFilter_Add(dependencies, returning_code);
- int returning_stacklevel = (int)this_instr->operand1;
- if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+ assert(PyCode_Check(returning_code));
+ if (frame_pop(ctx, returning_code)) {
break;
}
stack_pointer = ctx->frame->stack_pointer;
@@ -2011,7 +2002,7 @@
owner = stack_pointer[-1];
PyObject *fget = (PyObject *)this_instr->operand0;
PyCodeObject *co = (PyCodeObject *)((PyFunctionObject
*)fget)->func_code;
- _Py_UOpsAbstractFrame *f = frame_new(ctx, co, 0, NULL, 0);
+ _Py_UOpsAbstractFrame *f = frame_new(ctx, co, NULL, 0);
if (f == NULL) {
break;
}
@@ -2711,12 +2702,13 @@
JitOptRef iter;
JitOptRef gen_frame;
iter = stack_pointer[-2];
- _Py_UOpsAbstractFrame *new_frame = frame_new_from_symbol(ctx,
iter, 1, NULL, 0);
+ _Py_UOpsAbstractFrame *new_frame = frame_new_from_symbol(ctx,
iter, NULL, 0);
if (new_frame == NULL) {
ctx->done = true;
break;
}
- new_frame->stack[0] = sym_new_const(ctx, Py_None);
+ new_frame->stack_pointer[0] = sym_new_const(ctx, Py_None);
+ new_frame->stack_pointer++;
gen_frame = PyJitRef_WrapInvalid(new_frame);
CHECK_STACK_BOUNDS(1);
stack_pointer[0] = gen_frame;
@@ -2897,7 +2889,7 @@
JitOptRef callable;
JitOptRef new_frame;
callable = stack_pointer[-2 - oparg];
- new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx,
callable, 0, NULL, 0));
+ new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx,
callable, NULL, 0));
CHECK_STACK_BOUNDS(-1 - oparg);
stack_pointer[-2 - oparg] = new_frame;
stack_pointer += -1 - oparg;
@@ -3033,9 +3025,9 @@
argcount++;
}
if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) {
- new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx,
callable, 0, args, argcount));
+ new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx,
callable, args, argcount));
} else {
- new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx,
callable, 0, NULL, 0));
+ new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx,
callable, NULL, 0));
}
CHECK_STACK_BOUNDS(-1 - oparg);
stack_pointer[-2 - oparg] = new_frame;
@@ -3053,13 +3045,10 @@
if (!CURRENT_FRAME_IS_INIT_SHIM()) {
ctx->frame->stack_pointer = stack_pointer;
}
+ ctx->frame->caller = true;
ctx->frame = (_Py_UOpsAbstractFrame *)PyJitRef_Unwrap(new_frame);
ctx->curr_frame_depth++;
stack_pointer = ctx->frame->stack_pointer;
- if ((this_instr-1)->opcode == _CREATE_INIT_FRAME) {
- assert((this_instr+1)->opcode == _GUARD_IP__PUSH_FRAME);
- REPLACE_OP(this_instr+1, _NOP, 0, 0);
- }
assert(ctx->frame->locals != NULL);
break;
}
@@ -3213,17 +3202,17 @@
self = stack_pointer[-1 - oparg];
init = stack_pointer[-2 - oparg];
ctx->frame->stack_pointer = stack_pointer - oparg - 2;
- _Py_UOpsAbstractFrame *shim = frame_new(ctx, (PyCodeObject
*)&_Py_InitCleanup, 0, NULL, 0);
+ _Py_UOpsAbstractFrame *shim = frame_new(ctx, (PyCodeObject
*)&_Py_InitCleanup, NULL, 0);
if (shim == NULL) {
break;
}
- shim->stack[0] = self;
+ shim->stack_pointer[0] = self;
shim->stack_pointer++;
assert((int)(shim->stack_pointer - shim->stack) == 1);
ctx->frame = shim;
ctx->curr_frame_depth++;
assert((this_instr + 1)->opcode == _PUSH_FRAME);
- init_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, init,
0, args-1, oparg+1));
+ init_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, init,
args-1, oparg+1));
CHECK_STACK_BOUNDS(-1 - oparg);
stack_pointer[-2 - oparg] = init_frame;
stack_pointer += -1 - oparg;
@@ -3500,7 +3489,7 @@
JitOptRef callable;
JitOptRef new_frame;
callable = stack_pointer[-3 - oparg];
- new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx,
callable, 0, NULL, 0));
+ new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx,
callable, NULL, 0));
CHECK_STACK_BOUNDS(-2 - oparg);
stack_pointer[-3 - oparg] = new_frame;
stack_pointer += -2 - oparg;
@@ -3548,7 +3537,7 @@
JitOptRef func_st;
JitOptRef ex_frame;
func_st = stack_pointer[-4];
- ex_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx,
func_st, 0, NULL, 0));
+ ex_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx,
func_st, NULL, 0));
CHECK_STACK_BOUNDS(-3);
stack_pointer[-4] = ex_frame;
stack_pointer += -3;
@@ -3592,14 +3581,12 @@
ctx->frame->stack_pointer = stack_pointer;
assert(this_instr[1].opcode == _RECORD_CODE);
PyCodeObject *returning_code = (PyCodeObject
*)this_instr[1].operand0;
- assert(PyCode_Check(returning_code));
if (returning_code == NULL) {
ctx->done = true;
break;
}
- _Py_BloomFilter_Add(dependencies, returning_code);
- int returning_stacklevel = (int)this_instr->operand1;
- if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+ assert(PyCode_Check(returning_code));
+ if (frame_pop(ctx, returning_code)) {
break;
}
stack_pointer = ctx->frame->stack_pointer;
@@ -4157,23 +4144,49 @@
break;
}
- case _GUARD_CODE: {
+ case _GUARD_CODE_VERSION: {
+ uint32_t version = (uint32_t)this_instr->operand0;
+ PyCodeObject *co = get_current_code_object(ctx);
+ if (co->co_version == version) {
+ _Py_BloomFilter_Add(dependencies, co);
+ REPLACE_OP(this_instr, _NOP, 0, 0);
+ }
+ else {
+ ctx->done = true;
+ }
break;
}
case _GUARD_IP__PUSH_FRAME: {
+ PyObject *ip = (PyObject *)this_instr->operand0;
+ stack_pointer = sym_set_stack_depth(this_instr->operand1,
stack_pointer);
break;
}
case _GUARD_IP_YIELD_VALUE: {
+ PyObject *ip = (PyObject *)this_instr->operand0;
+ if (ctx->frame->caller) {
+ REPLACE_OP(this_instr, _NOP, 0, 0);
+ }
+ stack_pointer = sym_set_stack_depth(this_instr->operand1,
stack_pointer);
break;
}
case _GUARD_IP_RETURN_VALUE: {
+ PyObject *ip = (PyObject *)this_instr->operand0;
+ if (ctx->frame->caller) {
+ REPLACE_OP(this_instr, _NOP, 0, 0);
+ }
+ stack_pointer = sym_set_stack_depth(this_instr->operand1,
stack_pointer);
break;
}
case _GUARD_IP_RETURN_GENERATOR: {
+ PyObject *ip = (PyObject *)this_instr->operand0;
+ if (ctx->frame->caller) {
+ REPLACE_OP(this_instr, _NOP, 0, 0);
+ }
+ stack_pointer = sym_set_stack_depth(this_instr->operand1,
stack_pointer);
break;
}
diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c
index 635ce622c3c589..dcbe093fd6d74c 100644
--- a/Python/optimizer_symbols.c
+++ b/Python/optimizer_symbols.c
@@ -1284,7 +1284,6 @@ _Py_UOpsAbstractFrame *
_Py_uop_frame_new_from_symbol(
JitOptContext *ctx,
JitOptRef callable,
- int curr_stackentries,
JitOptRef *args,
int arg_len)
{
@@ -1293,7 +1292,7 @@ _Py_uop_frame_new_from_symbol(
ctx->done = true;
return NULL;
}
- _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co,
curr_stackentries, args, arg_len);
+ _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, args, arg_len);
if (frame == NULL) {
return NULL;
}
@@ -1311,7 +1310,6 @@ _Py_UOpsAbstractFrame *
_Py_uop_frame_new(
JitOptContext *ctx,
PyCodeObject *co,
- int curr_stackentries,
JitOptRef *args,
int arg_len)
{
@@ -1324,17 +1322,21 @@ _Py_uop_frame_new(
}
_Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth];
frame->code = co;
- frame->stack_len = co->co_stacksize;
+
+ frame->locals = ctx->locals.used;
+ ctx->locals.used += co->co_nlocalsplus;
frame->locals_len = co->co_nlocalsplus;
- frame->locals = ctx->n_consumed;
- frame->stack = frame->locals + co->co_nlocalsplus;
- frame->stack_pointer = frame->stack + curr_stackentries;
+ frame->stack = ctx->stack.used;
+ ctx->stack.used += co->co_stacksize;
+ frame->stack_len = co->co_stacksize;
+
+ frame->stack_pointer = frame->stack;
frame->globals_checked_version = 0;
frame->globals_watched = false;
frame->func = NULL;
- ctx->n_consumed = ctx->n_consumed + (co->co_nlocalsplus +
co->co_stacksize);
- if (ctx->n_consumed >= ctx->limit) {
+ frame->caller = false;
+ if (ctx->locals.used > ctx->locals.end || ctx->stack.used >
ctx->stack.end) {
ctx->done = true;
ctx->out_of_space = true;
return NULL;
@@ -1354,16 +1356,45 @@ _Py_uop_frame_new(
frame->locals[i] = local;
}
- // Initialize the stack as well
- for (int i = 0; i < curr_stackentries; i++) {
- JitOptRef stackvar = _Py_uop_sym_new_unknown(ctx);
- frame->stack[i] = stackvar;
- }
+ /* Most optimizations rely on code objects being immutable (including
sys._getframe modifications),
+ * and up to date for instrumentation. */
+ _Py_BloomFilter_Add(ctx->dependencies, co);
assert(frame->locals != NULL);
return frame;
}
+JitOptRef *
+_Py_uop_sym_set_stack_depth(JitOptContext *ctx, int stack_depth, JitOptRef
*current_sp) {
+ _Py_UOpsAbstractFrame *frame = ctx->frame;
+ assert(frame->stack != NULL);
+ JitOptRef *new_stack_pointer = frame->stack + stack_depth;
+ if (current_sp > new_stack_pointer) {
+ ctx->done = true;
+ ctx->contradiction = true;
+ return NULL;
+ }
+ if (new_stack_pointer > ctx->stack.end) {
+ ctx->done = true;
+ ctx->out_of_space = true;
+ return NULL;
+ }
+ int delta = (int)(new_stack_pointer - current_sp);
+ assert(delta >= 0);
+ if (delta) {
+ /* Shift existing stack elements up */
+ for (JitOptRef *p = current_sp-1; p >= frame->stack; p--) {
+ p[delta] = *p;
+ }
+ /* Fill rest of stack with unknowns */
+ for (int i = 0; i < delta; i++) {
+ frame->stack[i] = _Py_uop_sym_new_unknown(ctx);
+ }
+ }
+ return frame->stack_pointer = new_stack_pointer;
+}
+
+
void
_Py_uop_abstractcontext_fini(JitOptContext *ctx)
{
@@ -1380,15 +1411,24 @@ _Py_uop_abstractcontext_fini(JitOptContext *ctx)
}
}
+// Leave a bit of space to push values before checking that there is space for
a new frame
+#define STACK_HEADROOM 2
+
void
-_Py_uop_abstractcontext_init(JitOptContext *ctx)
+_Py_uop_abstractcontext_init(JitOptContext *ctx, _PyBloomFilter *dependencies)
{
static_assert(sizeof(JitOptSymbol) <= 3 * sizeof(uint64_t), "JitOptSymbol
has grown");
- ctx->limit = ctx->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE;
- ctx->n_consumed = ctx->locals_and_stack;
+
+ ctx->stack.used = ctx->stack_array;
+ ctx->stack.end =
&ctx->stack_array[ABSTRACT_INTERP_STACK_SIZE-STACK_HEADROOM];
+ ctx->locals.used = ctx->locals_array;
+ ctx->locals.end =
&ctx->locals_array[ABSTRACT_INTERP_LOCALS_SIZE-STACK_HEADROOM];
#ifdef Py_DEBUG // Aids debugging a little. There should never be NULL in the
abstract interpreter.
- for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) {
- ctx->locals_and_stack[i] = PyJitRef_NULL;
+ for (int i = 0 ; i < ABSTRACT_INTERP_STACK_SIZE; i++) {
+ ctx->stack_array[i] = PyJitRef_NULL;
+ }
+ for (int i = 0 ; i < ABSTRACT_INTERP_LOCALS_SIZE; i++) {
+ ctx->locals_array[i] = PyJitRef_NULL;
}
#endif
@@ -1406,13 +1446,15 @@ _Py_uop_abstractcontext_init(JitOptContext *ctx)
ctx->out_of_space = false;
ctx->contradiction = false;
ctx->builtins_watched = false;
+ ctx->dependencies = dependencies;
}
int
-_Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, int curr_stackentries)
+_Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co)
{
_Py_UOpsAbstractFrame *frame = ctx->frame;
- ctx->n_consumed = frame->locals;
+ ctx->stack.used = frame->stack;
+ ctx->locals.used = frame->locals;
ctx->curr_frame_depth--;
@@ -1436,9 +1478,7 @@ _Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co,
int curr_stackentries)
// Else: trace stack underflow.
// This handles swapping out frames.
- assert(curr_stackentries >= 1);
- // -1 to stackentries as we push to the stack our return value after this.
- _Py_UOpsAbstractFrame *new_frame = _Py_uop_frame_new(ctx, co,
curr_stackentries - 1, NULL, 0);
+ _Py_UOpsAbstractFrame *new_frame = _Py_uop_frame_new(ctx, co, NULL, 0);
if (new_frame == NULL) {
ctx->done = true;
return 1;
@@ -1474,7 +1514,7 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject
*Py_UNUSED(ignored))
{
JitOptContext context;
JitOptContext *ctx = &context;
- _Py_uop_abstractcontext_init(ctx);
+ _Py_uop_abstractcontext_init(ctx, NULL);
PyObject *val_42 = NULL;
PyObject *val_43 = NULL;
PyObject *val_big = NULL;
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]