https://github.com/python/cpython/commit/053c285f6b41f92fbdd1d4ff0c959cceefacd7cd commit: 053c285f6b41f92fbdd1d4ff0c959cceefacd7cd branch: main author: mpage <mp...@meta.com> committer: mpage <mp...@cs.stanford.edu> date: 2025-04-01T10:18:42-07:00 summary:
gh-130704: Strength reduce `LOAD_FAST{_LOAD_FAST}` (#130708) Optimize `LOAD_FAST` opcodes into faster versions that load borrowed references onto the operand stack when we can prove that the lifetime of the local outlives the lifetime of the temporary that is loaded onto the stack. files: A Misc/NEWS.d/next/Core_and_Builtins/2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst M Doc/library/dis.rst M Include/internal/pycore_frame.h M Include/internal/pycore_opcode_metadata.h M Include/internal/pycore_stackref.h M Include/internal/pycore_uop_ids.h M Include/internal/pycore_uop_metadata.h M Include/opcode_ids.h M Lib/_opcode_metadata.py M Lib/dis.py M Lib/test/test_capi/test_opt.py M Lib/test/test_ctypes/test_memfunctions.py M Lib/test/test_ctypes/test_refcounts.py M Lib/test/test_ctypes/test_stringptr.py M Lib/test/test_dis.py M Lib/test/test_frame.py M Lib/test/test_generators.py M Lib/test/test_importlib/test_abc.py M Lib/test/test_peepholer.py M Lib/test/test_sys.py M Lib/test/test_traceback.py M Objects/floatobject.c M Objects/frameobject.c M Programs/test_frozenmain.h M Python/bytecodes.c M Python/executor_cases.c.h M Python/flowgraph.c M Python/gc.c M Python/generated_cases.c.h M Python/opcode_targets.h M Python/optimizer_analysis.c M Python/optimizer_bytecodes.c M Python/optimizer_cases.c.h M Tools/cases_generator/analyzer.py M Tools/cases_generator/opcode_metadata_generator.py diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 90e937ab2a91a6..4fde5bf903880b 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -76,7 +76,7 @@ the following command can be used to display the disassembly of 2 RESUME 0 <BLANKLINE> 3 LOAD_GLOBAL 1 (len + NULL) - LOAD_FAST 0 (alist) + LOAD_FAST_BORROW 0 (alist) CALL 1 RETURN_VALUE @@ -215,7 +215,7 @@ Example: ... RESUME LOAD_GLOBAL - LOAD_FAST + LOAD_FAST_BORROW CALL RETURN_VALUE @@ -1402,6 +1402,13 @@ iterations of the loop. This opcode is now only used in situations where the local variable is guaranteed to be initialized. It cannot raise :exc:`UnboundLocalError`. +.. opcode:: LOAD_FAST_BORROW (var_num) + + Pushes a borrowed reference to the local ``co_varnames[var_num]`` onto the + stack. + + .. versionadded:: 3.14 + .. opcode:: LOAD_FAST_LOAD_FAST (var_nums) Pushes references to ``co_varnames[var_nums >> 4]`` and @@ -1409,6 +1416,14 @@ iterations of the loop. .. versionadded:: 3.13 + +.. opcode:: LOAD_FAST_BORROW_LOAD_FAST_BORROW (var_nums) + + Pushes borrowed references to ``co_varnames[var_nums >> 4]`` and + ``co_varnames[var_nums & 15]`` onto the stack. + + .. versionadded:: 3.14 + .. opcode:: LOAD_FAST_CHECK (var_num) Pushes a reference to the local ``co_varnames[var_num]`` onto the stack, @@ -2023,4 +2038,3 @@ instructions: .. deprecated:: 3.13 All jumps are now relative. This list is empty. - diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index dde211c5eac015..8c410e9e208340 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -28,6 +28,12 @@ struct _frame { PyEval_GetLocals requires a borrowed reference so the actual reference is stored here */ PyObject *f_locals_cache; + /* A tuple containing strong references to fast locals that were overwritten + * via f_locals. Borrowed references to these locals may exist in frames + * closer to the top of the stack. The references in this tuple act as + * "support" for the borrowed references, ensuring that they remain valid. + */ + PyObject *f_overwritten_fast_locals; /* The frame data, if this frame object owns the frame */ PyObject *_f_frame_data[1]; }; diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 32bafaf09ce3da..073c002b34c30a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -335,6 +335,10 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 0; case LOAD_FAST_AND_CLEAR: return 0; + case LOAD_FAST_BORROW: + return 0; + case LOAD_FAST_BORROW_LOAD_FAST_BORROW: + return 0; case LOAD_FAST_CHECK: return 0; case LOAD_FAST_LOAD_FAST: @@ -810,6 +814,10 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 1; case LOAD_FAST_AND_CLEAR: return 1; + case LOAD_FAST_BORROW: + return 1; + case LOAD_FAST_BORROW_LOAD_FAST_BORROW: + return 2; case LOAD_FAST_CHECK: return 1; case LOAD_FAST_LOAD_FAST: @@ -1198,6 +1206,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [LOAD_FAST_BORROW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, + [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FROM_DICT_OR_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -1406,6 +1416,8 @@ _PyOpcode_macro_expansion[256] = { [LOAD_DEREF] = { .nuops = 1, .uops = { { _LOAD_DEREF, OPARG_SIMPLE, 0 } } }, [LOAD_FAST] = { .nuops = 1, .uops = { { _LOAD_FAST, OPARG_SIMPLE, 0 } } }, [LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { _LOAD_FAST_AND_CLEAR, OPARG_SIMPLE, 0 } } }, + [LOAD_FAST_BORROW] = { .nuops = 1, .uops = { { _LOAD_FAST_BORROW, OPARG_SIMPLE, 0 } } }, + [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = { .nuops = 2, .uops = { { _LOAD_FAST_BORROW, OPARG_TOP, 0 }, { _LOAD_FAST_BORROW, OPARG_BOTTOM, 0 } } }, [LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { _LOAD_FAST_CHECK, OPARG_SIMPLE, 0 } } }, [LOAD_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { _LOAD_FAST, OPARG_TOP, 0 }, { _LOAD_FAST, OPARG_BOTTOM, 0 } } }, [LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_DEREF, OPARG_SIMPLE, 0 } } }, @@ -1632,6 +1644,8 @@ const char *_PyOpcode_OpName[266] = { [LOAD_DEREF] = "LOAD_DEREF", [LOAD_FAST] = "LOAD_FAST", [LOAD_FAST_AND_CLEAR] = "LOAD_FAST_AND_CLEAR", + [LOAD_FAST_BORROW] = "LOAD_FAST_BORROW", + [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = "LOAD_FAST_BORROW_LOAD_FAST_BORROW", [LOAD_FAST_CHECK] = "LOAD_FAST_CHECK", [LOAD_FAST_LOAD_FAST] = "LOAD_FAST_LOAD_FAST", [LOAD_FROM_DICT_OR_DEREF] = "LOAD_FROM_DICT_OR_DEREF", @@ -1890,6 +1904,8 @@ const uint8_t _PyOpcode_Deopt[256] = { [LOAD_DEREF] = LOAD_DEREF, [LOAD_FAST] = LOAD_FAST, [LOAD_FAST_AND_CLEAR] = LOAD_FAST_AND_CLEAR, + [LOAD_FAST_BORROW] = LOAD_FAST_BORROW, + [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = LOAD_FAST_BORROW_LOAD_FAST_BORROW, [LOAD_FAST_CHECK] = LOAD_FAST_CHECK, [LOAD_FAST_LOAD_FAST] = LOAD_FAST_LOAD_FAST, [LOAD_FROM_DICT_OR_DEREF] = LOAD_FROM_DICT_OR_DEREF, @@ -1972,8 +1988,6 @@ const uint8_t _PyOpcode_Deopt[256] = { #endif // NEED_OPCODE_METADATA #define EXTRA_CASES \ - case 117: \ - case 118: \ case 119: \ case 120: \ case 121: \ diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index e2e32ed4122838..6664a747e2e91b 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -172,6 +172,12 @@ PyStackRef_MakeHeapSafe(_PyStackRef ref) return ref; } +static inline _PyStackRef +PyStackRef_Borrow(_PyStackRef ref) +{ + return PyStackRef_DUP(ref) +} + #define PyStackRef_CLEAR(REF) \ do { \ _PyStackRef *_tmp_op_ptr = &(REF); \ @@ -253,6 +259,25 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj) } # define PyStackRef_FromPyObjectSteal(obj) _PyStackRef_FromPyObjectSteal(_PyObject_CAST(obj)) +static inline bool +PyStackRef_IsHeapSafe(_PyStackRef stackref) +{ + if (PyStackRef_IsDeferred(stackref)) { + PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref); + return obj == NULL || _Py_IsImmortal(obj) || _PyObject_HasDeferredRefcount(obj); + } + return true; +} + +static inline _PyStackRef +PyStackRef_MakeHeapSafe(_PyStackRef stackref) +{ + if (PyStackRef_IsHeapSafe(stackref)) { + return stackref; + } + PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref); + return (_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) | Py_TAG_PTR }; +} static inline _PyStackRef PyStackRef_FromPyObjectStealMortal(PyObject *obj) @@ -311,25 +336,16 @@ PyStackRef_DUP(_PyStackRef stackref) { assert(!PyStackRef_IsNull(stackref)); if (PyStackRef_IsDeferred(stackref)) { - assert(_Py_IsImmortal(PyStackRef_AsPyObjectBorrow(stackref)) || - _PyObject_HasDeferredRefcount(PyStackRef_AsPyObjectBorrow(stackref)) - ); return stackref; } Py_INCREF(PyStackRef_AsPyObjectBorrow(stackref)); return stackref; } -static inline bool -PyStackRef_IsHeapSafe(_PyStackRef ref) -{ - return true; -} - static inline _PyStackRef -PyStackRef_MakeHeapSafe(_PyStackRef ref) +PyStackRef_Borrow(_PyStackRef stackref) { - return ref; + return (_PyStackRef){ .bits = stackref.bits | Py_TAG_DEFERRED }; } // Convert a possibly deferred reference to a strong reference. @@ -399,7 +415,6 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { assert(!_Py_IsStaticImmortal(obj)); break; case Py_TAG_REFCNT: - assert(obj == NULL || _Py_IsImmortal(obj)); break; default: assert(0); @@ -413,14 +428,15 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { #endif #ifdef _WIN32 -#define PyStackRef_RefcountOnObject(REF) (((REF).bits & Py_TAG_BITS) == 0) +#define PyStackRef_RefcountOnObject(REF) (((REF).bits & Py_TAG_REFCNT) == 0) #define PyStackRef_AsPyObjectBorrow BITS_TO_PTR_MASKED +#define PyStackRef_Borrow(REF) (_PyStackRef){ .bits = ((REF).bits) | Py_TAG_REFCNT}; #else /* Does this ref not have an embedded refcount and thus not refer to a declared immmortal object? */ static inline int PyStackRef_RefcountOnObject(_PyStackRef ref) { - return (ref.bits & Py_TAG_BITS) == 0; + return (ref.bits & Py_TAG_REFCNT) == 0; } static inline PyObject * @@ -428,6 +444,12 @@ PyStackRef_AsPyObjectBorrow(_PyStackRef ref) { return BITS_TO_PTR_MASKED(ref); } + +static inline _PyStackRef +PyStackRef_Borrow(_PyStackRef ref) +{ + return (_PyStackRef){ .bits = ref.bits | Py_TAG_REFCNT }; +} #endif static inline PyObject * diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 1d9c2bef4cedda..6fb63bfabfe798 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -203,106 +203,116 @@ extern "C" { #define _LOAD_FAST_6 428 #define _LOAD_FAST_7 429 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR +#define _LOAD_FAST_BORROW 430 +#define _LOAD_FAST_BORROW_0 431 +#define _LOAD_FAST_BORROW_1 432 +#define _LOAD_FAST_BORROW_2 433 +#define _LOAD_FAST_BORROW_3 434 +#define _LOAD_FAST_BORROW_4 435 +#define _LOAD_FAST_BORROW_5 436 +#define _LOAD_FAST_BORROW_6 437 +#define _LOAD_FAST_BORROW_7 438 +#define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 430 -#define _LOAD_GLOBAL_BUILTINS 431 -#define _LOAD_GLOBAL_MODULE 432 +#define _LOAD_GLOBAL 439 +#define _LOAD_GLOBAL_BUILTINS 440 +#define _LOAD_GLOBAL_MODULE 441 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 433 -#define _LOAD_SMALL_INT_0 434 -#define _LOAD_SMALL_INT_1 435 -#define _LOAD_SMALL_INT_2 436 -#define _LOAD_SMALL_INT_3 437 +#define _LOAD_SMALL_INT 442 +#define _LOAD_SMALL_INT_0 443 +#define _LOAD_SMALL_INT_1 444 +#define _LOAD_SMALL_INT_2 445 +#define _LOAD_SMALL_INT_3 446 #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 438 +#define _MAKE_CALLARGS_A_TUPLE 447 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 439 +#define _MAKE_WARM 448 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 440 -#define _MAYBE_EXPAND_METHOD_KW 441 -#define _MONITOR_CALL 442 -#define _MONITOR_CALL_KW 443 -#define _MONITOR_JUMP_BACKWARD 444 -#define _MONITOR_RESUME 445 +#define _MAYBE_EXPAND_METHOD 449 +#define _MAYBE_EXPAND_METHOD_KW 450 +#define _MONITOR_CALL 451 +#define _MONITOR_CALL_KW 452 +#define _MONITOR_JUMP_BACKWARD 453 +#define _MONITOR_RESUME 454 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 446 -#define _POP_JUMP_IF_TRUE 447 +#define _POP_JUMP_IF_FALSE 455 +#define _POP_JUMP_IF_TRUE 456 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 448 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 449 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 450 +#define _POP_TOP_LOAD_CONST_INLINE 457 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 458 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 459 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 451 +#define _PUSH_FRAME 460 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 452 -#define _PY_FRAME_GENERAL 453 -#define _PY_FRAME_KW 454 -#define _QUICKEN_RESUME 455 -#define _REPLACE_WITH_TRUE 456 +#define _PUSH_NULL_CONDITIONAL 461 +#define _PY_FRAME_GENERAL 462 +#define _PY_FRAME_KW 463 +#define _QUICKEN_RESUME 464 +#define _REPLACE_WITH_TRUE 465 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 457 -#define _SEND 458 -#define _SEND_GEN_FRAME 459 +#define _SAVE_RETURN_OFFSET 466 +#define _SEND 467 +#define _SEND_GEN_FRAME 468 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 460 -#define _STORE_ATTR 461 -#define _STORE_ATTR_INSTANCE_VALUE 462 -#define _STORE_ATTR_SLOT 463 -#define _STORE_ATTR_WITH_HINT 464 +#define _START_EXECUTOR 469 +#define _STORE_ATTR 470 +#define _STORE_ATTR_INSTANCE_VALUE 471 +#define _STORE_ATTR_SLOT 472 +#define _STORE_ATTR_WITH_HINT 473 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 465 -#define _STORE_FAST_0 466 -#define _STORE_FAST_1 467 -#define _STORE_FAST_2 468 -#define _STORE_FAST_3 469 -#define _STORE_FAST_4 470 -#define _STORE_FAST_5 471 -#define _STORE_FAST_6 472 -#define _STORE_FAST_7 473 +#define _STORE_FAST 474 +#define _STORE_FAST_0 475 +#define _STORE_FAST_1 476 +#define _STORE_FAST_2 477 +#define _STORE_FAST_3 478 +#define _STORE_FAST_4 479 +#define _STORE_FAST_5 480 +#define _STORE_FAST_6 481 +#define _STORE_FAST_7 482 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 474 -#define _STORE_SUBSCR 475 +#define _STORE_SLICE 483 +#define _STORE_SUBSCR 484 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 476 -#define _TO_BOOL 477 +#define _TIER2_RESUME_CHECK 485 +#define _TO_BOOL 486 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 478 +#define _TO_BOOL_STR 487 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 479 +#define _UNPACK_SEQUENCE 488 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 479 +#define MAX_UOP_ID 488 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 4f5f6bbde2571a..32007d0d917e2f 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -33,8 +33,18 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_FAST_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW_0] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW_1] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW_2] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW_3] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW_4] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW_5] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, + [_LOAD_FAST_BORROW_LOAD_FAST_BORROW] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_CONST_MORTAL] = HAS_ARG_FLAG | HAS_CONST_FLAG, [_LOAD_CONST_IMMORTAL] = HAS_ARG_FLAG | HAS_CONST_FLAG, [_LOAD_SMALL_INT_0] = 0, @@ -287,6 +297,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = { [_LOAD_FAST] = 8, + [_LOAD_FAST_BORROW] = 8, [_LOAD_SMALL_INT] = 4, [_STORE_FAST] = 8, [_INIT_CALL_PY_EXACT_ARGS] = 5, @@ -466,6 +477,16 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_FAST_6] = "_LOAD_FAST_6", [_LOAD_FAST_7] = "_LOAD_FAST_7", [_LOAD_FAST_AND_CLEAR] = "_LOAD_FAST_AND_CLEAR", + [_LOAD_FAST_BORROW] = "_LOAD_FAST_BORROW", + [_LOAD_FAST_BORROW_0] = "_LOAD_FAST_BORROW_0", + [_LOAD_FAST_BORROW_1] = "_LOAD_FAST_BORROW_1", + [_LOAD_FAST_BORROW_2] = "_LOAD_FAST_BORROW_2", + [_LOAD_FAST_BORROW_3] = "_LOAD_FAST_BORROW_3", + [_LOAD_FAST_BORROW_4] = "_LOAD_FAST_BORROW_4", + [_LOAD_FAST_BORROW_5] = "_LOAD_FAST_BORROW_5", + [_LOAD_FAST_BORROW_6] = "_LOAD_FAST_BORROW_6", + [_LOAD_FAST_BORROW_7] = "_LOAD_FAST_BORROW_7", + [_LOAD_FAST_BORROW_LOAD_FAST_BORROW] = "_LOAD_FAST_BORROW_LOAD_FAST_BORROW", [_LOAD_FAST_CHECK] = "_LOAD_FAST_CHECK", [_LOAD_FAST_LOAD_FAST] = "_LOAD_FAST_LOAD_FAST", [_LOAD_FROM_DICT_OR_DEREF] = "_LOAD_FROM_DICT_OR_DEREF", @@ -589,10 +610,30 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_FAST: return 0; + case _LOAD_FAST_BORROW_0: + return 0; + case _LOAD_FAST_BORROW_1: + return 0; + case _LOAD_FAST_BORROW_2: + return 0; + case _LOAD_FAST_BORROW_3: + return 0; + case _LOAD_FAST_BORROW_4: + return 0; + case _LOAD_FAST_BORROW_5: + return 0; + case _LOAD_FAST_BORROW_6: + return 0; + case _LOAD_FAST_BORROW_7: + return 0; + case _LOAD_FAST_BORROW: + return 0; case _LOAD_FAST_AND_CLEAR: return 0; case _LOAD_FAST_LOAD_FAST: return 0; + case _LOAD_FAST_BORROW_LOAD_FAST_BORROW: + return 0; case _LOAD_CONST_MORTAL: return 0; case _LOAD_CONST_IMMORTAL: diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h index e4e6a88276655e..898dc580f4148e 100644 --- a/Include/opcode_ids.h +++ b/Include/opcode_ids.h @@ -94,39 +94,41 @@ extern "C" { #define LOAD_DEREF 81 #define LOAD_FAST 82 #define LOAD_FAST_AND_CLEAR 83 -#define LOAD_FAST_CHECK 84 -#define LOAD_FAST_LOAD_FAST 85 -#define LOAD_FROM_DICT_OR_DEREF 86 -#define LOAD_FROM_DICT_OR_GLOBALS 87 -#define LOAD_GLOBAL 88 -#define LOAD_NAME 89 -#define LOAD_SMALL_INT 90 -#define LOAD_SPECIAL 91 -#define LOAD_SUPER_ATTR 92 -#define MAKE_CELL 93 -#define MAP_ADD 94 -#define MATCH_CLASS 95 -#define POP_JUMP_IF_FALSE 96 -#define POP_JUMP_IF_NONE 97 -#define POP_JUMP_IF_NOT_NONE 98 -#define POP_JUMP_IF_TRUE 99 -#define RAISE_VARARGS 100 -#define RERAISE 101 -#define SEND 102 -#define SET_ADD 103 -#define SET_FUNCTION_ATTRIBUTE 104 -#define SET_UPDATE 105 -#define STORE_ATTR 106 -#define STORE_DEREF 107 -#define STORE_FAST 108 -#define STORE_FAST_LOAD_FAST 109 -#define STORE_FAST_STORE_FAST 110 -#define STORE_GLOBAL 111 -#define STORE_NAME 112 -#define SWAP 113 -#define UNPACK_EX 114 -#define UNPACK_SEQUENCE 115 -#define YIELD_VALUE 116 +#define LOAD_FAST_BORROW 84 +#define LOAD_FAST_BORROW_LOAD_FAST_BORROW 85 +#define LOAD_FAST_CHECK 86 +#define LOAD_FAST_LOAD_FAST 87 +#define LOAD_FROM_DICT_OR_DEREF 88 +#define LOAD_FROM_DICT_OR_GLOBALS 89 +#define LOAD_GLOBAL 90 +#define LOAD_NAME 91 +#define LOAD_SMALL_INT 92 +#define LOAD_SPECIAL 93 +#define LOAD_SUPER_ATTR 94 +#define MAKE_CELL 95 +#define MAP_ADD 96 +#define MATCH_CLASS 97 +#define POP_JUMP_IF_FALSE 98 +#define POP_JUMP_IF_NONE 99 +#define POP_JUMP_IF_NOT_NONE 100 +#define POP_JUMP_IF_TRUE 101 +#define RAISE_VARARGS 102 +#define RERAISE 103 +#define SEND 104 +#define SET_ADD 105 +#define SET_FUNCTION_ATTRIBUTE 106 +#define SET_UPDATE 107 +#define STORE_ATTR 108 +#define STORE_DEREF 109 +#define STORE_FAST 110 +#define STORE_FAST_LOAD_FAST 111 +#define STORE_FAST_STORE_FAST 112 +#define STORE_GLOBAL 113 +#define STORE_NAME 114 +#define SWAP 115 +#define UNPACK_EX 116 +#define UNPACK_SEQUENCE 117 +#define YIELD_VALUE 118 #define RESUME 128 #define BINARY_OP_ADD_FLOAT 129 #define BINARY_OP_ADD_INT 130 diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py index 2c399d2f7f4631..15900265a01270 100644 --- a/Lib/_opcode_metadata.py +++ b/Lib/_opcode_metadata.py @@ -295,39 +295,41 @@ 'LOAD_DEREF': 81, 'LOAD_FAST': 82, 'LOAD_FAST_AND_CLEAR': 83, - 'LOAD_FAST_CHECK': 84, - 'LOAD_FAST_LOAD_FAST': 85, - 'LOAD_FROM_DICT_OR_DEREF': 86, - 'LOAD_FROM_DICT_OR_GLOBALS': 87, - 'LOAD_GLOBAL': 88, - 'LOAD_NAME': 89, - 'LOAD_SMALL_INT': 90, - 'LOAD_SPECIAL': 91, - 'LOAD_SUPER_ATTR': 92, - 'MAKE_CELL': 93, - 'MAP_ADD': 94, - 'MATCH_CLASS': 95, - 'POP_JUMP_IF_FALSE': 96, - 'POP_JUMP_IF_NONE': 97, - 'POP_JUMP_IF_NOT_NONE': 98, - 'POP_JUMP_IF_TRUE': 99, - 'RAISE_VARARGS': 100, - 'RERAISE': 101, - 'SEND': 102, - 'SET_ADD': 103, - 'SET_FUNCTION_ATTRIBUTE': 104, - 'SET_UPDATE': 105, - 'STORE_ATTR': 106, - 'STORE_DEREF': 107, - 'STORE_FAST': 108, - 'STORE_FAST_LOAD_FAST': 109, - 'STORE_FAST_STORE_FAST': 110, - 'STORE_GLOBAL': 111, - 'STORE_NAME': 112, - 'SWAP': 113, - 'UNPACK_EX': 114, - 'UNPACK_SEQUENCE': 115, - 'YIELD_VALUE': 116, + 'LOAD_FAST_BORROW': 84, + 'LOAD_FAST_BORROW_LOAD_FAST_BORROW': 85, + 'LOAD_FAST_CHECK': 86, + 'LOAD_FAST_LOAD_FAST': 87, + 'LOAD_FROM_DICT_OR_DEREF': 88, + 'LOAD_FROM_DICT_OR_GLOBALS': 89, + 'LOAD_GLOBAL': 90, + 'LOAD_NAME': 91, + 'LOAD_SMALL_INT': 92, + 'LOAD_SPECIAL': 93, + 'LOAD_SUPER_ATTR': 94, + 'MAKE_CELL': 95, + 'MAP_ADD': 96, + 'MATCH_CLASS': 97, + 'POP_JUMP_IF_FALSE': 98, + 'POP_JUMP_IF_NONE': 99, + 'POP_JUMP_IF_NOT_NONE': 100, + 'POP_JUMP_IF_TRUE': 101, + 'RAISE_VARARGS': 102, + 'RERAISE': 103, + 'SEND': 104, + 'SET_ADD': 105, + 'SET_FUNCTION_ATTRIBUTE': 106, + 'SET_UPDATE': 107, + 'STORE_ATTR': 108, + 'STORE_DEREF': 109, + 'STORE_FAST': 110, + 'STORE_FAST_LOAD_FAST': 111, + 'STORE_FAST_STORE_FAST': 112, + 'STORE_GLOBAL': 113, + 'STORE_NAME': 114, + 'SWAP': 115, + 'UNPACK_EX': 116, + 'UNPACK_SEQUENCE': 117, + 'YIELD_VALUE': 118, 'INSTRUMENTED_END_FOR': 234, 'INSTRUMENTED_POP_ITER': 235, 'INSTRUMENTED_END_SEND': 236, diff --git a/Lib/dis.py b/Lib/dis.py index c0a25dea2a9a95..cb6d077a391677 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -48,6 +48,7 @@ LOAD_COMMON_CONSTANT = opmap['LOAD_COMMON_CONSTANT'] LOAD_SPECIAL = opmap['LOAD_SPECIAL'] LOAD_FAST_LOAD_FAST = opmap['LOAD_FAST_LOAD_FAST'] +LOAD_FAST_BORROW_LOAD_FAST_BORROW = opmap['LOAD_FAST_BORROW_LOAD_FAST_BORROW'] STORE_FAST_LOAD_FAST = opmap['STORE_FAST_LOAD_FAST'] STORE_FAST_STORE_FAST = opmap['STORE_FAST_STORE_FAST'] IS_OP = opmap['IS_OP'] @@ -608,7 +609,7 @@ def get_argval_argrepr(self, op, arg, offset): assert lbl is not None preposition = "from" if deop == END_ASYNC_FOR else "to" argrepr = f"{preposition} L{lbl}" - elif deop in (LOAD_FAST_LOAD_FAST, STORE_FAST_LOAD_FAST, STORE_FAST_STORE_FAST): + elif deop in (LOAD_FAST_LOAD_FAST, LOAD_FAST_BORROW_LOAD_FAST_BORROW, STORE_FAST_LOAD_FAST, STORE_FAST_STORE_FAST): arg1 = arg >> 4 arg2 = arg & 15 val1, argrepr1 = _get_name_info(arg1, self.varname_from_oparg) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index feab3b8b84f566..57556daa3ae74b 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -134,7 +134,7 @@ def testfunc(x): self.assertIsNotNone(ex) uops = get_opnames(ex) self.assertIn("_JUMP_TO_TOP", uops) - self.assertIn("_LOAD_FAST_0", uops) + self.assertIn("_LOAD_FAST_BORROW_0", uops) def test_extended_arg(self): "Check EXTENDED_ARG handling in superblock creation" @@ -180,7 +180,7 @@ def many_vars(): ex = get_first_executor(many_vars) self.assertIsNotNone(ex) - self.assertTrue(any((opcode, oparg, operand) == ("_LOAD_FAST", 259, 0) + self.assertTrue(any((opcode, oparg, operand) == ("_LOAD_FAST_BORROW", 259, 0) for opcode, oparg, _, operand in list(ex))) def test_unspecialized_unpack(self): diff --git a/Lib/test/test_ctypes/test_memfunctions.py b/Lib/test/test_ctypes/test_memfunctions.py index 325487618137f6..e3cb5db775ef62 100644 --- a/Lib/test/test_ctypes/test_memfunctions.py +++ b/Lib/test/test_ctypes/test_memfunctions.py @@ -60,9 +60,6 @@ def test_cast(self): @support.refcount_test def test_string_at(self): s = string_at(b"foo bar") - # XXX The following may be wrong, depending on how Python - # manages string instances - self.assertEqual(2, sys.getrefcount(s)) self.assertTrue(s, "foo bar") self.assertEqual(string_at(b"foo bar", 7), b"foo bar") diff --git a/Lib/test/test_ctypes/test_refcounts.py b/Lib/test/test_ctypes/test_refcounts.py index 012722d8486218..1fe4b3eca2c50e 100644 --- a/Lib/test/test_ctypes/test_refcounts.py +++ b/Lib/test/test_ctypes/test_refcounts.py @@ -24,36 +24,35 @@ def test_1(self): def callback(value): return value - self.assertEqual(sys.getrefcount(callback), 2) + orig_refcount = sys.getrefcount(callback) cb = MyCallback(callback) - self.assertGreater(sys.getrefcount(callback), 2) + self.assertGreater(sys.getrefcount(callback), orig_refcount) result = f(-10, cb) self.assertEqual(result, -18) cb = None gc.collect() - self.assertEqual(sys.getrefcount(callback), 2) + self.assertEqual(sys.getrefcount(callback), orig_refcount) @support.refcount_test def test_refcount(self): def func(*args): pass - # this is the standard refcount for func - self.assertEqual(sys.getrefcount(func), 2) + orig_refcount = sys.getrefcount(func) # the CFuncPtr instance holds at least one refcount on func: f = OtherCallback(func) - self.assertGreater(sys.getrefcount(func), 2) + self.assertGreater(sys.getrefcount(func), orig_refcount) # and may release it again del f - self.assertGreaterEqual(sys.getrefcount(func), 2) + self.assertGreaterEqual(sys.getrefcount(func), orig_refcount) # but now it must be gone gc.collect() - self.assertEqual(sys.getrefcount(func), 2) + self.assertEqual(sys.getrefcount(func), orig_refcount) class X(ctypes.Structure): _fields_ = [("a", OtherCallback)] @@ -61,27 +60,27 @@ class X(ctypes.Structure): x.a = OtherCallback(func) # the CFuncPtr instance holds at least one refcount on func: - self.assertGreater(sys.getrefcount(func), 2) + self.assertGreater(sys.getrefcount(func), orig_refcount) # and may release it again del x - self.assertGreaterEqual(sys.getrefcount(func), 2) + self.assertGreaterEqual(sys.getrefcount(func), orig_refcount) # and now it must be gone again gc.collect() - self.assertEqual(sys.getrefcount(func), 2) + self.assertEqual(sys.getrefcount(func), orig_refcount) f = OtherCallback(func) # the CFuncPtr instance holds at least one refcount on func: - self.assertGreater(sys.getrefcount(func), 2) + self.assertGreater(sys.getrefcount(func), orig_refcount) # create a cycle f.cycle = f del f gc.collect() - self.assertEqual(sys.getrefcount(func), 2) + self.assertEqual(sys.getrefcount(func), orig_refcount) class AnotherLeak(unittest.TestCase): diff --git a/Lib/test/test_ctypes/test_stringptr.py b/Lib/test/test_ctypes/test_stringptr.py index bb6045b250ffce..a6a2dec68df68e 100644 --- a/Lib/test/test_ctypes/test_stringptr.py +++ b/Lib/test/test_ctypes/test_stringptr.py @@ -20,9 +20,9 @@ class X(Structure): # NULL pointer access self.assertRaises(ValueError, getattr, x.str, "contents") b = create_string_buffer(b"Hello, World") - self.assertEqual(sys.getrefcount(b), 2) + orig_refcount = sys.getrefcount(b) x.str = b - self.assertEqual(sys.getrefcount(b), 3) + self.assertEqual(sys.getrefcount(b), orig_refcount + 1) # POINTER(c_char) and Python string is NOT compatible # POINTER(c_char) and create_string_buffer() is compatible diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 726af931a38ecd..58ba86fb43092a 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -51,10 +51,10 @@ def cm(cls, x): dis_c_instance_method = """\ %3d RESUME 0 -%3d LOAD_FAST 1 (x) +%3d LOAD_FAST_BORROW 1 (x) LOAD_SMALL_INT 1 COMPARE_OP 72 (==) - LOAD_FAST 0 (self) + LOAD_FAST_BORROW 0 (self) STORE_ATTR 0 (x) LOAD_CONST 1 (None) RETURN_VALUE @@ -62,10 +62,10 @@ def cm(cls, x): dis_c_instance_method_bytes = """\ RESUME 0 - LOAD_FAST 1 + LOAD_FAST_BORROW 1 LOAD_SMALL_INT 1 COMPARE_OP 72 (==) - LOAD_FAST 0 + LOAD_FAST_BORROW 0 STORE_ATTR 0 LOAD_CONST 1 RETURN_VALUE @@ -74,10 +74,10 @@ def cm(cls, x): dis_c_class_method = """\ %3d RESUME 0 -%3d LOAD_FAST 1 (x) +%3d LOAD_FAST_BORROW 1 (x) LOAD_SMALL_INT 1 COMPARE_OP 72 (==) - LOAD_FAST 0 (cls) + LOAD_FAST_BORROW 0 (cls) STORE_ATTR 0 (x) LOAD_CONST 1 (None) RETURN_VALUE @@ -86,7 +86,7 @@ def cm(cls, x): dis_c_static_method = """\ %3d RESUME 0 -%3d LOAD_FAST 0 (x) +%3d LOAD_FAST_BORROW 0 (x) LOAD_SMALL_INT 1 COMPARE_OP 72 (==) STORE_FAST 0 (x) @@ -114,7 +114,7 @@ def _f(a): %3d RESUME 0 %3d LOAD_GLOBAL 1 (print + NULL) - LOAD_FAST 0 (a) + LOAD_FAST_BORROW 0 (a) CALL 1 POP_TOP @@ -128,7 +128,7 @@ def _f(a): %3d 0 RESUME 0 %3d 2 LOAD_GLOBAL 1 (print + NULL) - 12 LOAD_FAST 0 (a) + 12 LOAD_FAST_BORROW 0 (a) 14 CALL 1 22 POP_TOP @@ -142,7 +142,7 @@ def _f(a): %-14s RESUME 0 %-14s LOAD_GLOBAL 1 (print + NULL) -%-14s LOAD_FAST 0 (a) +%-14s LOAD_FAST_BORROW 0 (a) %-14s CALL 1 %-14s POP_TOP @@ -153,7 +153,7 @@ def _f(a): dis_f_co_code = """\ RESUME 0 LOAD_GLOBAL 1 - LOAD_FAST 0 + LOAD_FAST_BORROW 0 CALL 1 POP_TOP LOAD_SMALL_INT 1 @@ -203,7 +203,7 @@ def bug1333982(x=[]): %3d LOAD_COMMON_CONSTANT 0 (AssertionError) LOAD_CONST 1 (<code object <genexpr> at 0x..., file "%s", line %d>) MAKE_FUNCTION - LOAD_FAST 0 (x) + LOAD_FAST_BORROW 0 (x) GET_ITER CALL 0 @@ -503,18 +503,18 @@ def _fstring(a, b, c, d): dis_fstring = """\ %3d RESUME 0 -%3d LOAD_FAST 0 (a) +%3d LOAD_FAST_BORROW 0 (a) FORMAT_SIMPLE LOAD_CONST 0 (' ') - LOAD_FAST 1 (b) + LOAD_FAST_BORROW 1 (b) LOAD_CONST 1 ('4') FORMAT_WITH_SPEC LOAD_CONST 0 (' ') - LOAD_FAST 2 (c) + LOAD_FAST_BORROW 2 (c) CONVERT_VALUE 2 (repr) FORMAT_SIMPLE LOAD_CONST 0 (' ') - LOAD_FAST 3 (d) + LOAD_FAST_BORROW 3 (d) CONVERT_VALUE 2 (repr) LOAD_CONST 1 ('4') FORMAT_WITH_SPEC @@ -530,7 +530,7 @@ def _with(c): dis_with = """\ %4d RESUME 0 -%4d LOAD_FAST 0 (c) +%4d LOAD_FAST_BORROW 0 (c) COPY 1 LOAD_SPECIAL 1 (__exit__) SWAP 2 @@ -595,7 +595,7 @@ async def _asyncwith(c): POP_TOP L1: RESUME 0 -%4d LOAD_FAST 0 (c) +%4d LOAD_FAST_BORROW 0 (c) COPY 1 LOAD_SPECIAL 3 (__aexit__) SWAP 2 @@ -707,9 +707,9 @@ def _tryfinallyconst(b): %4d NOP -%4d L1: LOAD_FAST 0 (a) +%4d L1: LOAD_FAST_BORROW 0 (a) -%4d L2: LOAD_FAST 1 (b) +%4d L2: LOAD_FAST_BORROW 1 (b) PUSH_NULL CALL 0 POP_TOP @@ -743,7 +743,7 @@ def _tryfinallyconst(b): %4d NOP -%4d LOAD_FAST 0 (b) +%4d LOAD_FAST_BORROW 0 (b) PUSH_NULL CALL 0 POP_TOP @@ -791,14 +791,14 @@ def foo(x): %4d RESUME 0 -%4d LOAD_FAST 0 (y) +%4d LOAD_FAST_BORROW 0 (y) BUILD_TUPLE 1 LOAD_CONST 0 (<code object foo at 0x..., file "%s", line %d>) MAKE_FUNCTION SET_FUNCTION_ATTRIBUTE 8 (closure) STORE_FAST 1 (foo) -%4d LOAD_FAST 1 (foo) +%4d LOAD_FAST_BORROW 1 (foo) RETURN_VALUE """ % (_h.__code__.co_firstlineno, _h.__code__.co_firstlineno + 1, @@ -815,7 +815,7 @@ def foo(x): %4d RESUME 0 %4d LOAD_GLOBAL 1 (list + NULL) - LOAD_FAST 0 (x) + LOAD_FAST_BORROW 0 (x) BUILD_TUPLE 1 LOAD_CONST 1 (<code object <genexpr> at 0x..., file "%s", line %d>) MAKE_FUNCTION @@ -841,12 +841,12 @@ def foo(x): %4d RETURN_GENERATOR POP_TOP L1: RESUME 0 - LOAD_FAST 0 (.0) + LOAD_FAST_BORROW 0 (.0) GET_ITER L2: FOR_ITER 14 (to L3) STORE_FAST 1 (z) LOAD_DEREF 2 (x) - LOAD_FAST 1 (z) + LOAD_FAST_BORROW 1 (z) BINARY_OP 0 (+) YIELD_VALUE 0 RESUME 5 @@ -877,7 +877,7 @@ def load_test(x, y=0): %3d LOAD_FAST_LOAD_FAST 1 (x, y) STORE_FAST_STORE_FAST 50 (b, a) -%3d LOAD_FAST_LOAD_FAST 35 (a, b) +%3d LOAD_FAST_BORROW_LOAD_FAST_BORROW 35 (a, b) BUILD_TUPLE 2 RETURN_VALUE """ % (load_test.__code__.co_firstlineno, @@ -901,7 +901,7 @@ def loop_test(): STORE_FAST 0 (i) %3d LOAD_GLOBAL_MODULE 1 (load_test + NULL) - LOAD_FAST 0 (i) + LOAD_FAST_BORROW 0 (i) CALL_PY_GENERAL 1 POP_TOP JUMP_BACKWARD_{: <6} 16 (to L1) @@ -996,6 +996,7 @@ def test_boundaries(self): def test_widths(self): long_opcodes = set(['JUMP_BACKWARD_NO_INTERRUPT', + 'LOAD_FAST_BORROW_LOAD_FAST_BORROW', 'INSTRUMENTED_CALL_FUNCTION_EX']) for op, opname in enumerate(dis.opname): if opname in long_opcodes or opname.startswith("INSTRUMENTED"): @@ -1739,8 +1740,8 @@ def _prepare_test_cases(): make_inst(opname='MAKE_CELL', arg=1, argval='b', argrepr='b', offset=2, start_offset=2, starts_line=False, line_number=None), make_inst(opname='RESUME', arg=0, argval=0, argrepr='', offset=4, start_offset=4, starts_line=True, line_number=1), make_inst(opname='LOAD_CONST', arg=4, argval=(3, 4), argrepr='(3, 4)', offset=6, start_offset=6, starts_line=True, line_number=2), - make_inst(opname='LOAD_FAST', arg=0, argval='a', argrepr='a', offset=8, start_offset=8, starts_line=False, line_number=2), - make_inst(opname='LOAD_FAST', arg=1, argval='b', argrepr='b', offset=10, start_offset=10, starts_line=False, line_number=2), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='a', argrepr='a', offset=8, start_offset=8, starts_line=False, line_number=2), + make_inst(opname='LOAD_FAST_BORROW', arg=1, argval='b', argrepr='b', offset=10, start_offset=10, starts_line=False, line_number=2), make_inst(opname='BUILD_TUPLE', arg=2, argval=2, argrepr='', offset=12, start_offset=12, starts_line=False, line_number=2), make_inst(opname='LOAD_CONST', arg=1, argval=code_object_f, argrepr=repr(code_object_f), offset=14, start_offset=14, starts_line=False, line_number=2), make_inst(opname='MAKE_FUNCTION', arg=None, argval=None, argrepr='', offset=16, start_offset=16, starts_line=False, line_number=2), @@ -1757,7 +1758,7 @@ def _prepare_test_cases(): make_inst(opname='LOAD_CONST', arg=3, argval='Hello world!', argrepr="'Hello world!'", offset=46, start_offset=46, starts_line=False, line_number=7), make_inst(opname='CALL', arg=7, argval=7, argrepr='', offset=48, start_offset=48, starts_line=False, line_number=7, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=56, start_offset=56, starts_line=False, line_number=7), - make_inst(opname='LOAD_FAST', arg=2, argval='f', argrepr='f', offset=58, start_offset=58, starts_line=True, line_number=8), + make_inst(opname='LOAD_FAST_BORROW', arg=2, argval='f', argrepr='f', offset=58, start_offset=58, starts_line=True, line_number=8), make_inst(opname='RETURN_VALUE', arg=None, argval=None, argrepr='', offset=60, start_offset=60, starts_line=False, line_number=8), ] @@ -1767,10 +1768,10 @@ def _prepare_test_cases(): make_inst(opname='MAKE_CELL', arg=1, argval='d', argrepr='d', offset=4, start_offset=4, starts_line=False, line_number=None), make_inst(opname='RESUME', arg=0, argval=0, argrepr='', offset=6, start_offset=6, starts_line=True, line_number=2), make_inst(opname='LOAD_CONST', arg=2, argval=(5, 6), argrepr='(5, 6)', offset=8, start_offset=8, starts_line=True, line_number=3), - make_inst(opname='LOAD_FAST', arg=3, argval='a', argrepr='a', offset=10, start_offset=10, starts_line=False, line_number=3), - make_inst(opname='LOAD_FAST', arg=4, argval='b', argrepr='b', offset=12, start_offset=12, starts_line=False, line_number=3), - make_inst(opname='LOAD_FAST', arg=0, argval='c', argrepr='c', offset=14, start_offset=14, starts_line=False, line_number=3), - make_inst(opname='LOAD_FAST', arg=1, argval='d', argrepr='d', offset=16, start_offset=16, starts_line=False, line_number=3), + make_inst(opname='LOAD_FAST_BORROW', arg=3, argval='a', argrepr='a', offset=10, start_offset=10, starts_line=False, line_number=3), + make_inst(opname='LOAD_FAST_BORROW', arg=4, argval='b', argrepr='b', offset=12, start_offset=12, starts_line=False, line_number=3), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='c', argrepr='c', offset=14, start_offset=14, starts_line=False, line_number=3), + make_inst(opname='LOAD_FAST_BORROW', arg=1, argval='d', argrepr='d', offset=16, start_offset=16, starts_line=False, line_number=3), make_inst(opname='BUILD_TUPLE', arg=4, argval=4, argrepr='', offset=18, start_offset=18, starts_line=False, line_number=3), make_inst(opname='LOAD_CONST', arg=1, argval=code_object_inner, argrepr=repr(code_object_inner), offset=20, start_offset=20, starts_line=False, line_number=3), make_inst(opname='MAKE_FUNCTION', arg=None, argval=None, argrepr='', offset=22, start_offset=22, starts_line=False, line_number=3), @@ -1784,7 +1785,7 @@ def _prepare_test_cases(): make_inst(opname='LOAD_DEREF', arg=1, argval='d', argrepr='d', offset=46, start_offset=46, starts_line=False, line_number=5), make_inst(opname='CALL', arg=4, argval=4, argrepr='', offset=48, start_offset=48, starts_line=False, line_number=5, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=56, start_offset=56, starts_line=False, line_number=5), - make_inst(opname='LOAD_FAST', arg=2, argval='inner', argrepr='inner', offset=58, start_offset=58, starts_line=True, line_number=6), + make_inst(opname='LOAD_FAST_BORROW', arg=2, argval='inner', argrepr='inner', offset=58, start_offset=58, starts_line=True, line_number=6), make_inst(opname='RETURN_VALUE', arg=None, argval=None, argrepr='', offset=60, start_offset=60, starts_line=False, line_number=6), ] @@ -1796,7 +1797,7 @@ def _prepare_test_cases(): make_inst(opname='LOAD_DEREF', arg=3, argval='b', argrepr='b', offset=16, start_offset=16, starts_line=False, line_number=4), make_inst(opname='LOAD_DEREF', arg=4, argval='c', argrepr='c', offset=18, start_offset=18, starts_line=False, line_number=4), make_inst(opname='LOAD_DEREF', arg=5, argval='d', argrepr='d', offset=20, start_offset=20, starts_line=False, line_number=4), - make_inst(opname='LOAD_FAST_LOAD_FAST', arg=1, argval=('e', 'f'), argrepr='e, f', offset=22, start_offset=22, starts_line=False, line_number=4), + make_inst(opname='LOAD_FAST_BORROW_LOAD_FAST_BORROW', arg=1, argval=('e', 'f'), argrepr='e, f', offset=22, start_offset=22, starts_line=False, line_number=4), make_inst(opname='CALL', arg=6, argval=6, argrepr='', offset=24, start_offset=24, starts_line=False, line_number=4, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=32, start_offset=32, starts_line=False, line_number=4), make_inst(opname='LOAD_CONST', arg=0, argval=None, argrepr='None', offset=34, start_offset=34, starts_line=False, line_number=4), @@ -1812,16 +1813,16 @@ def _prepare_test_cases(): make_inst(opname='FOR_ITER', arg=32, argval=92, argrepr='to L4', offset=24, start_offset=24, starts_line=False, line_number=3, label=1, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='STORE_FAST', arg=0, argval='i', argrepr='i', offset=28, start_offset=28, starts_line=False, line_number=3), make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=30, start_offset=30, starts_line=True, line_number=4, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), - make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=40, start_offset=40, starts_line=False, line_number=4), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=40, start_offset=40, starts_line=False, line_number=4), make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=42, start_offset=42, starts_line=False, line_number=4, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=50, start_offset=50, starts_line=False, line_number=4), - make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=52, start_offset=52, starts_line=True, line_number=5), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=52, start_offset=52, starts_line=True, line_number=5), make_inst(opname='LOAD_SMALL_INT', arg=4, argval=4, argrepr='', offset=54, start_offset=54, starts_line=False, line_number=5), make_inst(opname='COMPARE_OP', arg=18, argval='<', argrepr='bool(<)', offset=56, start_offset=56, starts_line=False, line_number=5, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='POP_JUMP_IF_FALSE', arg=3, argval=70, argrepr='to L2', offset=60, start_offset=60, starts_line=False, line_number=5, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=64, start_offset=64, starts_line=False, line_number=5), make_inst(opname='JUMP_BACKWARD', arg=23, argval=24, argrepr='to L1', offset=66, start_offset=66, starts_line=True, line_number=6, cache_info=[('counter', 1, b'\x00\x00')]), - make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=70, start_offset=70, starts_line=True, line_number=7, label=2), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=70, start_offset=70, starts_line=True, line_number=7, label=2), make_inst(opname='LOAD_SMALL_INT', arg=6, argval=6, argrepr='', offset=72, start_offset=72, starts_line=False, line_number=7), make_inst(opname='COMPARE_OP', arg=148, argval='>', argrepr='bool(>)', offset=74, start_offset=74, starts_line=False, line_number=7, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='POP_JUMP_IF_TRUE', arg=3, argval=88, argrepr='to L3', offset=78, start_offset=78, starts_line=False, line_number=7, cache_info=[('counter', 1, b'\x00\x00')]), @@ -1840,20 +1841,20 @@ def _prepare_test_cases(): make_inst(opname='POP_JUMP_IF_FALSE', arg=40, argval=212, argrepr='to L8', offset=128, start_offset=128, starts_line=False, line_number=11, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=132, start_offset=132, starts_line=False, line_number=11), make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=134, start_offset=134, starts_line=True, line_number=12, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), - make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=144, start_offset=144, starts_line=False, line_number=12), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=144, start_offset=144, starts_line=False, line_number=12), make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=146, start_offset=146, starts_line=False, line_number=12, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=154, start_offset=154, starts_line=False, line_number=12), - make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=156, start_offset=156, starts_line=True, line_number=13), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=156, start_offset=156, starts_line=True, line_number=13), make_inst(opname='LOAD_SMALL_INT', arg=1, argval=1, argrepr='', offset=158, start_offset=158, starts_line=False, line_number=13), make_inst(opname='BINARY_OP', arg=23, argval=23, argrepr='-=', offset=160, start_offset=160, starts_line=False, line_number=13, cache_info=[('counter', 1, b'\x00\x00'), ('descr', 4, b'\x00\x00\x00\x00\x00\x00\x00\x00')]), make_inst(opname='STORE_FAST', arg=0, argval='i', argrepr='i', offset=172, start_offset=172, starts_line=False, line_number=13), - make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=174, start_offset=174, starts_line=True, line_number=14), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=174, start_offset=174, starts_line=True, line_number=14), make_inst(opname='LOAD_SMALL_INT', arg=6, argval=6, argrepr='', offset=176, start_offset=176, starts_line=False, line_number=14), make_inst(opname='COMPARE_OP', arg=148, argval='>', argrepr='bool(>)', offset=178, start_offset=178, starts_line=False, line_number=14, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='POP_JUMP_IF_FALSE', arg=3, argval=192, argrepr='to L6', offset=182, start_offset=182, starts_line=False, line_number=14, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=186, start_offset=186, starts_line=False, line_number=14), make_inst(opname='JUMP_BACKWARD', arg=37, argval=118, argrepr='to L5', offset=188, start_offset=188, starts_line=True, line_number=15, cache_info=[('counter', 1, b'\x00\x00')]), - make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=192, start_offset=192, starts_line=True, line_number=16, label=6), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=192, start_offset=192, starts_line=True, line_number=16, label=6), make_inst(opname='LOAD_SMALL_INT', arg=4, argval=4, argrepr='', offset=194, start_offset=194, starts_line=False, line_number=16), make_inst(opname='COMPARE_OP', arg=18, argval='<', argrepr='bool(<)', offset=196, start_offset=196, starts_line=False, line_number=16, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='POP_JUMP_IF_TRUE', arg=3, argval=210, argrepr='to L7', offset=200, start_offset=200, starts_line=False, line_number=16, cache_info=[('counter', 1, b'\x00\x00')]), @@ -1869,7 +1870,7 @@ def _prepare_test_cases(): make_inst(opname='LOAD_SMALL_INT', arg=0, argval=0, argrepr='', offset=238, start_offset=238, starts_line=False, line_number=21), make_inst(opname='BINARY_OP', arg=11, argval=11, argrepr='/', offset=240, start_offset=240, starts_line=False, line_number=21, cache_info=[('counter', 1, b'\x00\x00'), ('descr', 4, b'\x00\x00\x00\x00\x00\x00\x00\x00')]), make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=252, start_offset=252, starts_line=False, line_number=21), - make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=254, start_offset=254, starts_line=True, line_number=25), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=254, start_offset=254, starts_line=True, line_number=25), make_inst(opname='COPY', arg=1, argval=1, argrepr='', offset=256, start_offset=256, starts_line=False, line_number=25), make_inst(opname='LOAD_SPECIAL', arg=1, argval=1, argrepr='__exit__', offset=258, start_offset=258, starts_line=False, line_number=25), make_inst(opname='SWAP', arg=2, argval=2, argrepr='', offset=260, start_offset=260, starts_line=False, line_number=25), diff --git a/Lib/test/test_frame.py b/Lib/test/test_frame.py index a6e11f1a5342b6..18ade18d1a1708 100644 --- a/Lib/test/test_frame.py +++ b/Lib/test/test_frame.py @@ -597,6 +597,22 @@ def make_frame(): with self.assertRaises(TypeError): FrameLocalsProxy(frame=sys._getframe()) # no keyword arguments + def test_overwrite_locals(self): + # Verify we do not crash if we overwrite a local passed as an argument + # from an ancestor in the call stack. + def f(): + xs = [1, 2, 3] + ys = [4, 5, 6] + return g(xs) + + def g(xs): + f = sys._getframe() + f.f_back.f_locals["xs"] = None + f.f_back.f_locals["ys"] = None + return xs[1] + + self.assertEqual(f(), 2) + class FrameLocalsProxyMappingTests(mapping_tests.TestHashMappingProtocol): """Test that FrameLocalsProxy behaves like a Mapping (with exceptions)""" diff --git a/Lib/test/test_generators.py b/Lib/test/test_generators.py index bf4b88cd9c4450..8bce42f037478c 100644 --- a/Lib/test/test_generators.py +++ b/Lib/test/test_generators.py @@ -83,7 +83,7 @@ def gen(): g = gen() next(g) g.send(g) - self.assertGreater(sys.getrefcount(g), 2) + self.assertGreaterEqual(sys.getrefcount(g), 2) self.assertFalse(finalized) del g support.gc_collect() diff --git a/Lib/test/test_importlib/test_abc.py b/Lib/test/test_importlib/test_abc.py index b1ab52f966ffdb..070920d0da7e19 100644 --- a/Lib/test/test_importlib/test_abc.py +++ b/Lib/test/test_importlib/test_abc.py @@ -801,6 +801,9 @@ def verify_code(self, code_object, *, bytecode_written=False): data.extend(self.init._pack_uint32(0)) data.extend(self.init._pack_uint32(self.loader.source_mtime)) data.extend(self.init._pack_uint32(self.loader.source_size)) + # Make sure there's > 1 reference to code_object so that the + # marshaled representation below matches the cached representation + l = [code_object] data.extend(marshal.dumps(code_object)) self.assertEqual(self.loader.written[self.cached], bytes(data)) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 8b97e76fa346ff..565e42b04a68d0 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -4,9 +4,14 @@ import sys import textwrap import unittest +try: + import _testinternalcapi +except ImportError: + _testinternalcapi = None from test import support -from test.support.bytecode_helper import BytecodeTestCase, CfgOptimizationTestCase +from test.support.bytecode_helper import ( + BytecodeTestCase, CfgOptimizationTestCase, CompilationStepTestCase) def compile_pattern_with_fast_locals(pattern): @@ -839,7 +844,7 @@ def test_load_fast_known_simple(self): def f(): x = 1 y = x + x - self.assertInBytecode(f, 'LOAD_FAST_LOAD_FAST') + self.assertInBytecode(f, 'LOAD_FAST_BORROW_LOAD_FAST_BORROW') def test_load_fast_unknown_simple(self): def f(): @@ -860,27 +865,27 @@ def f(): def test_load_fast_known_because_parameter(self): def f1(x): print(x) - self.assertInBytecode(f1, 'LOAD_FAST') + self.assertInBytecode(f1, 'LOAD_FAST_BORROW') self.assertNotInBytecode(f1, 'LOAD_FAST_CHECK') def f2(*, x): print(x) - self.assertInBytecode(f2, 'LOAD_FAST') + self.assertInBytecode(f2, 'LOAD_FAST_BORROW') self.assertNotInBytecode(f2, 'LOAD_FAST_CHECK') def f3(*args): print(args) - self.assertInBytecode(f3, 'LOAD_FAST') + self.assertInBytecode(f3, 'LOAD_FAST_BORROW') self.assertNotInBytecode(f3, 'LOAD_FAST_CHECK') def f4(**kwargs): print(kwargs) - self.assertInBytecode(f4, 'LOAD_FAST') + self.assertInBytecode(f4, 'LOAD_FAST_BORROW') self.assertNotInBytecode(f4, 'LOAD_FAST_CHECK') def f5(x=0): print(x) - self.assertInBytecode(f5, 'LOAD_FAST') + self.assertInBytecode(f5, 'LOAD_FAST_BORROW') self.assertNotInBytecode(f5, 'LOAD_FAST_CHECK') def test_load_fast_known_because_already_loaded(self): @@ -890,7 +895,7 @@ def f(): print(x) print(x) self.assertInBytecode(f, 'LOAD_FAST_CHECK') - self.assertInBytecode(f, 'LOAD_FAST') + self.assertInBytecode(f, 'LOAD_FAST_BORROW') def test_load_fast_known_multiple_branches(self): def f(): @@ -899,7 +904,7 @@ def f(): else: x = 2 print(x) - self.assertInBytecode(f, 'LOAD_FAST') + self.assertInBytecode(f, 'LOAD_FAST_BORROW') self.assertNotInBytecode(f, 'LOAD_FAST_CHECK') def test_load_fast_unknown_after_error(self): @@ -943,12 +948,12 @@ def f(): print(a00, a01, a62, a63) print(a64, a65, a78, a79) - self.assertInBytecode(f, 'LOAD_FAST_LOAD_FAST', ("a00", "a01")) + self.assertInBytecode(f, 'LOAD_FAST_BORROW_LOAD_FAST_BORROW', ("a00", "a01")) self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', "a00") self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', "a01") for i in 62, 63: # First 64 locals: analyze completely - self.assertInBytecode(f, 'LOAD_FAST', f"a{i:02}") + self.assertInBytecode(f, 'LOAD_FAST_BORROW', f"a{i:02}") self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}") for i in 64, 65, 78, 79: # Locals >=64 not in the same basicblock @@ -956,14 +961,14 @@ def f(): self.assertNotInBytecode(f, 'LOAD_FAST', f"a{i:02}") for i in 70, 71: # Locals >=64 in the same basicblock - self.assertInBytecode(f, 'LOAD_FAST', f"a{i:02}") + self.assertInBytecode(f, 'LOAD_FAST_BORROW', f"a{i:02}") self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}") # del statements should invalidate within basicblocks. self.assertInBytecode(f, 'LOAD_FAST_CHECK', "a72") self.assertNotInBytecode(f, 'LOAD_FAST', "a72") # previous checked loads within a basicblock enable unchecked loads self.assertInBytecode(f, 'LOAD_FAST_CHECK', "a73") - self.assertInBytecode(f, 'LOAD_FAST', "a73") + self.assertInBytecode(f, 'LOAD_FAST_BORROW', "a73") def test_setting_lineno_no_undefined(self): code = textwrap.dedent("""\ @@ -981,7 +986,7 @@ def f(): ns = {} exec(code, ns) f = ns['f'] - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") co_code = f.__code__.co_code def trace(frame, event, arg): @@ -993,7 +998,7 @@ def trace(frame, event, arg): sys.settrace(trace) result = f() self.assertIsNone(result) - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") self.assertEqual(f.__code__.co_code, co_code) @@ -1013,7 +1018,7 @@ def f(): ns = {} exec(code, ns) f = ns['f'] - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") co_code = f.__code__.co_code def trace(frame, event, arg): @@ -1027,7 +1032,7 @@ def trace(frame, event, arg): sys.settrace(trace) result = f() self.assertEqual(result, 4) - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") self.assertEqual(f.__code__.co_code, co_code) @@ -1047,7 +1052,7 @@ def f(): ns = {} exec(code, ns) f = ns['f'] - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") co_code = f.__code__.co_code def trace(frame, event, arg): @@ -1061,7 +1066,7 @@ def trace(frame, event, arg): sys.settrace(trace) result = f() self.assertEqual(result, 4) - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") self.assertEqual(f.__code__.co_code, co_code) @@ -1079,7 +1084,7 @@ def f(): ns = {} exec(code, ns) f = ns['f'] - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") return f @@ -1093,7 +1098,7 @@ def trace(frame, event, arg): return trace sys.settrace(trace) f() - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") def test_initializing_local_does_not_add_check(self): @@ -1106,7 +1111,7 @@ def trace(frame, event, arg): return trace sys.settrace(trace) f() - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") @@ -1263,15 +1268,6 @@ def test_fold_tuple_of_constants(self): ] self.cfg_optimization_test(before, after, consts=[], expected_consts=[(1, 2, 3)]) - # not enough consts - same = [ - ('LOAD_SMALL_INT', 1, 0), - ('LOAD_SMALL_INT', 2, 0), - ('BUILD_TUPLE', 3, 0), - ('RETURN_VALUE', None, 0) - ] - self.cfg_optimization_test(same, same, consts=[]) - # not all consts same = [ ('LOAD_SMALL_INT', 1, 0), @@ -1374,19 +1370,6 @@ def test_fold_constant_intrinsic_list_to_tuple(self): ] self.cfg_optimization_test(before, after, consts=[], expected_consts=[(1, 2, 3)]) - # no sequence start - same = [ - ('LOAD_SMALL_INT', 1, 0), - ('LIST_APPEND', 1, 0), - ('LOAD_SMALL_INT', 2, 0), - ('LIST_APPEND', 1, 0), - ('LOAD_SMALL_INT', 3, 0), - ('LIST_APPEND', 1, 0), - ('CALL_INTRINSIC_1', INTRINSIC_LIST_TO_TUPLE, 0), - ('RETURN_VALUE', None, 0) - ] - self.cfg_optimization_test(same, same, consts=[]) - def test_optimize_if_const_list(self): before = [ ('NOP', None, 0), @@ -1417,16 +1400,6 @@ def test_optimize_if_const_list(self): ] self.cfg_optimization_test(same, same, consts=[]) - # not enough consts - same = [ - ('LOAD_SMALL_INT', 1, 0), - ('LOAD_SMALL_INT', 2, 0), - ('LOAD_SMALL_INT', 3, 0), - ('BUILD_LIST', 4, 0), - ('RETURN_VALUE', None, 0), - ] - self.cfg_optimization_test(same, same, consts=[]) - # not all consts same = [ ('LOAD_SMALL_INT', 1, 0), @@ -1467,16 +1440,6 @@ def test_optimize_if_const_set(self): ] self.cfg_optimization_test(same, same, consts=[]) - # not enough consts - same = [ - ('LOAD_SMALL_INT', 1, 0), - ('LOAD_SMALL_INT', 2, 0), - ('LOAD_SMALL_INT', 3, 0), - ('BUILD_SET', 4, 0), - ('RETURN_VALUE', None, 0), - ] - self.cfg_optimization_test(same, same, consts=[]) - # not all consts same = [ ('LOAD_SMALL_INT', 1, 0), @@ -2331,13 +2294,13 @@ def get_insts(lno1, lno2, op1, op2): return [ lbl2 := self.Label(), ('LOAD_NAME', 0, 10), + ('POP_TOP', None, 10), (op1, lbl1 := self.Label(), lno1), ('LOAD_NAME', 1, 20), lbl1, (op2, lbl2, lno2), ] - for op1 in ('JUMP', 'JUMP_NO_INTERRUPT'): for op2 in ('JUMP', 'JUMP_NO_INTERRUPT'): # different lines @@ -2347,6 +2310,7 @@ def get_insts(lno1, lno2, op1, op2): op = 'JUMP' if 'JUMP' in (op1, op2) else 'JUMP_NO_INTERRUPT' expected_insts = [ ('LOAD_NAME', 0, 10), + ('POP_TOP', None, 10), ('NOP', None, 4), (op, 0, 5), ] @@ -2363,6 +2327,7 @@ def get_insts(lno1, lno2, op1, op2): op = 'JUMP' if 'JUMP' in (op1, op2) else 'JUMP_NO_INTERRUPT' expected_insts = [ ('LOAD_NAME', 0, 10), + ('POP_TOP', None, 10), (op, 0, lno), ] self.cfg_optimization_test(insts, expected_insts, consts=list(range(5))) @@ -2390,9 +2355,9 @@ def test_list_to_tuple_get_iter(self): ] expected_insts = [ ("BUILD_LIST", 0, 1), - ("LOAD_FAST", 0, 2), + ("LOAD_FAST_BORROW", 0, 2), ("LIST_EXTEND", 1, 3), - ("LOAD_FAST", 1, 4), + ("LOAD_FAST_BORROW", 1, 4), ("LIST_EXTEND", 1, 5), ("NOP", None, 6), # ("CALL_INTRINSIC_1", INTRINSIC_LIST_TO_TUPLE, 6), ("GET_ITER", None, 7), @@ -2418,5 +2383,230 @@ def test_list_to_tuple_get_iter_is_safe(self): self.assertEqual(items, []) +class OptimizeLoadFastTestCase(DirectCfgOptimizerTests): + def make_bb(self, insts): + last_loc = insts[-1][2] + maxconst = 0 + for op, arg, _ in insts: + if op == "LOAD_CONST": + maxconst = max(maxconst, arg) + consts = [None for _ in range(maxconst + 1)] + return insts + [ + ("LOAD_CONST", 0, last_loc + 1), + ("RETURN_VALUE", None, last_loc + 2), + ], consts + + def check(self, insts, expected_insts, consts=None): + insts_bb, insts_consts = self.make_bb(insts) + expected_insts_bb, exp_consts = self.make_bb(expected_insts) + self.cfg_optimization_test(insts_bb, expected_insts_bb, + consts=insts_consts, expected_consts=exp_consts) + + def test_optimized(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST", 1, 2), + ("BINARY_OP", 2, 3), + ] + expected = [ + ("LOAD_FAST_BORROW", 0, 1), + ("LOAD_FAST_BORROW", 1, 2), + ("BINARY_OP", 2, 3), + ] + self.check(insts, expected) + + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_CONST", 1, 2), + ("SWAP", 2, 3), + ("POP_TOP", None, 4), + ] + expected = [ + ("LOAD_FAST_BORROW", 0, 1), + ("LOAD_CONST", 1, 2), + ("SWAP", 2, 3), + ("POP_TOP", None, 4), + ] + self.check(insts, expected) + + def test_unoptimized_if_unconsumed(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST", 1, 2), + ("POP_TOP", None, 3), + ] + expected = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST_BORROW", 1, 2), + ("POP_TOP", None, 3), + ] + self.check(insts, expected) + + insts = [ + ("LOAD_FAST", 0, 1), + ("COPY", 1, 2), + ("POP_TOP", None, 3), + ] + expected = [ + ("LOAD_FAST", 0, 1), + ("NOP", None, 2), + ("NOP", None, 3), + ] + self.check(insts, expected) + + def test_unoptimized_if_support_killed(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_CONST", 0, 2), + ("STORE_FAST", 0, 3), + ("POP_TOP", None, 4), + ] + self.check(insts, insts) + + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_CONST", 0, 2), + ("LOAD_CONST", 0, 3), + ("STORE_FAST_STORE_FAST", ((0 << 4) | 1), 4), + ("POP_TOP", None, 5), + ] + self.check(insts, insts) + + def test_unoptimized_if_aliased(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("STORE_FAST", 1, 2), + ] + self.check(insts, insts) + + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_CONST", 0, 3), + ("STORE_FAST_STORE_FAST", ((0 << 4) | 1), 4), + ] + self.check(insts, insts) + + def test_consume_no_inputs(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("GET_LEN", None, 2), + ("STORE_FAST", 1 , 3), + ("STORE_FAST", 2, 4), + ] + self.check(insts, insts) + + def test_consume_some_inputs_no_outputs(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("GET_LEN", None, 2), + ("LIST_APPEND", 0, 3), + ] + self.check(insts, insts) + + def test_check_exc_match(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST", 1, 2), + ("CHECK_EXC_MATCH", None, 3) + ] + expected = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST_BORROW", 1, 2), + ("CHECK_EXC_MATCH", None, 3) + ] + self.check(insts, expected) + + def test_for_iter(self): + insts = [ + ("LOAD_FAST", 0, 1), + top := self.Label(), + ("FOR_ITER", end := self.Label(), 2), + ("STORE_FAST", 2, 3), + ("JUMP", top, 4), + end, + ("END_FOR", None, 5), + ("POP_TOP", None, 6), + ("LOAD_CONST", 0, 7), + ("RETURN_VALUE", None, 8), + ] + self.cfg_optimization_test(insts, insts, consts=[None]) + + def test_load_attr(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_ATTR", 0, 2), + ] + expected = [ + ("LOAD_FAST_BORROW", 0, 1), + ("LOAD_ATTR", 0, 2), + ] + self.check(insts, expected) + + # Method call, leaves self on stack unconsumed + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_ATTR", 1, 2), + ] + expected = [ + ("LOAD_FAST", 0, 1), + ("LOAD_ATTR", 1, 2), + ] + self.check(insts, expected) + + def test_super_attr(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST", 1, 2), + ("LOAD_FAST", 2, 3), + ("LOAD_SUPER_ATTR", 0, 4), + ] + expected = [ + ("LOAD_FAST_BORROW", 0, 1), + ("LOAD_FAST_BORROW", 1, 2), + ("LOAD_FAST_BORROW", 2, 3), + ("LOAD_SUPER_ATTR", 0, 4), + ] + self.check(insts, expected) + + # Method call, leaves self on stack unconsumed + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST", 1, 2), + ("LOAD_FAST", 2, 3), + ("LOAD_SUPER_ATTR", 1, 4), + ] + expected = [ + ("LOAD_FAST_BORROW", 0, 1), + ("LOAD_FAST_BORROW", 1, 2), + ("LOAD_FAST", 2, 3), + ("LOAD_SUPER_ATTR", 1, 4), + ] + self.check(insts, expected) + + def test_send(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST", 1, 2), + ("SEND", end := self.Label(), 3), + ("LOAD_CONST", 0, 4), + ("RETURN_VALUE", None, 5), + end, + ("LOAD_CONST", 0, 6), + ("RETURN_VALUE", None, 7) + ] + expected = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST_BORROW", 1, 2), + ("SEND", end := self.Label(), 3), + ("LOAD_CONST", 0, 4), + ("RETURN_VALUE", None, 5), + end, + ("LOAD_CONST", 0, 6), + ("RETURN_VALUE", None, 7) + ] + self.cfg_optimization_test(insts, expected, consts=[None]) + + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index c64a80d83f154e..368a5ba413b6ce 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1664,7 +1664,7 @@ def func(): INTERPRETER_FRAME = '9PihcP' else: INTERPRETER_FRAME = '9PhcP' - check(x, size('3PiccPP' + INTERPRETER_FRAME + 'P')) + check(x, size('3PiccPPP' + INTERPRETER_FRAME + 'P')) # function def func(): pass check(func, size('16Pi')) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 6ae8cf5c9a5130..5c390fd056b3e3 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -3598,6 +3598,7 @@ def test_no_save_exc_type(self): self.assertIsNone(te.exc_type) def test_no_refs_to_exception_and_traceback_objects(self): + exc_obj = None try: 1/0 except Exception as e: diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst new file mode 100644 index 00000000000000..42fba2933c31b3 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst @@ -0,0 +1,4 @@ +Optimize ``LOAD_FAST`` and its superinstruction form to reduce reference +counting overhead. These instructions are replaced with faster variants that +load borrowed references onto the operand stack when we can prove that the +reference in the frame outlives the reference loaded onto the stack. diff --git a/Objects/floatobject.c b/Objects/floatobject.c index af43357439d309..87a00bf1a458ea 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -135,8 +135,6 @@ PyFloat_FromDouble(double fval) return (PyObject *) op; } -#ifdef Py_GIL_DISABLED - _PyStackRef _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) { PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc); @@ -144,35 +142,6 @@ _PyStackRef _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef righ return PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(value)); } -#else // Py_GIL_DISABLED - -_PyStackRef _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) -{ - PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - if (Py_REFCNT(left_o) == 1) { - ((PyFloatObject *)left_o)->ob_fval = value; - PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc); - return left; - } - else if (Py_REFCNT(right_o) == 1) { - ((PyFloatObject *)right_o)->ob_fval = value; - PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc); - return right; - } - else { - PyObject *result = PyFloat_FromDouble(value); - PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc); - PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc); - if (result == NULL) { - return PyStackRef_NULL; - } - return PyStackRef_FromPyObjectStealMortal(result); - } -} - -#endif // Py_GIL_DISABLED - static PyObject * float_from_string_inner(const char *s, Py_ssize_t len, void *obj) { diff --git a/Objects/frameobject.c b/Objects/frameobject.c index e6a124ef94c3a1..db4e4b7e1939de 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -215,6 +215,33 @@ framelocalsproxy_getitem(PyObject *self, PyObject *key) return NULL; } +static int +add_overwritten_fast_local(PyFrameObject *frame, PyObject *obj) +{ + Py_ssize_t new_size; + if (frame->f_overwritten_fast_locals == NULL) { + new_size = 1; + } + else { + Py_ssize_t size = PyTuple_Size(frame->f_overwritten_fast_locals); + if (size == -1) { + return -1; + } + new_size = size + 1; + } + PyObject *new_tuple = PyTuple_New(new_size); + if (new_tuple == NULL) { + return -1; + } + for (Py_ssize_t i = 0; i < new_size - 1; i++) { + PyObject *o = PyTuple_GET_ITEM(frame->f_overwritten_fast_locals, i); + PyTuple_SET_ITEM(new_tuple, i, Py_NewRef(o)); + } + PyTuple_SET_ITEM(new_tuple, new_size - 1, Py_NewRef(obj)); + Py_XSETREF(frame->f_overwritten_fast_locals, new_tuple); + return 0; +} + static int framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) { @@ -253,7 +280,13 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) Py_XINCREF(value); PyCell_SetTakeRef((PyCellObject *)cell, value); } else if (value != PyStackRef_AsPyObjectBorrow(oldvalue)) { - PyStackRef_XCLOSE(fast[i]); + PyObject *old_obj = PyStackRef_AsPyObjectBorrow(fast[i]); + if (old_obj != NULL && !_Py_IsImmortal(old_obj)) { + if (add_overwritten_fast_local(frame, old_obj) < 0) { + return -1; + } + PyStackRef_CLOSE(fast[i]); + } fast[i] = PyStackRef_FromPyObjectNew(value); } return 0; @@ -1906,6 +1939,7 @@ frame_dealloc(PyObject *op) Py_CLEAR(f->f_trace); Py_CLEAR(f->f_extra_locals); Py_CLEAR(f->f_locals_cache); + Py_CLEAR(f->f_overwritten_fast_locals); PyObject_GC_Del(f); Py_TRASHCAN_END; } @@ -1918,6 +1952,7 @@ frame_traverse(PyObject *op, visitproc visit, void *arg) Py_VISIT(f->f_trace); Py_VISIT(f->f_extra_locals); Py_VISIT(f->f_locals_cache); + Py_VISIT(f->f_overwritten_fast_locals); if (f->f_frame->owner != FRAME_OWNED_BY_FRAME_OBJECT) { return 0; } @@ -1932,6 +1967,7 @@ frame_tp_clear(PyObject *op) Py_CLEAR(f->f_trace); Py_CLEAR(f->f_extra_locals); Py_CLEAR(f->f_locals_cache); + Py_CLEAR(f->f_overwritten_fast_locals); /* locals and stack */ _PyStackRef *locals = _PyFrame_GetLocalsArray(f->f_frame); @@ -2081,6 +2117,7 @@ _PyFrame_New_NoTrack(PyCodeObject *code) f->f_lineno = 0; f->f_extra_locals = NULL; f->f_locals_cache = NULL; + f->f_overwritten_fast_locals = NULL; return f; } diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 76a335086c9b11..8cedee31e08a00 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,16 +1,16 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { 227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0, - 0,0,0,0,0,243,184,0,0,0,128,0,90,0,80,1, - 71,0,112,0,90,0,80,1,71,1,112,1,89,2,32,0, - 80,2,50,1,0,0,0,0,0,0,30,0,89,2,32,0, - 80,3,89,0,78,6,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,243,184,0,0,0,128,0,92,0,80,1, + 71,0,114,0,92,0,80,1,71,1,114,1,91,2,32,0, + 80,2,50,1,0,0,0,0,0,0,30,0,91,2,32,0, + 80,3,91,0,78,6,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,50,2,0,0,0,0,0,0, - 30,0,89,1,78,8,0,0,0,0,0,0,0,0,0,0, + 30,0,91,1,78,8,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,32,0,50,0,0,0,0,0, 0,0,80,4,43,26,0,0,0,0,0,0,0,0,0,0, - 112,5,80,7,15,0,68,24,0,0,112,6,89,2,32,0, - 80,5,89,6,11,0,80,6,89,5,89,6,43,26,0,0, + 114,5,80,7,15,0,68,24,0,0,114,6,91,2,32,0, + 80,5,91,6,11,0,80,6,91,5,91,6,43,26,0,0, 0,0,0,0,0,0,0,0,11,0,48,4,50,1,0,0, 0,0,0,0,30,0,73,26,0,0,8,0,29,0,80,1, 34,0,41,8,233,0,0,0,0,78,122,18,70,114,111,122, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 43424447bb068d..83847e31ef4dc7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -270,6 +270,11 @@ dummy_func( value = PyStackRef_DUP(GETLOCAL(oparg)); } + replicate(8) pure inst (LOAD_FAST_BORROW, (-- value)) { + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + value = PyStackRef_Borrow(GETLOCAL(oparg)); + } + inst(LOAD_FAST_AND_CLEAR, (-- value)) { value = GETLOCAL(oparg); GETLOCAL(oparg) = PyStackRef_NULL; @@ -282,6 +287,13 @@ dummy_func( value2 = PyStackRef_DUP(GETLOCAL(oparg2)); } + inst(LOAD_FAST_BORROW_LOAD_FAST_BORROW, ( -- value1, value2)) { + uint32_t oparg1 = oparg >> 4; + uint32_t oparg2 = oparg & 15; + value1 = PyStackRef_Borrow(GETLOCAL(oparg1)); + value2 = PyStackRef_Borrow(GETLOCAL(oparg2)); + } + family(LOAD_CONST, 0) = { LOAD_CONST_MORTAL, LOAD_CONST_IMMORTAL, @@ -741,9 +753,8 @@ dummy_func( // At the end we just skip over the STORE_FAST. op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectSteal(right); assert(PyUnicode_CheckExact(left_o)); - assert(PyUnicode_CheckExact(right_o)); + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(right))); int next_oparg; #if TIER_ONE @@ -767,10 +778,11 @@ dummy_func( * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2); + assert(Py_REFCNT(left_o) >= 2 || !PyStackRef_IsHeapSafe(left)); PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); DEAD(left); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); + PyObject *right_o = PyStackRef_AsPyObjectSteal(right); PyUnicode_Append(&temp, right_o); *target_local = PyStackRef_FromPyObjectSteal(temp); Py_DECREF(right_o); @@ -1110,8 +1122,7 @@ dummy_func( // is pushed to a different frame, the callers' frame. inst(RETURN_VALUE, (retval -- res)) { assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef temp = retval; - assert(PyStackRef_IsHeapSafe(temp)); + _PyStackRef temp = PyStackRef_MakeHeapSafe(retval); DEAD(retval); SAVE_STACK(); assert(EMPTY()); @@ -1206,7 +1217,6 @@ dummy_func( op(_SEND, (receiver, v -- receiver, retval)) { PyObject *receiver_o = PyStackRef_AsPyObjectBorrow(receiver); - PyObject *retval_o; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); if ((tstate->interp->eval_frame == NULL) && @@ -1216,7 +1226,7 @@ dummy_func( PyGenObject *gen = (PyGenObject *)receiver_o; _PyInterpreterFrame *gen_frame = &gen->gi_iframe; STACK_SHRINK(1); - _PyFrame_StackPush(gen_frame, v); + _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v)); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; @@ -1261,7 +1271,7 @@ dummy_func( DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING); STAT_INC(SEND, hit); gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, v); + _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v)); DEAD(v); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; @@ -1308,7 +1318,7 @@ dummy_func( #endif RELOAD_STACK(); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - value = temp; + value = PyStackRef_MakeHeapSafe(temp); LLTRACE_RESUME_FRAME(); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 29c3a270a27526..260d52be02c136 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -201,6 +201,113 @@ break; } + case _LOAD_FAST_BORROW_0: { + _PyStackRef value; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + value = PyStackRef_Borrow(GETLOCAL(oparg)); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_1: { + _PyStackRef value; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + value = PyStackRef_Borrow(GETLOCAL(oparg)); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_2: { + _PyStackRef value; + oparg = 2; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + value = PyStackRef_Borrow(GETLOCAL(oparg)); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_3: { + _PyStackRef value; + oparg = 3; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + value = PyStackRef_Borrow(GETLOCAL(oparg)); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_4: { + _PyStackRef value; + oparg = 4; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + value = PyStackRef_Borrow(GETLOCAL(oparg)); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_5: { + _PyStackRef value; + oparg = 5; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + value = PyStackRef_Borrow(GETLOCAL(oparg)); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_6: { + _PyStackRef value; + oparg = 6; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + value = PyStackRef_Borrow(GETLOCAL(oparg)); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_7: { + _PyStackRef value; + oparg = 7; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + value = PyStackRef_Borrow(GETLOCAL(oparg)); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW: { + _PyStackRef value; + oparg = CURRENT_OPARG(); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + value = PyStackRef_Borrow(GETLOCAL(oparg)); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _LOAD_FAST_AND_CLEAR: { _PyStackRef value; oparg = CURRENT_OPARG(); @@ -1022,9 +1129,8 @@ right = stack_pointer[-1]; left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectSteal(right); assert(PyUnicode_CheckExact(left_o)); - assert(PyUnicode_CheckExact(right_o)); + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(right))); int next_oparg; #if TIER_ONE assert(next_instr->op.code == STORE_FAST); @@ -1050,9 +1156,10 @@ * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2); + assert(Py_REFCNT(left_o) >= 2 || !PyStackRef_IsHeapSafe(left)); PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); + PyObject *right_o = PyStackRef_AsPyObjectSteal(right); stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -1698,8 +1805,7 @@ _PyStackRef res; retval = stack_pointer[-1]; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef temp = retval; - assert(PyStackRef_IsHeapSafe(temp)); + _PyStackRef temp = PyStackRef_MakeHeapSafe(retval); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -1833,7 +1939,7 @@ } STAT_INC(SEND, hit); gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, v); + _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v)); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; @@ -1880,7 +1986,7 @@ #endif stack_pointer = _PyFrame_GetStackPointer(frame); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - value = temp; + value = PyStackRef_MakeHeapSafe(temp); LLTRACE_RESUME_FRAME(); stack_pointer[0] = value; stack_pointer += 1; diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 4f101a602c555e..a0d5690250cffb 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -574,6 +574,7 @@ normalize_jumps_in_block(cfg_builder *g, basicblock *b) { basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc)); RETURN_IF_ERROR( basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc)); + backwards_jump->b_startdepth = target->b_startdepth; last->i_opcode = reversed_opcode; last->i_target = b->b_next; @@ -2581,6 +2582,422 @@ insert_superinstructions(cfg_builder *g) return res; } +#define NOT_LOCAL -1 +#define DUMMY_INSTR -1 + +typedef struct { + // Index of instruction that produced the reference or DUMMY_INSTR. + int instr; + + // The local to which the reference refers or NOT_LOCAL. + int local; +} ref; + +typedef struct { + ref *refs; + Py_ssize_t size; + Py_ssize_t capacity; +} ref_stack; + +static int +ref_stack_push(ref_stack *stack, ref r) +{ + if (stack->size == stack->capacity) { + Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2); + ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap); + if (refs == NULL) { + PyErr_NoMemory(); + return -1; + } + stack->refs = refs; + stack->capacity = new_cap; + } + stack->refs[stack->size] = r; + stack->size++; + return 0; +} + +static ref +ref_stack_pop(ref_stack *stack) +{ + assert(stack->size > 0); + stack->size--; + ref r = stack->refs[stack->size]; + return r; +} + +static void +ref_stack_swap_top(ref_stack *stack, Py_ssize_t off) +{ + Py_ssize_t idx = stack->size - off; + assert(idx >= 0 && idx < stack->size); + ref tmp = stack->refs[idx]; + stack->refs[idx] = stack->refs[stack->size - 1]; + stack->refs[stack->size - 1] = tmp; +} + +static ref +ref_stack_at(ref_stack *stack, Py_ssize_t idx) +{ + assert(idx >= 0 && idx < stack->size); + return stack->refs[idx]; +} + +static void +ref_stack_clear(ref_stack *stack) +{ + stack->size = 0; +} + +static void +ref_stack_fini(ref_stack *stack) +{ + if (stack->refs != NULL) { + PyMem_Free(stack->refs); + } + stack->refs = NULL; + stack->capacity = 0; + stack->size = 0; +} + +typedef enum { + // The loaded reference is still on the stack when the local is killed + SUPPORT_KILLED = 1, + // The loaded reference is stored into a local + STORED_AS_LOCAL = 2, + // The loaded reference is still on the stack at the end of the basic block + REF_UNCONSUMED = 4, +} LoadFastInstrFlag; + +static void +kill_local(uint8_t *instr_flags, ref_stack *refs, int local) +{ + for (Py_ssize_t i = 0; i < refs->size; i++) { + ref r = ref_stack_at(refs, i); + if (r.local == local) { + assert(r.instr >= 0); + instr_flags[r.instr] |= SUPPORT_KILLED; + } + } +} + +static void +store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r) +{ + kill_local(instr_flags, refs, local); + if (r.instr != DUMMY_INSTR) { + instr_flags[r.instr] |= STORED_AS_LOCAL; + } +} + +static void +load_fast_push_block(basicblock ***sp, basicblock *target, + Py_ssize_t start_depth) +{ + assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth); + if (!target->b_visited) { + target->b_visited = 1; + *(*sp)++ = target; + } +} + +/* + * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that + * load borrowed references onto the operand stack. + * + * This is only safe when we can prove that the reference in the frame outlives + * the borrowed reference produced by the instruction. We make this tractable + * by enforcing the following lifetimes: + * + * 1. Borrowed references loaded onto the operand stack live until the end of + * the instruction that consumes them from the stack. Any borrowed + * references that would escape into the heap (e.g. into frame objects or + * generators) are converted into new, strong references. + * + * 2. Locals live until they are either killed by an instruction + * (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten + * via `f_locals` is added to a tuple owned by the frame object. + * + * To simplify the problem of detecting which supporting references in the + * frame are killed by instructions that overwrite locals, we only allow + * borrowed references to be stored as a local in the frame if they were passed + * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new, + * strong references. + * + * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions + * that meet the following criteria: + * + * 1. The produced reference must be consumed from the stack before the + * supporting reference in the frame is killed. + * + * 2. The produced reference cannot be stored as a local. + * + * We use abstract interpretation to identify instructions that meet these + * criteria. For each basic block, we simulate the effect the bytecode has on a + * stack of abstract references and note any instructions that violate the + * criteria above. Once we've processed all the instructions in a block, any + * non-violating LOAD_FAST{_LOAD_FAST} can be optimized. + */ +static int +optimize_load_fast(cfg_builder *g) +{ + int status; + ref_stack refs = {0}; + int max_instrs = 0; + basicblock *entryblock = g->g_entryblock; + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + max_instrs = Py_MAX(max_instrs, b->b_iused); + } + size_t instr_flags_size = max_instrs * sizeof(uint8_t); + uint8_t *instr_flags = PyMem_Malloc(instr_flags_size); + if (instr_flags == NULL) { + PyErr_NoMemory(); + return ERROR; + } + basicblock **blocks = make_cfg_traversal_stack(entryblock); + if (blocks == NULL) { + status = ERROR; + goto done; + } + basicblock **sp = blocks; + *sp = entryblock; + sp++; + entryblock->b_startdepth = 0; + entryblock->b_visited = 1; + + #define PUSH_REF(instr, local) \ + do { \ + if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \ + status = ERROR; \ + goto done; \ + } \ + } while(0) + + while (sp != blocks) { + basicblock *block = *--sp; + assert(block->b_startdepth > -1); + + // Reset per-block state. + memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags)); + + // Reset the stack of refs. We don't track references on the stack + // across basic blocks, but the bytecode will expect their + // presence. Add dummy references as necessary. + ref_stack_clear(&refs); + for (int i = 0; i < block->b_startdepth; i++) { + PUSH_REF(DUMMY_INSTR, NOT_LOCAL); + } + + for (int i = 0; i < block->b_iused; i++) { + cfg_instr *instr = &block->b_instr[i]; + int opcode = instr->i_opcode; + int oparg = instr->i_oparg; + assert(opcode != EXTENDED_ARG); + switch (opcode) { + // Opcodes that load and store locals + case LOAD_FAST: { + PUSH_REF(i, oparg); + break; + } + + case LOAD_FAST_AND_CLEAR: { + kill_local(instr_flags, &refs, oparg); + PUSH_REF(i, oparg); + break; + } + + case LOAD_FAST_LOAD_FAST: { + PUSH_REF(i, oparg >> 4); + PUSH_REF(i, oparg & 15); + break; + } + + case STORE_FAST: { + ref r = ref_stack_pop(&refs); + store_local(instr_flags, &refs, oparg, r); + break; + } + + case STORE_FAST_LOAD_FAST: { + // STORE_FAST + ref r = ref_stack_pop(&refs); + store_local(instr_flags, &refs, oparg >> 4, r); + // LOAD_FAST + PUSH_REF(i, oparg & 15); + break; + } + + case STORE_FAST_STORE_FAST: { + // STORE_FAST + ref r = ref_stack_pop(&refs); + store_local(instr_flags, &refs, oparg >> 4, r); + // STORE_FAST + r = ref_stack_pop(&refs); + store_local(instr_flags, &refs, oparg & 15, r); + break; + } + + // Opcodes that shuffle values on the stack + case COPY: { + assert(oparg > 0); + Py_ssize_t idx = refs.size - oparg; + ref r = ref_stack_at(&refs, idx); + PUSH_REF(r.instr, r.local); + break; + } + + case SWAP: { + assert(oparg >= 2); + ref_stack_swap_top(&refs, oparg); + break; + } + + // We treat opcodes that do not consume all of their inputs on + // a case by case basis, as we have no generic way of knowing + // how many inputs should be left on the stack. + + // Opcodes that consume no inputs + case GET_ANEXT: + case GET_LEN: + case IMPORT_FROM: + case MATCH_KEYS: + case MATCH_MAPPING: + case MATCH_SEQUENCE: + case WITH_EXCEPT_START: { + int num_popped = _PyOpcode_num_popped(opcode, oparg); + int num_pushed = _PyOpcode_num_pushed(opcode, oparg); + int net_pushed = num_pushed - num_popped; + assert(net_pushed >= 0); + for (int i = 0; i < net_pushed; i++) { + PUSH_REF(i, NOT_LOCAL); + } + break; + } + + // Opcodes that consume some inputs and push no new values + case DICT_MERGE: + case DICT_UPDATE: + case LIST_APPEND: + case LIST_EXTEND: + case MAP_ADD: + case RERAISE: + case SET_ADD: + case SET_UPDATE: { + int num_popped = _PyOpcode_num_popped(opcode, oparg); + int num_pushed = _PyOpcode_num_pushed(opcode, oparg); + int net_popped = num_popped - num_pushed; + assert(net_popped > 0); + for (int i = 0; i < net_popped; i++) { + ref_stack_pop(&refs); + } + break; + } + + // Opcodes that consume some inputs and push new values + case CHECK_EXC_MATCH: { + ref_stack_pop(&refs); + PUSH_REF(i, NOT_LOCAL); + break; + } + + case FOR_ITER: { + load_fast_push_block(&sp, instr->i_target, refs.size + 1); + PUSH_REF(i, NOT_LOCAL); + break; + } + + case LOAD_ATTR: + case LOAD_SUPER_ATTR: { + ref self = ref_stack_pop(&refs); + if (opcode == LOAD_SUPER_ATTR) { + ref_stack_pop(&refs); + ref_stack_pop(&refs); + } + PUSH_REF(i, NOT_LOCAL); + if (oparg & 1) { + // A method call; conservatively assume that self is pushed + // back onto the stack + PUSH_REF(self.instr, self.local); + } + break; + } + + case SEND: { + load_fast_push_block(&sp, instr->i_target, refs.size); + ref_stack_pop(&refs); + PUSH_REF(i, NOT_LOCAL); + break; + } + + // Opcodes that consume all of their inputs + default: { + int num_popped = _PyOpcode_num_popped(opcode, oparg); + int num_pushed = _PyOpcode_num_pushed(opcode, oparg); + if (HAS_TARGET(instr->i_opcode)) { + load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed); + } + if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) { + // Block push opcodes only affect the stack when jumping + // to the target. + for (int j = 0; j < num_popped; j++) { + ref_stack_pop(&refs); + } + for (int j = 0; j < num_pushed; j++) { + PUSH_REF(i, NOT_LOCAL); + } + } + break; + } + } + } + + // Push fallthrough block + cfg_instr *term = basicblock_last_instr(block); + if (term != NULL && block->b_next != NULL && + !(IS_UNCONDITIONAL_JUMP_OPCODE(term->i_opcode) || + IS_SCOPE_EXIT_OPCODE(term->i_opcode))) { + assert(BB_HAS_FALLTHROUGH(block)); + load_fast_push_block(&sp, block->b_next, refs.size); + } + + // Mark instructions that produce values that are on the stack at the + // end of the basic block + for (Py_ssize_t i = 0; i < refs.size; i++) { + ref r = ref_stack_at(&refs, i); + if (r.instr != -1) { + instr_flags[r.instr] |= REF_UNCONSUMED; + } + } + + // Optimize instructions + for (int i = 0; i < block->b_iused; i++) { + if (!instr_flags[i]) { + cfg_instr *instr = &block->b_instr[i]; + switch (instr->i_opcode) { + case LOAD_FAST: + instr->i_opcode = LOAD_FAST_BORROW; + break; + case LOAD_FAST_LOAD_FAST: + instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW; + break; + default: + break; + } + } + } + } + + #undef PUSH_REF + + status = SUCCESS; + +done: + ref_stack_fini(&refs); + PyMem_Free(instr_flags); + PyMem_Free(blocks); + return status; +} + // helper functions for add_checks_for_loads_of_unknown_variables static inline void maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp) @@ -3525,6 +3942,11 @@ _PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g, RETURN_IF_ERROR(normalize_jumps(g)); assert(no_redundant_jumps(g)); + /* Can't modify the bytecode after inserting instructions that produce + * borrowed references. + */ + RETURN_IF_ERROR(optimize_load_fast(g)); + /* Can't modify the bytecode after computing jump offsets. */ if (_PyCfg_ToInstructionSequence(g, seq) < 0) { return ERROR; @@ -3608,6 +4030,15 @@ _PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals) nparams, firstlineno) < 0) { goto error; } + + if (calculate_stackdepth(g) == ERROR) { + goto error; + } + + if (optimize_load_fast(g) != SUCCESS) { + goto error; + } + res = cfg_to_instruction_sequence(g); error: Py_DECREF(const_cache); diff --git a/Python/gc.c b/Python/gc.c index e37d4b76456acc..dad088e09f872f 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -544,6 +544,12 @@ visit_decref(PyObject *op, void *parent) int _PyGC_VisitStackRef(_PyStackRef *ref, visitproc visit, void *arg) { + // This is a bit tricky! We want to ignore stackrefs with embedded + // refcounts when computing the incoming references, but otherwise treat + // them like normal. + if (!PyStackRef_RefcountOnObject(*ref) && (visit == visit_decref)) { + return 0; + } Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref)); return 0; } @@ -554,7 +560,7 @@ _PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc visit, void *arg) _PyStackRef *ref = _PyFrame_GetLocalsArray(frame); /* locals and stack */ for (; ref < frame->stackpointer; ref++) { - Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref)); + _Py_VISIT_STACKREF(*ref); } return 0; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 50db2c867c89e4..a4b5673c9e54b4 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -340,9 +340,8 @@ // _BINARY_OP_INPLACE_ADD_UNICODE { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectSteal(right); assert(PyUnicode_CheckExact(left_o)); - assert(PyUnicode_CheckExact(right_o)); + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(right))); int next_oparg; #if TIER_ONE assert(next_instr->op.code == STORE_FAST); @@ -369,9 +368,10 @@ * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2); + assert(Py_REFCNT(left_o) >= 2 || !PyStackRef_IsHeapSafe(left)); PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); + PyObject *right_o = PyStackRef_AsPyObjectSteal(right); stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -7291,8 +7291,7 @@ { retval = val; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef temp = retval; - assert(PyStackRef_IsHeapSafe(temp)); + _PyStackRef temp = PyStackRef_MakeHeapSafe(retval); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -7376,7 +7375,7 @@ #endif stack_pointer = _PyFrame_GetStackPointer(frame); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - value = temp; + value = PyStackRef_MakeHeapSafe(temp); LLTRACE_RESUME_FRAME(); } stack_pointer[0] = value; @@ -8873,6 +8872,44 @@ DISPATCH(); } + TARGET(LOAD_FAST_BORROW) { + #if Py_TAIL_CALL_INTERP + int opcode = LOAD_FAST_BORROW; + (void)(opcode); + #endif + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(LOAD_FAST_BORROW); + _PyStackRef value; + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + value = PyStackRef_Borrow(GETLOCAL(oparg)); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + + TARGET(LOAD_FAST_BORROW_LOAD_FAST_BORROW) { + #if Py_TAIL_CALL_INTERP + int opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW; + (void)(opcode); + #endif + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(LOAD_FAST_BORROW_LOAD_FAST_BORROW); + _PyStackRef value1; + _PyStackRef value2; + uint32_t oparg1 = oparg >> 4; + uint32_t oparg2 = oparg & 15; + value1 = PyStackRef_Borrow(GETLOCAL(oparg1)); + value2 = PyStackRef_Borrow(GETLOCAL(oparg2)); + stack_pointer[0] = value1; + stack_pointer[1] = value2; + stack_pointer += 2; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + TARGET(LOAD_FAST_CHECK) { #if Py_TAIL_CALL_INTERP int opcode = LOAD_FAST_CHECK; @@ -10346,8 +10383,7 @@ _PyStackRef res; retval = stack_pointer[-1]; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef temp = retval; - assert(PyStackRef_IsHeapSafe(temp)); + _PyStackRef temp = PyStackRef_MakeHeapSafe(retval); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -10411,7 +10447,7 @@ PyGenObject *gen = (PyGenObject *)receiver_o; _PyInterpreterFrame *gen_frame = &gen->gi_iframe; STACK_SHRINK(1); - _PyFrame_StackPush(gen_frame, v); + _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v)); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; @@ -10512,7 +10548,7 @@ } STAT_INC(SEND, hit); gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, v); + _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v)); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; @@ -12033,7 +12069,7 @@ #endif stack_pointer = _PyFrame_GetStackPointer(frame); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - value = temp; + value = PyStackRef_MakeHeapSafe(temp); LLTRACE_RESUME_FRAME(); stack_pointer[0] = value; stack_pointer += 1; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index c0dac90aebd458..5b9fb794c6bddc 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -84,6 +84,8 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_DEREF, &&TARGET_LOAD_FAST, &&TARGET_LOAD_FAST_AND_CLEAR, + &&TARGET_LOAD_FAST_BORROW, + &&TARGET_LOAD_FAST_BORROW_LOAD_FAST_BORROW, &&TARGET_LOAD_FAST_CHECK, &&TARGET_LOAD_FAST_LOAD_FAST, &&TARGET_LOAD_FROM_DICT_OR_DEREF, @@ -126,8 +128,6 @@ static void *opcode_targets[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_RESUME, &&TARGET_BINARY_OP_ADD_FLOAT, &&TARGET_BINARY_OP_ADD_INT, @@ -414,6 +414,8 @@ Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_CONST_MORTAL(TAIL_CALL_PARA Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_DEREF(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_AND_CLEAR(TAIL_CALL_PARAMS); +Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_BORROW(TAIL_CALL_PARAMS); +Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_BORROW_LOAD_FAST_BORROW(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_CHECK(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_LOAD_FAST(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FROM_DICT_OR_DEREF(TAIL_CALL_PARAMS); @@ -648,6 +650,8 @@ static py_tail_call_funcptr INSTRUCTION_TABLE[256] = { [LOAD_DEREF] = _TAIL_CALL_LOAD_DEREF, [LOAD_FAST] = _TAIL_CALL_LOAD_FAST, [LOAD_FAST_AND_CLEAR] = _TAIL_CALL_LOAD_FAST_AND_CLEAR, + [LOAD_FAST_BORROW] = _TAIL_CALL_LOAD_FAST_BORROW, + [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = _TAIL_CALL_LOAD_FAST_BORROW_LOAD_FAST_BORROW, [LOAD_FAST_CHECK] = _TAIL_CALL_LOAD_FAST_CHECK, [LOAD_FAST_LOAD_FAST] = _TAIL_CALL_LOAD_FAST_LOAD_FAST, [LOAD_FROM_DICT_OR_DEREF] = _TAIL_CALL_LOAD_FROM_DICT_OR_DEREF, @@ -725,8 +729,6 @@ static py_tail_call_funcptr INSTRUCTION_TABLE[256] = { [UNPACK_SEQUENCE_TWO_TUPLE] = _TAIL_CALL_UNPACK_SEQUENCE_TWO_TUPLE, [WITH_EXCEPT_START] = _TAIL_CALL_WITH_EXCEPT_START, [YIELD_VALUE] = _TAIL_CALL_YIELD_VALUE, - [117] = _TAIL_CALL_UNKNOWN_OPCODE, - [118] = _TAIL_CALL_UNKNOWN_OPCODE, [119] = _TAIL_CALL_UNKNOWN_OPCODE, [120] = _TAIL_CALL_UNKNOWN_OPCODE, [121] = _TAIL_CALL_UNKNOWN_OPCODE, diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 017a2eeca0741e..afaf2e059e1b4a 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -573,6 +573,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) if (last->opcode == _LOAD_CONST_INLINE || last->opcode == _LOAD_CONST_INLINE_BORROW || last->opcode == _LOAD_FAST || + last->opcode == _LOAD_FAST_BORROW || last->opcode == _COPY ) { last->opcode = _NOP; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 8d9e94a2445182..163a8bfd2679db 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -86,6 +86,10 @@ dummy_func(void) { value = GETLOCAL(oparg); } + op(_LOAD_FAST_BORROW, (-- value)) { + value = GETLOCAL(oparg); + } + op(_LOAD_FAST_AND_CLEAR, (-- value)) { value = GETLOCAL(oparg); JitOptSymbol *temp = sym_new_null(ctx); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 6a8ac75b63eb0e..f1280ba1e40a7f 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -47,6 +47,15 @@ break; } + case _LOAD_FAST_BORROW: { + JitOptSymbol *value; + value = GETLOCAL(oparg); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _LOAD_FAST_AND_CLEAR: { JitOptSymbol *value; value = GETLOCAL(oparg); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 491b5d127cf0f5..5d75b9f036b524 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -580,6 +580,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_AsPyObjectNew", "PyStackRef_FromPyObjectNewMortal", "PyStackRef_AsPyObjectSteal", + "PyStackRef_Borrow", "PyStackRef_CLEAR", "PyStackRef_CLOSE_SPECIALIZED", "PyStackRef_DUP", @@ -595,6 +596,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_IsTrue", "PyStackRef_IsFalse", "PyStackRef_IsNull", + "PyStackRef_MakeHeapSafe", "PyStackRef_None", "PyStackRef_TYPE", "PyStackRef_True", diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 02283a0b647e84..620e4b6f1f4a69 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -227,9 +227,10 @@ def generate_expansion_table(analysis: Analysis, out: CWriter) -> None: expansions: list[tuple[str, str, int]] = [] # [(name, size, offset), ...] if inst.is_super(): pieces = inst.name.split("_") - assert len(pieces) == 4, f"{inst.name} doesn't look like a super-instr" - name1 = "_".join(pieces[:2]) - name2 = "_".join(pieces[2:]) + assert len(pieces) % 2 == 0, f"{inst.name} doesn't look like a super-instr" + parts_per_piece = int(len(pieces) / 2) + name1 = "_".join(pieces[:parts_per_piece]) + name2 = "_".join(pieces[parts_per_piece:]) assert name1 in analysis.instructions, f"{name1} doesn't match any instr" assert name2 in analysis.instructions, f"{name2} doesn't match any instr" instr1 = analysis.instructions[name1] _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com