https://github.com/python/cpython/commit/053c285f6b41f92fbdd1d4ff0c959cceefacd7cd
commit: 053c285f6b41f92fbdd1d4ff0c959cceefacd7cd
branch: main
author: mpage <mp...@meta.com>
committer: mpage <mp...@cs.stanford.edu>
date: 2025-04-01T10:18:42-07:00
summary:

gh-130704: Strength reduce `LOAD_FAST{_LOAD_FAST}` (#130708)

Optimize `LOAD_FAST` opcodes into faster versions that load borrowed references 
onto the operand stack when we can prove that the lifetime of the local 
outlives the lifetime of the temporary that is loaded onto the stack.

files:
A 
Misc/NEWS.d/next/Core_and_Builtins/2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst
M Doc/library/dis.rst
M Include/internal/pycore_frame.h
M Include/internal/pycore_opcode_metadata.h
M Include/internal/pycore_stackref.h
M Include/internal/pycore_uop_ids.h
M Include/internal/pycore_uop_metadata.h
M Include/opcode_ids.h
M Lib/_opcode_metadata.py
M Lib/dis.py
M Lib/test/test_capi/test_opt.py
M Lib/test/test_ctypes/test_memfunctions.py
M Lib/test/test_ctypes/test_refcounts.py
M Lib/test/test_ctypes/test_stringptr.py
M Lib/test/test_dis.py
M Lib/test/test_frame.py
M Lib/test/test_generators.py
M Lib/test/test_importlib/test_abc.py
M Lib/test/test_peepholer.py
M Lib/test/test_sys.py
M Lib/test/test_traceback.py
M Objects/floatobject.c
M Objects/frameobject.c
M Programs/test_frozenmain.h
M Python/bytecodes.c
M Python/executor_cases.c.h
M Python/flowgraph.c
M Python/gc.c
M Python/generated_cases.c.h
M Python/opcode_targets.h
M Python/optimizer_analysis.c
M Python/optimizer_bytecodes.c
M Python/optimizer_cases.c.h
M Tools/cases_generator/analyzer.py
M Tools/cases_generator/opcode_metadata_generator.py

diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst
index 90e937ab2a91a6..4fde5bf903880b 100644
--- a/Doc/library/dis.rst
+++ b/Doc/library/dis.rst
@@ -76,7 +76,7 @@ the following command can be used to display the disassembly 
of
      2           RESUME                   0
    <BLANKLINE>
      3           LOAD_GLOBAL              1 (len + NULL)
-                 LOAD_FAST                0 (alist)
+                 LOAD_FAST_BORROW         0 (alist)
                  CALL                     1
                  RETURN_VALUE
 
@@ -215,7 +215,7 @@ Example:
     ...
     RESUME
     LOAD_GLOBAL
-    LOAD_FAST
+    LOAD_FAST_BORROW
     CALL
     RETURN_VALUE
 
@@ -1402,6 +1402,13 @@ iterations of the loop.
       This opcode is now only used in situations where the local variable is
       guaranteed to be initialized. It cannot raise :exc:`UnboundLocalError`.
 
+.. opcode:: LOAD_FAST_BORROW (var_num)
+
+   Pushes a borrowed reference to the local ``co_varnames[var_num]`` onto the
+   stack.
+
+   .. versionadded:: 3.14
+
 .. opcode:: LOAD_FAST_LOAD_FAST (var_nums)
 
    Pushes references to ``co_varnames[var_nums >> 4]`` and
@@ -1409,6 +1416,14 @@ iterations of the loop.
 
    .. versionadded:: 3.13
 
+
+.. opcode:: LOAD_FAST_BORROW_LOAD_FAST_BORROW (var_nums)
+
+   Pushes borrowed references to ``co_varnames[var_nums >> 4]`` and
+   ``co_varnames[var_nums & 15]`` onto the stack.
+
+   .. versionadded:: 3.14
+
 .. opcode:: LOAD_FAST_CHECK (var_num)
 
    Pushes a reference to the local ``co_varnames[var_num]`` onto the stack,
@@ -2023,4 +2038,3 @@ instructions:
 
    .. deprecated:: 3.13
       All jumps are now relative. This list is empty.
-
diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h
index dde211c5eac015..8c410e9e208340 100644
--- a/Include/internal/pycore_frame.h
+++ b/Include/internal/pycore_frame.h
@@ -28,6 +28,12 @@ struct _frame {
        PyEval_GetLocals requires a borrowed reference so the actual reference
        is stored here */
     PyObject *f_locals_cache;
+    /* A tuple containing strong references to fast locals that were 
overwritten
+     * via f_locals. Borrowed references to these locals may exist in frames
+     * closer to the top of the stack. The references in this tuple act as
+     * "support" for the borrowed references, ensuring that they remain valid.
+     */
+    PyObject *f_overwritten_fast_locals;
     /* The frame data, if this frame object owns the frame */
     PyObject *_f_frame_data[1];
 };
diff --git a/Include/internal/pycore_opcode_metadata.h 
b/Include/internal/pycore_opcode_metadata.h
index 32bafaf09ce3da..073c002b34c30a 100644
--- a/Include/internal/pycore_opcode_metadata.h
+++ b/Include/internal/pycore_opcode_metadata.h
@@ -335,6 +335,10 @@ int _PyOpcode_num_popped(int opcode, int oparg)  {
             return 0;
         case LOAD_FAST_AND_CLEAR:
             return 0;
+        case LOAD_FAST_BORROW:
+            return 0;
+        case LOAD_FAST_BORROW_LOAD_FAST_BORROW:
+            return 0;
         case LOAD_FAST_CHECK:
             return 0;
         case LOAD_FAST_LOAD_FAST:
@@ -810,6 +814,10 @@ int _PyOpcode_num_pushed(int opcode, int oparg)  {
             return 1;
         case LOAD_FAST_AND_CLEAR:
             return 1;
+        case LOAD_FAST_BORROW:
+            return 1;
+        case LOAD_FAST_BORROW_LOAD_FAST_BORROW:
+            return 2;
         case LOAD_FAST_CHECK:
             return 1;
         case LOAD_FAST_LOAD_FAST:
@@ -1198,6 +1206,8 @@ const struct opcode_metadata 
_PyOpcode_opcode_metadata[266] = {
     [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | 
HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | 
HAS_PURE_FLAG },
     [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | 
HAS_LOCAL_FLAG },
+    [LOAD_FAST_BORROW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | 
HAS_PURE_FLAG },
+    [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | 
HAS_LOCAL_FLAG },
     [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | 
HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | 
HAS_LOCAL_FLAG },
     [LOAD_FROM_DICT_OR_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | 
HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
@@ -1406,6 +1416,8 @@ _PyOpcode_macro_expansion[256] = {
     [LOAD_DEREF] = { .nuops = 1, .uops = { { _LOAD_DEREF, OPARG_SIMPLE, 0 } } 
},
     [LOAD_FAST] = { .nuops = 1, .uops = { { _LOAD_FAST, OPARG_SIMPLE, 0 } } },
     [LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { _LOAD_FAST_AND_CLEAR, 
OPARG_SIMPLE, 0 } } },
+    [LOAD_FAST_BORROW] = { .nuops = 1, .uops = { { _LOAD_FAST_BORROW, 
OPARG_SIMPLE, 0 } } },
+    [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = { .nuops = 2, .uops = { { 
_LOAD_FAST_BORROW, OPARG_TOP, 0 }, { _LOAD_FAST_BORROW, OPARG_BOTTOM, 0 } } },
     [LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { _LOAD_FAST_CHECK, 
OPARG_SIMPLE, 0 } } },
     [LOAD_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { _LOAD_FAST, OPARG_TOP, 0 
}, { _LOAD_FAST, OPARG_BOTTOM, 0 } } },
     [LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { 
_LOAD_FROM_DICT_OR_DEREF, OPARG_SIMPLE, 0 } } },
@@ -1632,6 +1644,8 @@ const char *_PyOpcode_OpName[266] = {
     [LOAD_DEREF] = "LOAD_DEREF",
     [LOAD_FAST] = "LOAD_FAST",
     [LOAD_FAST_AND_CLEAR] = "LOAD_FAST_AND_CLEAR",
+    [LOAD_FAST_BORROW] = "LOAD_FAST_BORROW",
+    [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = "LOAD_FAST_BORROW_LOAD_FAST_BORROW",
     [LOAD_FAST_CHECK] = "LOAD_FAST_CHECK",
     [LOAD_FAST_LOAD_FAST] = "LOAD_FAST_LOAD_FAST",
     [LOAD_FROM_DICT_OR_DEREF] = "LOAD_FROM_DICT_OR_DEREF",
@@ -1890,6 +1904,8 @@ const uint8_t _PyOpcode_Deopt[256] = {
     [LOAD_DEREF] = LOAD_DEREF,
     [LOAD_FAST] = LOAD_FAST,
     [LOAD_FAST_AND_CLEAR] = LOAD_FAST_AND_CLEAR,
+    [LOAD_FAST_BORROW] = LOAD_FAST_BORROW,
+    [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = LOAD_FAST_BORROW_LOAD_FAST_BORROW,
     [LOAD_FAST_CHECK] = LOAD_FAST_CHECK,
     [LOAD_FAST_LOAD_FAST] = LOAD_FAST_LOAD_FAST,
     [LOAD_FROM_DICT_OR_DEREF] = LOAD_FROM_DICT_OR_DEREF,
@@ -1972,8 +1988,6 @@ const uint8_t _PyOpcode_Deopt[256] = {
 #endif // NEED_OPCODE_METADATA
 
 #define EXTRA_CASES \
-    case 117: \
-    case 118: \
     case 119: \
     case 120: \
     case 121: \
diff --git a/Include/internal/pycore_stackref.h 
b/Include/internal/pycore_stackref.h
index e2e32ed4122838..6664a747e2e91b 100644
--- a/Include/internal/pycore_stackref.h
+++ b/Include/internal/pycore_stackref.h
@@ -172,6 +172,12 @@ PyStackRef_MakeHeapSafe(_PyStackRef ref)
     return ref;
 }
 
+static inline _PyStackRef
+PyStackRef_Borrow(_PyStackRef ref)
+{
+    return PyStackRef_DUP(ref)
+}
+
 #define PyStackRef_CLEAR(REF) \
     do { \
         _PyStackRef *_tmp_op_ptr = &(REF); \
@@ -253,6 +259,25 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj)
 }
 #   define PyStackRef_FromPyObjectSteal(obj) 
_PyStackRef_FromPyObjectSteal(_PyObject_CAST(obj))
 
+static inline bool
+PyStackRef_IsHeapSafe(_PyStackRef stackref)
+{
+    if (PyStackRef_IsDeferred(stackref)) {
+        PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref);
+        return obj == NULL || _Py_IsImmortal(obj) || 
_PyObject_HasDeferredRefcount(obj);
+    }
+    return true;
+}
+
+static inline _PyStackRef
+PyStackRef_MakeHeapSafe(_PyStackRef stackref)
+{
+    if (PyStackRef_IsHeapSafe(stackref)) {
+        return stackref;
+    }
+    PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref);
+    return (_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) | Py_TAG_PTR };
+}
 
 static inline _PyStackRef
 PyStackRef_FromPyObjectStealMortal(PyObject *obj)
@@ -311,25 +336,16 @@ PyStackRef_DUP(_PyStackRef stackref)
 {
     assert(!PyStackRef_IsNull(stackref));
     if (PyStackRef_IsDeferred(stackref)) {
-        assert(_Py_IsImmortal(PyStackRef_AsPyObjectBorrow(stackref)) ||
-               
_PyObject_HasDeferredRefcount(PyStackRef_AsPyObjectBorrow(stackref))
-        );
         return stackref;
     }
     Py_INCREF(PyStackRef_AsPyObjectBorrow(stackref));
     return stackref;
 }
 
-static inline bool
-PyStackRef_IsHeapSafe(_PyStackRef ref)
-{
-    return true;
-}
-
 static inline _PyStackRef
-PyStackRef_MakeHeapSafe(_PyStackRef ref)
+PyStackRef_Borrow(_PyStackRef stackref)
 {
-    return ref;
+    return (_PyStackRef){ .bits = stackref.bits | Py_TAG_DEFERRED };
 }
 
 // Convert a possibly deferred reference to a strong reference.
@@ -399,7 +415,6 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) {
             assert(!_Py_IsStaticImmortal(obj));
             break;
         case Py_TAG_REFCNT:
-            assert(obj == NULL || _Py_IsImmortal(obj));
             break;
         default:
             assert(0);
@@ -413,14 +428,15 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) 
{
 #endif
 
 #ifdef _WIN32
-#define PyStackRef_RefcountOnObject(REF) (((REF).bits & Py_TAG_BITS) == 0)
+#define PyStackRef_RefcountOnObject(REF) (((REF).bits & Py_TAG_REFCNT) == 0)
 #define PyStackRef_AsPyObjectBorrow BITS_TO_PTR_MASKED
+#define PyStackRef_Borrow(REF) (_PyStackRef){ .bits = ((REF).bits) | 
Py_TAG_REFCNT};
 #else
 /* Does this ref not have an embedded refcount and thus not refer to a 
declared immmortal object? */
 static inline int
 PyStackRef_RefcountOnObject(_PyStackRef ref)
 {
-    return (ref.bits & Py_TAG_BITS) == 0;
+    return (ref.bits & Py_TAG_REFCNT) == 0;
 }
 
 static inline PyObject *
@@ -428,6 +444,12 @@ PyStackRef_AsPyObjectBorrow(_PyStackRef ref)
 {
     return BITS_TO_PTR_MASKED(ref);
 }
+
+static inline _PyStackRef
+PyStackRef_Borrow(_PyStackRef ref)
+{
+    return (_PyStackRef){ .bits = ref.bits | Py_TAG_REFCNT };
+}
 #endif
 
 static inline PyObject *
diff --git a/Include/internal/pycore_uop_ids.h 
b/Include/internal/pycore_uop_ids.h
index 1d9c2bef4cedda..6fb63bfabfe798 100644
--- a/Include/internal/pycore_uop_ids.h
+++ b/Include/internal/pycore_uop_ids.h
@@ -203,106 +203,116 @@ extern "C" {
 #define _LOAD_FAST_6 428
 #define _LOAD_FAST_7 429
 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR
+#define _LOAD_FAST_BORROW 430
+#define _LOAD_FAST_BORROW_0 431
+#define _LOAD_FAST_BORROW_1 432
+#define _LOAD_FAST_BORROW_2 433
+#define _LOAD_FAST_BORROW_3 434
+#define _LOAD_FAST_BORROW_4 435
+#define _LOAD_FAST_BORROW_5 436
+#define _LOAD_FAST_BORROW_6 437
+#define _LOAD_FAST_BORROW_7 438
+#define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW
 #define _LOAD_FAST_CHECK LOAD_FAST_CHECK
 #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST
 #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF
 #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS
-#define _LOAD_GLOBAL 430
-#define _LOAD_GLOBAL_BUILTINS 431
-#define _LOAD_GLOBAL_MODULE 432
+#define _LOAD_GLOBAL 439
+#define _LOAD_GLOBAL_BUILTINS 440
+#define _LOAD_GLOBAL_MODULE 441
 #define _LOAD_LOCALS LOAD_LOCALS
 #define _LOAD_NAME LOAD_NAME
-#define _LOAD_SMALL_INT 433
-#define _LOAD_SMALL_INT_0 434
-#define _LOAD_SMALL_INT_1 435
-#define _LOAD_SMALL_INT_2 436
-#define _LOAD_SMALL_INT_3 437
+#define _LOAD_SMALL_INT 442
+#define _LOAD_SMALL_INT_0 443
+#define _LOAD_SMALL_INT_1 444
+#define _LOAD_SMALL_INT_2 445
+#define _LOAD_SMALL_INT_3 446
 #define _LOAD_SPECIAL LOAD_SPECIAL
 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR
 #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD
-#define _MAKE_CALLARGS_A_TUPLE 438
+#define _MAKE_CALLARGS_A_TUPLE 447
 #define _MAKE_CELL MAKE_CELL
 #define _MAKE_FUNCTION MAKE_FUNCTION
-#define _MAKE_WARM 439
+#define _MAKE_WARM 448
 #define _MAP_ADD MAP_ADD
 #define _MATCH_CLASS MATCH_CLASS
 #define _MATCH_KEYS MATCH_KEYS
 #define _MATCH_MAPPING MATCH_MAPPING
 #define _MATCH_SEQUENCE MATCH_SEQUENCE
-#define _MAYBE_EXPAND_METHOD 440
-#define _MAYBE_EXPAND_METHOD_KW 441
-#define _MONITOR_CALL 442
-#define _MONITOR_CALL_KW 443
-#define _MONITOR_JUMP_BACKWARD 444
-#define _MONITOR_RESUME 445
+#define _MAYBE_EXPAND_METHOD 449
+#define _MAYBE_EXPAND_METHOD_KW 450
+#define _MONITOR_CALL 451
+#define _MONITOR_CALL_KW 452
+#define _MONITOR_JUMP_BACKWARD 453
+#define _MONITOR_RESUME 454
 #define _NOP NOP
 #define _POP_EXCEPT POP_EXCEPT
-#define _POP_JUMP_IF_FALSE 446
-#define _POP_JUMP_IF_TRUE 447
+#define _POP_JUMP_IF_FALSE 455
+#define _POP_JUMP_IF_TRUE 456
 #define _POP_TOP POP_TOP
-#define _POP_TOP_LOAD_CONST_INLINE 448
-#define _POP_TOP_LOAD_CONST_INLINE_BORROW 449
-#define _POP_TWO_LOAD_CONST_INLINE_BORROW 450
+#define _POP_TOP_LOAD_CONST_INLINE 457
+#define _POP_TOP_LOAD_CONST_INLINE_BORROW 458
+#define _POP_TWO_LOAD_CONST_INLINE_BORROW 459
 #define _PUSH_EXC_INFO PUSH_EXC_INFO
-#define _PUSH_FRAME 451
+#define _PUSH_FRAME 460
 #define _PUSH_NULL PUSH_NULL
-#define _PUSH_NULL_CONDITIONAL 452
-#define _PY_FRAME_GENERAL 453
-#define _PY_FRAME_KW 454
-#define _QUICKEN_RESUME 455
-#define _REPLACE_WITH_TRUE 456
+#define _PUSH_NULL_CONDITIONAL 461
+#define _PY_FRAME_GENERAL 462
+#define _PY_FRAME_KW 463
+#define _QUICKEN_RESUME 464
+#define _REPLACE_WITH_TRUE 465
 #define _RESUME_CHECK RESUME_CHECK
 #define _RETURN_GENERATOR RETURN_GENERATOR
 #define _RETURN_VALUE RETURN_VALUE
-#define _SAVE_RETURN_OFFSET 457
-#define _SEND 458
-#define _SEND_GEN_FRAME 459
+#define _SAVE_RETURN_OFFSET 466
+#define _SEND 467
+#define _SEND_GEN_FRAME 468
 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
 #define _SET_ADD SET_ADD
 #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
 #define _SET_UPDATE SET_UPDATE
-#define _START_EXECUTOR 460
-#define _STORE_ATTR 461
-#define _STORE_ATTR_INSTANCE_VALUE 462
-#define _STORE_ATTR_SLOT 463
-#define _STORE_ATTR_WITH_HINT 464
+#define _START_EXECUTOR 469
+#define _STORE_ATTR 470
+#define _STORE_ATTR_INSTANCE_VALUE 471
+#define _STORE_ATTR_SLOT 472
+#define _STORE_ATTR_WITH_HINT 473
 #define _STORE_DEREF STORE_DEREF
-#define _STORE_FAST 465
-#define _STORE_FAST_0 466
-#define _STORE_FAST_1 467
-#define _STORE_FAST_2 468
-#define _STORE_FAST_3 469
-#define _STORE_FAST_4 470
-#define _STORE_FAST_5 471
-#define _STORE_FAST_6 472
-#define _STORE_FAST_7 473
+#define _STORE_FAST 474
+#define _STORE_FAST_0 475
+#define _STORE_FAST_1 476
+#define _STORE_FAST_2 477
+#define _STORE_FAST_3 478
+#define _STORE_FAST_4 479
+#define _STORE_FAST_5 480
+#define _STORE_FAST_6 481
+#define _STORE_FAST_7 482
 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
 #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
 #define _STORE_GLOBAL STORE_GLOBAL
 #define _STORE_NAME STORE_NAME
-#define _STORE_SLICE 474
-#define _STORE_SUBSCR 475
+#define _STORE_SLICE 483
+#define _STORE_SUBSCR 484
 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT
 #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT
 #define _SWAP SWAP
-#define _TIER2_RESUME_CHECK 476
-#define _TO_BOOL 477
+#define _TIER2_RESUME_CHECK 485
+#define _TO_BOOL 486
 #define _TO_BOOL_BOOL TO_BOOL_BOOL
 #define _TO_BOOL_INT TO_BOOL_INT
 #define _TO_BOOL_LIST TO_BOOL_LIST
 #define _TO_BOOL_NONE TO_BOOL_NONE
-#define _TO_BOOL_STR 478
+#define _TO_BOOL_STR 487
 #define _UNARY_INVERT UNARY_INVERT
 #define _UNARY_NEGATIVE UNARY_NEGATIVE
 #define _UNARY_NOT UNARY_NOT
 #define _UNPACK_EX UNPACK_EX
-#define _UNPACK_SEQUENCE 479
+#define _UNPACK_SEQUENCE 488
 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST
 #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE
 #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE
 #define _WITH_EXCEPT_START WITH_EXCEPT_START
 #define _YIELD_VALUE YIELD_VALUE
-#define MAX_UOP_ID 479
+#define MAX_UOP_ID 488
 
 #ifdef __cplusplus
 }
diff --git a/Include/internal/pycore_uop_metadata.h 
b/Include/internal/pycore_uop_metadata.h
index 4f5f6bbde2571a..32007d0d917e2f 100644
--- a/Include/internal/pycore_uop_metadata.h
+++ b/Include/internal/pycore_uop_metadata.h
@@ -33,8 +33,18 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_LOAD_FAST_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG,
     [_LOAD_FAST_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG,
     [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG,
+    [_LOAD_FAST_BORROW_0] = HAS_LOCAL_FLAG | HAS_PURE_FLAG,
+    [_LOAD_FAST_BORROW_1] = HAS_LOCAL_FLAG | HAS_PURE_FLAG,
+    [_LOAD_FAST_BORROW_2] = HAS_LOCAL_FLAG | HAS_PURE_FLAG,
+    [_LOAD_FAST_BORROW_3] = HAS_LOCAL_FLAG | HAS_PURE_FLAG,
+    [_LOAD_FAST_BORROW_4] = HAS_LOCAL_FLAG | HAS_PURE_FLAG,
+    [_LOAD_FAST_BORROW_5] = HAS_LOCAL_FLAG | HAS_PURE_FLAG,
+    [_LOAD_FAST_BORROW_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG,
+    [_LOAD_FAST_BORROW_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG,
+    [_LOAD_FAST_BORROW] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG,
     [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG,
     [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG,
+    [_LOAD_FAST_BORROW_LOAD_FAST_BORROW] = HAS_ARG_FLAG | HAS_LOCAL_FLAG,
     [_LOAD_CONST_MORTAL] = HAS_ARG_FLAG | HAS_CONST_FLAG,
     [_LOAD_CONST_IMMORTAL] = HAS_ARG_FLAG | HAS_CONST_FLAG,
     [_LOAD_SMALL_INT_0] = 0,
@@ -287,6 +297,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
 
 const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = {
     [_LOAD_FAST] = 8,
+    [_LOAD_FAST_BORROW] = 8,
     [_LOAD_SMALL_INT] = 4,
     [_STORE_FAST] = 8,
     [_INIT_CALL_PY_EXACT_ARGS] = 5,
@@ -466,6 +477,16 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_LOAD_FAST_6] = "_LOAD_FAST_6",
     [_LOAD_FAST_7] = "_LOAD_FAST_7",
     [_LOAD_FAST_AND_CLEAR] = "_LOAD_FAST_AND_CLEAR",
+    [_LOAD_FAST_BORROW] = "_LOAD_FAST_BORROW",
+    [_LOAD_FAST_BORROW_0] = "_LOAD_FAST_BORROW_0",
+    [_LOAD_FAST_BORROW_1] = "_LOAD_FAST_BORROW_1",
+    [_LOAD_FAST_BORROW_2] = "_LOAD_FAST_BORROW_2",
+    [_LOAD_FAST_BORROW_3] = "_LOAD_FAST_BORROW_3",
+    [_LOAD_FAST_BORROW_4] = "_LOAD_FAST_BORROW_4",
+    [_LOAD_FAST_BORROW_5] = "_LOAD_FAST_BORROW_5",
+    [_LOAD_FAST_BORROW_6] = "_LOAD_FAST_BORROW_6",
+    [_LOAD_FAST_BORROW_7] = "_LOAD_FAST_BORROW_7",
+    [_LOAD_FAST_BORROW_LOAD_FAST_BORROW] = 
"_LOAD_FAST_BORROW_LOAD_FAST_BORROW",
     [_LOAD_FAST_CHECK] = "_LOAD_FAST_CHECK",
     [_LOAD_FAST_LOAD_FAST] = "_LOAD_FAST_LOAD_FAST",
     [_LOAD_FROM_DICT_OR_DEREF] = "_LOAD_FROM_DICT_OR_DEREF",
@@ -589,10 +610,30 @@ int _PyUop_num_popped(int opcode, int oparg)
             return 0;
         case _LOAD_FAST:
             return 0;
+        case _LOAD_FAST_BORROW_0:
+            return 0;
+        case _LOAD_FAST_BORROW_1:
+            return 0;
+        case _LOAD_FAST_BORROW_2:
+            return 0;
+        case _LOAD_FAST_BORROW_3:
+            return 0;
+        case _LOAD_FAST_BORROW_4:
+            return 0;
+        case _LOAD_FAST_BORROW_5:
+            return 0;
+        case _LOAD_FAST_BORROW_6:
+            return 0;
+        case _LOAD_FAST_BORROW_7:
+            return 0;
+        case _LOAD_FAST_BORROW:
+            return 0;
         case _LOAD_FAST_AND_CLEAR:
             return 0;
         case _LOAD_FAST_LOAD_FAST:
             return 0;
+        case _LOAD_FAST_BORROW_LOAD_FAST_BORROW:
+            return 0;
         case _LOAD_CONST_MORTAL:
             return 0;
         case _LOAD_CONST_IMMORTAL:
diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h
index e4e6a88276655e..898dc580f4148e 100644
--- a/Include/opcode_ids.h
+++ b/Include/opcode_ids.h
@@ -94,39 +94,41 @@ extern "C" {
 #define LOAD_DEREF                              81
 #define LOAD_FAST                               82
 #define LOAD_FAST_AND_CLEAR                     83
-#define LOAD_FAST_CHECK                         84
-#define LOAD_FAST_LOAD_FAST                     85
-#define LOAD_FROM_DICT_OR_DEREF                 86
-#define LOAD_FROM_DICT_OR_GLOBALS               87
-#define LOAD_GLOBAL                             88
-#define LOAD_NAME                               89
-#define LOAD_SMALL_INT                          90
-#define LOAD_SPECIAL                            91
-#define LOAD_SUPER_ATTR                         92
-#define MAKE_CELL                               93
-#define MAP_ADD                                 94
-#define MATCH_CLASS                             95
-#define POP_JUMP_IF_FALSE                       96
-#define POP_JUMP_IF_NONE                        97
-#define POP_JUMP_IF_NOT_NONE                    98
-#define POP_JUMP_IF_TRUE                        99
-#define RAISE_VARARGS                          100
-#define RERAISE                                101
-#define SEND                                   102
-#define SET_ADD                                103
-#define SET_FUNCTION_ATTRIBUTE                 104
-#define SET_UPDATE                             105
-#define STORE_ATTR                             106
-#define STORE_DEREF                            107
-#define STORE_FAST                             108
-#define STORE_FAST_LOAD_FAST                   109
-#define STORE_FAST_STORE_FAST                  110
-#define STORE_GLOBAL                           111
-#define STORE_NAME                             112
-#define SWAP                                   113
-#define UNPACK_EX                              114
-#define UNPACK_SEQUENCE                        115
-#define YIELD_VALUE                            116
+#define LOAD_FAST_BORROW                        84
+#define LOAD_FAST_BORROW_LOAD_FAST_BORROW       85
+#define LOAD_FAST_CHECK                         86
+#define LOAD_FAST_LOAD_FAST                     87
+#define LOAD_FROM_DICT_OR_DEREF                 88
+#define LOAD_FROM_DICT_OR_GLOBALS               89
+#define LOAD_GLOBAL                             90
+#define LOAD_NAME                               91
+#define LOAD_SMALL_INT                          92
+#define LOAD_SPECIAL                            93
+#define LOAD_SUPER_ATTR                         94
+#define MAKE_CELL                               95
+#define MAP_ADD                                 96
+#define MATCH_CLASS                             97
+#define POP_JUMP_IF_FALSE                       98
+#define POP_JUMP_IF_NONE                        99
+#define POP_JUMP_IF_NOT_NONE                   100
+#define POP_JUMP_IF_TRUE                       101
+#define RAISE_VARARGS                          102
+#define RERAISE                                103
+#define SEND                                   104
+#define SET_ADD                                105
+#define SET_FUNCTION_ATTRIBUTE                 106
+#define SET_UPDATE                             107
+#define STORE_ATTR                             108
+#define STORE_DEREF                            109
+#define STORE_FAST                             110
+#define STORE_FAST_LOAD_FAST                   111
+#define STORE_FAST_STORE_FAST                  112
+#define STORE_GLOBAL                           113
+#define STORE_NAME                             114
+#define SWAP                                   115
+#define UNPACK_EX                              116
+#define UNPACK_SEQUENCE                        117
+#define YIELD_VALUE                            118
 #define RESUME                                 128
 #define BINARY_OP_ADD_FLOAT                    129
 #define BINARY_OP_ADD_INT                      130
diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py
index 2c399d2f7f4631..15900265a01270 100644
--- a/Lib/_opcode_metadata.py
+++ b/Lib/_opcode_metadata.py
@@ -295,39 +295,41 @@
     'LOAD_DEREF': 81,
     'LOAD_FAST': 82,
     'LOAD_FAST_AND_CLEAR': 83,
-    'LOAD_FAST_CHECK': 84,
-    'LOAD_FAST_LOAD_FAST': 85,
-    'LOAD_FROM_DICT_OR_DEREF': 86,
-    'LOAD_FROM_DICT_OR_GLOBALS': 87,
-    'LOAD_GLOBAL': 88,
-    'LOAD_NAME': 89,
-    'LOAD_SMALL_INT': 90,
-    'LOAD_SPECIAL': 91,
-    'LOAD_SUPER_ATTR': 92,
-    'MAKE_CELL': 93,
-    'MAP_ADD': 94,
-    'MATCH_CLASS': 95,
-    'POP_JUMP_IF_FALSE': 96,
-    'POP_JUMP_IF_NONE': 97,
-    'POP_JUMP_IF_NOT_NONE': 98,
-    'POP_JUMP_IF_TRUE': 99,
-    'RAISE_VARARGS': 100,
-    'RERAISE': 101,
-    'SEND': 102,
-    'SET_ADD': 103,
-    'SET_FUNCTION_ATTRIBUTE': 104,
-    'SET_UPDATE': 105,
-    'STORE_ATTR': 106,
-    'STORE_DEREF': 107,
-    'STORE_FAST': 108,
-    'STORE_FAST_LOAD_FAST': 109,
-    'STORE_FAST_STORE_FAST': 110,
-    'STORE_GLOBAL': 111,
-    'STORE_NAME': 112,
-    'SWAP': 113,
-    'UNPACK_EX': 114,
-    'UNPACK_SEQUENCE': 115,
-    'YIELD_VALUE': 116,
+    'LOAD_FAST_BORROW': 84,
+    'LOAD_FAST_BORROW_LOAD_FAST_BORROW': 85,
+    'LOAD_FAST_CHECK': 86,
+    'LOAD_FAST_LOAD_FAST': 87,
+    'LOAD_FROM_DICT_OR_DEREF': 88,
+    'LOAD_FROM_DICT_OR_GLOBALS': 89,
+    'LOAD_GLOBAL': 90,
+    'LOAD_NAME': 91,
+    'LOAD_SMALL_INT': 92,
+    'LOAD_SPECIAL': 93,
+    'LOAD_SUPER_ATTR': 94,
+    'MAKE_CELL': 95,
+    'MAP_ADD': 96,
+    'MATCH_CLASS': 97,
+    'POP_JUMP_IF_FALSE': 98,
+    'POP_JUMP_IF_NONE': 99,
+    'POP_JUMP_IF_NOT_NONE': 100,
+    'POP_JUMP_IF_TRUE': 101,
+    'RAISE_VARARGS': 102,
+    'RERAISE': 103,
+    'SEND': 104,
+    'SET_ADD': 105,
+    'SET_FUNCTION_ATTRIBUTE': 106,
+    'SET_UPDATE': 107,
+    'STORE_ATTR': 108,
+    'STORE_DEREF': 109,
+    'STORE_FAST': 110,
+    'STORE_FAST_LOAD_FAST': 111,
+    'STORE_FAST_STORE_FAST': 112,
+    'STORE_GLOBAL': 113,
+    'STORE_NAME': 114,
+    'SWAP': 115,
+    'UNPACK_EX': 116,
+    'UNPACK_SEQUENCE': 117,
+    'YIELD_VALUE': 118,
     'INSTRUMENTED_END_FOR': 234,
     'INSTRUMENTED_POP_ITER': 235,
     'INSTRUMENTED_END_SEND': 236,
diff --git a/Lib/dis.py b/Lib/dis.py
index c0a25dea2a9a95..cb6d077a391677 100644
--- a/Lib/dis.py
+++ b/Lib/dis.py
@@ -48,6 +48,7 @@
 LOAD_COMMON_CONSTANT = opmap['LOAD_COMMON_CONSTANT']
 LOAD_SPECIAL = opmap['LOAD_SPECIAL']
 LOAD_FAST_LOAD_FAST = opmap['LOAD_FAST_LOAD_FAST']
+LOAD_FAST_BORROW_LOAD_FAST_BORROW = opmap['LOAD_FAST_BORROW_LOAD_FAST_BORROW']
 STORE_FAST_LOAD_FAST = opmap['STORE_FAST_LOAD_FAST']
 STORE_FAST_STORE_FAST = opmap['STORE_FAST_STORE_FAST']
 IS_OP = opmap['IS_OP']
@@ -608,7 +609,7 @@ def get_argval_argrepr(self, op, arg, offset):
                 assert lbl is not None
                 preposition = "from" if deop == END_ASYNC_FOR else "to"
                 argrepr = f"{preposition} L{lbl}"
-            elif deop in (LOAD_FAST_LOAD_FAST, STORE_FAST_LOAD_FAST, 
STORE_FAST_STORE_FAST):
+            elif deop in (LOAD_FAST_LOAD_FAST, 
LOAD_FAST_BORROW_LOAD_FAST_BORROW, STORE_FAST_LOAD_FAST, STORE_FAST_STORE_FAST):
                 arg1 = arg >> 4
                 arg2 = arg & 15
                 val1, argrepr1 = _get_name_info(arg1, self.varname_from_oparg)
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
index feab3b8b84f566..57556daa3ae74b 100644
--- a/Lib/test/test_capi/test_opt.py
+++ b/Lib/test/test_capi/test_opt.py
@@ -134,7 +134,7 @@ def testfunc(x):
         self.assertIsNotNone(ex)
         uops = get_opnames(ex)
         self.assertIn("_JUMP_TO_TOP", uops)
-        self.assertIn("_LOAD_FAST_0", uops)
+        self.assertIn("_LOAD_FAST_BORROW_0", uops)
 
     def test_extended_arg(self):
         "Check EXTENDED_ARG handling in superblock creation"
@@ -180,7 +180,7 @@ def many_vars():
 
         ex = get_first_executor(many_vars)
         self.assertIsNotNone(ex)
-        self.assertTrue(any((opcode, oparg, operand) == ("_LOAD_FAST", 259, 0)
+        self.assertTrue(any((opcode, oparg, operand) == ("_LOAD_FAST_BORROW", 
259, 0)
                             for opcode, oparg, _, operand in list(ex)))
 
     def test_unspecialized_unpack(self):
diff --git a/Lib/test/test_ctypes/test_memfunctions.py 
b/Lib/test/test_ctypes/test_memfunctions.py
index 325487618137f6..e3cb5db775ef62 100644
--- a/Lib/test/test_ctypes/test_memfunctions.py
+++ b/Lib/test/test_ctypes/test_memfunctions.py
@@ -60,9 +60,6 @@ def test_cast(self):
     @support.refcount_test
     def test_string_at(self):
         s = string_at(b"foo bar")
-        # XXX The following may be wrong, depending on how Python
-        # manages string instances
-        self.assertEqual(2, sys.getrefcount(s))
         self.assertTrue(s, "foo bar")
 
         self.assertEqual(string_at(b"foo bar", 7), b"foo bar")
diff --git a/Lib/test/test_ctypes/test_refcounts.py 
b/Lib/test/test_ctypes/test_refcounts.py
index 012722d8486218..1fe4b3eca2c50e 100644
--- a/Lib/test/test_ctypes/test_refcounts.py
+++ b/Lib/test/test_ctypes/test_refcounts.py
@@ -24,36 +24,35 @@ def test_1(self):
         def callback(value):
             return value
 
-        self.assertEqual(sys.getrefcount(callback), 2)
+        orig_refcount = sys.getrefcount(callback)
         cb = MyCallback(callback)
 
-        self.assertGreater(sys.getrefcount(callback), 2)
+        self.assertGreater(sys.getrefcount(callback), orig_refcount)
         result = f(-10, cb)
         self.assertEqual(result, -18)
         cb = None
 
         gc.collect()
 
-        self.assertEqual(sys.getrefcount(callback), 2)
+        self.assertEqual(sys.getrefcount(callback), orig_refcount)
 
     @support.refcount_test
     def test_refcount(self):
         def func(*args):
             pass
-        # this is the standard refcount for func
-        self.assertEqual(sys.getrefcount(func), 2)
+        orig_refcount = sys.getrefcount(func)
 
         # the CFuncPtr instance holds at least one refcount on func:
         f = OtherCallback(func)
-        self.assertGreater(sys.getrefcount(func), 2)
+        self.assertGreater(sys.getrefcount(func), orig_refcount)
 
         # and may release it again
         del f
-        self.assertGreaterEqual(sys.getrefcount(func), 2)
+        self.assertGreaterEqual(sys.getrefcount(func), orig_refcount)
 
         # but now it must be gone
         gc.collect()
-        self.assertEqual(sys.getrefcount(func), 2)
+        self.assertEqual(sys.getrefcount(func), orig_refcount)
 
         class X(ctypes.Structure):
             _fields_ = [("a", OtherCallback)]
@@ -61,27 +60,27 @@ class X(ctypes.Structure):
         x.a = OtherCallback(func)
 
         # the CFuncPtr instance holds at least one refcount on func:
-        self.assertGreater(sys.getrefcount(func), 2)
+        self.assertGreater(sys.getrefcount(func), orig_refcount)
 
         # and may release it again
         del x
-        self.assertGreaterEqual(sys.getrefcount(func), 2)
+        self.assertGreaterEqual(sys.getrefcount(func), orig_refcount)
 
         # and now it must be gone again
         gc.collect()
-        self.assertEqual(sys.getrefcount(func), 2)
+        self.assertEqual(sys.getrefcount(func), orig_refcount)
 
         f = OtherCallback(func)
 
         # the CFuncPtr instance holds at least one refcount on func:
-        self.assertGreater(sys.getrefcount(func), 2)
+        self.assertGreater(sys.getrefcount(func), orig_refcount)
 
         # create a cycle
         f.cycle = f
 
         del f
         gc.collect()
-        self.assertEqual(sys.getrefcount(func), 2)
+        self.assertEqual(sys.getrefcount(func), orig_refcount)
 
 
 class AnotherLeak(unittest.TestCase):
diff --git a/Lib/test/test_ctypes/test_stringptr.py 
b/Lib/test/test_ctypes/test_stringptr.py
index bb6045b250ffce..a6a2dec68df68e 100644
--- a/Lib/test/test_ctypes/test_stringptr.py
+++ b/Lib/test/test_ctypes/test_stringptr.py
@@ -20,9 +20,9 @@ class X(Structure):
         # NULL pointer access
         self.assertRaises(ValueError, getattr, x.str, "contents")
         b = create_string_buffer(b"Hello, World")
-        self.assertEqual(sys.getrefcount(b), 2)
+        orig_refcount = sys.getrefcount(b)
         x.str = b
-        self.assertEqual(sys.getrefcount(b), 3)
+        self.assertEqual(sys.getrefcount(b), orig_refcount + 1)
 
         # POINTER(c_char) and Python string is NOT compatible
         # POINTER(c_char) and create_string_buffer() is compatible
diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py
index 726af931a38ecd..58ba86fb43092a 100644
--- a/Lib/test/test_dis.py
+++ b/Lib/test/test_dis.py
@@ -51,10 +51,10 @@ def cm(cls, x):
 dis_c_instance_method = """\
 %3d           RESUME                   0
 
-%3d           LOAD_FAST                1 (x)
+%3d           LOAD_FAST_BORROW         1 (x)
               LOAD_SMALL_INT           1
               COMPARE_OP              72 (==)
-              LOAD_FAST                0 (self)
+              LOAD_FAST_BORROW         0 (self)
               STORE_ATTR               0 (x)
               LOAD_CONST               1 (None)
               RETURN_VALUE
@@ -62,10 +62,10 @@ def cm(cls, x):
 
 dis_c_instance_method_bytes = """\
           RESUME                   0
-          LOAD_FAST                1
+          LOAD_FAST_BORROW         1
           LOAD_SMALL_INT           1
           COMPARE_OP              72 (==)
-          LOAD_FAST                0
+          LOAD_FAST_BORROW         0
           STORE_ATTR               0
           LOAD_CONST               1
           RETURN_VALUE
@@ -74,10 +74,10 @@ def cm(cls, x):
 dis_c_class_method = """\
 %3d           RESUME                   0
 
-%3d           LOAD_FAST                1 (x)
+%3d           LOAD_FAST_BORROW         1 (x)
               LOAD_SMALL_INT           1
               COMPARE_OP              72 (==)
-              LOAD_FAST                0 (cls)
+              LOAD_FAST_BORROW         0 (cls)
               STORE_ATTR               0 (x)
               LOAD_CONST               1 (None)
               RETURN_VALUE
@@ -86,7 +86,7 @@ def cm(cls, x):
 dis_c_static_method = """\
 %3d           RESUME                   0
 
-%3d           LOAD_FAST                0 (x)
+%3d           LOAD_FAST_BORROW         0 (x)
               LOAD_SMALL_INT           1
               COMPARE_OP              72 (==)
               STORE_FAST               0 (x)
@@ -114,7 +114,7 @@ def _f(a):
 %3d           RESUME                   0
 
 %3d           LOAD_GLOBAL              1 (print + NULL)
-              LOAD_FAST                0 (a)
+              LOAD_FAST_BORROW         0 (a)
               CALL                     1
               POP_TOP
 
@@ -128,7 +128,7 @@ def _f(a):
 %3d          0       RESUME                   0
 
 %3d          2       LOAD_GLOBAL              1 (print + NULL)
-            12       LOAD_FAST                0 (a)
+            12       LOAD_FAST_BORROW         0 (a)
             14       CALL                     1
             22       POP_TOP
 
@@ -142,7 +142,7 @@ def _f(a):
 %-14s           RESUME                   0
 
 %-14s           LOAD_GLOBAL              1 (print + NULL)
-%-14s           LOAD_FAST                0 (a)
+%-14s           LOAD_FAST_BORROW         0 (a)
 %-14s           CALL                     1
 %-14s           POP_TOP
 
@@ -153,7 +153,7 @@ def _f(a):
 dis_f_co_code = """\
           RESUME                   0
           LOAD_GLOBAL              1
-          LOAD_FAST                0
+          LOAD_FAST_BORROW         0
           CALL                     1
           POP_TOP
           LOAD_SMALL_INT           1
@@ -203,7 +203,7 @@ def bug1333982(x=[]):
 %3d           LOAD_COMMON_CONSTANT     0 (AssertionError)
               LOAD_CONST               1 (<code object <genexpr> at 0x..., 
file "%s", line %d>)
               MAKE_FUNCTION
-              LOAD_FAST                0 (x)
+              LOAD_FAST_BORROW         0 (x)
               GET_ITER
               CALL                     0
 
@@ -503,18 +503,18 @@ def _fstring(a, b, c, d):
 dis_fstring = """\
 %3d           RESUME                   0
 
-%3d           LOAD_FAST                0 (a)
+%3d           LOAD_FAST_BORROW         0 (a)
               FORMAT_SIMPLE
               LOAD_CONST               0 (' ')
-              LOAD_FAST                1 (b)
+              LOAD_FAST_BORROW         1 (b)
               LOAD_CONST               1 ('4')
               FORMAT_WITH_SPEC
               LOAD_CONST               0 (' ')
-              LOAD_FAST                2 (c)
+              LOAD_FAST_BORROW         2 (c)
               CONVERT_VALUE            2 (repr)
               FORMAT_SIMPLE
               LOAD_CONST               0 (' ')
-              LOAD_FAST                3 (d)
+              LOAD_FAST_BORROW         3 (d)
               CONVERT_VALUE            2 (repr)
               LOAD_CONST               1 ('4')
               FORMAT_WITH_SPEC
@@ -530,7 +530,7 @@ def _with(c):
 dis_with = """\
 %4d           RESUME                   0
 
-%4d           LOAD_FAST                0 (c)
+%4d           LOAD_FAST_BORROW         0 (c)
                COPY                     1
                LOAD_SPECIAL             1 (__exit__)
                SWAP                     2
@@ -595,7 +595,7 @@ async def _asyncwith(c):
                 POP_TOP
         L1:     RESUME                   0
 
-%4d            LOAD_FAST                0 (c)
+%4d            LOAD_FAST_BORROW         0 (c)
                 COPY                     1
                 LOAD_SPECIAL             3 (__aexit__)
                 SWAP                     2
@@ -707,9 +707,9 @@ def _tryfinallyconst(b):
 
 %4d           NOP
 
-%4d   L1:     LOAD_FAST                0 (a)
+%4d   L1:     LOAD_FAST_BORROW         0 (a)
 
-%4d   L2:     LOAD_FAST                1 (b)
+%4d   L2:     LOAD_FAST_BORROW         1 (b)
                PUSH_NULL
                CALL                     0
                POP_TOP
@@ -743,7 +743,7 @@ def _tryfinallyconst(b):
 
 %4d           NOP
 
-%4d           LOAD_FAST                0 (b)
+%4d           LOAD_FAST_BORROW         0 (b)
                PUSH_NULL
                CALL                     0
                POP_TOP
@@ -791,14 +791,14 @@ def foo(x):
 
 %4d           RESUME                   0
 
-%4d           LOAD_FAST                0 (y)
+%4d           LOAD_FAST_BORROW         0 (y)
                BUILD_TUPLE              1
                LOAD_CONST               0 (<code object foo at 0x..., file 
"%s", line %d>)
                MAKE_FUNCTION
                SET_FUNCTION_ATTRIBUTE   8 (closure)
                STORE_FAST               1 (foo)
 
-%4d           LOAD_FAST                1 (foo)
+%4d           LOAD_FAST_BORROW         1 (foo)
                RETURN_VALUE
 """ % (_h.__code__.co_firstlineno,
        _h.__code__.co_firstlineno + 1,
@@ -815,7 +815,7 @@ def foo(x):
 %4d           RESUME                   0
 
 %4d           LOAD_GLOBAL              1 (list + NULL)
-               LOAD_FAST                0 (x)
+               LOAD_FAST_BORROW         0 (x)
                BUILD_TUPLE              1
                LOAD_CONST               1 (<code object <genexpr> at 0x..., 
file "%s", line %d>)
                MAKE_FUNCTION
@@ -841,12 +841,12 @@ def foo(x):
 %4d           RETURN_GENERATOR
                POP_TOP
        L1:     RESUME                   0
-               LOAD_FAST                0 (.0)
+               LOAD_FAST_BORROW         0 (.0)
                GET_ITER
        L2:     FOR_ITER                14 (to L3)
                STORE_FAST               1 (z)
                LOAD_DEREF               2 (x)
-               LOAD_FAST                1 (z)
+               LOAD_FAST_BORROW         1 (z)
                BINARY_OP                0 (+)
                YIELD_VALUE              0
                RESUME                   5
@@ -877,7 +877,7 @@ def load_test(x, y=0):
 %3d           LOAD_FAST_LOAD_FAST      1 (x, y)
               STORE_FAST_STORE_FAST   50 (b, a)
 
-%3d           LOAD_FAST_LOAD_FAST     35 (a, b)
+%3d           LOAD_FAST_BORROW_LOAD_FAST_BORROW 35 (a, b)
               BUILD_TUPLE              2
               RETURN_VALUE
 """ % (load_test.__code__.co_firstlineno,
@@ -901,7 +901,7 @@ def loop_test():
               STORE_FAST               0 (i)
 
 %3d           LOAD_GLOBAL_MODULE       1 (load_test + NULL)
-              LOAD_FAST                0 (i)
+              LOAD_FAST_BORROW         0 (i)
               CALL_PY_GENERAL          1
               POP_TOP
               JUMP_BACKWARD_{: <6}    16 (to L1)
@@ -996,6 +996,7 @@ def test_boundaries(self):
 
     def test_widths(self):
         long_opcodes = set(['JUMP_BACKWARD_NO_INTERRUPT',
+                            'LOAD_FAST_BORROW_LOAD_FAST_BORROW',
                             'INSTRUMENTED_CALL_FUNCTION_EX'])
         for op, opname in enumerate(dis.opname):
             if opname in long_opcodes or opname.startswith("INSTRUMENTED"):
@@ -1739,8 +1740,8 @@ def _prepare_test_cases():
   make_inst(opname='MAKE_CELL', arg=1, argval='b', argrepr='b', offset=2, 
start_offset=2, starts_line=False, line_number=None),
   make_inst(opname='RESUME', arg=0, argval=0, argrepr='', offset=4, 
start_offset=4, starts_line=True, line_number=1),
   make_inst(opname='LOAD_CONST', arg=4, argval=(3, 4), argrepr='(3, 4)', 
offset=6, start_offset=6, starts_line=True, line_number=2),
-  make_inst(opname='LOAD_FAST', arg=0, argval='a', argrepr='a', offset=8, 
start_offset=8, starts_line=False, line_number=2),
-  make_inst(opname='LOAD_FAST', arg=1, argval='b', argrepr='b', offset=10, 
start_offset=10, starts_line=False, line_number=2),
+  make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='a', argrepr='a', 
offset=8, start_offset=8, starts_line=False, line_number=2),
+  make_inst(opname='LOAD_FAST_BORROW', arg=1, argval='b', argrepr='b', 
offset=10, start_offset=10, starts_line=False, line_number=2),
   make_inst(opname='BUILD_TUPLE', arg=2, argval=2, argrepr='', offset=12, 
start_offset=12, starts_line=False, line_number=2),
   make_inst(opname='LOAD_CONST', arg=1, argval=code_object_f, 
argrepr=repr(code_object_f), offset=14, start_offset=14, starts_line=False, 
line_number=2),
   make_inst(opname='MAKE_FUNCTION', arg=None, argval=None, argrepr='', 
offset=16, start_offset=16, starts_line=False, line_number=2),
@@ -1757,7 +1758,7 @@ def _prepare_test_cases():
   make_inst(opname='LOAD_CONST', arg=3, argval='Hello world!', argrepr="'Hello 
world!'", offset=46, start_offset=46, starts_line=False, line_number=7),
   make_inst(opname='CALL', arg=7, argval=7, argrepr='', offset=48, 
start_offset=48, starts_line=False, line_number=7, cache_info=[('counter', 1, 
b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]),
   make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=56, 
start_offset=56, starts_line=False, line_number=7),
-  make_inst(opname='LOAD_FAST', arg=2, argval='f', argrepr='f', offset=58, 
start_offset=58, starts_line=True, line_number=8),
+  make_inst(opname='LOAD_FAST_BORROW', arg=2, argval='f', argrepr='f', 
offset=58, start_offset=58, starts_line=True, line_number=8),
   make_inst(opname='RETURN_VALUE', arg=None, argval=None, argrepr='', 
offset=60, start_offset=60, starts_line=False, line_number=8),
 ]
 
@@ -1767,10 +1768,10 @@ def _prepare_test_cases():
   make_inst(opname='MAKE_CELL', arg=1, argval='d', argrepr='d', offset=4, 
start_offset=4, starts_line=False, line_number=None),
   make_inst(opname='RESUME', arg=0, argval=0, argrepr='', offset=6, 
start_offset=6, starts_line=True, line_number=2),
   make_inst(opname='LOAD_CONST', arg=2, argval=(5, 6), argrepr='(5, 6)', 
offset=8, start_offset=8, starts_line=True, line_number=3),
-  make_inst(opname='LOAD_FAST', arg=3, argval='a', argrepr='a', offset=10, 
start_offset=10, starts_line=False, line_number=3),
-  make_inst(opname='LOAD_FAST', arg=4, argval='b', argrepr='b', offset=12, 
start_offset=12, starts_line=False, line_number=3),
-  make_inst(opname='LOAD_FAST', arg=0, argval='c', argrepr='c', offset=14, 
start_offset=14, starts_line=False, line_number=3),
-  make_inst(opname='LOAD_FAST', arg=1, argval='d', argrepr='d', offset=16, 
start_offset=16, starts_line=False, line_number=3),
+  make_inst(opname='LOAD_FAST_BORROW', arg=3, argval='a', argrepr='a', 
offset=10, start_offset=10, starts_line=False, line_number=3),
+  make_inst(opname='LOAD_FAST_BORROW', arg=4, argval='b', argrepr='b', 
offset=12, start_offset=12, starts_line=False, line_number=3),
+  make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='c', argrepr='c', 
offset=14, start_offset=14, starts_line=False, line_number=3),
+  make_inst(opname='LOAD_FAST_BORROW', arg=1, argval='d', argrepr='d', 
offset=16, start_offset=16, starts_line=False, line_number=3),
   make_inst(opname='BUILD_TUPLE', arg=4, argval=4, argrepr='', offset=18, 
start_offset=18, starts_line=False, line_number=3),
   make_inst(opname='LOAD_CONST', arg=1, argval=code_object_inner, 
argrepr=repr(code_object_inner), offset=20, start_offset=20, starts_line=False, 
line_number=3),
   make_inst(opname='MAKE_FUNCTION', arg=None, argval=None, argrepr='', 
offset=22, start_offset=22, starts_line=False, line_number=3),
@@ -1784,7 +1785,7 @@ def _prepare_test_cases():
   make_inst(opname='LOAD_DEREF', arg=1, argval='d', argrepr='d', offset=46, 
start_offset=46, starts_line=False, line_number=5),
   make_inst(opname='CALL', arg=4, argval=4, argrepr='', offset=48, 
start_offset=48, starts_line=False, line_number=5, cache_info=[('counter', 1, 
b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]),
   make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=56, 
start_offset=56, starts_line=False, line_number=5),
-  make_inst(opname='LOAD_FAST', arg=2, argval='inner', argrepr='inner', 
offset=58, start_offset=58, starts_line=True, line_number=6),
+  make_inst(opname='LOAD_FAST_BORROW', arg=2, argval='inner', argrepr='inner', 
offset=58, start_offset=58, starts_line=True, line_number=6),
   make_inst(opname='RETURN_VALUE', arg=None, argval=None, argrepr='', 
offset=60, start_offset=60, starts_line=False, line_number=6),
 ]
 
@@ -1796,7 +1797,7 @@ def _prepare_test_cases():
   make_inst(opname='LOAD_DEREF', arg=3, argval='b', argrepr='b', offset=16, 
start_offset=16, starts_line=False, line_number=4),
   make_inst(opname='LOAD_DEREF', arg=4, argval='c', argrepr='c', offset=18, 
start_offset=18, starts_line=False, line_number=4),
   make_inst(opname='LOAD_DEREF', arg=5, argval='d', argrepr='d', offset=20, 
start_offset=20, starts_line=False, line_number=4),
-  make_inst(opname='LOAD_FAST_LOAD_FAST', arg=1, argval=('e', 'f'), 
argrepr='e, f', offset=22, start_offset=22, starts_line=False, line_number=4),
+  make_inst(opname='LOAD_FAST_BORROW_LOAD_FAST_BORROW', arg=1, argval=('e', 
'f'), argrepr='e, f', offset=22, start_offset=22, starts_line=False, 
line_number=4),
   make_inst(opname='CALL', arg=6, argval=6, argrepr='', offset=24, 
start_offset=24, starts_line=False, line_number=4, cache_info=[('counter', 1, 
b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]),
   make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=32, 
start_offset=32, starts_line=False, line_number=4),
   make_inst(opname='LOAD_CONST', arg=0, argval=None, argrepr='None', 
offset=34, start_offset=34, starts_line=False, line_number=4),
@@ -1812,16 +1813,16 @@ def _prepare_test_cases():
   make_inst(opname='FOR_ITER', arg=32, argval=92, argrepr='to L4', offset=24, 
start_offset=24, starts_line=False, line_number=3, label=1, 
cache_info=[('counter', 1, b'\x00\x00')]),
   make_inst(opname='STORE_FAST', arg=0, argval='i', argrepr='i', offset=28, 
start_offset=28, starts_line=False, line_number=3),
   make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + 
NULL', offset=30, start_offset=30, starts_line=True, line_number=4, 
cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), 
('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, 
b'\x00\x00')]),
-  make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=40, 
start_offset=40, starts_line=False, line_number=4),
+  make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', 
offset=40, start_offset=40, starts_line=False, line_number=4),
   make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=42, 
start_offset=42, starts_line=False, line_number=4, cache_info=[('counter', 1, 
b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]),
   make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=50, 
start_offset=50, starts_line=False, line_number=4),
-  make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=52, 
start_offset=52, starts_line=True, line_number=5),
+  make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', 
offset=52, start_offset=52, starts_line=True, line_number=5),
   make_inst(opname='LOAD_SMALL_INT', arg=4, argval=4, argrepr='', offset=54, 
start_offset=54, starts_line=False, line_number=5),
   make_inst(opname='COMPARE_OP', arg=18, argval='<', argrepr='bool(<)', 
offset=56, start_offset=56, starts_line=False, line_number=5, 
cache_info=[('counter', 1, b'\x00\x00')]),
   make_inst(opname='POP_JUMP_IF_FALSE', arg=3, argval=70, argrepr='to L2', 
offset=60, start_offset=60, starts_line=False, line_number=5, 
cache_info=[('counter', 1, b'\x00\x00')]),
   make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=64, 
start_offset=64, starts_line=False, line_number=5),
   make_inst(opname='JUMP_BACKWARD', arg=23, argval=24, argrepr='to L1', 
offset=66, start_offset=66, starts_line=True, line_number=6, 
cache_info=[('counter', 1, b'\x00\x00')]),
-  make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=70, 
start_offset=70, starts_line=True, line_number=7, label=2),
+  make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', 
offset=70, start_offset=70, starts_line=True, line_number=7, label=2),
   make_inst(opname='LOAD_SMALL_INT', arg=6, argval=6, argrepr='', offset=72, 
start_offset=72, starts_line=False, line_number=7),
   make_inst(opname='COMPARE_OP', arg=148, argval='>', argrepr='bool(>)', 
offset=74, start_offset=74, starts_line=False, line_number=7, 
cache_info=[('counter', 1, b'\x00\x00')]),
   make_inst(opname='POP_JUMP_IF_TRUE', arg=3, argval=88, argrepr='to L3', 
offset=78, start_offset=78, starts_line=False, line_number=7, 
cache_info=[('counter', 1, b'\x00\x00')]),
@@ -1840,20 +1841,20 @@ def _prepare_test_cases():
   make_inst(opname='POP_JUMP_IF_FALSE', arg=40, argval=212, argrepr='to L8', 
offset=128, start_offset=128, starts_line=False, line_number=11, 
cache_info=[('counter', 1, b'\x00\x00')]),
   make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=132, 
start_offset=132, starts_line=False, line_number=11),
   make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + 
NULL', offset=134, start_offset=134, starts_line=True, line_number=12, 
cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), 
('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, 
b'\x00\x00')]),
-  make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=144, 
start_offset=144, starts_line=False, line_number=12),
+  make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', 
offset=144, start_offset=144, starts_line=False, line_number=12),
   make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=146, 
start_offset=146, starts_line=False, line_number=12, cache_info=[('counter', 1, 
b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]),
   make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=154, 
start_offset=154, starts_line=False, line_number=12),
-  make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=156, 
start_offset=156, starts_line=True, line_number=13),
+  make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', 
offset=156, start_offset=156, starts_line=True, line_number=13),
   make_inst(opname='LOAD_SMALL_INT', arg=1, argval=1, argrepr='', offset=158, 
start_offset=158, starts_line=False, line_number=13),
   make_inst(opname='BINARY_OP', arg=23, argval=23, argrepr='-=', offset=160, 
start_offset=160, starts_line=False, line_number=13, cache_info=[('counter', 1, 
b'\x00\x00'), ('descr', 4, b'\x00\x00\x00\x00\x00\x00\x00\x00')]),
   make_inst(opname='STORE_FAST', arg=0, argval='i', argrepr='i', offset=172, 
start_offset=172, starts_line=False, line_number=13),
-  make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=174, 
start_offset=174, starts_line=True, line_number=14),
+  make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', 
offset=174, start_offset=174, starts_line=True, line_number=14),
   make_inst(opname='LOAD_SMALL_INT', arg=6, argval=6, argrepr='', offset=176, 
start_offset=176, starts_line=False, line_number=14),
   make_inst(opname='COMPARE_OP', arg=148, argval='>', argrepr='bool(>)', 
offset=178, start_offset=178, starts_line=False, line_number=14, 
cache_info=[('counter', 1, b'\x00\x00')]),
   make_inst(opname='POP_JUMP_IF_FALSE', arg=3, argval=192, argrepr='to L6', 
offset=182, start_offset=182, starts_line=False, line_number=14, 
cache_info=[('counter', 1, b'\x00\x00')]),
   make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=186, 
start_offset=186, starts_line=False, line_number=14),
   make_inst(opname='JUMP_BACKWARD', arg=37, argval=118, argrepr='to L5', 
offset=188, start_offset=188, starts_line=True, line_number=15, 
cache_info=[('counter', 1, b'\x00\x00')]),
-  make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=192, 
start_offset=192, starts_line=True, line_number=16, label=6),
+  make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', 
offset=192, start_offset=192, starts_line=True, line_number=16, label=6),
   make_inst(opname='LOAD_SMALL_INT', arg=4, argval=4, argrepr='', offset=194, 
start_offset=194, starts_line=False, line_number=16),
   make_inst(opname='COMPARE_OP', arg=18, argval='<', argrepr='bool(<)', 
offset=196, start_offset=196, starts_line=False, line_number=16, 
cache_info=[('counter', 1, b'\x00\x00')]),
   make_inst(opname='POP_JUMP_IF_TRUE', arg=3, argval=210, argrepr='to L7', 
offset=200, start_offset=200, starts_line=False, line_number=16, 
cache_info=[('counter', 1, b'\x00\x00')]),
@@ -1869,7 +1870,7 @@ def _prepare_test_cases():
   make_inst(opname='LOAD_SMALL_INT', arg=0, argval=0, argrepr='', offset=238, 
start_offset=238, starts_line=False, line_number=21),
   make_inst(opname='BINARY_OP', arg=11, argval=11, argrepr='/', offset=240, 
start_offset=240, starts_line=False, line_number=21, cache_info=[('counter', 1, 
b'\x00\x00'), ('descr', 4, b'\x00\x00\x00\x00\x00\x00\x00\x00')]),
   make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=252, 
start_offset=252, starts_line=False, line_number=21),
-  make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=254, 
start_offset=254, starts_line=True, line_number=25),
+  make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', 
offset=254, start_offset=254, starts_line=True, line_number=25),
   make_inst(opname='COPY', arg=1, argval=1, argrepr='', offset=256, 
start_offset=256, starts_line=False, line_number=25),
   make_inst(opname='LOAD_SPECIAL', arg=1, argval=1, argrepr='__exit__', 
offset=258, start_offset=258, starts_line=False, line_number=25),
   make_inst(opname='SWAP', arg=2, argval=2, argrepr='', offset=260, 
start_offset=260, starts_line=False, line_number=25),
diff --git a/Lib/test/test_frame.py b/Lib/test/test_frame.py
index a6e11f1a5342b6..18ade18d1a1708 100644
--- a/Lib/test/test_frame.py
+++ b/Lib/test/test_frame.py
@@ -597,6 +597,22 @@ def make_frame():
         with self.assertRaises(TypeError):
             FrameLocalsProxy(frame=sys._getframe())  # no keyword arguments
 
+    def test_overwrite_locals(self):
+        # Verify we do not crash if we overwrite a local passed as an argument
+        # from an ancestor in the call stack.
+        def f():
+            xs = [1, 2, 3]
+            ys = [4, 5, 6]
+            return g(xs)
+
+        def g(xs):
+            f = sys._getframe()
+            f.f_back.f_locals["xs"] = None
+            f.f_back.f_locals["ys"] = None
+            return xs[1]
+
+        self.assertEqual(f(), 2)
+
 
 class FrameLocalsProxyMappingTests(mapping_tests.TestHashMappingProtocol):
     """Test that FrameLocalsProxy behaves like a Mapping (with exceptions)"""
diff --git a/Lib/test/test_generators.py b/Lib/test/test_generators.py
index bf4b88cd9c4450..8bce42f037478c 100644
--- a/Lib/test/test_generators.py
+++ b/Lib/test/test_generators.py
@@ -83,7 +83,7 @@ def gen():
         g = gen()
         next(g)
         g.send(g)
-        self.assertGreater(sys.getrefcount(g), 2)
+        self.assertGreaterEqual(sys.getrefcount(g), 2)
         self.assertFalse(finalized)
         del g
         support.gc_collect()
diff --git a/Lib/test/test_importlib/test_abc.py 
b/Lib/test/test_importlib/test_abc.py
index b1ab52f966ffdb..070920d0da7e19 100644
--- a/Lib/test/test_importlib/test_abc.py
+++ b/Lib/test/test_importlib/test_abc.py
@@ -801,6 +801,9 @@ def verify_code(self, code_object, *, 
bytecode_written=False):
             data.extend(self.init._pack_uint32(0))
             data.extend(self.init._pack_uint32(self.loader.source_mtime))
             data.extend(self.init._pack_uint32(self.loader.source_size))
+            # Make sure there's > 1 reference to code_object so that the
+            # marshaled representation below matches the cached representation
+            l = [code_object]
             data.extend(marshal.dumps(code_object))
             self.assertEqual(self.loader.written[self.cached], bytes(data))
 
diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py
index 8b97e76fa346ff..565e42b04a68d0 100644
--- a/Lib/test/test_peepholer.py
+++ b/Lib/test/test_peepholer.py
@@ -4,9 +4,14 @@
 import sys
 import textwrap
 import unittest
+try:
+    import _testinternalcapi
+except ImportError:
+    _testinternalcapi = None
 
 from test import support
-from test.support.bytecode_helper import BytecodeTestCase, 
CfgOptimizationTestCase
+from test.support.bytecode_helper import (
+    BytecodeTestCase, CfgOptimizationTestCase, CompilationStepTestCase)
 
 
 def compile_pattern_with_fast_locals(pattern):
@@ -839,7 +844,7 @@ def test_load_fast_known_simple(self):
         def f():
             x = 1
             y = x + x
-        self.assertInBytecode(f, 'LOAD_FAST_LOAD_FAST')
+        self.assertInBytecode(f, 'LOAD_FAST_BORROW_LOAD_FAST_BORROW')
 
     def test_load_fast_unknown_simple(self):
         def f():
@@ -860,27 +865,27 @@ def f():
     def test_load_fast_known_because_parameter(self):
         def f1(x):
             print(x)
-        self.assertInBytecode(f1, 'LOAD_FAST')
+        self.assertInBytecode(f1, 'LOAD_FAST_BORROW')
         self.assertNotInBytecode(f1, 'LOAD_FAST_CHECK')
 
         def f2(*, x):
             print(x)
-        self.assertInBytecode(f2, 'LOAD_FAST')
+        self.assertInBytecode(f2, 'LOAD_FAST_BORROW')
         self.assertNotInBytecode(f2, 'LOAD_FAST_CHECK')
 
         def f3(*args):
             print(args)
-        self.assertInBytecode(f3, 'LOAD_FAST')
+        self.assertInBytecode(f3, 'LOAD_FAST_BORROW')
         self.assertNotInBytecode(f3, 'LOAD_FAST_CHECK')
 
         def f4(**kwargs):
             print(kwargs)
-        self.assertInBytecode(f4, 'LOAD_FAST')
+        self.assertInBytecode(f4, 'LOAD_FAST_BORROW')
         self.assertNotInBytecode(f4, 'LOAD_FAST_CHECK')
 
         def f5(x=0):
             print(x)
-        self.assertInBytecode(f5, 'LOAD_FAST')
+        self.assertInBytecode(f5, 'LOAD_FAST_BORROW')
         self.assertNotInBytecode(f5, 'LOAD_FAST_CHECK')
 
     def test_load_fast_known_because_already_loaded(self):
@@ -890,7 +895,7 @@ def f():
             print(x)
             print(x)
         self.assertInBytecode(f, 'LOAD_FAST_CHECK')
-        self.assertInBytecode(f, 'LOAD_FAST')
+        self.assertInBytecode(f, 'LOAD_FAST_BORROW')
 
     def test_load_fast_known_multiple_branches(self):
         def f():
@@ -899,7 +904,7 @@ def f():
             else:
                 x = 2
             print(x)
-        self.assertInBytecode(f, 'LOAD_FAST')
+        self.assertInBytecode(f, 'LOAD_FAST_BORROW')
         self.assertNotInBytecode(f, 'LOAD_FAST_CHECK')
 
     def test_load_fast_unknown_after_error(self):
@@ -943,12 +948,12 @@ def f():
                 print(a00, a01, a62, a63)
                 print(a64, a65, a78, a79)
 
-        self.assertInBytecode(f, 'LOAD_FAST_LOAD_FAST', ("a00", "a01"))
+        self.assertInBytecode(f, 'LOAD_FAST_BORROW_LOAD_FAST_BORROW', ("a00", 
"a01"))
         self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', "a00")
         self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', "a01")
         for i in 62, 63:
             # First 64 locals: analyze completely
-            self.assertInBytecode(f, 'LOAD_FAST', f"a{i:02}")
+            self.assertInBytecode(f, 'LOAD_FAST_BORROW', f"a{i:02}")
             self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}")
         for i in 64, 65, 78, 79:
             # Locals >=64 not in the same basicblock
@@ -956,14 +961,14 @@ def f():
             self.assertNotInBytecode(f, 'LOAD_FAST', f"a{i:02}")
         for i in 70, 71:
             # Locals >=64 in the same basicblock
-            self.assertInBytecode(f, 'LOAD_FAST', f"a{i:02}")
+            self.assertInBytecode(f, 'LOAD_FAST_BORROW', f"a{i:02}")
             self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}")
         # del statements should invalidate within basicblocks.
         self.assertInBytecode(f, 'LOAD_FAST_CHECK', "a72")
         self.assertNotInBytecode(f, 'LOAD_FAST', "a72")
         # previous checked loads within a basicblock enable unchecked loads
         self.assertInBytecode(f, 'LOAD_FAST_CHECK', "a73")
-        self.assertInBytecode(f, 'LOAD_FAST', "a73")
+        self.assertInBytecode(f, 'LOAD_FAST_BORROW', "a73")
 
     def test_setting_lineno_no_undefined(self):
         code = textwrap.dedent("""\
@@ -981,7 +986,7 @@ def f():
         ns = {}
         exec(code, ns)
         f = ns['f']
-        self.assertInBytecode(f, "LOAD_FAST")
+        self.assertInBytecode(f, "LOAD_FAST_BORROW")
         self.assertNotInBytecode(f, "LOAD_FAST_CHECK")
         co_code = f.__code__.co_code
         def trace(frame, event, arg):
@@ -993,7 +998,7 @@ def trace(frame, event, arg):
         sys.settrace(trace)
         result = f()
         self.assertIsNone(result)
-        self.assertInBytecode(f, "LOAD_FAST")
+        self.assertInBytecode(f, "LOAD_FAST_BORROW")
         self.assertNotInBytecode(f, "LOAD_FAST_CHECK")
         self.assertEqual(f.__code__.co_code, co_code)
 
@@ -1013,7 +1018,7 @@ def f():
         ns = {}
         exec(code, ns)
         f = ns['f']
-        self.assertInBytecode(f, "LOAD_FAST")
+        self.assertInBytecode(f, "LOAD_FAST_BORROW")
         self.assertNotInBytecode(f, "LOAD_FAST_CHECK")
         co_code = f.__code__.co_code
         def trace(frame, event, arg):
@@ -1027,7 +1032,7 @@ def trace(frame, event, arg):
             sys.settrace(trace)
             result = f()
         self.assertEqual(result, 4)
-        self.assertInBytecode(f, "LOAD_FAST")
+        self.assertInBytecode(f, "LOAD_FAST_BORROW")
         self.assertNotInBytecode(f, "LOAD_FAST_CHECK")
         self.assertEqual(f.__code__.co_code, co_code)
 
@@ -1047,7 +1052,7 @@ def f():
         ns = {}
         exec(code, ns)
         f = ns['f']
-        self.assertInBytecode(f, "LOAD_FAST")
+        self.assertInBytecode(f, "LOAD_FAST_BORROW")
         self.assertNotInBytecode(f, "LOAD_FAST_CHECK")
         co_code = f.__code__.co_code
         def trace(frame, event, arg):
@@ -1061,7 +1066,7 @@ def trace(frame, event, arg):
             sys.settrace(trace)
             result = f()
         self.assertEqual(result, 4)
-        self.assertInBytecode(f, "LOAD_FAST")
+        self.assertInBytecode(f, "LOAD_FAST_BORROW")
         self.assertNotInBytecode(f, "LOAD_FAST_CHECK")
         self.assertEqual(f.__code__.co_code, co_code)
 
@@ -1079,7 +1084,7 @@ def f():
         ns = {}
         exec(code, ns)
         f = ns['f']
-        self.assertInBytecode(f, "LOAD_FAST")
+        self.assertInBytecode(f, "LOAD_FAST_BORROW")
         self.assertNotInBytecode(f, "LOAD_FAST_CHECK")
         return f
 
@@ -1093,7 +1098,7 @@ def trace(frame, event, arg):
             return trace
         sys.settrace(trace)
         f()
-        self.assertInBytecode(f, "LOAD_FAST")
+        self.assertInBytecode(f, "LOAD_FAST_BORROW")
         self.assertNotInBytecode(f, "LOAD_FAST_CHECK")
 
     def test_initializing_local_does_not_add_check(self):
@@ -1106,7 +1111,7 @@ def trace(frame, event, arg):
             return trace
         sys.settrace(trace)
         f()
-        self.assertInBytecode(f, "LOAD_FAST")
+        self.assertInBytecode(f, "LOAD_FAST_BORROW")
         self.assertNotInBytecode(f, "LOAD_FAST_CHECK")
 
 
@@ -1263,15 +1268,6 @@ def test_fold_tuple_of_constants(self):
         ]
         self.cfg_optimization_test(before, after, consts=[], 
expected_consts=[(1, 2, 3)])
 
-        # not enough consts
-        same = [
-            ('LOAD_SMALL_INT', 1, 0),
-            ('LOAD_SMALL_INT', 2, 0),
-            ('BUILD_TUPLE', 3, 0),
-            ('RETURN_VALUE', None, 0)
-        ]
-        self.cfg_optimization_test(same, same, consts=[])
-
         # not all consts
         same = [
             ('LOAD_SMALL_INT', 1, 0),
@@ -1374,19 +1370,6 @@ def test_fold_constant_intrinsic_list_to_tuple(self):
         ]
         self.cfg_optimization_test(before, after, consts=[], 
expected_consts=[(1, 2, 3)])
 
-        # no sequence start
-        same = [
-            ('LOAD_SMALL_INT', 1, 0),
-            ('LIST_APPEND', 1, 0),
-            ('LOAD_SMALL_INT', 2, 0),
-            ('LIST_APPEND', 1, 0),
-            ('LOAD_SMALL_INT', 3, 0),
-            ('LIST_APPEND', 1, 0),
-            ('CALL_INTRINSIC_1', INTRINSIC_LIST_TO_TUPLE, 0),
-            ('RETURN_VALUE', None, 0)
-        ]
-        self.cfg_optimization_test(same, same, consts=[])
-
     def test_optimize_if_const_list(self):
         before = [
             ('NOP', None, 0),
@@ -1417,16 +1400,6 @@ def test_optimize_if_const_list(self):
         ]
         self.cfg_optimization_test(same, same, consts=[])
 
-        # not enough consts
-        same = [
-            ('LOAD_SMALL_INT', 1, 0),
-            ('LOAD_SMALL_INT', 2, 0),
-            ('LOAD_SMALL_INT', 3, 0),
-            ('BUILD_LIST', 4, 0),
-            ('RETURN_VALUE', None, 0),
-        ]
-        self.cfg_optimization_test(same, same, consts=[])
-
         # not all consts
         same = [
             ('LOAD_SMALL_INT', 1, 0),
@@ -1467,16 +1440,6 @@ def test_optimize_if_const_set(self):
         ]
         self.cfg_optimization_test(same, same, consts=[])
 
-        # not enough consts
-        same = [
-            ('LOAD_SMALL_INT', 1, 0),
-            ('LOAD_SMALL_INT', 2, 0),
-            ('LOAD_SMALL_INT', 3, 0),
-            ('BUILD_SET', 4, 0),
-            ('RETURN_VALUE', None, 0),
-        ]
-        self.cfg_optimization_test(same, same, consts=[])
-
         # not all consts
         same = [
             ('LOAD_SMALL_INT', 1, 0),
@@ -2331,13 +2294,13 @@ def get_insts(lno1, lno2, op1, op2):
             return [
                        lbl2 := self.Label(),
                        ('LOAD_NAME', 0, 10),
+                       ('POP_TOP', None, 10),
                        (op1, lbl1 := self.Label(), lno1),
                        ('LOAD_NAME', 1, 20),
                        lbl1,
                        (op2, lbl2, lno2),
                    ]
 
-
         for op1 in ('JUMP', 'JUMP_NO_INTERRUPT'):
             for op2 in ('JUMP', 'JUMP_NO_INTERRUPT'):
                 # different lines
@@ -2347,6 +2310,7 @@ def get_insts(lno1, lno2, op1, op2):
                     op = 'JUMP' if 'JUMP' in (op1, op2) else 
'JUMP_NO_INTERRUPT'
                     expected_insts = [
                         ('LOAD_NAME', 0, 10),
+                        ('POP_TOP', None, 10),
                         ('NOP', None, 4),
                         (op, 0, 5),
                     ]
@@ -2363,6 +2327,7 @@ def get_insts(lno1, lno2, op1, op2):
                         op = 'JUMP' if 'JUMP' in (op1, op2) else 
'JUMP_NO_INTERRUPT'
                         expected_insts = [
                             ('LOAD_NAME', 0, 10),
+                            ('POP_TOP', None, 10),
                             (op, 0, lno),
                         ]
                         self.cfg_optimization_test(insts, expected_insts, 
consts=list(range(5)))
@@ -2390,9 +2355,9 @@ def test_list_to_tuple_get_iter(self):
         ]
         expected_insts = [
             ("BUILD_LIST", 0, 1),
-            ("LOAD_FAST", 0, 2),
+            ("LOAD_FAST_BORROW", 0, 2),
             ("LIST_EXTEND", 1, 3),
-            ("LOAD_FAST", 1, 4),
+            ("LOAD_FAST_BORROW", 1, 4),
             ("LIST_EXTEND", 1, 5),
             ("NOP", None, 6),  # ("CALL_INTRINSIC_1", INTRINSIC_LIST_TO_TUPLE, 
6),
             ("GET_ITER", None, 7),
@@ -2418,5 +2383,230 @@ def test_list_to_tuple_get_iter_is_safe(self):
         self.assertEqual(items, [])
 
 
+class OptimizeLoadFastTestCase(DirectCfgOptimizerTests):
+    def make_bb(self, insts):
+        last_loc = insts[-1][2]
+        maxconst = 0
+        for op, arg, _ in insts:
+            if op == "LOAD_CONST":
+                maxconst = max(maxconst, arg)
+        consts = [None for _ in range(maxconst + 1)]
+        return insts + [
+            ("LOAD_CONST", 0, last_loc + 1),
+            ("RETURN_VALUE", None, last_loc + 2),
+        ], consts
+
+    def check(self, insts, expected_insts, consts=None):
+        insts_bb, insts_consts = self.make_bb(insts)
+        expected_insts_bb, exp_consts = self.make_bb(expected_insts)
+        self.cfg_optimization_test(insts_bb, expected_insts_bb,
+                                   consts=insts_consts, 
expected_consts=exp_consts)
+
+    def test_optimized(self):
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            ("LOAD_FAST", 1, 2),
+            ("BINARY_OP", 2, 3),
+        ]
+        expected = [
+            ("LOAD_FAST_BORROW", 0, 1),
+            ("LOAD_FAST_BORROW", 1, 2),
+            ("BINARY_OP", 2, 3),
+        ]
+        self.check(insts, expected)
+
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            ("LOAD_CONST", 1, 2),
+            ("SWAP", 2, 3),
+            ("POP_TOP", None, 4),
+        ]
+        expected = [
+            ("LOAD_FAST_BORROW", 0, 1),
+            ("LOAD_CONST", 1, 2),
+            ("SWAP", 2, 3),
+            ("POP_TOP", None, 4),
+        ]
+        self.check(insts, expected)
+
+    def test_unoptimized_if_unconsumed(self):
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            ("LOAD_FAST", 1, 2),
+            ("POP_TOP", None, 3),
+        ]
+        expected = [
+            ("LOAD_FAST", 0, 1),
+            ("LOAD_FAST_BORROW", 1, 2),
+            ("POP_TOP", None, 3),
+        ]
+        self.check(insts, expected)
+
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            ("COPY", 1, 2),
+            ("POP_TOP", None, 3),
+        ]
+        expected = [
+            ("LOAD_FAST", 0, 1),
+            ("NOP", None, 2),
+            ("NOP", None, 3),
+        ]
+        self.check(insts, expected)
+
+    def test_unoptimized_if_support_killed(self):
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            ("LOAD_CONST", 0, 2),
+            ("STORE_FAST", 0, 3),
+            ("POP_TOP", None, 4),
+        ]
+        self.check(insts, insts)
+
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            ("LOAD_CONST", 0, 2),
+            ("LOAD_CONST", 0, 3),
+            ("STORE_FAST_STORE_FAST", ((0 << 4) | 1), 4),
+            ("POP_TOP", None, 5),
+        ]
+        self.check(insts, insts)
+
+    def test_unoptimized_if_aliased(self):
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            ("STORE_FAST", 1, 2),
+        ]
+        self.check(insts, insts)
+
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            ("LOAD_CONST", 0, 3),
+            ("STORE_FAST_STORE_FAST", ((0 << 4) | 1), 4),
+        ]
+        self.check(insts, insts)
+
+    def test_consume_no_inputs(self):
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            ("GET_LEN", None, 2),
+            ("STORE_FAST", 1 , 3),
+            ("STORE_FAST", 2, 4),
+        ]
+        self.check(insts, insts)
+
+    def test_consume_some_inputs_no_outputs(self):
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            ("GET_LEN", None, 2),
+            ("LIST_APPEND", 0, 3),
+        ]
+        self.check(insts, insts)
+
+    def test_check_exc_match(self):
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            ("LOAD_FAST", 1, 2),
+            ("CHECK_EXC_MATCH", None, 3)
+        ]
+        expected = [
+            ("LOAD_FAST", 0, 1),
+            ("LOAD_FAST_BORROW", 1, 2),
+            ("CHECK_EXC_MATCH", None, 3)
+        ]
+        self.check(insts, expected)
+
+    def test_for_iter(self):
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            top := self.Label(),
+            ("FOR_ITER", end := self.Label(), 2),
+            ("STORE_FAST", 2, 3),
+            ("JUMP", top, 4),
+            end,
+            ("END_FOR", None, 5),
+            ("POP_TOP", None, 6),
+            ("LOAD_CONST", 0, 7),
+            ("RETURN_VALUE", None, 8),
+        ]
+        self.cfg_optimization_test(insts, insts, consts=[None])
+
+    def test_load_attr(self):
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            ("LOAD_ATTR", 0, 2),
+        ]
+        expected = [
+            ("LOAD_FAST_BORROW", 0, 1),
+            ("LOAD_ATTR", 0, 2),
+        ]
+        self.check(insts, expected)
+
+        # Method call, leaves self on stack unconsumed
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            ("LOAD_ATTR", 1, 2),
+        ]
+        expected = [
+            ("LOAD_FAST", 0, 1),
+            ("LOAD_ATTR", 1, 2),
+        ]
+        self.check(insts, expected)
+
+    def test_super_attr(self):
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            ("LOAD_FAST", 1, 2),
+            ("LOAD_FAST", 2, 3),
+            ("LOAD_SUPER_ATTR", 0, 4),
+        ]
+        expected = [
+            ("LOAD_FAST_BORROW", 0, 1),
+            ("LOAD_FAST_BORROW", 1, 2),
+            ("LOAD_FAST_BORROW", 2, 3),
+            ("LOAD_SUPER_ATTR", 0, 4),
+        ]
+        self.check(insts, expected)
+
+        # Method call, leaves self on stack unconsumed
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            ("LOAD_FAST", 1, 2),
+            ("LOAD_FAST", 2, 3),
+            ("LOAD_SUPER_ATTR", 1, 4),
+        ]
+        expected = [
+            ("LOAD_FAST_BORROW", 0, 1),
+            ("LOAD_FAST_BORROW", 1, 2),
+            ("LOAD_FAST", 2, 3),
+            ("LOAD_SUPER_ATTR", 1, 4),
+        ]
+        self.check(insts, expected)
+
+    def test_send(self):
+        insts = [
+            ("LOAD_FAST", 0, 1),
+            ("LOAD_FAST", 1, 2),
+            ("SEND", end := self.Label(), 3),
+            ("LOAD_CONST", 0, 4),
+            ("RETURN_VALUE", None, 5),
+            end,
+            ("LOAD_CONST", 0, 6),
+            ("RETURN_VALUE", None, 7)
+        ]
+        expected = [
+            ("LOAD_FAST", 0, 1),
+            ("LOAD_FAST_BORROW", 1, 2),
+            ("SEND", end := self.Label(), 3),
+            ("LOAD_CONST", 0, 4),
+            ("RETURN_VALUE", None, 5),
+            end,
+            ("LOAD_CONST", 0, 6),
+            ("RETURN_VALUE", None, 7)
+        ]
+        self.cfg_optimization_test(insts, expected, consts=[None])
+
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index c64a80d83f154e..368a5ba413b6ce 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -1664,7 +1664,7 @@ def func():
             INTERPRETER_FRAME = '9PihcP'
         else:
             INTERPRETER_FRAME = '9PhcP'
-        check(x, size('3PiccPP' + INTERPRETER_FRAME + 'P'))
+        check(x, size('3PiccPPP' + INTERPRETER_FRAME + 'P'))
         # function
         def func(): pass
         check(func, size('16Pi'))
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index 6ae8cf5c9a5130..5c390fd056b3e3 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -3598,6 +3598,7 @@ def test_no_save_exc_type(self):
             self.assertIsNone(te.exc_type)
 
     def test_no_refs_to_exception_and_traceback_objects(self):
+        exc_obj = None
         try:
             1/0
         except Exception as e:
diff --git 
a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst
 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst
new file mode 100644
index 00000000000000..42fba2933c31b3
--- /dev/null
+++ 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst
@@ -0,0 +1,4 @@
+Optimize ``LOAD_FAST`` and its superinstruction form to reduce reference
+counting overhead. These instructions are replaced with faster variants that
+load borrowed references onto the operand stack when we can prove that the
+reference in the frame outlives the reference loaded onto the stack.
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index af43357439d309..87a00bf1a458ea 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -135,8 +135,6 @@ PyFloat_FromDouble(double fval)
     return (PyObject *) op;
 }
 
-#ifdef Py_GIL_DISABLED
-
 _PyStackRef _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef 
right, double value)
 {
     PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc);
@@ -144,35 +142,6 @@ _PyStackRef _PyFloat_FromDouble_ConsumeInputs(_PyStackRef 
left, _PyStackRef righ
     return PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(value));
 }
 
-#else // Py_GIL_DISABLED
-
-_PyStackRef _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef 
right, double value)
-{
-    PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
-    PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
-    if (Py_REFCNT(left_o) == 1) {
-        ((PyFloatObject *)left_o)->ob_fval = value;
-        PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc);
-        return left;
-    }
-    else if (Py_REFCNT(right_o) == 1)  {
-        ((PyFloatObject *)right_o)->ob_fval = value;
-        PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc);
-        return right;
-    }
-    else {
-        PyObject *result = PyFloat_FromDouble(value);
-        PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc);
-        PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc);
-        if (result == NULL) {
-            return PyStackRef_NULL;
-        }
-        return PyStackRef_FromPyObjectStealMortal(result);
-    }
-}
-
-#endif // Py_GIL_DISABLED
-
 static PyObject *
 float_from_string_inner(const char *s, Py_ssize_t len, void *obj)
 {
diff --git a/Objects/frameobject.c b/Objects/frameobject.c
index e6a124ef94c3a1..db4e4b7e1939de 100644
--- a/Objects/frameobject.c
+++ b/Objects/frameobject.c
@@ -215,6 +215,33 @@ framelocalsproxy_getitem(PyObject *self, PyObject *key)
     return NULL;
 }
 
+static int
+add_overwritten_fast_local(PyFrameObject *frame, PyObject *obj)
+{
+    Py_ssize_t new_size;
+    if (frame->f_overwritten_fast_locals == NULL) {
+        new_size = 1;
+    }
+    else {
+        Py_ssize_t size = PyTuple_Size(frame->f_overwritten_fast_locals);
+        if (size == -1) {
+            return -1;
+        }
+        new_size = size + 1;
+    }
+    PyObject *new_tuple = PyTuple_New(new_size);
+    if (new_tuple == NULL) {
+        return -1;
+    }
+    for (Py_ssize_t i = 0; i < new_size - 1; i++) {
+        PyObject *o = PyTuple_GET_ITEM(frame->f_overwritten_fast_locals, i);
+        PyTuple_SET_ITEM(new_tuple, i, Py_NewRef(o));
+    }
+    PyTuple_SET_ITEM(new_tuple, new_size - 1, Py_NewRef(obj));
+    Py_XSETREF(frame->f_overwritten_fast_locals, new_tuple);
+    return 0;
+}
+
 static int
 framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value)
 {
@@ -253,7 +280,13 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, 
PyObject *value)
             Py_XINCREF(value);
             PyCell_SetTakeRef((PyCellObject *)cell, value);
         } else if (value != PyStackRef_AsPyObjectBorrow(oldvalue)) {
-            PyStackRef_XCLOSE(fast[i]);
+            PyObject *old_obj = PyStackRef_AsPyObjectBorrow(fast[i]);
+            if (old_obj != NULL && !_Py_IsImmortal(old_obj)) {
+                if (add_overwritten_fast_local(frame, old_obj) < 0) {
+                    return -1;
+                }
+                PyStackRef_CLOSE(fast[i]);
+            }
             fast[i] = PyStackRef_FromPyObjectNew(value);
         }
         return 0;
@@ -1906,6 +1939,7 @@ frame_dealloc(PyObject *op)
     Py_CLEAR(f->f_trace);
     Py_CLEAR(f->f_extra_locals);
     Py_CLEAR(f->f_locals_cache);
+    Py_CLEAR(f->f_overwritten_fast_locals);
     PyObject_GC_Del(f);
     Py_TRASHCAN_END;
 }
@@ -1918,6 +1952,7 @@ frame_traverse(PyObject *op, visitproc visit, void *arg)
     Py_VISIT(f->f_trace);
     Py_VISIT(f->f_extra_locals);
     Py_VISIT(f->f_locals_cache);
+    Py_VISIT(f->f_overwritten_fast_locals);
     if (f->f_frame->owner != FRAME_OWNED_BY_FRAME_OBJECT) {
         return 0;
     }
@@ -1932,6 +1967,7 @@ frame_tp_clear(PyObject *op)
     Py_CLEAR(f->f_trace);
     Py_CLEAR(f->f_extra_locals);
     Py_CLEAR(f->f_locals_cache);
+    Py_CLEAR(f->f_overwritten_fast_locals);
 
     /* locals and stack */
     _PyStackRef *locals = _PyFrame_GetLocalsArray(f->f_frame);
@@ -2081,6 +2117,7 @@ _PyFrame_New_NoTrack(PyCodeObject *code)
     f->f_lineno = 0;
     f->f_extra_locals = NULL;
     f->f_locals_cache = NULL;
+    f->f_overwritten_fast_locals = NULL;
     return f;
 }
 
diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h
index 76a335086c9b11..8cedee31e08a00 100644
--- a/Programs/test_frozenmain.h
+++ b/Programs/test_frozenmain.h
@@ -1,16 +1,16 @@
 // Auto-generated by Programs/freeze_test_frozenmain.py
 unsigned char M_test_frozenmain[] = {
     227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,
-    0,0,0,0,0,243,184,0,0,0,128,0,90,0,80,1,
-    71,0,112,0,90,0,80,1,71,1,112,1,89,2,32,0,
-    80,2,50,1,0,0,0,0,0,0,30,0,89,2,32,0,
-    80,3,89,0,78,6,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,243,184,0,0,0,128,0,92,0,80,1,
+    71,0,114,0,92,0,80,1,71,1,114,1,91,2,32,0,
+    80,2,50,1,0,0,0,0,0,0,30,0,91,2,32,0,
+    80,3,91,0,78,6,0,0,0,0,0,0,0,0,0,0,
     0,0,0,0,0,0,0,0,50,2,0,0,0,0,0,0,
-    30,0,89,1,78,8,0,0,0,0,0,0,0,0,0,0,
+    30,0,91,1,78,8,0,0,0,0,0,0,0,0,0,0,
     0,0,0,0,0,0,0,0,32,0,50,0,0,0,0,0,
     0,0,80,4,43,26,0,0,0,0,0,0,0,0,0,0,
-    112,5,80,7,15,0,68,24,0,0,112,6,89,2,32,0,
-    80,5,89,6,11,0,80,6,89,5,89,6,43,26,0,0,
+    114,5,80,7,15,0,68,24,0,0,114,6,91,2,32,0,
+    80,5,91,6,11,0,80,6,91,5,91,6,43,26,0,0,
     0,0,0,0,0,0,0,0,11,0,48,4,50,1,0,0,
     0,0,0,0,30,0,73,26,0,0,8,0,29,0,80,1,
     34,0,41,8,233,0,0,0,0,78,122,18,70,114,111,122,
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 43424447bb068d..83847e31ef4dc7 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -270,6 +270,11 @@ dummy_func(
             value = PyStackRef_DUP(GETLOCAL(oparg));
         }
 
+        replicate(8) pure inst (LOAD_FAST_BORROW, (-- value)) {
+            assert(!PyStackRef_IsNull(GETLOCAL(oparg)));
+            value = PyStackRef_Borrow(GETLOCAL(oparg));
+        }
+
         inst(LOAD_FAST_AND_CLEAR, (-- value)) {
             value = GETLOCAL(oparg);
             GETLOCAL(oparg) = PyStackRef_NULL;
@@ -282,6 +287,13 @@ dummy_func(
             value2 = PyStackRef_DUP(GETLOCAL(oparg2));
         }
 
+        inst(LOAD_FAST_BORROW_LOAD_FAST_BORROW, ( -- value1, value2)) {
+            uint32_t oparg1 = oparg >> 4;
+            uint32_t oparg2 = oparg & 15;
+            value1 = PyStackRef_Borrow(GETLOCAL(oparg1));
+            value2 = PyStackRef_Borrow(GETLOCAL(oparg2));
+        }
+
         family(LOAD_CONST, 0) = {
             LOAD_CONST_MORTAL,
             LOAD_CONST_IMMORTAL,
@@ -741,9 +753,8 @@ dummy_func(
         // At the end we just skip over the STORE_FAST.
         op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) {
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
-            PyObject *right_o = PyStackRef_AsPyObjectSteal(right);
             assert(PyUnicode_CheckExact(left_o));
-            assert(PyUnicode_CheckExact(right_o));
+            assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(right)));
 
             int next_oparg;
         #if TIER_ONE
@@ -767,10 +778,11 @@ dummy_func(
              * only the locals reference, so PyUnicode_Append knows
              * that the string is safe to mutate.
              */
-            assert(Py_REFCNT(left_o) >= 2);
+            assert(Py_REFCNT(left_o) >= 2 || !PyStackRef_IsHeapSafe(left));
             PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc);
             DEAD(left);
             PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local);
+            PyObject *right_o = PyStackRef_AsPyObjectSteal(right);
             PyUnicode_Append(&temp, right_o);
             *target_local = PyStackRef_FromPyObjectSteal(temp);
             Py_DECREF(right_o);
@@ -1110,8 +1122,7 @@ dummy_func(
         // is pushed to a different frame, the callers' frame.
         inst(RETURN_VALUE, (retval -- res)) {
             assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
-            _PyStackRef temp = retval;
-            assert(PyStackRef_IsHeapSafe(temp));
+            _PyStackRef temp = PyStackRef_MakeHeapSafe(retval);
             DEAD(retval);
             SAVE_STACK();
             assert(EMPTY());
@@ -1206,7 +1217,6 @@ dummy_func(
 
         op(_SEND, (receiver, v -- receiver, retval)) {
             PyObject *receiver_o = PyStackRef_AsPyObjectBorrow(receiver);
-
             PyObject *retval_o;
             assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
             if ((tstate->interp->eval_frame == NULL) &&
@@ -1216,7 +1226,7 @@ dummy_func(
                 PyGenObject *gen = (PyGenObject *)receiver_o;
                 _PyInterpreterFrame *gen_frame = &gen->gi_iframe;
                 STACK_SHRINK(1);
-                _PyFrame_StackPush(gen_frame, v);
+                _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v));
                 gen->gi_frame_state = FRAME_EXECUTING;
                 gen->gi_exc_state.previous_item = tstate->exc_info;
                 tstate->exc_info = &gen->gi_exc_state;
@@ -1261,7 +1271,7 @@ dummy_func(
             DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING);
             STAT_INC(SEND, hit);
             gen_frame = &gen->gi_iframe;
-            _PyFrame_StackPush(gen_frame, v);
+            _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v));
             DEAD(v);
             gen->gi_frame_state = FRAME_EXECUTING;
             gen->gi_exc_state.previous_item = tstate->exc_info;
@@ -1308,7 +1318,7 @@ dummy_func(
             #endif
             RELOAD_STACK();
             LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND);
-            value = temp;
+            value = PyStackRef_MakeHeapSafe(temp);
             LLTRACE_RESUME_FRAME();
         }
 
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 29c3a270a27526..260d52be02c136 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -201,6 +201,113 @@
             break;
         }
 
+        case _LOAD_FAST_BORROW_0: {
+            _PyStackRef value;
+            oparg = 0;
+            assert(oparg == CURRENT_OPARG());
+            assert(!PyStackRef_IsNull(GETLOCAL(oparg)));
+            value = PyStackRef_Borrow(GETLOCAL(oparg));
+            stack_pointer[0] = value;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _LOAD_FAST_BORROW_1: {
+            _PyStackRef value;
+            oparg = 1;
+            assert(oparg == CURRENT_OPARG());
+            assert(!PyStackRef_IsNull(GETLOCAL(oparg)));
+            value = PyStackRef_Borrow(GETLOCAL(oparg));
+            stack_pointer[0] = value;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _LOAD_FAST_BORROW_2: {
+            _PyStackRef value;
+            oparg = 2;
+            assert(oparg == CURRENT_OPARG());
+            assert(!PyStackRef_IsNull(GETLOCAL(oparg)));
+            value = PyStackRef_Borrow(GETLOCAL(oparg));
+            stack_pointer[0] = value;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _LOAD_FAST_BORROW_3: {
+            _PyStackRef value;
+            oparg = 3;
+            assert(oparg == CURRENT_OPARG());
+            assert(!PyStackRef_IsNull(GETLOCAL(oparg)));
+            value = PyStackRef_Borrow(GETLOCAL(oparg));
+            stack_pointer[0] = value;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _LOAD_FAST_BORROW_4: {
+            _PyStackRef value;
+            oparg = 4;
+            assert(oparg == CURRENT_OPARG());
+            assert(!PyStackRef_IsNull(GETLOCAL(oparg)));
+            value = PyStackRef_Borrow(GETLOCAL(oparg));
+            stack_pointer[0] = value;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _LOAD_FAST_BORROW_5: {
+            _PyStackRef value;
+            oparg = 5;
+            assert(oparg == CURRENT_OPARG());
+            assert(!PyStackRef_IsNull(GETLOCAL(oparg)));
+            value = PyStackRef_Borrow(GETLOCAL(oparg));
+            stack_pointer[0] = value;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _LOAD_FAST_BORROW_6: {
+            _PyStackRef value;
+            oparg = 6;
+            assert(oparg == CURRENT_OPARG());
+            assert(!PyStackRef_IsNull(GETLOCAL(oparg)));
+            value = PyStackRef_Borrow(GETLOCAL(oparg));
+            stack_pointer[0] = value;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _LOAD_FAST_BORROW_7: {
+            _PyStackRef value;
+            oparg = 7;
+            assert(oparg == CURRENT_OPARG());
+            assert(!PyStackRef_IsNull(GETLOCAL(oparg)));
+            value = PyStackRef_Borrow(GETLOCAL(oparg));
+            stack_pointer[0] = value;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _LOAD_FAST_BORROW: {
+            _PyStackRef value;
+            oparg = CURRENT_OPARG();
+            assert(!PyStackRef_IsNull(GETLOCAL(oparg)));
+            value = PyStackRef_Borrow(GETLOCAL(oparg));
+            stack_pointer[0] = value;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
         case _LOAD_FAST_AND_CLEAR: {
             _PyStackRef value;
             oparg = CURRENT_OPARG();
@@ -1022,9 +1129,8 @@
             right = stack_pointer[-1];
             left = stack_pointer[-2];
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
-            PyObject *right_o = PyStackRef_AsPyObjectSteal(right);
             assert(PyUnicode_CheckExact(left_o));
-            assert(PyUnicode_CheckExact(right_o));
+            assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(right)));
             int next_oparg;
             #if TIER_ONE
             assert(next_instr->op.code == STORE_FAST);
@@ -1050,9 +1156,10 @@
              * only the locals reference, so PyUnicode_Append knows
              * that the string is safe to mutate.
              */
-            assert(Py_REFCNT(left_o) >= 2);
+            assert(Py_REFCNT(left_o) >= 2 || !PyStackRef_IsHeapSafe(left));
             PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc);
             PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local);
+            PyObject *right_o = PyStackRef_AsPyObjectSteal(right);
             stack_pointer += -2;
             assert(WITHIN_STACK_BOUNDS());
             _PyFrame_SetStackPointer(frame, stack_pointer);
@@ -1698,8 +1805,7 @@
             _PyStackRef res;
             retval = stack_pointer[-1];
             assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
-            _PyStackRef temp = retval;
-            assert(PyStackRef_IsHeapSafe(temp));
+            _PyStackRef temp = PyStackRef_MakeHeapSafe(retval);
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
             _PyFrame_SetStackPointer(frame, stack_pointer);
@@ -1833,7 +1939,7 @@
             }
             STAT_INC(SEND, hit);
             gen_frame = &gen->gi_iframe;
-            _PyFrame_StackPush(gen_frame, v);
+            _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v));
             gen->gi_frame_state = FRAME_EXECUTING;
             gen->gi_exc_state.previous_item = tstate->exc_info;
             tstate->exc_info = &gen->gi_exc_state;
@@ -1880,7 +1986,7 @@
             #endif
             stack_pointer = _PyFrame_GetStackPointer(frame);
             LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND);
-            value = temp;
+            value = PyStackRef_MakeHeapSafe(temp);
             LLTRACE_RESUME_FRAME();
             stack_pointer[0] = value;
             stack_pointer += 1;
diff --git a/Python/flowgraph.c b/Python/flowgraph.c
index 4f101a602c555e..a0d5690250cffb 100644
--- a/Python/flowgraph.c
+++ b/Python/flowgraph.c
@@ -574,6 +574,7 @@ normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
         basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
     RETURN_IF_ERROR(
         basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
+    backwards_jump->b_startdepth = target->b_startdepth;
     last->i_opcode = reversed_opcode;
     last->i_target = b->b_next;
 
@@ -2581,6 +2582,422 @@ insert_superinstructions(cfg_builder *g)
     return res;
 }
 
+#define NOT_LOCAL -1
+#define DUMMY_INSTR -1
+
+typedef struct {
+    // Index of instruction that produced the reference or DUMMY_INSTR.
+    int instr;
+
+    // The local to which the reference refers or NOT_LOCAL.
+    int local;
+} ref;
+
+typedef struct {
+    ref *refs;
+    Py_ssize_t size;
+    Py_ssize_t capacity;
+} ref_stack;
+
+static int
+ref_stack_push(ref_stack *stack, ref r)
+{
+    if (stack->size == stack->capacity) {
+        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
+        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
+        if (refs == NULL) {
+            PyErr_NoMemory();
+            return -1;
+        }
+        stack->refs = refs;
+        stack->capacity = new_cap;
+    }
+    stack->refs[stack->size] = r;
+    stack->size++;
+    return 0;
+}
+
+static ref
+ref_stack_pop(ref_stack *stack)
+{
+    assert(stack->size > 0);
+    stack->size--;
+    ref r = stack->refs[stack->size];
+    return r;
+}
+
+static void
+ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
+{
+    Py_ssize_t idx = stack->size - off;
+    assert(idx >= 0 && idx < stack->size);
+    ref tmp = stack->refs[idx];
+    stack->refs[idx] = stack->refs[stack->size - 1];
+    stack->refs[stack->size - 1] = tmp;
+}
+
+static ref
+ref_stack_at(ref_stack *stack, Py_ssize_t idx)
+{
+    assert(idx >= 0 && idx < stack->size);
+    return stack->refs[idx];
+}
+
+static void
+ref_stack_clear(ref_stack *stack)
+{
+    stack->size = 0;
+}
+
+static void
+ref_stack_fini(ref_stack *stack)
+{
+    if (stack->refs != NULL) {
+        PyMem_Free(stack->refs);
+    }
+    stack->refs = NULL;
+    stack->capacity = 0;
+    stack->size = 0;
+}
+
+typedef enum {
+    // The loaded reference is still on the stack when the local is killed
+    SUPPORT_KILLED  = 1,
+    // The loaded reference is stored into a local
+    STORED_AS_LOCAL = 2,
+    // The loaded reference is still on the stack at the end of the basic block
+    REF_UNCONSUMED  = 4,
+} LoadFastInstrFlag;
+
+static void
+kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
+{
+    for (Py_ssize_t i = 0; i < refs->size; i++) {
+        ref r = ref_stack_at(refs, i);
+        if (r.local == local) {
+            assert(r.instr >= 0);
+            instr_flags[r.instr] |= SUPPORT_KILLED;
+        }
+    }
+}
+
+static void
+store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
+{
+    kill_local(instr_flags, refs, local);
+    if (r.instr != DUMMY_INSTR) {
+        instr_flags[r.instr] |= STORED_AS_LOCAL;
+    }
+}
+
+static void
+load_fast_push_block(basicblock ***sp, basicblock *target,
+                     Py_ssize_t start_depth)
+{
+    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
+    if (!target->b_visited) {
+        target->b_visited = 1;
+        *(*sp)++ = target;
+    }
+}
+
+/*
+ * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
+ * load borrowed references onto the operand stack.
+ *
+ * This is only safe when we can prove that the reference in the frame outlives
+ * the borrowed reference produced by the instruction. We make this tractable
+ * by enforcing the following lifetimes:
+ *
+ * 1. Borrowed references loaded onto the operand stack live until the end of
+ *    the instruction that consumes them from the stack. Any borrowed
+ *    references that would escape into the heap (e.g. into frame objects or
+ *    generators) are converted into new, strong references.
+ *
+ * 2. Locals live until they are either killed by an instruction
+ *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
+ *    via `f_locals` is added to a tuple owned by the frame object.
+ *
+ * To simplify the problem of detecting which supporting references in the
+ * frame are killed by instructions that overwrite locals, we only allow
+ * borrowed references to be stored as a local in the frame if they were passed
+ * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
+ * strong references.
+ *
+ * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
+ * that meet the following criteria:
+ *
+ * 1. The produced reference must be consumed from the stack before the
+ *    supporting reference in the frame is killed.
+ *
+ * 2. The produced reference cannot be stored as a local.
+ *
+ * We use abstract interpretation to identify instructions that meet these
+ * criteria. For each basic block, we simulate the effect the bytecode has on a
+ * stack of abstract references and note any instructions that violate the
+ * criteria above. Once we've processed all the instructions in a block, any
+ * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
+ */
+static int
+optimize_load_fast(cfg_builder *g)
+{
+    int status;
+    ref_stack refs = {0};
+    int max_instrs = 0;
+    basicblock *entryblock = g->g_entryblock;
+    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+        max_instrs = Py_MAX(max_instrs, b->b_iused);
+    }
+    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
+    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
+    if (instr_flags == NULL) {
+        PyErr_NoMemory();
+        return ERROR;
+    }
+    basicblock **blocks = make_cfg_traversal_stack(entryblock);
+    if (blocks == NULL) {
+        status = ERROR;
+        goto done;
+    }
+    basicblock **sp = blocks;
+    *sp = entryblock;
+    sp++;
+    entryblock->b_startdepth = 0;
+    entryblock->b_visited = 1;
+
+    #define PUSH_REF(instr, local)                \
+        do {                                      \
+            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
+                status = ERROR;                   \
+                goto done;                        \
+            }                                     \
+        } while(0)
+
+    while (sp != blocks) {
+        basicblock *block = *--sp;
+        assert(block->b_startdepth > -1);
+
+        // Reset per-block state.
+        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
+
+        // Reset the stack of refs. We don't track references on the stack
+        // across basic blocks, but the bytecode will expect their
+        // presence. Add dummy references as necessary.
+        ref_stack_clear(&refs);
+        for (int i = 0; i < block->b_startdepth; i++) {
+            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
+        }
+
+        for (int i = 0; i < block->b_iused; i++) {
+            cfg_instr *instr = &block->b_instr[i];
+            int opcode = instr->i_opcode;
+            int oparg = instr->i_oparg;
+            assert(opcode != EXTENDED_ARG);
+            switch (opcode) {
+                // Opcodes that load and store locals
+                case LOAD_FAST: {
+                    PUSH_REF(i, oparg);
+                    break;
+                }
+
+                case LOAD_FAST_AND_CLEAR: {
+                    kill_local(instr_flags, &refs, oparg);
+                    PUSH_REF(i, oparg);
+                    break;
+                }
+
+                case LOAD_FAST_LOAD_FAST: {
+                    PUSH_REF(i, oparg >> 4);
+                    PUSH_REF(i, oparg & 15);
+                    break;
+                }
+
+                case STORE_FAST: {
+                    ref r = ref_stack_pop(&refs);
+                    store_local(instr_flags, &refs, oparg, r);
+                    break;
+                }
+
+                case STORE_FAST_LOAD_FAST: {
+                    // STORE_FAST
+                    ref r = ref_stack_pop(&refs);
+                    store_local(instr_flags, &refs, oparg >> 4, r);
+                    // LOAD_FAST
+                    PUSH_REF(i, oparg & 15);
+                    break;
+                }
+
+                case STORE_FAST_STORE_FAST: {
+                    // STORE_FAST
+                    ref r = ref_stack_pop(&refs);
+                    store_local(instr_flags, &refs, oparg >> 4, r);
+                    // STORE_FAST
+                    r = ref_stack_pop(&refs);
+                    store_local(instr_flags, &refs, oparg & 15, r);
+                    break;
+                }
+
+                // Opcodes that shuffle values on the stack
+                case COPY: {
+                    assert(oparg > 0);
+                    Py_ssize_t idx = refs.size - oparg;
+                    ref r = ref_stack_at(&refs, idx);
+                    PUSH_REF(r.instr, r.local);
+                    break;
+                }
+
+                case SWAP: {
+                    assert(oparg >= 2);
+                    ref_stack_swap_top(&refs, oparg);
+                    break;
+                }
+
+                // We treat opcodes that do not consume all of their inputs on
+                // a case by case basis, as we have no generic way of knowing
+                // how many inputs should be left on the stack.
+
+                // Opcodes that consume no inputs
+                case GET_ANEXT:
+                case GET_LEN:
+                case IMPORT_FROM:
+                case MATCH_KEYS:
+                case MATCH_MAPPING:
+                case MATCH_SEQUENCE:
+                case WITH_EXCEPT_START: {
+                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
+                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
+                    int net_pushed = num_pushed - num_popped;
+                    assert(net_pushed >= 0);
+                    for (int i = 0; i < net_pushed; i++) {
+                        PUSH_REF(i, NOT_LOCAL);
+                    }
+                    break;
+                }
+
+                // Opcodes that consume some inputs and push no new values
+                case DICT_MERGE:
+                case DICT_UPDATE:
+                case LIST_APPEND:
+                case LIST_EXTEND:
+                case MAP_ADD:
+                case RERAISE:
+                case SET_ADD:
+                case SET_UPDATE: {
+                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
+                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
+                    int net_popped = num_popped - num_pushed;
+                    assert(net_popped > 0);
+                    for (int i = 0; i < net_popped; i++) {
+                        ref_stack_pop(&refs);
+                    }
+                    break;
+                }
+
+                // Opcodes that consume some inputs and push new values
+                case CHECK_EXC_MATCH: {
+                    ref_stack_pop(&refs);
+                    PUSH_REF(i, NOT_LOCAL);
+                    break;
+                }
+
+                case FOR_ITER: {
+                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
+                    PUSH_REF(i, NOT_LOCAL);
+                    break;
+                }
+
+                case LOAD_ATTR:
+                case LOAD_SUPER_ATTR: {
+                    ref self = ref_stack_pop(&refs);
+                    if (opcode == LOAD_SUPER_ATTR) {
+                        ref_stack_pop(&refs);
+                        ref_stack_pop(&refs);
+                    }
+                    PUSH_REF(i, NOT_LOCAL);
+                    if (oparg & 1) {
+                        // A method call; conservatively assume that self is 
pushed
+                        // back onto the stack
+                        PUSH_REF(self.instr, self.local);
+                    }
+                    break;
+                }
+
+                case SEND: {
+                    load_fast_push_block(&sp, instr->i_target, refs.size);
+                    ref_stack_pop(&refs);
+                    PUSH_REF(i, NOT_LOCAL);
+                    break;
+                }
+
+                // Opcodes that consume all of their inputs
+                default: {
+                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
+                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
+                    if (HAS_TARGET(instr->i_opcode)) {
+                        load_fast_push_block(&sp, instr->i_target, refs.size - 
num_popped + num_pushed);
+                    }
+                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
+                        // Block push opcodes only affect the stack when 
jumping
+                        // to the target.
+                        for (int j = 0; j < num_popped; j++) {
+                            ref_stack_pop(&refs);
+                        }
+                        for (int j = 0; j < num_pushed; j++) {
+                            PUSH_REF(i, NOT_LOCAL);
+                        }
+                    }
+                    break;
+                }
+            }
+        }
+
+        // Push fallthrough block
+        cfg_instr *term = basicblock_last_instr(block);
+        if (term != NULL && block->b_next != NULL &&
+            !(IS_UNCONDITIONAL_JUMP_OPCODE(term->i_opcode) ||
+              IS_SCOPE_EXIT_OPCODE(term->i_opcode))) {
+            assert(BB_HAS_FALLTHROUGH(block));
+            load_fast_push_block(&sp, block->b_next, refs.size);
+        }
+
+        // Mark instructions that produce values that are on the stack at the
+        // end of the basic block
+        for (Py_ssize_t i = 0; i < refs.size; i++) {
+            ref r = ref_stack_at(&refs, i);
+            if (r.instr != -1) {
+                instr_flags[r.instr] |= REF_UNCONSUMED;
+            }
+        }
+
+        // Optimize instructions
+        for (int i = 0; i < block->b_iused; i++) {
+            if (!instr_flags[i]) {
+                cfg_instr *instr = &block->b_instr[i];
+                switch (instr->i_opcode) {
+                    case LOAD_FAST:
+                        instr->i_opcode = LOAD_FAST_BORROW;
+                        break;
+                    case LOAD_FAST_LOAD_FAST:
+                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
+                        break;
+                    default:
+                        break;
+                }
+            }
+        }
+    }
+
+    #undef PUSH_REF
+
+    status = SUCCESS;
+
+done:
+    ref_stack_fini(&refs);
+    PyMem_Free(instr_flags);
+    PyMem_Free(blocks);
+    return status;
+}
+
 // helper functions for add_checks_for_loads_of_unknown_variables
 static inline void
 maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
@@ -3525,6 +3942,11 @@ _PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
     RETURN_IF_ERROR(normalize_jumps(g));
     assert(no_redundant_jumps(g));
 
+    /* Can't modify the bytecode after inserting instructions that produce
+     * borrowed references.
+     */
+    RETURN_IF_ERROR(optimize_load_fast(g));
+
     /* Can't modify the bytecode after computing jump offsets. */
     if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
         return ERROR;
@@ -3608,6 +4030,15 @@ _PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, 
int nlocals)
                                 nparams, firstlineno) < 0) {
         goto error;
     }
+
+    if (calculate_stackdepth(g) == ERROR) {
+        goto error;
+    }
+
+    if (optimize_load_fast(g) != SUCCESS) {
+        goto error;
+    }
+
     res = cfg_to_instruction_sequence(g);
 error:
     Py_DECREF(const_cache);
diff --git a/Python/gc.c b/Python/gc.c
index e37d4b76456acc..dad088e09f872f 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -544,6 +544,12 @@ visit_decref(PyObject *op, void *parent)
 int
 _PyGC_VisitStackRef(_PyStackRef *ref, visitproc visit, void *arg)
 {
+    // This is a bit tricky! We want to ignore stackrefs with embedded
+    // refcounts when computing the incoming references, but otherwise treat
+    // them like normal.
+    if (!PyStackRef_RefcountOnObject(*ref) && (visit == visit_decref)) {
+        return 0;
+    }
     Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref));
     return 0;
 }
@@ -554,7 +560,7 @@ _PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc 
visit, void *arg)
     _PyStackRef *ref = _PyFrame_GetLocalsArray(frame);
     /* locals and stack */
     for (; ref < frame->stackpointer; ref++) {
-        Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref));
+        _Py_VISIT_STACKREF(*ref);
     }
     return 0;
 }
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index 50db2c867c89e4..a4b5673c9e54b4 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -340,9 +340,8 @@
             // _BINARY_OP_INPLACE_ADD_UNICODE
             {
                 PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
-                PyObject *right_o = PyStackRef_AsPyObjectSteal(right);
                 assert(PyUnicode_CheckExact(left_o));
-                assert(PyUnicode_CheckExact(right_o));
+                
assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(right)));
                 int next_oparg;
                 #if TIER_ONE
                 assert(next_instr->op.code == STORE_FAST);
@@ -369,9 +368,10 @@
                  * only the locals reference, so PyUnicode_Append knows
                  * that the string is safe to mutate.
                  */
-                assert(Py_REFCNT(left_o) >= 2);
+                assert(Py_REFCNT(left_o) >= 2 || !PyStackRef_IsHeapSafe(left));
                 PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc);
                 PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local);
+                PyObject *right_o = PyStackRef_AsPyObjectSteal(right);
                 stack_pointer += -2;
                 assert(WITHIN_STACK_BOUNDS());
                 _PyFrame_SetStackPointer(frame, stack_pointer);
@@ -7291,8 +7291,7 @@
             {
                 retval = val;
                 assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
-                _PyStackRef temp = retval;
-                assert(PyStackRef_IsHeapSafe(temp));
+                _PyStackRef temp = PyStackRef_MakeHeapSafe(retval);
                 stack_pointer += -1;
                 assert(WITHIN_STACK_BOUNDS());
                 _PyFrame_SetStackPointer(frame, stack_pointer);
@@ -7376,7 +7375,7 @@
                 #endif
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND);
-                value = temp;
+                value = PyStackRef_MakeHeapSafe(temp);
                 LLTRACE_RESUME_FRAME();
             }
             stack_pointer[0] = value;
@@ -8873,6 +8872,44 @@
             DISPATCH();
         }
 
+        TARGET(LOAD_FAST_BORROW) {
+            #if Py_TAIL_CALL_INTERP
+            int opcode = LOAD_FAST_BORROW;
+            (void)(opcode);
+            #endif
+            frame->instr_ptr = next_instr;
+            next_instr += 1;
+            INSTRUCTION_STATS(LOAD_FAST_BORROW);
+            _PyStackRef value;
+            assert(!PyStackRef_IsNull(GETLOCAL(oparg)));
+            value = PyStackRef_Borrow(GETLOCAL(oparg));
+            stack_pointer[0] = value;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            DISPATCH();
+        }
+
+        TARGET(LOAD_FAST_BORROW_LOAD_FAST_BORROW) {
+            #if Py_TAIL_CALL_INTERP
+            int opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
+            (void)(opcode);
+            #endif
+            frame->instr_ptr = next_instr;
+            next_instr += 1;
+            INSTRUCTION_STATS(LOAD_FAST_BORROW_LOAD_FAST_BORROW);
+            _PyStackRef value1;
+            _PyStackRef value2;
+            uint32_t oparg1 = oparg >> 4;
+            uint32_t oparg2 = oparg & 15;
+            value1 = PyStackRef_Borrow(GETLOCAL(oparg1));
+            value2 = PyStackRef_Borrow(GETLOCAL(oparg2));
+            stack_pointer[0] = value1;
+            stack_pointer[1] = value2;
+            stack_pointer += 2;
+            assert(WITHIN_STACK_BOUNDS());
+            DISPATCH();
+        }
+
         TARGET(LOAD_FAST_CHECK) {
             #if Py_TAIL_CALL_INTERP
             int opcode = LOAD_FAST_CHECK;
@@ -10346,8 +10383,7 @@
             _PyStackRef res;
             retval = stack_pointer[-1];
             assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
-            _PyStackRef temp = retval;
-            assert(PyStackRef_IsHeapSafe(temp));
+            _PyStackRef temp = PyStackRef_MakeHeapSafe(retval);
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
             _PyFrame_SetStackPointer(frame, stack_pointer);
@@ -10411,7 +10447,7 @@
                     PyGenObject *gen = (PyGenObject *)receiver_o;
                     _PyInterpreterFrame *gen_frame = &gen->gi_iframe;
                     STACK_SHRINK(1);
-                    _PyFrame_StackPush(gen_frame, v);
+                    _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v));
                     gen->gi_frame_state = FRAME_EXECUTING;
                     gen->gi_exc_state.previous_item = tstate->exc_info;
                     tstate->exc_info = &gen->gi_exc_state;
@@ -10512,7 +10548,7 @@
                 }
                 STAT_INC(SEND, hit);
                 gen_frame = &gen->gi_iframe;
-                _PyFrame_StackPush(gen_frame, v);
+                _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v));
                 gen->gi_frame_state = FRAME_EXECUTING;
                 gen->gi_exc_state.previous_item = tstate->exc_info;
                 tstate->exc_info = &gen->gi_exc_state;
@@ -12033,7 +12069,7 @@
             #endif
             stack_pointer = _PyFrame_GetStackPointer(frame);
             LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND);
-            value = temp;
+            value = PyStackRef_MakeHeapSafe(temp);
             LLTRACE_RESUME_FRAME();
             stack_pointer[0] = value;
             stack_pointer += 1;
diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h
index c0dac90aebd458..5b9fb794c6bddc 100644
--- a/Python/opcode_targets.h
+++ b/Python/opcode_targets.h
@@ -84,6 +84,8 @@ static void *opcode_targets[256] = {
     &&TARGET_LOAD_DEREF,
     &&TARGET_LOAD_FAST,
     &&TARGET_LOAD_FAST_AND_CLEAR,
+    &&TARGET_LOAD_FAST_BORROW,
+    &&TARGET_LOAD_FAST_BORROW_LOAD_FAST_BORROW,
     &&TARGET_LOAD_FAST_CHECK,
     &&TARGET_LOAD_FAST_LOAD_FAST,
     &&TARGET_LOAD_FROM_DICT_OR_DEREF,
@@ -126,8 +128,6 @@ static void *opcode_targets[256] = {
     &&_unknown_opcode,
     &&_unknown_opcode,
     &&_unknown_opcode,
-    &&_unknown_opcode,
-    &&_unknown_opcode,
     &&TARGET_RESUME,
     &&TARGET_BINARY_OP_ADD_FLOAT,
     &&TARGET_BINARY_OP_ADD_INT,
@@ -414,6 +414,8 @@ Py_PRESERVE_NONE_CC static PyObject 
*_TAIL_CALL_LOAD_CONST_MORTAL(TAIL_CALL_PARA
 Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_DEREF(TAIL_CALL_PARAMS);
 Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST(TAIL_CALL_PARAMS);
 Py_PRESERVE_NONE_CC static PyObject 
*_TAIL_CALL_LOAD_FAST_AND_CLEAR(TAIL_CALL_PARAMS);
+Py_PRESERVE_NONE_CC static PyObject 
*_TAIL_CALL_LOAD_FAST_BORROW(TAIL_CALL_PARAMS);
+Py_PRESERVE_NONE_CC static PyObject 
*_TAIL_CALL_LOAD_FAST_BORROW_LOAD_FAST_BORROW(TAIL_CALL_PARAMS);
 Py_PRESERVE_NONE_CC static PyObject 
*_TAIL_CALL_LOAD_FAST_CHECK(TAIL_CALL_PARAMS);
 Py_PRESERVE_NONE_CC static PyObject 
*_TAIL_CALL_LOAD_FAST_LOAD_FAST(TAIL_CALL_PARAMS);
 Py_PRESERVE_NONE_CC static PyObject 
*_TAIL_CALL_LOAD_FROM_DICT_OR_DEREF(TAIL_CALL_PARAMS);
@@ -648,6 +650,8 @@ static py_tail_call_funcptr INSTRUCTION_TABLE[256] = {
     [LOAD_DEREF] = _TAIL_CALL_LOAD_DEREF,
     [LOAD_FAST] = _TAIL_CALL_LOAD_FAST,
     [LOAD_FAST_AND_CLEAR] = _TAIL_CALL_LOAD_FAST_AND_CLEAR,
+    [LOAD_FAST_BORROW] = _TAIL_CALL_LOAD_FAST_BORROW,
+    [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = 
_TAIL_CALL_LOAD_FAST_BORROW_LOAD_FAST_BORROW,
     [LOAD_FAST_CHECK] = _TAIL_CALL_LOAD_FAST_CHECK,
     [LOAD_FAST_LOAD_FAST] = _TAIL_CALL_LOAD_FAST_LOAD_FAST,
     [LOAD_FROM_DICT_OR_DEREF] = _TAIL_CALL_LOAD_FROM_DICT_OR_DEREF,
@@ -725,8 +729,6 @@ static py_tail_call_funcptr INSTRUCTION_TABLE[256] = {
     [UNPACK_SEQUENCE_TWO_TUPLE] = _TAIL_CALL_UNPACK_SEQUENCE_TWO_TUPLE,
     [WITH_EXCEPT_START] = _TAIL_CALL_WITH_EXCEPT_START,
     [YIELD_VALUE] = _TAIL_CALL_YIELD_VALUE,
-    [117] = _TAIL_CALL_UNKNOWN_OPCODE,
-    [118] = _TAIL_CALL_UNKNOWN_OPCODE,
     [119] = _TAIL_CALL_UNKNOWN_OPCODE,
     [120] = _TAIL_CALL_UNKNOWN_OPCODE,
     [121] = _TAIL_CALL_UNKNOWN_OPCODE,
diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c
index 017a2eeca0741e..afaf2e059e1b4a 100644
--- a/Python/optimizer_analysis.c
+++ b/Python/optimizer_analysis.c
@@ -573,6 +573,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int 
buffer_size)
                 if (last->opcode == _LOAD_CONST_INLINE  ||
                     last->opcode == _LOAD_CONST_INLINE_BORROW ||
                     last->opcode == _LOAD_FAST ||
+                    last->opcode == _LOAD_FAST_BORROW ||
                     last->opcode == _COPY
                 ) {
                     last->opcode = _NOP;
diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c
index 8d9e94a2445182..163a8bfd2679db 100644
--- a/Python/optimizer_bytecodes.c
+++ b/Python/optimizer_bytecodes.c
@@ -86,6 +86,10 @@ dummy_func(void) {
         value = GETLOCAL(oparg);
     }
 
+    op(_LOAD_FAST_BORROW, (-- value)) {
+        value = GETLOCAL(oparg);
+    }
+
     op(_LOAD_FAST_AND_CLEAR, (-- value)) {
         value = GETLOCAL(oparg);
         JitOptSymbol *temp = sym_new_null(ctx);
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index 6a8ac75b63eb0e..f1280ba1e40a7f 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -47,6 +47,15 @@
             break;
         }
 
+        case _LOAD_FAST_BORROW: {
+            JitOptSymbol *value;
+            value = GETLOCAL(oparg);
+            stack_pointer[0] = value;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
         case _LOAD_FAST_AND_CLEAR: {
             JitOptSymbol *value;
             value = GETLOCAL(oparg);
diff --git a/Tools/cases_generator/analyzer.py 
b/Tools/cases_generator/analyzer.py
index 491b5d127cf0f5..5d75b9f036b524 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -580,6 +580,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool:
     "PyStackRef_AsPyObjectNew",
     "PyStackRef_FromPyObjectNewMortal",
     "PyStackRef_AsPyObjectSteal",
+    "PyStackRef_Borrow",
     "PyStackRef_CLEAR",
     "PyStackRef_CLOSE_SPECIALIZED",
     "PyStackRef_DUP",
@@ -595,6 +596,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool:
     "PyStackRef_IsTrue",
     "PyStackRef_IsFalse",
     "PyStackRef_IsNull",
+    "PyStackRef_MakeHeapSafe",
     "PyStackRef_None",
     "PyStackRef_TYPE",
     "PyStackRef_True",
diff --git a/Tools/cases_generator/opcode_metadata_generator.py 
b/Tools/cases_generator/opcode_metadata_generator.py
index 02283a0b647e84..620e4b6f1f4a69 100644
--- a/Tools/cases_generator/opcode_metadata_generator.py
+++ b/Tools/cases_generator/opcode_metadata_generator.py
@@ -227,9 +227,10 @@ def generate_expansion_table(analysis: Analysis, out: 
CWriter) -> None:
         expansions: list[tuple[str, str, int]] = []  # [(name, size, offset), 
...]
         if inst.is_super():
             pieces = inst.name.split("_")
-            assert len(pieces) == 4, f"{inst.name} doesn't look like a 
super-instr"
-            name1 = "_".join(pieces[:2])
-            name2 = "_".join(pieces[2:])
+            assert len(pieces) % 2 == 0, f"{inst.name} doesn't look like a 
super-instr"
+            parts_per_piece = int(len(pieces) / 2)
+            name1 = "_".join(pieces[:parts_per_piece])
+            name2 = "_".join(pieces[parts_per_piece:])
             assert name1 in analysis.instructions, f"{name1} doesn't match any 
instr"
             assert name2 in analysis.instructions, f"{name2} doesn't match any 
instr"
             instr1 = analysis.instructions[name1]

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to