https://github.com/python/cpython/commit/de2f7da77d9e8dc6758430249e7179d37b3fee44
commit: de2f7da77d9e8dc6758430249e7179d37b3fee44
branch: main
author: T. Wouters <tho...@python.org>
committer: Yhg1s <tho...@python.org>
date: 2025-03-12T16:21:46+01:00
summary:

gh-115999: Add free-threaded specialization for FOR_ITER (#128798)

Add free-threaded versions of existing specialization for FOR_ITER (list, 
tuples, fast range iterators and generators), without significantly affecting 
their thread-safety. (Iterating over shared lists/tuples/ranges should be fine 
like before. Reusing iterators between threads is not fine, like before. 
Sharing generators between threads is a recipe for significant crashes, like 
before.)

files:
M Include/internal/pycore_list.h
M Include/internal/pycore_opcode_metadata.h
M Include/internal/pycore_uop_ids.h
M Include/internal/pycore_uop_metadata.h
M Lib/test/test_opcache.py
M Objects/listobject.c
M Python/bytecodes.c
M Python/executor_cases.c.h
M Python/generated_cases.c.h
M Python/optimizer.c
M Python/optimizer_cases.c.h
M Python/specialize.c
M Tools/cases_generator/analyzer.py

diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h
index 5d817891408481..5c66d76a0d6e7f 100644
--- a/Include/internal/pycore_list.h
+++ b/Include/internal/pycore_list.h
@@ -8,11 +8,19 @@ extern "C" {
 #  error "this header requires Py_BUILD_CORE define"
 #endif
 
+#ifdef Py_GIL_DISABLED
+#include "pycore_stackref.h"
+#endif
+
 PyAPI_FUNC(PyObject*) _PyList_Extend(PyListObject *, PyObject *);
 extern void _PyList_DebugMallocStats(FILE *out);
 // _PyList_GetItemRef should be used only when the object is known as a list
 // because it doesn't raise TypeError when the object is not a list, whereas 
PyList_GetItemRef does.
 extern PyObject* _PyList_GetItemRef(PyListObject *, Py_ssize_t i);
+#ifdef Py_GIL_DISABLED
+// Returns -1 in case of races with other threads.
+extern int _PyList_GetItemRefNoLock(PyListObject *, Py_ssize_t, _PyStackRef *);
+#endif
 
 #define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item)
 
diff --git a/Include/internal/pycore_opcode_metadata.h 
b/Include/internal/pycore_opcode_metadata.h
index eda9b53629ce2d..152238d21e5da7 100644
--- a/Include/internal/pycore_opcode_metadata.h
+++ b/Include/internal/pycore_opcode_metadata.h
@@ -2094,7 +2094,7 @@ const struct opcode_metadata 
_PyOpcode_opcode_metadata[266] = {
     [FORMAT_WITH_SPEC] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | 
HAS_ESCAPES_FLAG },
     [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | 
HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
     [FOR_ITER_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
-    [FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | 
HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
+    [FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | 
HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
     [FOR_ITER_RANGE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | 
HAS_EXIT_FLAG | HAS_ERROR_FLAG },
     [FOR_ITER_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | 
HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
     [GET_AITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
@@ -2331,7 +2331,7 @@ _PyOpcode_macro_expansion[256] = {
     [FORMAT_WITH_SPEC] = { .nuops = 1, .uops = { { _FORMAT_WITH_SPEC, 
OPARG_SIMPLE, 0 } } },
     [FOR_ITER] = { .nuops = 1, .uops = { { _FOR_ITER, OPARG_REPLACED, 0 } } },
     [FOR_ITER_GEN] = { .nuops = 3, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 
}, { _FOR_ITER_GEN_FRAME, OPARG_SIMPLE, 1 }, { _PUSH_FRAME, OPARG_SIMPLE, 1 } } 
},
-    [FOR_ITER_LIST] = { .nuops = 3, .uops = { { _ITER_CHECK_LIST, 
OPARG_SIMPLE, 1 }, { _ITER_JUMP_LIST, OPARG_REPLACED, 1 }, { _ITER_NEXT_LIST, 
OPARG_SIMPLE, 1 } } },
+    [FOR_ITER_LIST] = { .nuops = 3, .uops = { { _ITER_CHECK_LIST, 
OPARG_SIMPLE, 1 }, { _ITER_JUMP_LIST, OPARG_REPLACED, 1 }, { _ITER_NEXT_LIST, 
OPARG_REPLACED, 1 } } },
     [FOR_ITER_RANGE] = { .nuops = 3, .uops = { { _ITER_CHECK_RANGE, 
OPARG_SIMPLE, 1 }, { _ITER_JUMP_RANGE, OPARG_REPLACED, 1 }, { _ITER_NEXT_RANGE, 
OPARG_SIMPLE, 1 } } },
     [FOR_ITER_TUPLE] = { .nuops = 3, .uops = { { _ITER_CHECK_TUPLE, 
OPARG_SIMPLE, 1 }, { _ITER_JUMP_TUPLE, OPARG_REPLACED, 1 }, { _ITER_NEXT_TUPLE, 
OPARG_SIMPLE, 1 } } },
     [GET_AITER] = { .nuops = 1, .uops = { { _GET_AITER, OPARG_SIMPLE, 0 } } },
diff --git a/Include/internal/pycore_uop_ids.h 
b/Include/internal/pycore_uop_ids.h
index 5143b10def5396..095fd043090fbb 100644
--- a/Include/internal/pycore_uop_ids.h
+++ b/Include/internal/pycore_uop_ids.h
@@ -164,126 +164,127 @@ extern "C" {
 #define _ITER_JUMP_RANGE 398
 #define _ITER_JUMP_TUPLE 399
 #define _ITER_NEXT_LIST 400
-#define _ITER_NEXT_RANGE 401
-#define _ITER_NEXT_TUPLE 402
-#define _JUMP_TO_TOP 403
+#define _ITER_NEXT_LIST_TIER_TWO 401
+#define _ITER_NEXT_RANGE 402
+#define _ITER_NEXT_TUPLE 403
+#define _JUMP_TO_TOP 404
 #define _LIST_APPEND LIST_APPEND
 #define _LIST_EXTEND LIST_EXTEND
-#define _LOAD_ATTR 404
-#define _LOAD_ATTR_CLASS 405
+#define _LOAD_ATTR 405
+#define _LOAD_ATTR_CLASS 406
 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN
-#define _LOAD_ATTR_INSTANCE_VALUE 406
-#define _LOAD_ATTR_METHOD_LAZY_DICT 407
-#define _LOAD_ATTR_METHOD_NO_DICT 408
-#define _LOAD_ATTR_METHOD_WITH_VALUES 409
-#define _LOAD_ATTR_MODULE 410
-#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 411
-#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 412
-#define _LOAD_ATTR_PROPERTY_FRAME 413
-#define _LOAD_ATTR_SLOT 414
-#define _LOAD_ATTR_WITH_HINT 415
+#define _LOAD_ATTR_INSTANCE_VALUE 407
+#define _LOAD_ATTR_METHOD_LAZY_DICT 408
+#define _LOAD_ATTR_METHOD_NO_DICT 409
+#define _LOAD_ATTR_METHOD_WITH_VALUES 410
+#define _LOAD_ATTR_MODULE 411
+#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 412
+#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 413
+#define _LOAD_ATTR_PROPERTY_FRAME 414
+#define _LOAD_ATTR_SLOT 415
+#define _LOAD_ATTR_WITH_HINT 416
 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS
-#define _LOAD_BYTECODE 416
+#define _LOAD_BYTECODE 417
 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT
 #define _LOAD_CONST LOAD_CONST
 #define _LOAD_CONST_IMMORTAL LOAD_CONST_IMMORTAL
-#define _LOAD_CONST_INLINE 417
-#define _LOAD_CONST_INLINE_BORROW 418
+#define _LOAD_CONST_INLINE 418
+#define _LOAD_CONST_INLINE_BORROW 419
 #define _LOAD_CONST_MORTAL LOAD_CONST_MORTAL
 #define _LOAD_DEREF LOAD_DEREF
-#define _LOAD_FAST 419
-#define _LOAD_FAST_0 420
-#define _LOAD_FAST_1 421
-#define _LOAD_FAST_2 422
-#define _LOAD_FAST_3 423
-#define _LOAD_FAST_4 424
-#define _LOAD_FAST_5 425
-#define _LOAD_FAST_6 426
-#define _LOAD_FAST_7 427
+#define _LOAD_FAST 420
+#define _LOAD_FAST_0 421
+#define _LOAD_FAST_1 422
+#define _LOAD_FAST_2 423
+#define _LOAD_FAST_3 424
+#define _LOAD_FAST_4 425
+#define _LOAD_FAST_5 426
+#define _LOAD_FAST_6 427
+#define _LOAD_FAST_7 428
 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR
 #define _LOAD_FAST_CHECK LOAD_FAST_CHECK
 #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST
 #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF
 #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS
-#define _LOAD_GLOBAL 428
-#define _LOAD_GLOBAL_BUILTINS 429
-#define _LOAD_GLOBAL_MODULE 430
+#define _LOAD_GLOBAL 429
+#define _LOAD_GLOBAL_BUILTINS 430
+#define _LOAD_GLOBAL_MODULE 431
 #define _LOAD_LOCALS LOAD_LOCALS
 #define _LOAD_NAME LOAD_NAME
-#define _LOAD_SMALL_INT 431
-#define _LOAD_SMALL_INT_0 432
-#define _LOAD_SMALL_INT_1 433
-#define _LOAD_SMALL_INT_2 434
-#define _LOAD_SMALL_INT_3 435
+#define _LOAD_SMALL_INT 432
+#define _LOAD_SMALL_INT_0 433
+#define _LOAD_SMALL_INT_1 434
+#define _LOAD_SMALL_INT_2 435
+#define _LOAD_SMALL_INT_3 436
 #define _LOAD_SPECIAL LOAD_SPECIAL
 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR
 #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD
-#define _MAKE_CALLARGS_A_TUPLE 436
+#define _MAKE_CALLARGS_A_TUPLE 437
 #define _MAKE_CELL MAKE_CELL
 #define _MAKE_FUNCTION MAKE_FUNCTION
-#define _MAKE_WARM 437
+#define _MAKE_WARM 438
 #define _MAP_ADD MAP_ADD
 #define _MATCH_CLASS MATCH_CLASS
 #define _MATCH_KEYS MATCH_KEYS
 #define _MATCH_MAPPING MATCH_MAPPING
 #define _MATCH_SEQUENCE MATCH_SEQUENCE
-#define _MAYBE_EXPAND_METHOD 438
-#define _MAYBE_EXPAND_METHOD_KW 439
-#define _MONITOR_CALL 440
-#define _MONITOR_CALL_KW 441
-#define _MONITOR_JUMP_BACKWARD 442
-#define _MONITOR_RESUME 443
+#define _MAYBE_EXPAND_METHOD 439
+#define _MAYBE_EXPAND_METHOD_KW 440
+#define _MONITOR_CALL 441
+#define _MONITOR_CALL_KW 442
+#define _MONITOR_JUMP_BACKWARD 443
+#define _MONITOR_RESUME 444
 #define _NOP NOP
 #define _POP_EXCEPT POP_EXCEPT
-#define _POP_JUMP_IF_FALSE 444
-#define _POP_JUMP_IF_TRUE 445
+#define _POP_JUMP_IF_FALSE 445
+#define _POP_JUMP_IF_TRUE 446
 #define _POP_TOP POP_TOP
-#define _POP_TOP_LOAD_CONST_INLINE 446
-#define _POP_TOP_LOAD_CONST_INLINE_BORROW 447
+#define _POP_TOP_LOAD_CONST_INLINE 447
+#define _POP_TOP_LOAD_CONST_INLINE_BORROW 448
 #define _PUSH_EXC_INFO PUSH_EXC_INFO
-#define _PUSH_FRAME 448
+#define _PUSH_FRAME 449
 #define _PUSH_NULL PUSH_NULL
-#define _PUSH_NULL_CONDITIONAL 449
-#define _PY_FRAME_GENERAL 450
-#define _PY_FRAME_KW 451
-#define _QUICKEN_RESUME 452
-#define _REPLACE_WITH_TRUE 453
+#define _PUSH_NULL_CONDITIONAL 450
+#define _PY_FRAME_GENERAL 451
+#define _PY_FRAME_KW 452
+#define _QUICKEN_RESUME 453
+#define _REPLACE_WITH_TRUE 454
 #define _RESUME_CHECK RESUME_CHECK
 #define _RETURN_GENERATOR RETURN_GENERATOR
 #define _RETURN_VALUE RETURN_VALUE
-#define _SAVE_RETURN_OFFSET 454
-#define _SEND 455
-#define _SEND_GEN_FRAME 456
+#define _SAVE_RETURN_OFFSET 455
+#define _SEND 456
+#define _SEND_GEN_FRAME 457
 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
 #define _SET_ADD SET_ADD
 #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
 #define _SET_UPDATE SET_UPDATE
-#define _START_EXECUTOR 457
-#define _STORE_ATTR 458
-#define _STORE_ATTR_INSTANCE_VALUE 459
-#define _STORE_ATTR_SLOT 460
-#define _STORE_ATTR_WITH_HINT 461
+#define _START_EXECUTOR 458
+#define _STORE_ATTR 459
+#define _STORE_ATTR_INSTANCE_VALUE 460
+#define _STORE_ATTR_SLOT 461
+#define _STORE_ATTR_WITH_HINT 462
 #define _STORE_DEREF STORE_DEREF
-#define _STORE_FAST 462
-#define _STORE_FAST_0 463
-#define _STORE_FAST_1 464
-#define _STORE_FAST_2 465
-#define _STORE_FAST_3 466
-#define _STORE_FAST_4 467
-#define _STORE_FAST_5 468
-#define _STORE_FAST_6 469
-#define _STORE_FAST_7 470
+#define _STORE_FAST 463
+#define _STORE_FAST_0 464
+#define _STORE_FAST_1 465
+#define _STORE_FAST_2 466
+#define _STORE_FAST_3 467
+#define _STORE_FAST_4 468
+#define _STORE_FAST_5 469
+#define _STORE_FAST_6 470
+#define _STORE_FAST_7 471
 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
 #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
 #define _STORE_GLOBAL STORE_GLOBAL
 #define _STORE_NAME STORE_NAME
-#define _STORE_SLICE 471
-#define _STORE_SUBSCR 472
+#define _STORE_SLICE 472
+#define _STORE_SUBSCR 473
 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT
 #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT
 #define _SWAP SWAP
-#define _TIER2_RESUME_CHECK 473
-#define _TO_BOOL 474
+#define _TIER2_RESUME_CHECK 474
+#define _TO_BOOL 475
 #define _TO_BOOL_BOOL TO_BOOL_BOOL
 #define _TO_BOOL_INT TO_BOOL_INT
 #define _TO_BOOL_LIST TO_BOOL_LIST
@@ -293,13 +294,13 @@ extern "C" {
 #define _UNARY_NEGATIVE UNARY_NEGATIVE
 #define _UNARY_NOT UNARY_NOT
 #define _UNPACK_EX UNPACK_EX
-#define _UNPACK_SEQUENCE 475
+#define _UNPACK_SEQUENCE 476
 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST
 #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE
 #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE
 #define _WITH_EXCEPT_START WITH_EXCEPT_START
 #define _YIELD_VALUE YIELD_VALUE
-#define MAX_UOP_ID 475
+#define MAX_UOP_ID 476
 
 #ifdef __cplusplus
 }
diff --git a/Include/internal/pycore_uop_metadata.h 
b/Include/internal/pycore_uop_metadata.h
index 883a07a408ca92..7f52352913a4ad 100644
--- a/Include/internal/pycore_uop_metadata.h
+++ b/Include/internal/pycore_uop_metadata.h
@@ -185,7 +185,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_FOR_ITER_TIER_TWO] = HAS_EXIT_FLAG | HAS_ERROR_FLAG | 
HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG,
     [_ITER_CHECK_LIST] = HAS_EXIT_FLAG,
     [_GUARD_NOT_EXHAUSTED_LIST] = HAS_EXIT_FLAG,
-    [_ITER_NEXT_LIST] = 0,
+    [_ITER_NEXT_LIST_TIER_TWO] = HAS_EXIT_FLAG | HAS_ESCAPES_FLAG,
     [_ITER_CHECK_TUPLE] = HAS_EXIT_FLAG,
     [_GUARD_NOT_EXHAUSTED_TUPLE] = HAS_EXIT_FLAG,
     [_ITER_NEXT_TUPLE] = 0,
@@ -428,7 +428,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_ITER_CHECK_LIST] = "_ITER_CHECK_LIST",
     [_ITER_CHECK_RANGE] = "_ITER_CHECK_RANGE",
     [_ITER_CHECK_TUPLE] = "_ITER_CHECK_TUPLE",
-    [_ITER_NEXT_LIST] = "_ITER_NEXT_LIST",
+    [_ITER_NEXT_LIST_TIER_TWO] = "_ITER_NEXT_LIST_TIER_TWO",
     [_ITER_NEXT_RANGE] = "_ITER_NEXT_RANGE",
     [_ITER_NEXT_TUPLE] = "_ITER_NEXT_TUPLE",
     [_JUMP_TO_TOP] = "_JUMP_TO_TOP",
@@ -889,7 +889,7 @@ int _PyUop_num_popped(int opcode, int oparg)
             return 0;
         case _GUARD_NOT_EXHAUSTED_LIST:
             return 0;
-        case _ITER_NEXT_LIST:
+        case _ITER_NEXT_LIST_TIER_TWO:
             return 0;
         case _ITER_CHECK_TUPLE:
             return 0;
diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py
index e4224b843b2e00..946a4827fe7f4e 100644
--- a/Lib/test/test_opcache.py
+++ b/Lib/test/test_opcache.py
@@ -1802,5 +1802,44 @@ def unused(): pass
         self.assert_specialized(load_const, "LOAD_CONST_MORTAL")
         self.assert_no_opcode(load_const, "LOAD_CONST")
 
+    @cpython_only
+    @requires_specialization_ft
+    def test_for_iter(self):
+        L = list(range(10))
+        def for_iter_list():
+            for i in L:
+                self.assertIn(i, L)
+
+        for_iter_list()
+        self.assert_specialized(for_iter_list, "FOR_ITER_LIST")
+        self.assert_no_opcode(for_iter_list, "FOR_ITER")
+
+        t = tuple(range(10))
+        def for_iter_tuple():
+            for i in t:
+                self.assertIn(i, t)
+
+        for_iter_tuple()
+        self.assert_specialized(for_iter_tuple, "FOR_ITER_TUPLE")
+        self.assert_no_opcode(for_iter_tuple, "FOR_ITER")
+
+        r = range(10)
+        def for_iter_range():
+            for i in r:
+                self.assertIn(i, r)
+
+        for_iter_range()
+        self.assert_specialized(for_iter_range, "FOR_ITER_RANGE")
+        self.assert_no_opcode(for_iter_range, "FOR_ITER")
+
+        def for_iter_generator():
+            for i in (i for i in range(10)):
+                i + 1
+
+        for_iter_generator()
+        self.assert_specialized(for_iter_generator, "FOR_ITER_GEN")
+        self.assert_no_opcode(for_iter_generator, "FOR_ITER")
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Objects/listobject.c b/Objects/listobject.c
index 4ad2b032f64c62..85eb5e56853b8f 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -415,6 +415,32 @@ _PyList_GetItemRef(PyListObject *list, Py_ssize_t i)
     return list_get_item_ref(list, i);
 }
 
+#ifdef Py_GIL_DISABLED
+int
+_PyList_GetItemRefNoLock(PyListObject *list, Py_ssize_t i, _PyStackRef *result)
+{
+    assert(_Py_IsOwnedByCurrentThread((PyObject *)list) ||
+           _PyObject_GC_IS_SHARED(list));
+    if (!valid_index(i, PyList_GET_SIZE(list))) {
+        return 0;
+    }
+    PyObject **ob_item = _Py_atomic_load_ptr(&list->ob_item);
+    if (ob_item == NULL) {
+        return 0;
+    }
+    Py_ssize_t cap = list_capacity(ob_item);
+    assert(cap != -1);
+    if (!valid_index(i, cap)) {
+        return 0;
+    }
+    PyObject *obj = _Py_atomic_load_ptr(&ob_item[i]);
+    if (obj == NULL || !_Py_TryIncrefCompareStackRef(&ob_item[i], obj, 
result)) {
+        return -1;
+    }
+    return 1;
+}
+#endif
+
 int
 PyList_SetItem(PyObject *op, Py_ssize_t i,
                PyObject *newitem)
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 17dc0c5bfb5c9f..aaa25161d0943f 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -3005,7 +3005,7 @@ dummy_func(
         };
 
         specializing op(_SPECIALIZE_FOR_ITER, (counter/1, iter -- iter)) {
-            #if ENABLE_SPECIALIZATION
+            #if ENABLE_SPECIALIZATION_FT
             if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 next_instr = this_instr;
                 _Py_Specialize_ForIter(iter, next_instr, oparg);
@@ -3013,7 +3013,7 @@ dummy_func(
             }
             OPCODE_DEFERRED_INC(FOR_ITER);
             ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
-            #endif  /* ENABLE_SPECIALIZATION */
+            #endif  /* ENABLE_SPECIALIZATION_FT */
         }
 
         replaced op(_FOR_ITER, (iter -- iter, next)) {
@@ -3091,31 +3091,46 @@ dummy_func(
 
 
         op(_ITER_CHECK_LIST, (iter -- iter)) {
-            EXIT_IF(Py_TYPE(PyStackRef_AsPyObjectBorrow(iter)) != 
&PyListIter_Type);
+            PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
+            EXIT_IF(Py_TYPE(iter_o) != &PyListIter_Type);
+#ifdef Py_GIL_DISABLED
+            EXIT_IF(!_PyObject_IsUniquelyReferenced(iter_o));
+            _PyListIterObject *it = (_PyListIterObject *)iter_o;
+            EXIT_IF(!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) ||
+                    !_PyObject_GC_IS_SHARED(it->it_seq));
+#endif
         }
 
         replaced op(_ITER_JUMP_LIST, (iter -- iter)) {
             PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
-            _PyListIterObject *it = (_PyListIterObject *)iter_o;
             assert(Py_TYPE(iter_o) == &PyListIter_Type);
+// For free-threaded Python, the loop exit can happen at any point during
+// item retrieval, so it doesn't make much sense to check and jump
+// separately before item retrieval. Any length check we do here can be
+// invalid by the time we actually try to fetch the item.
+#ifdef Py_GIL_DISABLED
+            assert(_PyObject_IsUniquelyReferenced(iter_o));
+            (void)iter_o;
+#else
+            _PyListIterObject *it = (_PyListIterObject *)iter_o;
             STAT_INC(FOR_ITER, hit);
             PyListObject *seq = it->it_seq;
             if (seq == NULL || (size_t)it->it_index >= 
(size_t)PyList_GET_SIZE(seq)) {
                 it->it_index = -1;
-                #ifndef Py_GIL_DISABLED
                 if (seq != NULL) {
                     it->it_seq = NULL;
                     Py_DECREF(seq);
                 }
-                #endif
                 /* Jump forward oparg, then skip following END_FOR instruction 
*/
                 JUMPBY(oparg + 1);
                 DISPATCH();
             }
+#endif
         }
 
         // Only used by Tier 2
         op(_GUARD_NOT_EXHAUSTED_LIST, (iter -- iter)) {
+#ifndef Py_GIL_DISABLED
             PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
             _PyListIterObject *it = (_PyListIterObject *)iter_o;
             assert(Py_TYPE(iter_o) == &PyListIter_Type);
@@ -3125,16 +3140,62 @@ dummy_func(
                 it->it_index = -1;
                 EXIT_IF(1);
             }
+#endif
+        }
+
+        replaced op(_ITER_NEXT_LIST, (iter -- iter, next)) {
+            PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
+            _PyListIterObject *it = (_PyListIterObject *)iter_o;
+            assert(Py_TYPE(iter_o) == &PyListIter_Type);
+            PyListObject *seq = it->it_seq;
+            assert(seq);
+#ifdef Py_GIL_DISABLED
+            assert(_PyObject_IsUniquelyReferenced(iter_o));
+            assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) ||
+                   _PyObject_GC_IS_SHARED(seq));
+            STAT_INC(FOR_ITER, hit);
+            int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next);
+            // A negative result means we lost a race with another thread
+            // and we need to take the slow path.
+            DEOPT_IF(result < 0);
+            if (result == 0) {
+                it->it_index = -1;
+                /* Jump forward oparg, then skip following END_FOR instruction 
*/
+                JUMPBY(oparg + 1);
+                DISPATCH();
+            }
+            it->it_index++;
+#else
+            assert(it->it_index < PyList_GET_SIZE(seq));
+            next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, 
it->it_index++));
+#endif
         }
 
-        op(_ITER_NEXT_LIST, (iter -- iter, next)) {
+        // Only used by Tier 2
+        op(_ITER_NEXT_LIST_TIER_TWO, (iter -- iter, next)) {
             PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
             _PyListIterObject *it = (_PyListIterObject *)iter_o;
             assert(Py_TYPE(iter_o) == &PyListIter_Type);
             PyListObject *seq = it->it_seq;
             assert(seq);
+#ifdef Py_GIL_DISABLED
+            assert(_PyObject_IsUniquelyReferenced(iter_o));
+            assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) ||
+                   _PyObject_GC_IS_SHARED(seq));
+            STAT_INC(FOR_ITER, hit);
+            int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next);
+            // A negative result means we lost a race with another thread
+            // and we need to take the slow path.
+            EXIT_IF(result < 0);
+            if (result == 0) {
+                it->it_index = -1;
+                EXIT_IF(1);
+            }
+            it->it_index++;
+#else
             assert(it->it_index < PyList_GET_SIZE(seq));
             next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, 
it->it_index++));
+#endif
         }
 
         macro(FOR_ITER_LIST) =
@@ -3144,20 +3205,30 @@ dummy_func(
             _ITER_NEXT_LIST;
 
         op(_ITER_CHECK_TUPLE, (iter -- iter)) {
-            EXIT_IF(Py_TYPE(PyStackRef_AsPyObjectBorrow(iter)) != 
&PyTupleIter_Type);
+            PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
+            EXIT_IF(Py_TYPE(iter_o) != &PyTupleIter_Type);
+#ifdef Py_GIL_DISABLED
+            EXIT_IF(!_PyObject_IsUniquelyReferenced(iter_o));
+#endif
         }
 
         replaced op(_ITER_JUMP_TUPLE, (iter -- iter)) {
             PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
-            _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o;
+            (void)iter_o;
             assert(Py_TYPE(iter_o) == &PyTupleIter_Type);
+#ifdef Py_GIL_DISABLED
+            assert(_PyObject_IsUniquelyReferenced(iter_o));
+#endif
+            _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o;
             STAT_INC(FOR_ITER, hit);
             PyTupleObject *seq = it->it_seq;
-            if (seq == NULL || it->it_index >= PyTuple_GET_SIZE(seq)) {
+            if (seq == NULL || (size_t)it->it_index >= 
(size_t)PyTuple_GET_SIZE(seq)) {
+#ifndef Py_GIL_DISABLED
                 if (seq != NULL) {
                     it->it_seq = NULL;
                     Py_DECREF(seq);
                 }
+#endif
                 /* Jump forward oparg, then skip following END_FOR instruction 
*/
                 JUMPBY(oparg + 1);
                 DISPATCH();
@@ -3169,6 +3240,9 @@ dummy_func(
             PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
             _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o;
             assert(Py_TYPE(iter_o) == &PyTupleIter_Type);
+#ifdef Py_GIL_DISABLED
+            assert(_PyObject_IsUniquelyReferenced(iter_o));
+#endif
             PyTupleObject *seq = it->it_seq;
             EXIT_IF(seq == NULL);
             EXIT_IF(it->it_index >= PyTuple_GET_SIZE(seq));
@@ -3179,6 +3253,9 @@ dummy_func(
             _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o;
             assert(Py_TYPE(iter_o) == &PyTupleIter_Type);
             PyTupleObject *seq = it->it_seq;
+#ifdef Py_GIL_DISABLED
+            assert(_PyObject_IsUniquelyReferenced(iter_o));
+#endif
             assert(seq);
             assert(it->it_index < PyTuple_GET_SIZE(seq));
             next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, 
it->it_index++));
@@ -3193,11 +3270,17 @@ dummy_func(
         op(_ITER_CHECK_RANGE, (iter -- iter)) {
             _PyRangeIterObject *r = (_PyRangeIterObject 
*)PyStackRef_AsPyObjectBorrow(iter);
             EXIT_IF(Py_TYPE(r) != &PyRangeIter_Type);
+#ifdef Py_GIL_DISABLED
+            EXIT_IF(!_PyObject_IsUniquelyReferenced((PyObject *)r));
+#endif
         }
 
         replaced op(_ITER_JUMP_RANGE, (iter -- iter)) {
             _PyRangeIterObject *r = (_PyRangeIterObject 
*)PyStackRef_AsPyObjectBorrow(iter);
             assert(Py_TYPE(r) == &PyRangeIter_Type);
+#ifdef Py_GIL_DISABLED
+            assert(_PyObject_IsUniquelyReferenced((PyObject *)r));
+#endif
             STAT_INC(FOR_ITER, hit);
             if (r->len <= 0) {
                 // Jump over END_FOR instruction.
@@ -3216,6 +3299,9 @@ dummy_func(
         op(_ITER_NEXT_RANGE, (iter -- iter, next)) {
             _PyRangeIterObject *r = (_PyRangeIterObject 
*)PyStackRef_AsPyObjectBorrow(iter);
             assert(Py_TYPE(r) == &PyRangeIter_Type);
+#ifdef Py_GIL_DISABLED
+            assert(_PyObject_IsUniquelyReferenced((PyObject *)r));
+#endif
             assert(r->len > 0);
             long value = r->start;
             r->start = value + r->step;
@@ -3234,6 +3320,13 @@ dummy_func(
         op(_FOR_ITER_GEN_FRAME, (iter -- iter, gen_frame: 
_PyInterpreterFrame*)) {
             PyGenObject *gen = (PyGenObject 
*)PyStackRef_AsPyObjectBorrow(iter);
             DEOPT_IF(Py_TYPE(gen) != &PyGen_Type);
+#ifdef Py_GIL_DISABLED
+            // Since generators can't be used by multiple threads anyway we
+            // don't need to deopt here, but this lets us work on making
+            // generators thread-safe without necessarily having to
+            // specialize them thread-safely as well.
+            DEOPT_IF(!_PyObject_IsUniquelyReferenced((PyObject *)gen));
+#endif
             DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING);
             STAT_INC(FOR_ITER, hit);
             gen_frame = &gen->gi_iframe;
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 8886564cded600..fb5b2240436f1c 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -4171,10 +4171,23 @@
         case _ITER_CHECK_LIST: {
             _PyStackRef iter;
             iter = stack_pointer[-1];
-            if (Py_TYPE(PyStackRef_AsPyObjectBorrow(iter)) != 
&PyListIter_Type) {
+            PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
+            if (Py_TYPE(iter_o) != &PyListIter_Type) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
+            #ifdef Py_GIL_DISABLED
+            if (!_PyObject_IsUniquelyReferenced(iter_o)) {
+                UOP_STAT_INC(uopcode, miss);
+                JUMP_TO_JUMP_TARGET();
+            }
+            _PyListIterObject *it = (_PyListIterObject *)iter_o;
+            if (!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) ||
+                    !_PyObject_GC_IS_SHARED(it->it_seq)) {
+                UOP_STAT_INC(uopcode, miss);
+                JUMP_TO_JUMP_TARGET();
+            }
+            #endif
             break;
         }
 
@@ -4183,6 +4196,7 @@
         case _GUARD_NOT_EXHAUSTED_LIST: {
             _PyStackRef iter;
             iter = stack_pointer[-1];
+            #ifndef Py_GIL_DISABLED
             PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
             _PyListIterObject *it = (_PyListIterObject *)iter_o;
             assert(Py_TYPE(iter_o) == &PyListIter_Type);
@@ -4198,10 +4212,13 @@
                     JUMP_TO_JUMP_TARGET();
                 }
             }
+            #endif
             break;
         }
 
-        case _ITER_NEXT_LIST: {
+        /* _ITER_NEXT_LIST is not a viable micro-op for tier 2 because it is 
replaced */
+
+        case _ITER_NEXT_LIST_TIER_TWO: {
             _PyStackRef iter;
             _PyStackRef next;
             iter = stack_pointer[-1];
@@ -4210,8 +4227,32 @@
             assert(Py_TYPE(iter_o) == &PyListIter_Type);
             PyListObject *seq = it->it_seq;
             assert(seq);
+            #ifdef Py_GIL_DISABLED
+            assert(_PyObject_IsUniquelyReferenced(iter_o));
+            assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) ||
+                   _PyObject_GC_IS_SHARED(seq));
+            STAT_INC(FOR_ITER, hit);
+            _PyFrame_SetStackPointer(frame, stack_pointer);
+            int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next);
+            stack_pointer = _PyFrame_GetStackPointer(frame);
+            // A negative result means we lost a race with another thread
+            // and we need to take the slow path.
+            if (result < 0) {
+                UOP_STAT_INC(uopcode, miss);
+                JUMP_TO_JUMP_TARGET();
+            }
+            if (result == 0) {
+                it->it_index = -1;
+                if (1) {
+                    UOP_STAT_INC(uopcode, miss);
+                    JUMP_TO_JUMP_TARGET();
+                }
+            }
+            it->it_index++;
+            #else
             assert(it->it_index < PyList_GET_SIZE(seq));
             next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, 
it->it_index++));
+            #endif
             stack_pointer[0] = next;
             stack_pointer += 1;
             assert(WITHIN_STACK_BOUNDS());
@@ -4221,10 +4262,17 @@
         case _ITER_CHECK_TUPLE: {
             _PyStackRef iter;
             iter = stack_pointer[-1];
-            if (Py_TYPE(PyStackRef_AsPyObjectBorrow(iter)) != 
&PyTupleIter_Type) {
+            PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
+            if (Py_TYPE(iter_o) != &PyTupleIter_Type) {
+                UOP_STAT_INC(uopcode, miss);
+                JUMP_TO_JUMP_TARGET();
+            }
+            #ifdef Py_GIL_DISABLED
+            if (!_PyObject_IsUniquelyReferenced(iter_o)) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
+            #endif
             break;
         }
 
@@ -4236,6 +4284,9 @@
             PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
             _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o;
             assert(Py_TYPE(iter_o) == &PyTupleIter_Type);
+            #ifdef Py_GIL_DISABLED
+            assert(_PyObject_IsUniquelyReferenced(iter_o));
+            #endif
             PyTupleObject *seq = it->it_seq;
             if (seq == NULL) {
                 UOP_STAT_INC(uopcode, miss);
@@ -4256,6 +4307,9 @@
             _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o;
             assert(Py_TYPE(iter_o) == &PyTupleIter_Type);
             PyTupleObject *seq = it->it_seq;
+            #ifdef Py_GIL_DISABLED
+            assert(_PyObject_IsUniquelyReferenced(iter_o));
+            #endif
             assert(seq);
             assert(it->it_index < PyTuple_GET_SIZE(seq));
             next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, 
it->it_index++));
@@ -4273,6 +4327,12 @@
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
+            #ifdef Py_GIL_DISABLED
+            if (!_PyObject_IsUniquelyReferenced((PyObject *)r)) {
+                UOP_STAT_INC(uopcode, miss);
+                JUMP_TO_JUMP_TARGET();
+            }
+            #endif
             break;
         }
 
@@ -4296,6 +4356,9 @@
             iter = stack_pointer[-1];
             _PyRangeIterObject *r = (_PyRangeIterObject 
*)PyStackRef_AsPyObjectBorrow(iter);
             assert(Py_TYPE(r) == &PyRangeIter_Type);
+            #ifdef Py_GIL_DISABLED
+            assert(_PyObject_IsUniquelyReferenced((PyObject *)r));
+            #endif
             assert(r->len > 0);
             long value = r->start;
             r->start = value + r->step;
@@ -4321,6 +4384,16 @@
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
+            #ifdef Py_GIL_DISABLED
+            // Since generators can't be used by multiple threads anyway we
+            // don't need to deopt here, but this lets us work on making
+            // generators thread-safe without necessarily having to
+            // specialize them thread-safely as well.
+            if (!_PyObject_IsUniquelyReferenced((PyObject *)gen)) {
+                UOP_STAT_INC(uopcode, miss);
+                JUMP_TO_JUMP_TARGET();
+            }
+            #endif
             if (gen->gi_frame_state >= FRAME_EXECUTING) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index ad0e41188170fb..8cdbaf1a4d5761 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -5434,7 +5434,7 @@
                 iter = stack_pointer[-1];
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
-                #if ENABLE_SPECIALIZATION
+                #if ENABLE_SPECIALIZATION_FT
                 if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     next_instr = this_instr;
                     _PyFrame_SetStackPointer(frame, stack_pointer);
@@ -5444,7 +5444,7 @@
                 }
                 OPCODE_DEFERRED_INC(FOR_ITER);
                 ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
-                #endif  /* ENABLE_SPECIALIZATION */
+                #endif  /* ENABLE_SPECIALIZATION_FT */
             }
             // _FOR_ITER
             {
@@ -5514,6 +5514,17 @@
                     assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
                     JUMP_TO_PREDICTED(FOR_ITER);
                 }
+                #ifdef Py_GIL_DISABLED
+                // Since generators can't be used by multiple threads anyway we
+                // don't need to deopt here, but this lets us work on making
+                // generators thread-safe without necessarily having to
+                // specialize them thread-safely as well.
+                if (!_PyObject_IsUniquelyReferenced((PyObject *)gen)) {
+                    UPDATE_MISS_STATS(FOR_ITER);
+                    assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
+                    JUMP_TO_PREDICTED(FOR_ITER);
+                }
+                #endif
                 if (gen->gi_frame_state >= FRAME_EXECUTING) {
                     UPDATE_MISS_STATS(FOR_ITER);
                     assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
@@ -5565,33 +5576,55 @@
             // _ITER_CHECK_LIST
             {
                 iter = stack_pointer[-1];
-                if (Py_TYPE(PyStackRef_AsPyObjectBorrow(iter)) != 
&PyListIter_Type) {
+                PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
+                if (Py_TYPE(iter_o) != &PyListIter_Type) {
                     UPDATE_MISS_STATS(FOR_ITER);
                     assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
                     JUMP_TO_PREDICTED(FOR_ITER);
                 }
+                #ifdef Py_GIL_DISABLED
+                if (!_PyObject_IsUniquelyReferenced(iter_o)) {
+                    UPDATE_MISS_STATS(FOR_ITER);
+                    assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
+                    JUMP_TO_PREDICTED(FOR_ITER);
+                }
+                _PyListIterObject *it = (_PyListIterObject *)iter_o;
+                if (!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) ||
+                    !_PyObject_GC_IS_SHARED(it->it_seq)) {
+                    UPDATE_MISS_STATS(FOR_ITER);
+                    assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
+                    JUMP_TO_PREDICTED(FOR_ITER);
+                }
+                #endif
             }
             // _ITER_JUMP_LIST
             {
                 PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
-                _PyListIterObject *it = (_PyListIterObject *)iter_o;
                 assert(Py_TYPE(iter_o) == &PyListIter_Type);
+                // For free-threaded Python, the loop exit can happen at any 
point during
+                // item retrieval, so it doesn't make much sense to check and 
jump
+                // separately before item retrieval. Any length check we do 
here can be
+                // invalid by the time we actually try to fetch the item.
+                #ifdef Py_GIL_DISABLED
+                assert(_PyObject_IsUniquelyReferenced(iter_o));
+                (void)iter_o;
+                #else
+                _PyListIterObject *it = (_PyListIterObject *)iter_o;
                 STAT_INC(FOR_ITER, hit);
                 PyListObject *seq = it->it_seq;
                 if (seq == NULL || (size_t)it->it_index >= 
(size_t)PyList_GET_SIZE(seq)) {
                     it->it_index = -1;
-                    #ifndef Py_GIL_DISABLED
                     if (seq != NULL) {
                         it->it_seq = NULL;
                         _PyFrame_SetStackPointer(frame, stack_pointer);
                         Py_DECREF(seq);
                         stack_pointer = _PyFrame_GetStackPointer(frame);
                     }
-                    #endif
                     /* Jump forward oparg, then skip following END_FOR 
instruction */
                     JUMPBY(oparg + 1);
                     DISPATCH();
                 }
+                #endif
             }
             // _ITER_NEXT_LIST
             {
@@ -5600,8 +5633,32 @@
                 assert(Py_TYPE(iter_o) == &PyListIter_Type);
                 PyListObject *seq = it->it_seq;
                 assert(seq);
+                #ifdef Py_GIL_DISABLED
+                assert(_PyObject_IsUniquelyReferenced(iter_o));
+                assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) ||
+                   _PyObject_GC_IS_SHARED(seq));
+                STAT_INC(FOR_ITER, hit);
+                _PyFrame_SetStackPointer(frame, stack_pointer);
+                int result = _PyList_GetItemRefNoLock(seq, it->it_index, 
&next);
+                stack_pointer = _PyFrame_GetStackPointer(frame);
+                // A negative result means we lost a race with another thread
+                // and we need to take the slow path.
+                if (result < 0) {
+                    UPDATE_MISS_STATS(FOR_ITER);
+                    assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
+                    JUMP_TO_PREDICTED(FOR_ITER);
+                }
+                if (result == 0) {
+                    it->it_index = -1;
+                    /* Jump forward oparg, then skip following END_FOR 
instruction */
+                    JUMPBY(oparg + 1);
+                    DISPATCH();
+                }
+                it->it_index++;
+                #else
                 assert(it->it_index < PyList_GET_SIZE(seq));
                 next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, 
it->it_index++));
+                #endif
             }
             stack_pointer[0] = next;
             stack_pointer += 1;
@@ -5632,11 +5689,21 @@
                     assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
                     JUMP_TO_PREDICTED(FOR_ITER);
                 }
+                #ifdef Py_GIL_DISABLED
+                if (!_PyObject_IsUniquelyReferenced((PyObject *)r)) {
+                    UPDATE_MISS_STATS(FOR_ITER);
+                    assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
+                    JUMP_TO_PREDICTED(FOR_ITER);
+                }
+                #endif
             }
             // _ITER_JUMP_RANGE
             {
                 _PyRangeIterObject *r = (_PyRangeIterObject 
*)PyStackRef_AsPyObjectBorrow(iter);
                 assert(Py_TYPE(r) == &PyRangeIter_Type);
+                #ifdef Py_GIL_DISABLED
+                assert(_PyObject_IsUniquelyReferenced((PyObject *)r));
+                #endif
                 STAT_INC(FOR_ITER, hit);
                 if (r->len <= 0) {
                     // Jump over END_FOR instruction.
@@ -5648,6 +5715,9 @@
             {
                 _PyRangeIterObject *r = (_PyRangeIterObject 
*)PyStackRef_AsPyObjectBorrow(iter);
                 assert(Py_TYPE(r) == &PyRangeIter_Type);
+                #ifdef Py_GIL_DISABLED
+                assert(_PyObject_IsUniquelyReferenced((PyObject *)r));
+                #endif
                 assert(r->len > 0);
                 long value = r->start;
                 r->start = value + r->step;
@@ -5681,26 +5751,40 @@
             // _ITER_CHECK_TUPLE
             {
                 iter = stack_pointer[-1];
-                if (Py_TYPE(PyStackRef_AsPyObjectBorrow(iter)) != 
&PyTupleIter_Type) {
+                PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
+                if (Py_TYPE(iter_o) != &PyTupleIter_Type) {
                     UPDATE_MISS_STATS(FOR_ITER);
                     assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
                     JUMP_TO_PREDICTED(FOR_ITER);
                 }
+                #ifdef Py_GIL_DISABLED
+                if (!_PyObject_IsUniquelyReferenced(iter_o)) {
+                    UPDATE_MISS_STATS(FOR_ITER);
+                    assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
+                    JUMP_TO_PREDICTED(FOR_ITER);
+                }
+                #endif
             }
             // _ITER_JUMP_TUPLE
             {
                 PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
-                _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o;
+                (void)iter_o;
                 assert(Py_TYPE(iter_o) == &PyTupleIter_Type);
+                #ifdef Py_GIL_DISABLED
+                assert(_PyObject_IsUniquelyReferenced(iter_o));
+                #endif
+                _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o;
                 STAT_INC(FOR_ITER, hit);
                 PyTupleObject *seq = it->it_seq;
-                if (seq == NULL || it->it_index >= PyTuple_GET_SIZE(seq)) {
+                if (seq == NULL || (size_t)it->it_index >= 
(size_t)PyTuple_GET_SIZE(seq)) {
+                    #ifndef Py_GIL_DISABLED
                     if (seq != NULL) {
                         it->it_seq = NULL;
                         _PyFrame_SetStackPointer(frame, stack_pointer);
                         Py_DECREF(seq);
                         stack_pointer = _PyFrame_GetStackPointer(frame);
                     }
+                    #endif
                     /* Jump forward oparg, then skip following END_FOR 
instruction */
                     JUMPBY(oparg + 1);
                     DISPATCH();
@@ -5712,6 +5796,9 @@
                 _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o;
                 assert(Py_TYPE(iter_o) == &PyTupleIter_Type);
                 PyTupleObject *seq = it->it_seq;
+                #ifdef Py_GIL_DISABLED
+                assert(_PyObject_IsUniquelyReferenced(iter_o));
+                #endif
                 assert(seq);
                 assert(it->it_index < PyTuple_GET_SIZE(seq));
                 next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, 
it->it_index++));
diff --git a/Python/optimizer.c b/Python/optimizer.c
index e05523451da859..6fc5eabdf8b44e 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -363,6 +363,7 @@ _PyUOp_Replacements[MAX_UOP_ID + 1] = {
     [_ITER_JUMP_LIST] = _GUARD_NOT_EXHAUSTED_LIST,
     [_ITER_JUMP_TUPLE] = _GUARD_NOT_EXHAUSTED_TUPLE,
     [_FOR_ITER] = _FOR_ITER_TIER_TWO,
+    [_ITER_NEXT_LIST] = _ITER_NEXT_LIST_TIER_TWO,
 };
 
 static const uint8_t
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index 9f3fc052494309..3f315901a5beb8 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -1458,7 +1458,9 @@
             break;
         }
 
-        case _ITER_NEXT_LIST: {
+        /* _ITER_NEXT_LIST is not a viable micro-op for tier 2 */
+
+        case _ITER_NEXT_LIST_TIER_TWO: {
             JitOptSymbol *next;
             next = sym_new_not_null(ctx);
             stack_pointer[0] = next;
diff --git a/Python/specialize.c b/Python/specialize.c
index c741c4f93f3138..0466b5bee90330 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -2826,45 +2826,56 @@ int
 void
 _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg)
 {
-    assert(ENABLE_SPECIALIZATION);
+    assert(ENABLE_SPECIALIZATION_FT);
     assert(_PyOpcode_Caches[FOR_ITER] == INLINE_CACHE_ENTRIES_FOR_ITER);
-    _PyForIterCache *cache = (_PyForIterCache *)(instr + 1);
     PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
     PyTypeObject *tp = Py_TYPE(iter_o);
+#ifdef Py_GIL_DISABLED
+    // Only specialize for uniquely referenced iterators, so that we know
+    // they're only referenced by this one thread. This is more limiting
+    // than we need (even `it = iter(mylist); for item in it:` won't get
+    // specialized) but we don't have a way to check whether we're the only
+    // _thread_ who has access to the object.
+    if (!_PyObject_IsUniquelyReferenced(iter_o))
+        goto failure;
+#endif
     if (tp == &PyListIter_Type) {
-        instr->op.code = FOR_ITER_LIST;
-        goto success;
+#ifdef Py_GIL_DISABLED
+        _PyListIterObject *it = (_PyListIterObject *)iter_o;
+        if (!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) &&
+            !_PyObject_GC_IS_SHARED(it->it_seq)) {
+            // Maybe this should just set GC_IS_SHARED in a critical
+            // section, instead of leaving it to the first iteration?
+            goto failure;
+        }
+#endif
+        specialize(instr, FOR_ITER_LIST);
+        return;
     }
     else if (tp == &PyTupleIter_Type) {
-        instr->op.code = FOR_ITER_TUPLE;
-        goto success;
+        specialize(instr, FOR_ITER_TUPLE);
+        return;
     }
     else if (tp == &PyRangeIter_Type) {
-        instr->op.code = FOR_ITER_RANGE;
-        goto success;
+        specialize(instr, FOR_ITER_RANGE);
+        return;
     }
     else if (tp == &PyGen_Type && oparg <= SHRT_MAX) {
+        // Generators are very much not thread-safe, so don't worry about
+        // the specialization not being thread-safe.
         assert(instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == 
END_FOR  ||
             instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == 
INSTRUMENTED_END_FOR
         );
         /* Don't specialize if PEP 523 is active */
-        if (_PyInterpreterState_GET()->eval_frame) {
-            SPECIALIZATION_FAIL(FOR_ITER, SPEC_FAIL_OTHER);
+        if (_PyInterpreterState_GET()->eval_frame)
             goto failure;
-        }
-        instr->op.code = FOR_ITER_GEN;
-        goto success;
+        specialize(instr, FOR_ITER_GEN);
+        return;
     }
+failure:
     SPECIALIZATION_FAIL(FOR_ITER,
                         _PySpecialization_ClassifyIterator(iter_o));
-failure:
-    STAT_INC(FOR_ITER, failure);
-    instr->op.code = FOR_ITER;
-    cache->counter = adaptive_counter_backoff(cache->counter);
-    return;
-success:
-    STAT_INC(FOR_ITER, success);
-    cache->counter = adaptive_counter_cooldown();
+    unspecialize(instr);
 }
 
 void
diff --git a/Tools/cases_generator/analyzer.py 
b/Tools/cases_generator/analyzer.py
index faf720e8fe67ee..664e247226c084 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -644,11 +644,13 @@ def has_error_without_pop(op: parser.CodeDef) -> bool:
     "_PyLong_IsNonNegativeCompact",
     "_PyLong_IsZero",
     "_PyManagedDictPointer_IsValues",
+    "_PyObject_GC_IS_SHARED",
     "_PyObject_GC_IS_TRACKED",
     "_PyObject_GC_MAY_BE_TRACKED",
     "_PyObject_GC_TRACK",
     "_PyObject_GetManagedDict",
     "_PyObject_InlineValues",
+    "_PyObject_IsUniquelyReferenced",
     "_PyObject_ManagedDictPointer",
     "_PyThreadState_HasStackSpace",
     "_PyTuple_FromStackRefStealOnSuccess",
@@ -661,6 +663,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool:
     "_Py_DECREF_NO_DEALLOC",
     "_Py_ID",
     "_Py_IsImmortal",
+    "_Py_IsOwnedByCurrentThread",
     "_Py_LeaveRecursiveCallPy",
     "_Py_LeaveRecursiveCallTstate",
     "_Py_NewRef",

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to