https://github.com/python/cpython/commit/dabcecfd6dadb9430733105ba36925b290343d31
commit: dabcecfd6dadb9430733105ba36925b290343d31
branch: main
author: mpage <mp...@meta.com>
committer: mpage <mp...@cs.stanford.edu>
date: 2024-12-03T11:20:20-08:00
summary:

gh-115999: Enable specialization of `CALL` instructions in free-threaded builds 
(#127123)

The CALL family of instructions were mostly thread-safe already and only 
required a small number of changes, which are documented below.

A few changes were needed to make CALL_ALLOC_AND_ENTER_INIT thread-safe:

Added _PyType_LookupRefAndVersion, which returns the type version corresponding 
to the returned ref.

Added _PyType_CacheInitForSpecialization, which takes an init method and the 
corresponding type version and only populates the specialization cache if the 
current type version matches the supplied version. This prevents potentially 
caching a stale value in free-threaded builds if we race with an update to 
__init__.

Only cache __init__ functions that are deferred in free-threaded builds. This 
ensures that the reference to __init__ that is stored in the specialization 
cache is valid if the type version guard in _CHECK_AND_ALLOCATE_OBJECT passes.
Fix a bug in _CREATE_INIT_FRAME where the frame is pushed to the stack on 
failure.

A few other miscellaneous changes were also needed:

Use {LOCK,UNLOCK}_OBJECT in LIST_APPEND. This ensures that the list's 
per-object lock is held while we are appending to it.

Add missing co_tlbc for _Py_InitCleanup.

Stop/start the world around setting the eval frame hook. This allows us to read 
interp->eval_frame non-atomically and preserves the behavior of _CHECK_PEP_523 
documented below.

files:
M Include/internal/pycore_object.h
M Lib/test/test_monitoring.py
M Lib/test/test_opcache.py
M Lib/test/test_type_cache.py
M Objects/typeobject.c
M Python/bytecodes.c
M Python/executor_cases.c.h
M Python/generated_cases.c.h
M Python/perf_trampoline.c
M Python/pystate.c
M Python/specialize.c

diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h
index c52ed8f14707ba..ce876b093b2522 100644
--- a/Include/internal/pycore_object.h
+++ b/Include/internal/pycore_object.h
@@ -835,6 +835,20 @@ extern int _PyObject_StoreInstanceAttribute(PyObject *obj,
                                             PyObject *name, PyObject *value);
 extern bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name,
                                               PyObject **attr);
+extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *,
+                                             unsigned int *);
+
+// Cache the provided init method in the specialization cache of type if the
+// provided type version matches the current version of the type.
+//
+// The cached value is borrowed and is only valid if guarded by a type
+// version check. In free-threaded builds the init method must also use
+// deferred reference counting.
+//
+// Returns 1 if the value was cached or 0 otherwise.
+extern int _PyType_CacheInitForSpecialization(PyHeapTypeObject *type,
+                                              PyObject *init,
+                                              unsigned int tp_version);
 
 #ifdef Py_GIL_DISABLED
 #  define MANAGED_DICT_OFFSET    (((Py_ssize_t)sizeof(PyObject *))*-1)
diff --git a/Lib/test/test_monitoring.py b/Lib/test/test_monitoring.py
index b640aa08e4a812..5a4bcebedf19de 100644
--- a/Lib/test/test_monitoring.py
+++ b/Lib/test/test_monitoring.py
@@ -11,7 +11,7 @@
 import unittest
 
 import test.support
-from test.support import requires_specialization, script_helper
+from test.support import requires_specialization_ft, script_helper
 from test.support.import_helper import import_module
 
 _testcapi = test.support.import_helper.import_module("_testcapi")
@@ -850,6 +850,13 @@ def __init__(self, events):
     def __call__(self, code, offset, val):
         self.events.append(("return", code.co_name, val))
 
+# gh-127274: CALL_ALLOC_AND_ENTER_INIT will only cache __init__ methods that
+# are deferred. We only defer functions defined at the top-level.
+class ValueErrorRaiser:
+    def __init__(self):
+        raise ValueError()
+
+
 class ExceptionMonitoringTest(CheckEvents):
 
     exception_recorders = (
@@ -1045,16 +1052,12 @@ def func():
         )
         self.assertEqual(events[0], ("throw", IndexError))
 
-    @requires_specialization
+    @requires_specialization_ft
     def test_no_unwind_for_shim_frame(self):
 
-        class B:
-            def __init__(self):
-                raise ValueError()
-
         def f():
             try:
-                return B()
+                return ValueErrorRaiser()
             except ValueError:
                 pass
 
diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py
index b7a18133ab8b8a..50b5f365165921 100644
--- a/Lib/test/test_opcache.py
+++ b/Lib/test/test_opcache.py
@@ -493,6 +493,18 @@ def f():
             self.assertFalse(f())
 
 
+# gh-127274: CALL_ALLOC_AND_ENTER_INIT will only cache __init__ methods that
+# are deferred. We only defer functions defined at the top-level.
+class MyClass:
+    def __init__(self):
+        pass
+
+
+class InitTakesArg:
+    def __init__(self, arg):
+        self.arg = arg
+
+
 class TestCallCache(TestBase):
     def test_too_many_defaults_0(self):
         def f():
@@ -522,12 +534,8 @@ def f(x, y):
             f()
 
     @disabling_optimizer
-    @requires_specialization
+    @requires_specialization_ft
     def test_assign_init_code(self):
-        class MyClass:
-            def __init__(self):
-                pass
-
         def instantiate():
             return MyClass()
 
@@ -544,6 +552,20 @@ def count_args(self, *args):
         MyClass.__init__.__code__ = count_args.__code__
         instantiate()
 
+    @disabling_optimizer
+    @requires_specialization_ft
+    def test_push_init_frame_fails(self):
+        def instantiate():
+            return InitTakesArg()
+
+        for _ in range(2):
+            with self.assertRaises(TypeError):
+                instantiate()
+        self.assert_specialized(instantiate, "CALL_ALLOC_AND_ENTER_INIT")
+
+        with self.assertRaises(TypeError):
+            instantiate()
+
 
 @threading_helper.requires_working_threading()
 class TestRacesDoNotCrash(TestBase):
diff --git a/Lib/test/test_type_cache.py b/Lib/test/test_type_cache.py
index 66abe73f8d766d..e109a65741309a 100644
--- a/Lib/test/test_type_cache.py
+++ b/Lib/test/test_type_cache.py
@@ -2,7 +2,7 @@
 import unittest
 import dis
 from test import support
-from test.support import import_helper, requires_specialization
+from test.support import import_helper, requires_specialization, 
requires_specialization_ft
 try:
     from sys import _clear_type_cache
 except ImportError:
@@ -110,7 +110,6 @@ class HolderSub(Holder):
             HolderSub.value
 
 @support.cpython_only
-@requires_specialization
 class TypeCacheWithSpecializationTests(unittest.TestCase):
     def tearDown(self):
         _clear_type_cache()
@@ -140,6 +139,7 @@ def _check_specialization(self, func, arg, opname, *, 
should_specialize):
         else:
             self.assertIn(opname, self._all_opnames(func))
 
+    @requires_specialization
     def test_class_load_attr_specialization_user_type(self):
         class A:
             def foo(self):
@@ -160,6 +160,7 @@ def load_foo_2(type_):
 
         self._check_specialization(load_foo_2, A, "LOAD_ATTR", 
should_specialize=False)
 
+    @requires_specialization
     def test_class_load_attr_specialization_static_type(self):
         self.assertNotEqual(type_get_version(str), 0)
         self.assertNotEqual(type_get_version(bytes), 0)
@@ -171,6 +172,7 @@ def get_capitalize_1(type_):
         self.assertEqual(get_capitalize_1(str)('hello'), 'Hello')
         self.assertEqual(get_capitalize_1(bytes)(b'hello'), b'Hello')
 
+    @requires_specialization
     def test_property_load_attr_specialization_user_type(self):
         class G:
             @property
@@ -192,6 +194,7 @@ def load_x_2(instance):
 
         self._check_specialization(load_x_2, G(), "LOAD_ATTR", 
should_specialize=False)
 
+    @requires_specialization
     def test_store_attr_specialization_user_type(self):
         class B:
             __slots__ = ("bar",)
@@ -211,6 +214,7 @@ def store_bar_2(type_):
 
         self._check_specialization(store_bar_2, B(), "STORE_ATTR", 
should_specialize=False)
 
+    @requires_specialization_ft
     def test_class_call_specialization_user_type(self):
         class F:
             def __init__(self):
@@ -231,6 +235,7 @@ def call_class_2(type_):
 
         self._check_specialization(call_class_2, F, "CALL", 
should_specialize=False)
 
+    @requires_specialization
     def test_to_bool_specialization_user_type(self):
         class H:
             pass
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index bf9049bce3adeb..2068d6aa9be52b 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -5528,9 +5528,12 @@ _PyTypes_AfterFork(void)
 }
 
 /* Internal API to look for a name through the MRO.
-   This returns a borrowed reference, and doesn't set an exception! */
+   This returns a strong reference, and doesn't set an exception!
+   If nonzero, version is set to the value of type->tp_version at the time of
+   the lookup.
+*/
 PyObject *
-_PyType_LookupRef(PyTypeObject *type, PyObject *name)
+_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int 
*version)
 {
     PyObject *res;
     int error;
@@ -5553,6 +5556,9 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
             // If the sequence is still valid then we're done
             if (value == NULL || _Py_TryIncref(value)) {
                 if (_PySeqLock_EndRead(&entry->sequence, sequence)) {
+                    if (version != NULL) {
+                        *version = entry_version;
+                    }
                     return value;
                 }
                 Py_XDECREF(value);
@@ -5574,6 +5580,9 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
         OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name));
         OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name));
         Py_XINCREF(entry->value);
+        if (version != NULL) {
+            *version = entry->version;
+        }
         return entry->value;
     }
 #endif
@@ -5587,12 +5596,12 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
     // anyone else can modify our mro or mutate the type.
 
     int has_version = 0;
-    int version = 0;
+    unsigned int assigned_version = 0;
     BEGIN_TYPE_LOCK();
     res = find_name_in_mro(type, name, &error);
     if (MCACHE_CACHEABLE_NAME(name)) {
         has_version = assign_version_tag(interp, type);
-        version = type->tp_version_tag;
+        assigned_version = type->tp_version_tag;
     }
     END_TYPE_LOCK();
 
@@ -5609,28 +5618,67 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
         if (error == -1) {
             PyErr_Clear();
         }
+        if (version != NULL) {
+            // 0 is not a valid version
+            *version = 0;
+        }
         return NULL;
     }
 
     if (has_version) {
 #if Py_GIL_DISABLED
-        update_cache_gil_disabled(entry, name, version, res);
+        update_cache_gil_disabled(entry, name, assigned_version, res);
 #else
-        PyObject *old_value = update_cache(entry, name, version, res);
+        PyObject *old_value = update_cache(entry, name, assigned_version, res);
         Py_DECREF(old_value);
 #endif
     }
+    if (version != NULL) {
+        // 0 is not a valid version
+        *version = has_version ? assigned_version : 0;
+    }
     return res;
 }
 
+/* Internal API to look for a name through the MRO.
+   This returns a strong reference, and doesn't set an exception!
+*/
+PyObject *
+_PyType_LookupRef(PyTypeObject *type, PyObject *name)
+{
+    return _PyType_LookupRefAndVersion(type, name, NULL);
+}
+
+/* Internal API to look for a name through the MRO.
+   This returns a borrowed reference, and doesn't set an exception! */
 PyObject *
 _PyType_Lookup(PyTypeObject *type, PyObject *name)
 {
-    PyObject *res = _PyType_LookupRef(type, name);
+    PyObject *res = _PyType_LookupRefAndVersion(type, name, NULL);
     Py_XDECREF(res);
     return res;
 }
 
+int
+_PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init,
+                                   unsigned int tp_version)
+{
+    if (!init || !tp_version) {
+        return 0;
+    }
+    int can_cache;
+    BEGIN_TYPE_LOCK();
+    can_cache = ((PyTypeObject*)type)->tp_version_tag == tp_version;
+    #ifdef Py_GIL_DISABLED
+    can_cache = can_cache && _PyObject_HasDeferredRefcount(init);
+    #endif
+    if (can_cache) {
+        FT_ATOMIC_STORE_PTR_RELEASE(type->_spec_cache.init, init);
+    }
+    END_TYPE_LOCK();
+    return can_cache;
+}
+
 static void
 set_flags(PyTypeObject *self, unsigned long mask, unsigned long flags)
 {
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index d6be3cebf80724..3d280941b35244 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -3329,7 +3329,7 @@ dummy_func(
         };
 
         specializing op(_SPECIALIZE_CALL, (counter/1, callable[1], 
self_or_null[1], args[oparg] -- callable[1], self_or_null[1], args[oparg])) {
-            #if ENABLE_SPECIALIZATION
+            #if ENABLE_SPECIALIZATION_FT
             if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 next_instr = this_instr;
                 _Py_Specialize_Call(callable[0], next_instr, oparg + 
!PyStackRef_IsNull(self_or_null[0]));
@@ -3337,7 +3337,7 @@ dummy_func(
             }
             OPCODE_DEFERRED_INC(CALL);
             ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
-            #endif  /* ENABLE_SPECIALIZATION */
+            #endif  /* ENABLE_SPECIALIZATION_FT */
         }
 
         op(_MAYBE_EXPAND_METHOD, (callable[1], self_or_null[1], args[oparg] -- 
func[1], maybe_self[1], args[oparg])) {
@@ -3722,10 +3722,10 @@ dummy_func(
             DEOPT_IF(!PyStackRef_IsNull(null[0]));
             DEOPT_IF(!PyType_Check(callable_o));
             PyTypeObject *tp = (PyTypeObject *)callable_o;
-            DEOPT_IF(tp->tp_version_tag != type_version);
+            DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != 
type_version);
             assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES);
             PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o;
-            PyFunctionObject *init_func = (PyFunctionObject 
*)cls->_spec_cache.init;
+            PyFunctionObject *init_func = (PyFunctionObject 
*)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init);
             PyCodeObject *code = (PyCodeObject *)init_func->func_code;
             DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize 
+ _Py_InitCleanup.co_framesize));
             STAT_INC(CALL, hit);
@@ -3743,17 +3743,19 @@ dummy_func(
             _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
                 tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
             assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
+            assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE);
             /* Push self onto stack of shim */
             shim->localsplus[0] = PyStackRef_DUP(self[0]);
             DEAD(init);
             DEAD(self);
-            init_frame = _PyEvalFramePushAndInit(
+            _PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
                 tstate, init[0], NULL, args-1, oparg+1, NULL, shim);
             SYNC_SP();
-            if (init_frame == NULL) {
+            if (temp == NULL) {
                 _PyEval_FrameClearAndPop(tstate, shim);
                 ERROR_NO_POP();
             }
+            init_frame = temp;
             frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL;
             /* Account for pushing the extra frame.
              * We don't check recursion depth here,
@@ -4000,8 +4002,10 @@ dummy_func(
             DEOPT_IF(callable_o != interp->callable_cache.list_append);
             assert(self_o != NULL);
             DEOPT_IF(!PyList_Check(self_o));
+            DEOPT_IF(!LOCK_OBJECT(self_o));
             STAT_INC(CALL, hit);
             int err = _PyList_AppendTakeRef((PyListObject *)self_o, 
PyStackRef_AsPyObjectSteal(arg));
+            UNLOCK_OBJECT(self_o);
             PyStackRef_CLOSE(self);
             PyStackRef_CLOSE(callable);
             ERROR_IF(err, error);
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 580814657608db..987ff2e6419669 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -4500,13 +4500,13 @@
                 JUMP_TO_JUMP_TARGET();
             }
             PyTypeObject *tp = (PyTypeObject *)callable_o;
-            if (tp->tp_version_tag != type_version) {
+            if (FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != 
type_version) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
             assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES);
             PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o;
-            PyFunctionObject *init_func = (PyFunctionObject 
*)cls->_spec_cache.init;
+            PyFunctionObject *init_func = (PyFunctionObject 
*)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init);
             PyCodeObject *code = (PyCodeObject *)init_func->func_code;
             if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize + 
_Py_InitCleanup.co_framesize)) {
                 UOP_STAT_INC(uopcode, miss);
@@ -4537,25 +4537,29 @@
             _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
                 tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
             assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
+            assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             /* Push self onto stack of shim */
             shim->localsplus[0] = PyStackRef_DUP(self[0]);
             _PyFrame_SetStackPointer(frame, stack_pointer);
-            init_frame = _PyEvalFramePushAndInit(
+            _PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
                 tstate, init[0], NULL, args-1, oparg+1, NULL, shim);
             stack_pointer = _PyFrame_GetStackPointer(frame);
-            stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame;
-            stack_pointer += -1 - oparg;
+            stack_pointer += -2 - oparg;
             assert(WITHIN_STACK_BOUNDS());
-            if (init_frame == NULL) {
+            if (temp == NULL) {
                 _PyEval_FrameClearAndPop(tstate, shim);
                 JUMP_TO_ERROR();
             }
+            init_frame = temp;
             frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL;
             /* Account for pushing the extra frame.
              * We don't check recursion depth here,
              * as it will be checked after start_frame */
             tstate->py_recursion_remaining--;
+            stack_pointer[0].bits = (uintptr_t)init_frame;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
             break;
         }
 
@@ -4908,8 +4912,13 @@
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
+            if (!LOCK_OBJECT(self_o)) {
+                UOP_STAT_INC(uopcode, miss);
+                JUMP_TO_JUMP_TARGET();
+            }
             STAT_INC(CALL, hit);
             int err = _PyList_AppendTakeRef((PyListObject *)self_o, 
PyStackRef_AsPyObjectSteal(arg));
+            UNLOCK_OBJECT(self_o);
             PyStackRef_CLOSE(self);
             PyStackRef_CLOSE(callable);
             if (err) JUMP_TO_ERROR();
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index ef191f6f697f24..33f32aba1e5145 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -880,7 +880,7 @@
                 callable = &stack_pointer[-2 - oparg];
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
-                #if ENABLE_SPECIALIZATION
+                #if ENABLE_SPECIALIZATION_FT
                 if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     next_instr = this_instr;
                     _PyFrame_SetStackPointer(frame, stack_pointer);
@@ -890,7 +890,7 @@
                 }
                 OPCODE_DEFERRED_INC(CALL);
                 ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
-                #endif  /* ENABLE_SPECIALIZATION */
+                #endif  /* ENABLE_SPECIALIZATION_FT */
             }
             /* Skip 2 cache entries */
             // _MAYBE_EXPAND_METHOD
@@ -1048,10 +1048,10 @@
                 DEOPT_IF(!PyStackRef_IsNull(null[0]), CALL);
                 DEOPT_IF(!PyType_Check(callable_o), CALL);
                 PyTypeObject *tp = (PyTypeObject *)callable_o;
-                DEOPT_IF(tp->tp_version_tag != type_version, CALL);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != 
type_version, CALL);
                 assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES);
                 PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o;
-                PyFunctionObject *init_func = (PyFunctionObject 
*)cls->_spec_cache.init;
+                PyFunctionObject *init_func = (PyFunctionObject 
*)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init);
                 PyCodeObject *code = (PyCodeObject *)init_func->func_code;
                 DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, 
code->co_framesize + _Py_InitCleanup.co_framesize), CALL);
                 STAT_INC(CALL, hit);
@@ -1073,20 +1073,21 @@
                 _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
                     tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
                 assert(_PyFrame_GetBytecode(shim)[0].op.code == 
EXIT_INIT_CHECK);
+                assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 /* Push self onto stack of shim */
                 shim->localsplus[0] = PyStackRef_DUP(self[0]);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
-                init_frame = _PyEvalFramePushAndInit(
+                _PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
                     tstate, init[0], NULL, args-1, oparg+1, NULL, shim);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame;
-                stack_pointer += -1 - oparg;
+                stack_pointer += -2 - oparg;
                 assert(WITHIN_STACK_BOUNDS());
-                if (init_frame == NULL) {
+                if (temp == NULL) {
                     _PyEval_FrameClearAndPop(tstate, shim);
                     goto error;
                 }
+                init_frame = temp;
                 frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL;
                 /* Account for pushing the extra frame.
                  * We don't check recursion depth here,
@@ -1100,8 +1101,6 @@
                 // Eventually this should be the only occurrence of this code.
                 assert(tstate->interp->eval_frame == NULL);
                 _PyInterpreterFrame *temp = new_frame;
-                stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 assert(new_frame->previous == frame || 
new_frame->previous->previous == frame);
                 CALL_STAT_INC(inlined_py_calls);
@@ -2383,8 +2382,10 @@
             DEOPT_IF(callable_o != interp->callable_cache.list_append, CALL);
             assert(self_o != NULL);
             DEOPT_IF(!PyList_Check(self_o), CALL);
+            DEOPT_IF(!LOCK_OBJECT(self_o), CALL);
             STAT_INC(CALL, hit);
             int err = _PyList_AppendTakeRef((PyListObject *)self_o, 
PyStackRef_AsPyObjectSteal(arg));
+            UNLOCK_OBJECT(self_o);
             PyStackRef_CLOSE(self);
             PyStackRef_CLOSE(callable);
             if (err) goto pop_3_error;
diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c
index f144f7d436fe68..ad077dc861b0a7 100644
--- a/Python/perf_trampoline.c
+++ b/Python/perf_trampoline.c
@@ -484,11 +484,11 @@ _PyPerfTrampoline_Init(int activate)
         return -1;
     }
     if (!activate) {
-        tstate->interp->eval_frame = NULL;
+        _PyInterpreterState_SetEvalFrameFunc(tstate->interp, NULL);
         perf_status = PERF_STATUS_NO_INIT;
     }
     else {
-        tstate->interp->eval_frame = py_trampoline_evaluator;
+        _PyInterpreterState_SetEvalFrameFunc(tstate->interp, 
py_trampoline_evaluator);
         if (new_code_arena() < 0) {
             return -1;
         }
@@ -514,7 +514,7 @@ _PyPerfTrampoline_Fini(void)
     }
     PyThreadState *tstate = _PyThreadState_GET();
     if (tstate->interp->eval_frame == py_trampoline_evaluator) {
-        tstate->interp->eval_frame = NULL;
+        _PyInterpreterState_SetEvalFrameFunc(tstate->interp, NULL);
     }
     if (perf_status == PERF_STATUS_OK) {
         trampoline_api.free_state(trampoline_api.state);
diff --git a/Python/pystate.c b/Python/pystate.c
index 3ceae229f75cd0..839413a65a42fb 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -2838,7 +2838,9 @@ _PyInterpreterState_SetEvalFrameFunc(PyInterpreterState 
*interp,
     }
 #endif
     RARE_EVENT_INC(set_eval_frame_func);
+    _PyEval_StopTheWorld(interp);
     interp->eval_frame = eval_frame;
+    _PyEval_StartTheWorld(interp);
 }
 
 
diff --git a/Python/specialize.c b/Python/specialize.c
index 8b2d1a14c107e0..ec2cd7025e5054 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -1911,38 +1911,38 @@ _Py_Specialize_StoreSubscr(_PyStackRef container_st, 
_PyStackRef sub_st, _Py_COD
     unspecialize(instr);
 }
 
-/* Returns a borrowed reference.
- * The reference is only valid if guarded by a type version check.
- */
-static PyFunctionObject *
-get_init_for_simple_managed_python_class(PyTypeObject *tp)
+/* Returns a strong reference. */
+static PyObject *
+get_init_for_simple_managed_python_class(PyTypeObject *tp, unsigned int 
*tp_version)
 {
     assert(tp->tp_new == PyBaseObject_Type.tp_new);
     if (tp->tp_alloc != PyType_GenericAlloc) {
         SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OVERRIDDEN);
         return NULL;
     }
-    if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) == 0) {
+    unsigned long tp_flags = PyType_GetFlags(tp);
+    if ((tp_flags & Py_TPFLAGS_INLINE_VALUES) == 0) {
         SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_INLINE_VALUES);
         return NULL;
     }
-    if (!(tp->tp_flags & Py_TPFLAGS_HEAPTYPE)) {
+    if (!(tp_flags & Py_TPFLAGS_HEAPTYPE)) {
         /* Is this possible? */
         SPECIALIZATION_FAIL(CALL, SPEC_FAIL_EXPECTED_ERROR);
         return NULL;
     }
-    PyObject *init = _PyType_Lookup(tp, &_Py_ID(__init__));
+    PyObject *init = _PyType_LookupRefAndVersion(tp, &_Py_ID(__init__), 
tp_version);
     if (init == NULL || !PyFunction_Check(init)) {
         SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_PYTHON);
+        Py_XDECREF(init);
         return NULL;
     }
     int kind = function_kind((PyCodeObject *)PyFunction_GET_CODE(init));
     if (kind != SIMPLE_FUNCTION) {
         SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_SIMPLE);
+        Py_DECREF(init);
         return NULL;
     }
-    ((PyHeapTypeObject *)tp)->_spec_cache.init = init;
-    return (PyFunctionObject *)init;
+    return init;
 }
 
 static int
@@ -1954,20 +1954,20 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT 
*instr, int nargs)
         int oparg = instr->op.arg;
         if (nargs == 1 && oparg == 1) {
             if (tp == &PyUnicode_Type) {
-                instr->op.code = CALL_STR_1;
+                specialize(instr, CALL_STR_1);
                 return 0;
             }
             else if (tp == &PyType_Type) {
-                instr->op.code = CALL_TYPE_1;
+                specialize(instr, CALL_TYPE_1);
                 return 0;
             }
             else if (tp == &PyTuple_Type) {
-                instr->op.code = CALL_TUPLE_1;
+                specialize(instr, CALL_TUPLE_1);
                 return 0;
             }
         }
         if (tp->tp_vectorcall != NULL) {
-            instr->op.code = CALL_BUILTIN_CLASS;
+            specialize(instr, CALL_BUILTIN_CLASS);
             return 0;
         }
         goto generic;
@@ -1976,19 +1976,25 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT 
*instr, int nargs)
         goto generic;
     }
     if (tp->tp_new == PyBaseObject_Type.tp_new) {
-        PyFunctionObject *init = get_init_for_simple_managed_python_class(tp);
-        if (type_get_version(tp, CALL) == 0) {
+        unsigned int tp_version = 0;
+        PyObject *init = get_init_for_simple_managed_python_class(tp, 
&tp_version);
+        if (!tp_version) {
+            SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OUT_OF_VERSIONS);
+            Py_XDECREF(init);
             return -1;
         }
-        if (init != NULL) {
+        if (init != NULL && _PyType_CacheInitForSpecialization(
+                                (PyHeapTypeObject *)tp, init, tp_version)) {
             _PyCallCache *cache = (_PyCallCache *)(instr + 1);
-            write_u32(cache->func_version, tp->tp_version_tag);
-            _Py_SET_OPCODE(*instr, CALL_ALLOC_AND_ENTER_INIT);
+            write_u32(cache->func_version, tp_version);
+            specialize(instr, CALL_ALLOC_AND_ENTER_INIT);
+            Py_DECREF(init);
             return 0;
         }
+        Py_XDECREF(init);
     }
 generic:
-    instr->op.code = CALL_NON_PY_GENERAL;
+    specialize(instr, CALL_NON_PY_GENERAL);
     return 0;
 }
 
@@ -2004,7 +2010,7 @@ specialize_method_descriptor(PyMethodDescrObject *descr, 
_Py_CODEUNIT *instr,
                 SPECIALIZATION_FAIL(CALL, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
                 return -1;
             }
-            instr->op.code = CALL_METHOD_DESCRIPTOR_NOARGS;
+            specialize(instr, CALL_METHOD_DESCRIPTOR_NOARGS);
             return 0;
         }
         case METH_O: {
@@ -2018,22 +2024,22 @@ specialize_method_descriptor(PyMethodDescrObject 
*descr, _Py_CODEUNIT *instr,
             bool pop = (next.op.code == POP_TOP);
             int oparg = instr->op.arg;
             if ((PyObject *)descr == list_append && oparg == 1 && pop) {
-                instr->op.code = CALL_LIST_APPEND;
+                specialize(instr, CALL_LIST_APPEND);
                 return 0;
             }
-            instr->op.code = CALL_METHOD_DESCRIPTOR_O;
+            specialize(instr, CALL_METHOD_DESCRIPTOR_O);
             return 0;
         }
         case METH_FASTCALL: {
-            instr->op.code = CALL_METHOD_DESCRIPTOR_FAST;
+            specialize(instr, CALL_METHOD_DESCRIPTOR_FAST);
             return 0;
         }
         case METH_FASTCALL | METH_KEYWORDS: {
-            instr->op.code = CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS;
+            specialize(instr, CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS);
             return 0;
         }
     }
-    instr->op.code = CALL_NON_PY_GENERAL;
+    specialize(instr, CALL_NON_PY_GENERAL);
     return 0;
 }
 
@@ -2063,12 +2069,15 @@ specialize_py_call(PyFunctionObject *func, _Py_CODEUNIT 
*instr, int nargs,
         return -1;
     }
     write_u32(cache->func_version, version);
+    uint8_t opcode;
     if (argcount == nargs + bound_method) {
-        instr->op.code = bound_method ? CALL_BOUND_METHOD_EXACT_ARGS : 
CALL_PY_EXACT_ARGS;
+        opcode =
+            bound_method ? CALL_BOUND_METHOD_EXACT_ARGS : CALL_PY_EXACT_ARGS;
     }
     else {
-        instr->op.code = bound_method ? CALL_BOUND_METHOD_GENERAL : 
CALL_PY_GENERAL;
+        opcode = bound_method ? CALL_BOUND_METHOD_GENERAL : CALL_PY_GENERAL;
     }
+    specialize(instr, opcode);
     return 0;
 }
 
@@ -2117,10 +2126,10 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT 
*instr, int nargs)
             /* len(o) */
             PyInterpreterState *interp = _PyInterpreterState_GET();
             if (callable == interp->callable_cache.len) {
-                instr->op.code = CALL_LEN;
+                specialize(instr, CALL_LEN);
                 return 0;
             }
-            instr->op.code = CALL_BUILTIN_O;
+            specialize(instr, CALL_BUILTIN_O);
             return 0;
         }
         case METH_FASTCALL: {
@@ -2128,19 +2137,19 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT 
*instr, int nargs)
                 /* isinstance(o1, o2) */
                 PyInterpreterState *interp = _PyInterpreterState_GET();
                 if (callable == interp->callable_cache.isinstance) {
-                    instr->op.code = CALL_ISINSTANCE;
+                    specialize(instr, CALL_ISINSTANCE);
                     return 0;
                 }
             }
-            instr->op.code = CALL_BUILTIN_FAST;
+            specialize(instr, CALL_BUILTIN_FAST);
             return 0;
         }
         case METH_FASTCALL | METH_KEYWORDS: {
-            instr->op.code = CALL_BUILTIN_FAST_WITH_KEYWORDS;
+            specialize(instr, CALL_BUILTIN_FAST_WITH_KEYWORDS);
             return 0;
         }
         default:
-            instr->op.code = CALL_NON_PY_GENERAL;
+            specialize(instr, CALL_NON_PY_GENERAL);
             return 0;
     }
 }
@@ -2150,10 +2159,9 @@ _Py_Specialize_Call(_PyStackRef callable_st, 
_Py_CODEUNIT *instr, int nargs)
 {
     PyObject *callable = PyStackRef_AsPyObjectBorrow(callable_st);
 
-    assert(ENABLE_SPECIALIZATION);
+    assert(ENABLE_SPECIALIZATION_FT);
     assert(_PyOpcode_Caches[CALL] == INLINE_CACHE_ENTRIES_CALL);
     assert(_Py_OPCODE(*instr) != INSTRUMENTED_CALL);
-    _PyCallCache *cache = (_PyCallCache *)(instr + 1);
     int fail;
     if (PyCFunction_CheckExact(callable)) {
         fail = specialize_c_call(callable, instr, nargs);
@@ -2178,19 +2186,11 @@ _Py_Specialize_Call(_PyStackRef callable_st, 
_Py_CODEUNIT *instr, int nargs)
         }
     }
     else {
-        instr->op.code = CALL_NON_PY_GENERAL;
+        specialize(instr, CALL_NON_PY_GENERAL);
         fail = 0;
     }
     if (fail) {
-        STAT_INC(CALL, failure);
-        assert(!PyErr_Occurred());
-        instr->op.code = CALL;
-        cache->counter = adaptive_counter_backoff(cache->counter);
-    }
-    else {
-        STAT_INC(CALL, success);
-        assert(!PyErr_Occurred());
-        cache->counter = adaptive_counter_cooldown();
+        unspecialize(instr);
     }
 }
 
@@ -2793,6 +2793,16 @@ _Py_Specialize_ContainsOp(_PyStackRef value_st, 
_Py_CODEUNIT *instr)
  * Ends with a RESUME so that it is not traced.
  * This is used as a plain code object, not a function,
  * so must not access globals or builtins.
+ * There are a few other constraints imposed on the code
+ * by the free-threaded build:
+ *
+ * 1. The RESUME instruction must not be executed. Otherwise we may attempt to
+ *    free the statically allocated TLBC array.
+ * 2. It must contain no specializable instructions. Specializing multiple
+ *    copies of the same bytecode is not thread-safe in free-threaded builds.
+ *
+ * This should be dynamically allocated if either of those restrictions need to
+ * be lifted.
  */
 
 #define NO_LOC_4 (128 | (PY_CODE_LOCATION_INFO_NONE << 3) | 3)
@@ -2802,6 +2812,13 @@ static const PyBytesObject no_location = {
     .ob_sval = { NO_LOC_4 }
 };
 
+#ifdef Py_GIL_DISABLED
+static _PyCodeArray init_cleanup_tlbc = {
+    .size = 1,
+    .entries = {(char*) &_Py_InitCleanup.co_code_adaptive},
+};
+#endif
+
 const struct _PyCode8 _Py_InitCleanup = {
     _PyVarObject_HEAD_INIT(&PyCode_Type, 3),
     .co_consts = (PyObject *)&_Py_SINGLETON(tuple_empty),
@@ -2817,6 +2834,9 @@ const struct _PyCode8 _Py_InitCleanup = {
     ._co_firsttraceable = 4,
     .co_stacksize = 2,
     .co_framesize = 2 + FRAME_SPECIALS_SIZE,
+#ifdef Py_GIL_DISABLED
+    .co_tlbc = &init_cleanup_tlbc,
+#endif
     .co_code_adaptive = {
         EXIT_INIT_CHECK, 0,
         RETURN_VALUE, 0,

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to