https://github.com/python/cpython/commit/bb25f7280af30831fffa3345b4fc93798949c6c6
commit: bb25f7280af30831fffa3345b4fc93798949c6c6
branch: main
author: Neil Schemenauer <[email protected]>
committer: nascheme <[email protected]>
date: 2026-01-17T01:54:43Z
summary:

gh-132657: Add maybe_enable_deferred_ref_count() (gh-142843)

If we are specializing to `LOAD_GLOBAL_MODULE` or `LOAD_ATTR_MODULE`, try
to enable deferred reference counting for the value, if the object is owned by
a different thread.  This applies to the free-threaded build only and should
improve scaling of multi-threaded programs.

files:
A 
Misc/NEWS.d/next/Core_and_Builtins/2025-12-24-13-19-16.gh-issue-132657._P4DDb.rst
M Include/internal/pycore_dict.h
M Objects/dictobject.c
M Python/specialize.c
M Tools/ftscalingbench/ftscalingbench.py

diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h
index a7005a3b8e2fab..950547cb002f4c 100644
--- a/Include/internal/pycore_dict.h
+++ b/Include/internal/pycore_dict.h
@@ -114,6 +114,7 @@ extern Py_ssize_t 
_Py_dict_lookup_threadsafe_stackref(PyDictObject *mp, PyObject
 
 extern int _PyDict_GetMethodStackRef(PyDictObject *dict, PyObject *name, 
_PyStackRef *method);
 
+extern Py_ssize_t _PyDict_LookupIndexAndValue(PyDictObject *, PyObject *, 
PyObject **);
 extern Py_ssize_t _PyDict_LookupIndex(PyDictObject *, PyObject *);
 extern Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, 
PyObject *key);
 
diff --git 
a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-24-13-19-16.gh-issue-132657._P4DDb.rst
 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-24-13-19-16.gh-issue-132657._P4DDb.rst
new file mode 100644
index 00000000000000..bbc9611b748fde
--- /dev/null
+++ 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-24-13-19-16.gh-issue-132657._P4DDb.rst
@@ -0,0 +1,6 @@
+If we are specializing to ``LOAD_GLOBAL_MODULE`` or ``LOAD_ATTR_MODULE``, try
+to enable deferred reference counting for the value, if the object is owned by
+a different thread.  This applies to the free-threaded build only and should
+improve scaling of multi-threaded programs. Note that when deferred reference
+counting is enabled, the object will be deallocated by the GC, rather than by
+:c:func:`Py_DECREF`.
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index a4e2fd19cefb63..aea9ea84202b07 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -2349,10 +2349,9 @@ dict_unhashable_type(PyObject *key)
 }
 
 Py_ssize_t
-_PyDict_LookupIndex(PyDictObject *mp, PyObject *key)
+_PyDict_LookupIndexAndValue(PyDictObject *mp, PyObject *key, PyObject **value)
 {
     // TODO: Thread safety
-    PyObject *value;
     assert(PyDict_CheckExact((PyObject*)mp));
     assert(PyUnicode_CheckExact(key));
 
@@ -2362,7 +2361,14 @@ _PyDict_LookupIndex(PyDictObject *mp, PyObject *key)
         return -1;
     }
 
-    return _Py_dict_lookup(mp, key, hash, &value);
+    return _Py_dict_lookup(mp, key, hash, value);
+}
+
+Py_ssize_t
+_PyDict_LookupIndex(PyDictObject *mp, PyObject *key)
+{
+    PyObject *value; // discarded
+    return _PyDict_LookupIndexAndValue(mp, key, &value);
 }
 
 /* Same as PyDict_GetItemWithError() but with hash supplied by caller.
diff --git a/Python/specialize.c b/Python/specialize.c
index 2f82fb4ff4ef84..432053f85221a3 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -358,6 +358,21 @@ static int function_kind(PyCodeObject *code);
 static bool function_check_args(PyObject *o, int expected_argcount, int 
opcode);
 static uint32_t function_get_version(PyObject *o, int opcode);
 
+#ifdef Py_GIL_DISABLED
+static void
+maybe_enable_deferred_ref_count(PyObject *op)
+{
+    if (!_Py_IsOwnedByCurrentThread(op)) {
+        // For module level variables that are heavily used from multiple
+        // threads, deferred reference counting provides good scaling
+        // benefits.  The downside is that the object will only be deallocated
+        // by a GC run.
+        PyUnstable_Object_EnableDeferredRefcount(op);
+    }
+}
+#endif
+
+
 static int
 specialize_module_load_attr_lock_held(PyDictObject *dict, _Py_CODEUNIT *instr, 
PyObject *name)
 {
@@ -366,7 +381,8 @@ specialize_module_load_attr_lock_held(PyDictObject *dict, 
_Py_CODEUNIT *instr, P
         SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_NON_STRING);
         return -1;
     }
-    Py_ssize_t index = _PyDict_LookupIndex(dict, name);
+    PyObject *value;
+    Py_ssize_t index = _PyDict_LookupIndexAndValue(dict, name, &value);
     assert(index != DKIX_ERROR);
     if (index != (uint16_t)index) {
         SPECIALIZATION_FAIL(LOAD_ATTR,
@@ -381,6 +397,9 @@ specialize_module_load_attr_lock_held(PyDictObject *dict, 
_Py_CODEUNIT *instr, P
         SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OUT_OF_VERSIONS);
         return -1;
     }
+#ifdef Py_GIL_DISABLED
+    maybe_enable_deferred_ref_count(value);
+#endif
     write_u32(cache->version, keys_version);
     cache->index = (uint16_t)index;
     specialize(instr, LOAD_ATTR_MODULE);
@@ -1269,7 +1288,6 @@ specialize_attr_loadclassattr(PyObject *owner, 
_Py_CODEUNIT *instr,
     return 1;
 }
 
-
 static void
 specialize_load_global_lock_held(
     PyObject *globals, PyObject *builtins,
@@ -1289,7 +1307,12 @@ specialize_load_global_lock_held(
         SPECIALIZATION_FAIL(LOAD_GLOBAL, 
SPEC_FAIL_LOAD_GLOBAL_NON_STRING_OR_SPLIT);
         goto fail;
     }
+#ifdef Py_GIL_DISABLED
+    PyObject *value;
+    Py_ssize_t index = _PyDict_LookupIndexAndValue((PyDictObject *)globals, 
name, &value);
+#else
     Py_ssize_t index = _PyDictKeys_StringLookup(globals_keys, name);
+#endif
     if (index == DKIX_ERROR) {
         SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_EXPECTED_ERROR);
         goto fail;
@@ -1310,6 +1333,9 @@ specialize_load_global_lock_held(
             SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE);
             goto fail;
         }
+#ifdef Py_GIL_DISABLED
+        maybe_enable_deferred_ref_count(value);
+#endif
         cache->index = (uint16_t)index;
         cache->module_keys_version = (uint16_t)keys_version;
         specialize(instr, LOAD_GLOBAL_MODULE);
diff --git a/Tools/ftscalingbench/ftscalingbench.py 
b/Tools/ftscalingbench/ftscalingbench.py
index 097a065f368f30..c2bd7c3880bc90 100644
--- a/Tools/ftscalingbench/ftscalingbench.py
+++ b/Tools/ftscalingbench/ftscalingbench.py
@@ -21,6 +21,7 @@
 # > echo "0" | sudo tee /sys/devices/system/cpu/cpufreq/boost
 #
 
+import copy
 import math
 import os
 import queue
@@ -214,6 +215,14 @@ def instantiate_dataclass():
     for _ in range(1000 * WORK_SCALE):
         obj = MyDataClass(x=1, y=2, z=3)
 
+
+@register_benchmark
+def deepcopy():
+    x = {'list': [1, 2], 'tuple': (1, None)}
+    for i in range(40 * WORK_SCALE):
+        copy.deepcopy(x)
+
+
 def bench_one_thread(func):
     t0 = time.perf_counter_ns()
     func()

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to