https://github.com/python/cpython/commit/d66c08aa757f221c0e8893300edac105dfcde7e8
commit: d66c08aa757f221c0e8893300edac105dfcde7e8
branch: main
author: Sam Gross <colesb...@gmail.com>
committer: Yhg1s <tho...@python.org>
date: 2025-01-17T16:42:27+01:00
summary:

gh-128923: Use zero to indicate unassigned unique id (#128925)

In the free threading build, the per thread reference counting uses a
unique id for some objects to index into the local reference count
table. Use 0 instead of -1 to indicate that the id is not assigned. This
avoids bugs where zero-initialized heap type objects look like they have
a unique id assigned.

files:
M Include/internal/pycore_dict.h
M Include/internal/pycore_object.h
M Include/internal/pycore_uniqueid.h
M Lib/test/test_capi/test_type.py
M Modules/_testcapimodule.c
M Objects/codeobject.c
M Objects/dictobject.c
M Objects/typeobject.c
M Python/uniqueid.c

diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h
index 74ac8f2148174c..f4c55ca6cf64d2 100644
--- a/Include/internal/pycore_dict.h
+++ b/Include/internal/pycore_dict.h
@@ -347,8 +347,7 @@ PyDictObject 
*_PyObject_MaterializeManagedDict_LockHeld(PyObject *);
 static inline Py_ssize_t
 _PyDict_UniqueId(PyDictObject *mp)
 {
-    // Offset by one so that _ma_watcher_tag=0 represents an unassigned id
-    return (Py_ssize_t)(mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) - 1;
+    return (Py_ssize_t)(mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT);
 }
 
 static inline void
diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h
index 322305bc8c664a..19d657070ff221 100644
--- a/Include/internal/pycore_object.h
+++ b/Include/internal/pycore_object.h
@@ -336,20 +336,20 @@ _Py_THREAD_INCREF_OBJECT(PyObject *obj, Py_ssize_t 
unique_id)
 {
     _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
 
-    // Unsigned comparison so that `unique_id=-1`, which indicates that
-    // per-thread refcounting has been disabled on this object, is handled by
-    // the "else".
-    if ((size_t)unique_id < (size_t)tstate->refcounts.size) {
+    // The table index is `unique_id - 1` because 0 is not a valid unique id.
+    // Unsigned comparison so that `idx=-1` is handled by the "else".
+    size_t idx = (size_t)(unique_id - 1);
+    if (idx < (size_t)tstate->refcounts.size) {
 #  ifdef Py_REF_DEBUG
         _Py_INCREF_IncRefTotal();
 #  endif
         _Py_INCREF_STAT_INC();
-        tstate->refcounts.values[unique_id]++;
+        tstate->refcounts.values[idx]++;
     }
     else {
         // The slow path resizes the per-thread refcount array if necessary.
-        // It handles the unique_id=-1 case to keep the inlinable function 
smaller.
-        _PyObject_ThreadIncrefSlow(obj, unique_id);
+        // It handles the unique_id=0 case to keep the inlinable function 
smaller.
+        _PyObject_ThreadIncrefSlow(obj, idx);
     }
 }
 
@@ -386,15 +386,15 @@ _Py_THREAD_DECREF_OBJECT(PyObject *obj, Py_ssize_t 
unique_id)
 {
     _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
 
-    // Unsigned comparison so that `unique_id=-1`, which indicates that
-    // per-thread refcounting has been disabled on this object, is handled by
-    // the "else".
-    if ((size_t)unique_id < (size_t)tstate->refcounts.size) {
+    // The table index is `unique_id - 1` because 0 is not a valid unique id.
+    // Unsigned comparison so that `idx=-1` is handled by the "else".
+    size_t idx = (size_t)(unique_id - 1);
+    if (idx < (size_t)tstate->refcounts.size) {
 #  ifdef Py_REF_DEBUG
         _Py_DECREF_DecRefTotal();
 #  endif
         _Py_DECREF_STAT_INC();
-        tstate->refcounts.values[unique_id]--;
+        tstate->refcounts.values[idx]--;
     }
     else {
         // Directly decref the object if the id is not assigned or if
diff --git a/Include/internal/pycore_uniqueid.h 
b/Include/internal/pycore_uniqueid.h
index d3db49ddb78103..9d3c866a704894 100644
--- a/Include/internal/pycore_uniqueid.h
+++ b/Include/internal/pycore_uniqueid.h
@@ -16,7 +16,7 @@ extern "C" {
 // Per-thread reference counting is used along with deferred reference
 // counting to avoid scaling bottlenecks due to reference count contention.
 //
-// An id of -1 is used to indicate that an object doesn't use per-thread
+// An id of 0 is used to indicate that an object doesn't use per-thread
 // refcounting. This value is used when the object is finalized by the GC
 // and during interpreter shutdown to allow the object to be
 // deallocated promptly when the object's refcount reaches zero.
@@ -45,6 +45,8 @@ struct _Py_unique_id_pool {
     Py_ssize_t size;
 };
 
+#define _Py_INVALID_UNIQUE_ID 0
+
 // Assigns the next id from the pool of ids.
 extern Py_ssize_t _PyObject_AssignUniqueId(PyObject *obj);
 
@@ -65,7 +67,7 @@ extern void 
_PyObject_FinalizePerThreadRefcounts(_PyThreadStateImpl *tstate);
 extern void _PyObject_FinalizeUniqueIdPool(PyInterpreterState *interp);
 
 // Increfs the object, resizing the thread-local refcount array if necessary.
-PyAPI_FUNC(void) _PyObject_ThreadIncrefSlow(PyObject *obj, Py_ssize_t 
unique_id);
+PyAPI_FUNC(void) _PyObject_ThreadIncrefSlow(PyObject *obj, size_t idx);
 
 #endif   /* Py_GIL_DISABLED */
 
diff --git a/Lib/test/test_capi/test_type.py b/Lib/test/test_capi/test_type.py
index 92d056e802eeed..ffcaae73bca236 100644
--- a/Lib/test/test_capi/test_type.py
+++ b/Lib/test/test_capi/test_type.py
@@ -67,3 +67,10 @@ class FreezeThis(metaclass=Meta):
             Base.value = 3
         type_freeze(FreezeThis)
         self.assertEqual(FreezeThis.value, 2)
+
+    def test_manual_heap_type(self):
+        # gh-128923: test that a manually allocated and initailized heap type
+        # works correctly
+        ManualHeapType = _testcapi.ManualHeapType
+        for i in range(100):
+            self.assertIsInstance(ManualHeapType(), ManualHeapType)
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index 7d304add5999d1..b657bb665bd5c5 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -4149,6 +4149,61 @@ static PyTypeObject ContainerNoGC_type = {
     .tp_new = ContainerNoGC_new,
 };
 
+/* Manually allocated heap type */
+
+typedef struct {
+    PyObject_HEAD
+    PyObject *dict;
+} ManualHeapType;
+
+static int
+ManualHeapType_traverse(PyObject *self, visitproc visit, void *arg)
+{
+    ManualHeapType *mht = (ManualHeapType *)self;
+    Py_VISIT(mht->dict);
+    return 0;
+}
+
+static void
+ManualHeapType_dealloc(PyObject *self)
+{
+    ManualHeapType *mht = (ManualHeapType *)self;
+    PyObject_GC_UnTrack(self);
+    Py_XDECREF(mht->dict);
+    PyTypeObject *type = Py_TYPE(self);
+    Py_TYPE(self)->tp_free(self);
+    Py_DECREF(type);
+}
+
+static PyObject *
+create_manual_heap_type(void)
+{
+    // gh-128923: Ensure that a heap type allocated through 
PyType_Type.tp_alloc
+    // with minimal initialization works correctly.
+    PyHeapTypeObject *heap_type = (PyHeapTypeObject 
*)PyType_Type.tp_alloc(&PyType_Type, 0);
+    if (heap_type == NULL) {
+        return NULL;
+    }
+    PyTypeObject* type = &heap_type->ht_type;
+    type->tp_basicsize = sizeof(ManualHeapType);
+    type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HEAPTYPE | 
Py_TPFLAGS_HAVE_GC;
+    type->tp_new = PyType_GenericNew;
+    type->tp_name = "ManualHeapType";
+    type->tp_dictoffset = offsetof(ManualHeapType, dict);
+    type->tp_traverse = ManualHeapType_traverse;
+    type->tp_dealloc = ManualHeapType_dealloc;
+    heap_type->ht_name = PyUnicode_FromString(type->tp_name);
+    if (!heap_type->ht_name) {
+        Py_DECREF(type);
+        return NULL;
+    }
+    heap_type->ht_qualname = Py_NewRef(heap_type->ht_name);
+    if (PyType_Ready(type) < 0) {
+        Py_DECREF(type);
+        return NULL;
+    }
+    return (PyObject *)type;
+}
 
 static struct PyModuleDef _testcapimodule = {
     PyModuleDef_HEAD_INIT,
@@ -4283,6 +4338,15 @@ PyInit__testcapi(void)
                            (PyObject *) &ContainerNoGC_type) < 0)
         return NULL;
 
+    PyObject *manual_heap_type = create_manual_heap_type();
+    if (manual_heap_type == NULL) {
+        return NULL;
+    }
+    if (PyModule_Add(m, "ManualHeapType", manual_heap_type) < 0) {
+        return NULL;
+    }
+
+
     /* Include tests from the _testcapi/ directory */
     if (_PyTestCapi_Init_Vectorcall(m) < 0) {
         return NULL;
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index 15b36a868a47df..539200c97c1206 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -1911,7 +1911,7 @@ code_dealloc(PyObject *self)
     Py_XDECREF(co->co_linetable);
     Py_XDECREF(co->co_exceptiontable);
 #ifdef Py_GIL_DISABLED
-    assert(co->_co_unique_id == -1);
+    assert(co->_co_unique_id == _Py_INVALID_UNIQUE_ID);
 #endif
     if (co->_co_cached != NULL) {
         Py_XDECREF(co->_co_cached->_co_code);
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 82789d5e56f523..504e65b01ca959 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -1659,6 +1659,9 @@ _PyDict_EnablePerThreadRefcounting(PyObject *op)
     assert(PyDict_Check(op));
 #ifdef Py_GIL_DISABLED
     Py_ssize_t id = _PyObject_AssignUniqueId(op);
+    if (id == _Py_INVALID_UNIQUE_ID) {
+        return;
+    }
     if ((uint64_t)id >= (uint64_t)DICT_UNIQUE_ID_MAX) {
         _PyObject_ReleaseUniqueId(id);
         return;
@@ -1666,8 +1669,7 @@ _PyDict_EnablePerThreadRefcounting(PyObject *op)
 
     PyDictObject *mp = (PyDictObject *)op;
     assert((mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) == 0);
-    // Plus 1 so that _ma_watcher_tag=0 represents an unassigned id
-    mp->_ma_watcher_tag += ((uint64_t)id + 1) << DICT_UNIQUE_ID_SHIFT;
+    mp->_ma_watcher_tag += (uint64_t)id << DICT_UNIQUE_ID_SHIFT;
 #endif
 }
 
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index d8f5f6d9cb2366..93920341a179e8 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -6160,7 +6160,7 @@ type_dealloc(PyObject *self)
     Py_XDECREF(et->ht_module);
     PyMem_Free(et->_ht_tpname);
 #ifdef Py_GIL_DISABLED
-    assert(et->unique_id == -1);
+    assert(et->unique_id == _Py_INVALID_UNIQUE_ID);
 #endif
     et->ht_token = NULL;
     Py_TYPE(type)->tp_free((PyObject *)type);
diff --git a/Python/uniqueid.c b/Python/uniqueid.c
index b9f30713feeb57..64c3e6cfbbe825 100644
--- a/Python/uniqueid.c
+++ b/Python/uniqueid.c
@@ -86,7 +86,7 @@ _PyObject_AssignUniqueId(PyObject *obj)
     if (pool->freelist == NULL) {
         if (resize_interp_type_id_pool(pool) < 0) {
             UNLOCK_POOL(pool);
-            return -1;
+            return _Py_INVALID_UNIQUE_ID;
         }
     }
 
@@ -94,7 +94,9 @@ _PyObject_AssignUniqueId(PyObject *obj)
     pool->freelist = entry->next;
     entry->obj = obj;
     _PyObject_SetDeferredRefcount(obj);
-    Py_ssize_t unique_id = (entry - pool->table);
+    // The unique id is one plus the index of the entry in the table.
+    Py_ssize_t unique_id = (entry - pool->table) + 1;
+    assert(unique_id > 0);
     UNLOCK_POOL(pool);
     return unique_id;
 }
@@ -106,8 +108,9 @@ _PyObject_ReleaseUniqueId(Py_ssize_t unique_id)
     struct _Py_unique_id_pool *pool = &interp->unique_ids;
 
     LOCK_POOL(pool);
-    assert(unique_id >= 0 && unique_id < pool->size);
-    _Py_unique_id_entry *entry = &pool->table[unique_id];
+    assert(unique_id > 0 && unique_id <= pool->size);
+    Py_ssize_t idx = unique_id - 1;
+    _Py_unique_id_entry *entry = &pool->table[idx];
     entry->next = pool->freelist;
     pool->freelist = entry;
     UNLOCK_POOL(pool);
@@ -116,18 +119,18 @@ _PyObject_ReleaseUniqueId(Py_ssize_t unique_id)
 static Py_ssize_t
 clear_unique_id(PyObject *obj)
 {
-    Py_ssize_t id = -1;
+    Py_ssize_t id = _Py_INVALID_UNIQUE_ID;
     if (PyType_Check(obj)) {
         if (PyType_HasFeature((PyTypeObject *)obj, Py_TPFLAGS_HEAPTYPE)) {
             PyHeapTypeObject *ht = (PyHeapTypeObject *)obj;
             id = ht->unique_id;
-            ht->unique_id = -1;
+            ht->unique_id = _Py_INVALID_UNIQUE_ID;
         }
     }
     else if (PyCode_Check(obj)) {
         PyCodeObject *co = (PyCodeObject *)obj;
         id = co->_co_unique_id;
-        co->_co_unique_id = -1;
+        co->_co_unique_id = _Py_INVALID_UNIQUE_ID;
     }
     else if (PyDict_Check(obj)) {
         PyDictObject *mp = (PyDictObject *)obj;
@@ -141,23 +144,23 @@ void
 _PyObject_DisablePerThreadRefcounting(PyObject *obj)
 {
     Py_ssize_t id = clear_unique_id(obj);
-    if (id >= 0) {
+    if (id != _Py_INVALID_UNIQUE_ID) {
         _PyObject_ReleaseUniqueId(id);
     }
 }
 
 void
-_PyObject_ThreadIncrefSlow(PyObject *obj, Py_ssize_t unique_id)
+_PyObject_ThreadIncrefSlow(PyObject *obj, size_t idx)
 {
     _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
-    if (unique_id < 0 || resize_local_refcounts(tstate) < 0) {
+    if (((Py_ssize_t)idx) < 0 || resize_local_refcounts(tstate) < 0) {
         // just incref the object directly.
         Py_INCREF(obj);
         return;
     }
 
-    assert(unique_id < tstate->refcounts.size);
-    tstate->refcounts.values[unique_id]++;
+    assert(idx < (size_t)tstate->refcounts.size);
+    tstate->refcounts.values[idx]++;
 #ifdef Py_REF_DEBUG
     _Py_IncRefTotal((PyThreadState *)tstate);
 #endif
@@ -217,7 +220,7 @@ _PyObject_FinalizeUniqueIdPool(PyInterpreterState *interp)
         if (obj != NULL) {
             Py_ssize_t id = clear_unique_id(obj);
             (void)id;
-            assert(id == i);
+            assert(id == i + 1);
         }
     }
     PyMem_Free(pool->table);

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to