https://github.com/python/cpython/commit/b9d2ee687cfca6365e26e156b1e22824b16dabb8
commit: b9d2ee687cfca6365e26e156b1e22824b16dabb8
branch: main
author: Sam Gross <colesb...@gmail.com>
committer: encukou <encu...@gmail.com>
date: 2025-02-17T14:15:40+01:00
summary:

gh-129701: Fix a data race in `intern_common` in the free threaded build 
(GH-130089)

* gh-129701: Fix a data race in `intern_common` in the free threaded build

* Use a mutex to avoid potentially returning a non-immortalized string,
  because immortalization happens after the insertion into the interned
  dict.

* Use `Py_DECREF()` calls instead of `Py_SET_REFCNT(s, Py_REFCNT(s) - 2)`
  for thread-safety. This code path isn't performance sensistive, so
  just use `Py_DECREF()` unconditionally for simplicity.

files:
M Include/internal/pycore_global_objects.h
M Objects/unicodeobject.c

diff --git a/Include/internal/pycore_global_objects.h 
b/Include/internal/pycore_global_objects.h
index e3f7ac707f0c37..611f01d1c32710 100644
--- a/Include/internal/pycore_global_objects.h
+++ b/Include/internal/pycore_global_objects.h
@@ -64,6 +64,9 @@ struct _Py_static_objects {
     (interp)->cached_objects.NAME
 
 struct _Py_interp_cached_objects {
+#ifdef Py_GIL_DISABLED
+    PyMutex interned_mutex;
+#endif
     PyObject *interned_strings;
 
     /* object.__reduce__ */
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 371c358a4950c2..03f15b37598363 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -112,6 +112,14 @@ NOTE: In the interpreter's initialization phase, some 
globals are currently
 #  define _PyUnicode_CHECK(op) PyUnicode_Check(op)
 #endif
 
+#ifdef Py_GIL_DISABLED
+#  define LOCK_INTERNED(interp) PyMutex_Lock(&_Py_INTERP_CACHED_OBJECT(interp, 
interned_mutex))
+#  define UNLOCK_INTERNED(interp) 
PyMutex_Unlock(&_Py_INTERP_CACHED_OBJECT(interp, interned_mutex))
+#else
+#  define LOCK_INTERNED(interp)
+#  define UNLOCK_INTERNED(interp)
+#endif
+
 static inline char* _PyUnicode_UTF8(PyObject *op)
 {
     return FT_ATOMIC_LOAD_PTR_ACQUIRE(_PyCompactUnicodeObject_CAST(op)->utf8);
@@ -15814,11 +15822,13 @@ intern_common(PyInterpreterState *interp, PyObject *s 
/* stolen */,
     PyObject *interned = get_interned_dict(interp);
     assert(interned != NULL);
 
+    LOCK_INTERNED(interp);
     PyObject *t;
     {
         int res = PyDict_SetDefaultRef(interned, s, s, &t);
         if (res < 0) {
             PyErr_Clear();
+            UNLOCK_INTERNED(interp);
             return s;
         }
         else if (res == 1) {
@@ -15828,6 +15838,7 @@ intern_common(PyInterpreterState *interp, PyObject *s 
/* stolen */,
                     PyUnicode_CHECK_INTERNED(t) == SSTATE_INTERNED_MORTAL) {
                 immortalize_interned(t);
             }
+            UNLOCK_INTERNED(interp);
             return t;
         }
         else {
@@ -15844,12 +15855,8 @@ intern_common(PyInterpreterState *interp, PyObject *s 
/* stolen */,
     if (!_Py_IsImmortal(s)) {
         /* The two references in interned dict (key and value) are not counted.
         unicode_dealloc() and _PyUnicode_ClearInterned() take care of this. */
-        Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
-#ifdef Py_REF_DEBUG
-        /* let's be pedantic with the ref total */
-        _Py_DecRefTotal(_PyThreadState_GET());
-        _Py_DecRefTotal(_PyThreadState_GET());
-#endif
+        Py_DECREF(s);
+        Py_DECREF(s);
     }
     FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, 
SSTATE_INTERNED_MORTAL);
 
@@ -15864,6 +15871,7 @@ intern_common(PyInterpreterState *interp, PyObject *s 
/* stolen */,
         immortalize_interned(s);
     }
 
+    UNLOCK_INTERNED(interp);
     return s;
 }
 

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to