https://github.com/python/cpython/commit/b9d2ee687cfca6365e26e156b1e22824b16dabb8 commit: b9d2ee687cfca6365e26e156b1e22824b16dabb8 branch: main author: Sam Gross <colesb...@gmail.com> committer: encukou <encu...@gmail.com> date: 2025-02-17T14:15:40+01:00 summary:
gh-129701: Fix a data race in `intern_common` in the free threaded build (GH-130089) * gh-129701: Fix a data race in `intern_common` in the free threaded build * Use a mutex to avoid potentially returning a non-immortalized string, because immortalization happens after the insertion into the interned dict. * Use `Py_DECREF()` calls instead of `Py_SET_REFCNT(s, Py_REFCNT(s) - 2)` for thread-safety. This code path isn't performance sensistive, so just use `Py_DECREF()` unconditionally for simplicity. files: M Include/internal/pycore_global_objects.h M Objects/unicodeobject.c diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index e3f7ac707f0c37..611f01d1c32710 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -64,6 +64,9 @@ struct _Py_static_objects { (interp)->cached_objects.NAME struct _Py_interp_cached_objects { +#ifdef Py_GIL_DISABLED + PyMutex interned_mutex; +#endif PyObject *interned_strings; /* object.__reduce__ */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 371c358a4950c2..03f15b37598363 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -112,6 +112,14 @@ NOTE: In the interpreter's initialization phase, some globals are currently # define _PyUnicode_CHECK(op) PyUnicode_Check(op) #endif +#ifdef Py_GIL_DISABLED +# define LOCK_INTERNED(interp) PyMutex_Lock(&_Py_INTERP_CACHED_OBJECT(interp, interned_mutex)) +# define UNLOCK_INTERNED(interp) PyMutex_Unlock(&_Py_INTERP_CACHED_OBJECT(interp, interned_mutex)) +#else +# define LOCK_INTERNED(interp) +# define UNLOCK_INTERNED(interp) +#endif + static inline char* _PyUnicode_UTF8(PyObject *op) { return FT_ATOMIC_LOAD_PTR_ACQUIRE(_PyCompactUnicodeObject_CAST(op)->utf8); @@ -15814,11 +15822,13 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, PyObject *interned = get_interned_dict(interp); assert(interned != NULL); + LOCK_INTERNED(interp); PyObject *t; { int res = PyDict_SetDefaultRef(interned, s, s, &t); if (res < 0) { PyErr_Clear(); + UNLOCK_INTERNED(interp); return s; } else if (res == 1) { @@ -15828,6 +15838,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, PyUnicode_CHECK_INTERNED(t) == SSTATE_INTERNED_MORTAL) { immortalize_interned(t); } + UNLOCK_INTERNED(interp); return t; } else { @@ -15844,12 +15855,8 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, if (!_Py_IsImmortal(s)) { /* The two references in interned dict (key and value) are not counted. unicode_dealloc() and _PyUnicode_ClearInterned() take care of this. */ - Py_SET_REFCNT(s, Py_REFCNT(s) - 2); -#ifdef Py_REF_DEBUG - /* let's be pedantic with the ref total */ - _Py_DecRefTotal(_PyThreadState_GET()); - _Py_DecRefTotal(_PyThreadState_GET()); -#endif + Py_DECREF(s); + Py_DECREF(s); } FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL); @@ -15864,6 +15871,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, immortalize_interned(s); } + UNLOCK_INTERNED(interp); return s; } _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com