https://github.com/python/cpython/commit/98b2ed7e239c807f379cd2bf864f372d79064aac
commit: 98b2ed7e239c807f379cd2bf864f372d79064aac
branch: main
author: Neil Schemenauer <nas-git...@arctrix.com>
committer: nascheme <nas-git...@arctrix.com>
date: 2024-09-26T19:16:51-07:00
summary:

gh-116510: Fix crash due to shared immortal interned strings. (gh-124646)

files:
A 
Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst
M Objects/unicodeobject.c

diff --git 
a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst
 
b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst
new file mode 100644
index 00000000000000..e3741321006548
--- /dev/null
+++ 
b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst
@@ -0,0 +1,5 @@
+Fix a crash caused by immortal interned strings being shared between
+sub-interpreters that use basic single-phase init.  In that case, the string
+can be used by an interpreter that outlives the interpreter that created and
+interned it.  For interpreters that share obmalloc state, also share the
+interned dict with the main interpreter.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e9589cfe44f3bf..0f502ccdaf5767 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -282,13 +282,37 @@ hashtable_unicode_compare(const void *key1, const void 
*key2)
     }
 }
 
+/* Return true if this interpreter should share the main interpreter's
+   intern_dict.  That's important for interpreters which load basic
+   single-phase init extension modules (m_size == -1).  There could be interned
+   immortal strings that are shared between interpreters, due to the
+   PyDict_Update(mdict, m_copy) call in import_find_extension().
+
+   It's not safe to deallocate those strings until all interpreters that
+   potentially use them are freed.  By storing them in the main interpreter, we
+   ensure they get freed after all other interpreters are freed.
+*/
+static bool
+has_shared_intern_dict(PyInterpreterState *interp)
+{
+    PyInterpreterState *main_interp = _PyInterpreterState_Main();
+    return interp != main_interp  && interp->feature_flags & 
Py_RTFLAGS_USE_MAIN_OBMALLOC;
+}
+
 static int
 init_interned_dict(PyInterpreterState *interp)
 {
     assert(get_interned_dict(interp) == NULL);
-    PyObject *interned = interned = PyDict_New();
-    if (interned == NULL) {
-        return -1;
+    PyObject *interned;
+    if (has_shared_intern_dict(interp)) {
+        interned = get_interned_dict(_PyInterpreterState_Main());
+        Py_INCREF(interned);
+    }
+    else {
+        interned = PyDict_New();
+        if (interned == NULL) {
+            return -1;
+        }
     }
     _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned;
     return 0;
@@ -299,7 +323,10 @@ clear_interned_dict(PyInterpreterState *interp)
 {
     PyObject *interned = get_interned_dict(interp);
     if (interned != NULL) {
-        PyDict_Clear(interned);
+        if (!has_shared_intern_dict(interp)) {
+            // only clear if the dict belongs to this interpreter
+            PyDict_Clear(interned);
+        }
         Py_DECREF(interned);
         _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL;
     }
@@ -15618,6 +15645,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
     }
     assert(PyDict_CheckExact(interned));
 
+    if (has_shared_intern_dict(interp)) {
+        // the dict doesn't belong to this interpreter, skip the debug
+        // checks on it and just clear the pointer to it
+        clear_interned_dict(interp);
+        return;
+    }
+
 #ifdef INTERNED_STATS
     fprintf(stderr, "releasing %zd interned strings\n",
             PyDict_GET_SIZE(interned));
@@ -16126,8 +16160,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
 {
     struct _Py_unicode_state *state = &interp->unicode;
 
-    // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
-    assert(get_interned_dict(interp) == NULL);
+    if (!has_shared_intern_dict(interp)) {
+        // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
+        assert(get_interned_dict(interp) == NULL);
+    }
 
     _PyUnicode_FiniEncodings(&state->fs_codec);
 

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to