https://github.com/python/cpython/commit/812ef66759f9fe27d68283d8e67d6cd3eb512be2
commit: 812ef66759f9fe27d68283d8e67d6cd3eb512be2
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-02-26T11:30:08+02:00
summary:

gh-145202: Fix crash in unicodedata's GraphemeBreakIterator and Segment 
(GH-145216)

Remove the tp_clear slots and make Segment members read-only.

Also add tests for reference loops involving GraphemeBreakIterator
and Segment.

files:
M Lib/test/test_unicodedata.py
M Modules/unicodedata.c

diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index 30a26751d3ac54..8ecb0df2f8e5dd 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -12,7 +12,9 @@
 import sys
 import unicodedata
 import unittest
+import weakref
 from test.support import (
+    gc_collect,
     open_urlresource,
     requires_resource,
     script_helper,
@@ -1338,6 +1340,28 @@ def run_grapheme_break_tests(self, testdata):
                     self.assertEqual([x.start for x in result], breaks[i:-1], 
comment)
                     self.assertEqual([x.end for x in result], breaks[i+1:], 
comment)
 
+    def test_reference_loops(self):
+        # Test that reference loops involving GraphemeBreakIterator or
+        # Segment can be broken by the garbage collector.
+        class S(str):
+            pass
+
+        s = S('abc')
+        s.ref = unicodedata.iter_graphemes(s)
+        wr = weakref.ref(s)
+        del s
+        self.assertIsNotNone(wr())
+        gc_collect()
+        self.assertIsNone(wr())
+
+        s = S('abc')
+        s.ref = next(unicodedata.iter_graphemes(s))
+        wr = weakref.ref(s)
+        del s
+        self.assertIsNotNone(wr())
+        gc_collect()
+        self.assertIsNone(wr())
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index 401f64e7416944..2c67c23d98ed81 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -1925,13 +1925,6 @@ Segment_traverse(PyObject *self, visitproc visit, void 
*arg)
     return 0;
 }
 
-static int
-Segment_clear(PyObject *self)
-{
-    Py_CLEAR(((SegmentObject *)self)->string);
-    return 0;
-}
-
 static PyObject *
 Segment_str(PyObject *self)
 {
@@ -1947,9 +1940,9 @@ Segment_repr(PyObject *self)
 }
 
 static PyMemberDef Segment_members[] = {
-    {"start", Py_T_PYSSIZET, offsetof(SegmentObject, start), 0,
+    {"start", Py_T_PYSSIZET, offsetof(SegmentObject, start), Py_READONLY,
         PyDoc_STR("grapheme start")},
-    {"end", Py_T_PYSSIZET, offsetof(SegmentObject, end), 0,
+    {"end", Py_T_PYSSIZET, offsetof(SegmentObject, end), Py_READONLY,
         PyDoc_STR("grapheme end")},
     {NULL}  /* Sentinel */
 };
@@ -1957,7 +1950,6 @@ static PyMemberDef Segment_members[] = {
 static PyType_Slot Segment_slots[] = {
     {Py_tp_dealloc, Segment_dealloc},
     {Py_tp_traverse, Segment_traverse},
-    {Py_tp_clear, Segment_clear},
     {Py_tp_str, Segment_str},
     {Py_tp_repr, Segment_repr},
     {Py_tp_members, Segment_members},
@@ -2001,13 +1993,6 @@ GBI_traverse(PyObject *self, visitproc visit, void *arg)
     return 0;
 }
 
-static int
-GBI_clear(PyObject *self)
-{
-    Py_CLEAR(((GraphemeBreakIterator *)self)->iter.str);
-    return 0;
-}
-
 static PyObject *
 GBI_iternext(PyObject *self)
 {
@@ -2038,7 +2023,6 @@ static PyType_Slot GraphemeBreakIterator_slots[] = {
     {Py_tp_iter, PyObject_SelfIter},
     {Py_tp_iternext, GBI_iternext},
     {Py_tp_traverse, GBI_traverse},
-    {Py_tp_clear, GBI_clear},
     {0, 0},
 };
 

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to