https://github.com/python/cpython/commit/9718880ba9312f844902a684d97e3f0d77cd9f4b
commit: 9718880ba9312f844902a684d97e3f0d77cd9f4b
branch: 3.13
author: Miss Islington (bot) <31488909+miss-isling...@users.noreply.github.com>
committer: picnixz <10796600+picn...@users.noreply.github.com>
date: 2025-05-10T07:55:47Z
summary:

[3.13] gh-133009: fix UAF in `xml.etree.ElementTree.Element.__deepcopy__` 
(GH-133010) (#133806)

gh-133009: fix UAF in `xml.etree.ElementTree.Element.__deepcopy__` (GH-133010)
(cherry picked from commit 116a9f9b3775c904c98e390d896200e1641498aa)

Co-authored-by: Bénédikt Tran <10796600+picn...@users.noreply.github.com>

files:
A Misc/NEWS.d/next/Library/2025-04-26-15-50-12.gh-issue-133009.etBuz5.rst
M Lib/test/test_xml_etree.py
M Modules/_elementtree.c

diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 3878e3a9cd5732..bd75c00e50322c 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -2960,6 +2960,50 @@ def element_factory(x, y):
         del b
         gc_collect()
 
+    def test_deepcopy_clear(self):
+        # Prevent crashes when __deepcopy__() clears the children list.
+        # See https://github.com/python/cpython/issues/133009.
+        class X(ET.Element):
+            def __deepcopy__(self, memo):
+                root.clear()
+                return self
+
+        root = ET.Element('a')
+        evil = X('x')
+        root.extend([evil, ET.Element('y')])
+        if is_python_implementation():
+            # Mutating a list over which we iterate raises an error.
+            self.assertRaises(RuntimeError, copy.deepcopy, root)
+        else:
+            c = copy.deepcopy(root)
+            # In the C implementation, we can still copy the evil element.
+            self.assertListEqual(list(c), [evil])
+
+    def test_deepcopy_grow(self):
+        # Prevent crashes when __deepcopy__() mutates the children list.
+        # See https://github.com/python/cpython/issues/133009.
+        a = ET.Element('a')
+        b = ET.Element('b')
+        c = ET.Element('c')
+
+        class X(ET.Element):
+            def __deepcopy__(self, memo):
+                root.append(a)
+                root.append(b)
+                return self
+
+        root = ET.Element('top')
+        evil1, evil2 = X('1'), X('2')
+        root.extend([evil1, c, evil2])
+        children = list(copy.deepcopy(root))
+        # mock deep copies
+        self.assertIs(children[0], evil1)
+        self.assertIs(children[2], evil2)
+        # true deep copies
+        self.assertEqual(children[1].tag, c.tag)
+        self.assertEqual([c.tag for c in children[3:]],
+                         [a.tag, b.tag, a.tag, b.tag])
+
 
 class MutationDeleteElementPath(str):
     def __new__(cls, elem, *args):
diff --git 
a/Misc/NEWS.d/next/Library/2025-04-26-15-50-12.gh-issue-133009.etBuz5.rst 
b/Misc/NEWS.d/next/Library/2025-04-26-15-50-12.gh-issue-133009.etBuz5.rst
new file mode 100644
index 00000000000000..1f7155c6a40d00
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-04-26-15-50-12.gh-issue-133009.etBuz5.rst
@@ -0,0 +1,3 @@
+:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.__deepcopy__
+<object.__deepcopy__>` when the element is concurrently mutated.
+Patch by Bénédikt Tran.
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index 3dd60c2eace07e..47a601b270aff6 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -810,6 +810,8 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, 
PyObject *memo)
 
     PyTypeObject *tp = Py_TYPE(self);
     elementtreestate *st = get_elementtree_state_by_type(tp);
+    // The deepcopy() helper takes care of incrementing the refcount
+    // of the object to copy so to avoid use-after-frees.
     tag = deepcopy(st, self->tag, memo);
     if (!tag)
         return NULL;
@@ -844,11 +846,13 @@ _elementtree_Element___deepcopy___impl(ElementObject 
*self, PyObject *memo)
 
     assert(!element->extra || !element->extra->length);
     if (self->extra) {
-        if (element_resize(element, self->extra->length) < 0)
+        Py_ssize_t expected_count = self->extra->length;
+        if (element_resize(element, expected_count) < 0) {
+            assert(!element->extra->length);
             goto error;
+        }
 
-        // TODO(picnixz): check for an evil child's __deepcopy__ on 'self'
-        for (i = 0; i < self->extra->length; i++) {
+        for (i = 0; self->extra && i < self->extra->length; i++) {
             PyObject* child = deepcopy(st, self->extra->children[i], memo);
             if (!child || !Element_Check(st, child)) {
                 if (child) {
@@ -858,11 +862,24 @@ _elementtree_Element___deepcopy___impl(ElementObject 
*self, PyObject *memo)
                 element->extra->length = i;
                 goto error;
             }
+            if (self->extra && expected_count != self->extra->length) {
+                // 'self->extra' got mutated and 'element' may not have
+                // sufficient space to hold the next iteration's item.
+                expected_count = self->extra->length;
+                if (element_resize(element, expected_count) < 0) {
+                    Py_DECREF(child);
+                    element->extra->length = i;
+                    goto error;
+                }
+            }
             element->extra->children[i] = child;
         }
 
         assert(!element->extra->length);
-        element->extra->length = self->extra->length;
+        // The original 'self->extra' may be gone at this point if deepcopy()
+        // has side-effects. However, 'i' is the number of copied items that
+        // we were able to successfully copy.
+        element->extra->length = i;
     }
 
     /* add object to memo dictionary (so deepcopy won't visit it again) */
@@ -905,13 +922,20 @@ deepcopy(elementtreestate *st, PyObject *object, PyObject 
*memo)
                     break;
                 }
             }
-            if (simple)
+            if (simple) {
                 return PyDict_Copy(object);
+            }
             /* Fall through to general case */
         }
         else if (Element_CheckExact(st, object)) {
-            return _elementtree_Element___deepcopy___impl(
+            // The __deepcopy__() call may call arbitrary code even if the
+            // object to copy is a built-in XML element (one of its children
+            // any of its parents in its own __deepcopy__() implementation).
+            Py_INCREF(object);
+            PyObject *res = _elementtree_Element___deepcopy___impl(
                 (ElementObject *)object, memo);
+            Py_DECREF(object);
+            return res;
         }
     }
 
@@ -922,8 +946,11 @@ deepcopy(elementtreestate *st, PyObject *object, PyObject 
*memo)
         return NULL;
     }
 
+    Py_INCREF(object);
     PyObject *args[2] = {object, memo};
-    return PyObject_Vectorcall(st->deepcopy_obj, args, 2, NULL);
+    PyObject *res = PyObject_Vectorcall(st->deepcopy_obj, args, 2, NULL);
+    Py_DECREF(object);
+    return res;
 }
 
 

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to