https://github.com/python/cpython/commit/588bb6ddf4388cefc926006e9e4752b7e62ea077
commit: 588bb6ddf4388cefc926006e9e4752b7e62ea077
branch: 3.13
author: Bénédikt Tran <10796600+picn...@users.noreply.github.com>
committer: picnixz <10796600+picn...@users.noreply.github.com>
date: 2025-03-31T14:48:42+02:00
summary:

[3.13] gh-126037: fix UAF in `xml.etree.ElementTree.Element.find*` when current 
mutations happen (#127964) (#131931)

gh-126037: fix UAF in `xml.etree.ElementTree.Element.find*` when concurrent 
mutations happen (#127964)

We fix a use-after-free in the `find`, `findtext` and `findall` methods of 
`xml.etree.ElementTree.Element`
objects that can be triggered when the tag to find implements an `__eq__` 
method that mutates the
element being queried.

(cherry picked from commit c57623c221d46daeaedfbf2b32d041fde0c882de)

files:
A Misc/NEWS.d/next/Library/2024-12-15-15-07-22.gh-issue-126037.OyA7JP.rst
M Lib/test/test_xml_etree.py
M Modules/_elementtree.c

diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index ebec9d8f18a2f6..235f4b54fac50b 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -2793,20 +2793,38 @@ def element_factory(x, y):
         gc_collect()
 
 
-class MutatingElementPath(str):
+class MutationDeleteElementPath(str):
     def __new__(cls, elem, *args):
         self = str.__new__(cls, *args)
         self.elem = elem
         return self
+
     def __eq__(self, o):
         del self.elem[:]
         return True
-MutatingElementPath.__hash__ = str.__hash__
+
+    __hash__ = str.__hash__
+
+
+class MutationClearElementPath(str):
+    def __new__(cls, elem, *args):
+        self = str.__new__(cls, *args)
+        self.elem = elem
+        return self
+
+    def __eq__(self, o):
+        self.elem.clear()
+        return True
+
+    __hash__ = str.__hash__
+
 
 class BadElementPath(str):
     def __eq__(self, o):
         raise 1/0
-BadElementPath.__hash__ = str.__hash__
+
+    __hash__ = str.__hash__
+
 
 class BadElementPathTest(ElementTestCase, unittest.TestCase):
     def setUp(self):
@@ -2821,9 +2839,11 @@ def tearDown(self):
         super().tearDown()
 
     def test_find_with_mutating(self):
-        e = ET.Element('foo')
-        e.extend([ET.Element('bar')])
-        e.find(MutatingElementPath(e, 'x'))
+        for cls in [MutationDeleteElementPath, MutationClearElementPath]:
+            with self.subTest(cls):
+                e = ET.Element('foo')
+                e.extend([ET.Element('bar')])
+                e.find(cls(e, 'x'))
 
     def test_find_with_error(self):
         e = ET.Element('foo')
@@ -2834,9 +2854,11 @@ def test_find_with_error(self):
             pass
 
     def test_findtext_with_mutating(self):
-        e = ET.Element('foo')
-        e.extend([ET.Element('bar')])
-        e.findtext(MutatingElementPath(e, 'x'))
+        for cls in [MutationDeleteElementPath, MutationClearElementPath]:
+            with self.subTest(cls):
+                e = ET.Element('foo')
+                e.extend([ET.Element('bar')])
+                e.findtext(cls(e, 'x'))
 
     def test_findtext_with_error(self):
         e = ET.Element('foo')
@@ -2861,9 +2883,11 @@ def test_findtext_with_none_text_attribute(self):
         self.assertEqual(root_elem.findtext('./bar'), '')
 
     def test_findall_with_mutating(self):
-        e = ET.Element('foo')
-        e.extend([ET.Element('bar')])
-        e.findall(MutatingElementPath(e, 'x'))
+        for cls in [MutationDeleteElementPath, MutationClearElementPath]:
+            with self.subTest(cls):
+                e = ET.Element('foo')
+                e.extend([ET.Element('bar')])
+                e.findall(cls(e, 'x'))
 
     def test_findall_with_error(self):
         e = ET.Element('foo')
diff --git 
a/Misc/NEWS.d/next/Library/2024-12-15-15-07-22.gh-issue-126037.OyA7JP.rst 
b/Misc/NEWS.d/next/Library/2024-12-15-15-07-22.gh-issue-126037.OyA7JP.rst
new file mode 100644
index 00000000000000..30098f6e13e986
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-12-15-15-07-22.gh-issue-126037.OyA7JP.rst
@@ -0,0 +1,5 @@
+:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.find 
<xml.etree.ElementTree.Element.find>`,
+:meth:`Element.findtext <xml.etree.ElementTree.Element.findtext>` and
+:meth:`Element.findall <xml.etree.ElementTree.Element.findall>` when the tag
+to find implements an :meth:`~object.__eq__` method mutating the element
+being queried. Patch by Bénédikt Tran.
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index ec999582d2fb9d..b89f455cd8f1ca 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -1249,29 +1249,28 @@ _elementtree_Element_find_impl(ElementObject *self, 
PyTypeObject *cls,
                                PyObject *path, PyObject *namespaces)
 /*[clinic end generated code: output=18f77d393c9fef1b input=94df8a83f956acc6]*/
 {
-    Py_ssize_t i;
     elementtreestate *st = get_elementtree_state_by_cls(cls);
 
     if (checkpath(path) || namespaces != Py_None) {
         return PyObject_CallMethodObjArgs(
             st->elementpath_obj, st->str_find, self, path, namespaces, NULL
-            );
+        );
     }
 
-    if (!self->extra)
-        Py_RETURN_NONE;
-
-    for (i = 0; i < self->extra->length; i++) {
-        PyObject* item = self->extra->children[i];
-        int rc;
+    for (Py_ssize_t i = 0; self->extra && i < self->extra->length; i++) {
+        PyObject *item = self->extra->children[i];
         assert(Element_Check(st, item));
         Py_INCREF(item);
-        rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, 
Py_EQ);
-        if (rc > 0)
+        PyObject *tag = Py_NewRef(((ElementObject *)item)->tag);
+        int rc = PyObject_RichCompareBool(tag, path, Py_EQ);
+        Py_DECREF(tag);
+        if (rc > 0) {
             return item;
+        }
         Py_DECREF(item);
-        if (rc < 0)
+        if (rc < 0) {
             return NULL;
+        }
     }
 
     Py_RETURN_NONE;
@@ -1294,38 +1293,34 @@ _elementtree_Element_findtext_impl(ElementObject *self, 
PyTypeObject *cls,
                                    PyObject *namespaces)
 /*[clinic end generated code: output=6af7a2d96aac32cb input=32f252099f62a3d2]*/
 {
-    Py_ssize_t i;
     elementtreestate *st = get_elementtree_state_by_cls(cls);
 
     if (checkpath(path) || namespaces != Py_None)
         return PyObject_CallMethodObjArgs(
             st->elementpath_obj, st->str_findtext,
             self, path, default_value, namespaces, NULL
-            );
-
-    if (!self->extra) {
-        return Py_NewRef(default_value);
-    }
+        );
 
-    for (i = 0; i < self->extra->length; i++) {
+    for (Py_ssize_t i = 0; self->extra && i < self->extra->length; i++) {
         PyObject *item = self->extra->children[i];
-        int rc;
         assert(Element_Check(st, item));
         Py_INCREF(item);
-        rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, 
Py_EQ);
+        PyObject *tag = Py_NewRef(((ElementObject *)item)->tag);
+        int rc = PyObject_RichCompareBool(tag, path, Py_EQ);
+        Py_DECREF(tag);
         if (rc > 0) {
-            PyObject* text = element_get_text((ElementObject*)item);
+            PyObject *text = element_get_text((ElementObject *)item);
+            Py_DECREF(item);
             if (text == Py_None) {
-                Py_DECREF(item);
-                return PyUnicode_New(0, 0);
+                return Py_GetConstant(Py_CONSTANT_EMPTY_STR);
             }
             Py_XINCREF(text);
-            Py_DECREF(item);
             return text;
         }
         Py_DECREF(item);
-        if (rc < 0)
+        if (rc < 0) {
             return NULL;
+        }
     }
 
     return Py_NewRef(default_value);
@@ -1346,29 +1341,26 @@ _elementtree_Element_findall_impl(ElementObject *self, 
PyTypeObject *cls,
                                   PyObject *path, PyObject *namespaces)
 /*[clinic end generated code: output=65e39a1208f3b59e input=7aa0db45673fc9a5]*/
 {
-    Py_ssize_t i;
-    PyObject* out;
     elementtreestate *st = get_elementtree_state_by_cls(cls);
 
     if (checkpath(path) || namespaces != Py_None) {
         return PyObject_CallMethodObjArgs(
             st->elementpath_obj, st->str_findall, self, path, namespaces, NULL
-            );
+        );
     }
 
-    out = PyList_New(0);
-    if (!out)
+    PyObject *out = PyList_New(0);
+    if (out == NULL) {
         return NULL;
+    }
 
-    if (!self->extra)
-        return out;
-
-    for (i = 0; i < self->extra->length; i++) {
-        PyObject* item = self->extra->children[i];
-        int rc;
+    for (Py_ssize_t i = 0; self->extra && i < self->extra->length; i++) {
+        PyObject *item = self->extra->children[i];
         assert(Element_Check(st, item));
         Py_INCREF(item);
-        rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, 
Py_EQ);
+        PyObject *tag = Py_NewRef(((ElementObject *)item)->tag);
+        int rc = PyObject_RichCompareBool(tag, path, Py_EQ);
+        Py_DECREF(tag);
         if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
             Py_DECREF(item);
             Py_DECREF(out);

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to