https://github.com/python/cpython/commit/5d4e89141137d98a546f81210c0c449ab1d76937 commit: 5d4e89141137d98a546f81210c0c449ab1d76937 branch: 3.12 author: Miss Islington (bot) <31488909+miss-isling...@users.noreply.github.com> committer: picnixz <10796600+picn...@users.noreply.github.com> date: 2025-03-31T14:50:13+02:00 summary:
[3.12] gh-126033: fix UAF in `xml.etree.ElementTree.Element.remove` when concurrent mutations happen (GH-126124) (#131930) gh-126033: fix UAF in `xml.etree.ElementTree.Element.remove` when concurrent mutations happen (GH-126124) (cherry picked from commit bab1398a47f6d0cfc1be70497f306874c749ef7c) Co-authored-by: Bénédikt Tran <10796600+picn...@users.noreply.github.com> files: A Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst M Lib/test/test_xml_etree.py M Modules/_elementtree.c diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 137e23867c5939..f1b420ed7944a9 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -18,9 +18,11 @@ import textwrap import types import unittest +import unittest.mock as mock import warnings import weakref +from contextlib import nullcontext from functools import partial from itertools import product, islice from test import support @@ -121,6 +123,21 @@ </foo> """ +def is_python_implementation(): + assert ET is not None, "ET must be initialized" + assert pyET is not None, "pyET must be initialized" + return ET is pyET + + +def equal_wrapper(cls): + """Mock cls.__eq__ to check whether it has been called or not. + + The behaviour of cls.__eq__ (side-effects included) is left as is. + """ + eq = cls.__eq__ + return mock.patch.object(cls, "__eq__", autospec=True, wraps=eq) + + def checkwarnings(*filters, quiet=False): def decorator(test): def newtest(*args, **kwargs): @@ -2562,6 +2579,7 @@ def test_pickle_issue18997(self): class BadElementTest(ElementTestCase, unittest.TestCase): + def test_extend_mutable_list(self): class X: @property @@ -2600,18 +2618,168 @@ class Y(X, ET.Element): e = ET.Element('foo') e.extend(L) - def test_remove_with_mutating(self): - class X(ET.Element): + def test_remove_with_clear_assume_missing(self): + # gh-126033: Check that a concurrent clear() for an assumed-to-be + # missing element does not make the interpreter crash. + self.do_test_remove_with_clear(raises=True) + + def test_remove_with_clear_assume_existing(self): + # gh-126033: Check that a concurrent clear() for an assumed-to-be + # existing element does not make the interpreter crash. + self.do_test_remove_with_clear(raises=False) + + def do_test_remove_with_clear(self, *, raises): + + # Until the discrepency between "del root[:]" and "root.clear()" is + # resolved, we need to keep two tests. Previously, using "del root[:]" + # did not crash with the reproducer of gh-126033 while "root.clear()" + # did. + + class E(ET.Element): + """Local class to be able to mock E.__eq__ for introspection.""" + + class X(E): def __eq__(self, o): - del e[:] - return False - e = ET.Element('foo') - e.extend([X('bar')]) - self.assertRaises(ValueError, e.remove, ET.Element('baz')) + del root[:] + return not raises - e = ET.Element('foo') - e.extend([ET.Element('bar')]) - self.assertRaises(ValueError, e.remove, X('baz')) + class Y(E): + def __eq__(self, o): + root.clear() + return not raises + + if raises: + get_checker_context = lambda: self.assertRaises(ValueError) + else: + get_checker_context = nullcontext + + self.assertIs(E.__eq__, object.__eq__) + + for Z, side_effect in [(X, 'del root[:]'), (Y, 'root.clear()')]: + self.enterContext(self.subTest(side_effect=side_effect)) + + # test removing R() from [U()] + for R, U, description in [ + (E, Z, "remove missing E() from [Z()]"), + (Z, E, "remove missing Z() from [E()]"), + (Z, Z, "remove missing Z() from [Z()]"), + ]: + with self.subTest(description): + root = E('top') + root.extend([U('one')]) + with get_checker_context(): + root.remove(R('missing')) + + # test removing R() from [U(), V()] + cases = self.cases_for_remove_missing_with_mutations(E, Z) + for R, U, V, description in cases: + with self.subTest(description): + root = E('top') + root.extend([U('one'), V('two')]) + with get_checker_context(): + root.remove(R('missing')) + + # Test removing root[0] from [Z()]. + # + # Since we call root.remove() with root[0], Z.__eq__() + # will not be called (we branch on the fast Py_EQ path). + with self.subTest("remove root[0] from [Z()]"): + root = E('top') + root.append(Z('rem')) + with equal_wrapper(E) as f, equal_wrapper(Z) as g: + root.remove(root[0]) + f.assert_not_called() + g.assert_not_called() + + # Test removing root[1] (of type R) from [U(), R()]. + is_special = is_python_implementation() and raises and Z is Y + if is_python_implementation() and raises and Z is Y: + # In pure Python, using root.clear() sets the children + # list to [] without calling list.clear(). + # + # For this reason, the call to root.remove() first + # checks root[0] and sets the children list to [] + # since either root[0] or root[1] is an evil element. + # + # Since checking root[1] still uses the old reference + # to the children list, PyObject_RichCompareBool() branches + # to the fast Py_EQ path and Y.__eq__() is called exactly + # once (when checking root[0]). + continue + else: + cases = self.cases_for_remove_existing_with_mutations(E, Z) + for R, U, description in cases: + with self.subTest(description): + root = E('top') + root.extend([U('one'), R('rem')]) + with get_checker_context(): + root.remove(root[1]) + + def test_remove_with_mutate_root_assume_missing(self): + # gh-126033: Check that a concurrent mutation for an assumed-to-be + # missing element does not make the interpreter crash. + self.do_test_remove_with_mutate_root(raises=True) + + def test_remove_with_mutate_root_assume_existing(self): + # gh-126033: Check that a concurrent mutation for an assumed-to-be + # existing element does not make the interpreter crash. + self.do_test_remove_with_mutate_root(raises=False) + + def do_test_remove_with_mutate_root(self, *, raises): + E = ET.Element + + class Z(E): + def __eq__(self, o): + del root[0] + return not raises + + if raises: + get_checker_context = lambda: self.assertRaises(ValueError) + else: + get_checker_context = nullcontext + + # test removing R() from [U(), V()] + cases = self.cases_for_remove_missing_with_mutations(E, Z) + for R, U, V, description in cases: + with self.subTest(description): + root = E('top') + root.extend([U('one'), V('two')]) + with get_checker_context(): + root.remove(R('missing')) + + # test removing root[1] (of type R) from [U(), R()] + cases = self.cases_for_remove_existing_with_mutations(E, Z) + for R, U, description in cases: + with self.subTest(description): + root = E('top') + root.extend([U('one'), R('rem')]) + with get_checker_context(): + root.remove(root[1]) + + def cases_for_remove_missing_with_mutations(self, E, Z): + # Cases for removing R() from [U(), V()]. + # The case U = V = R = E is not interesting as there is no mutation. + for U, V in [(E, Z), (Z, E), (Z, Z)]: + description = (f"remove missing {E.__name__}() from " + f"[{U.__name__}(), {V.__name__}()]") + yield E, U, V, description + + for U, V in [(E, E), (E, Z), (Z, E), (Z, Z)]: + description = (f"remove missing {Z.__name__}() from " + f"[{U.__name__}(), {V.__name__}()]") + yield Z, U, V, description + + def cases_for_remove_existing_with_mutations(self, E, Z): + # Cases for removing root[1] (of type R) from [U(), R()]. + # The case U = R = E is not interesting as there is no mutation. + for U, R, description in [ + (E, Z, "remove root[1] from [E(), Z()]"), + (Z, E, "remove root[1] from [Z(), E()]"), + (Z, Z, "remove root[1] from [Z(), Z()]"), + ]: + description = (f"remove root[1] (of type {R.__name__}) " + f"from [{U.__name__}(), {R.__name__}()]") + yield R, U, description @support.infinite_recursion(25) def test_recursive_repr(self): diff --git a/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst new file mode 100644 index 00000000000000..fa09c712aa0c4a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst @@ -0,0 +1,3 @@ +:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.remove +<xml.etree.ElementTree.Element.remove>` when the element is +concurrently mutated. Patch by Bénédikt Tran. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index ad65eedcd6464f..56d1508af137da 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -845,6 +845,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) if (element_resize(element, self->extra->length) < 0) goto error; + // TODO(picnixz): check for an evil child's __deepcopy__ on 'self' for (i = 0; i < self->extra->length; i++) { PyObject* child = deepcopy(st, self->extra->children[i], memo); if (!child || !Element_Check(st, child)) { @@ -1627,42 +1628,47 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) /*[clinic end generated code: output=38fe6c07d6d87d1f input=6133e1d05597d5ee]*/ { Py_ssize_t i; - int rc; - PyObject *found; - - if (!self->extra) { - /* element has no children, so raise exception */ - PyErr_SetString( - PyExc_ValueError, - "list.remove(x): x not in list" - ); - return NULL; - } - - for (i = 0; i < self->extra->length; i++) { - if (self->extra->children[i] == subelement) - break; - rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ); - if (rc > 0) + // When iterating over the list of children, we need to check that the + // list is not cleared (self->extra != NULL) and that we are still within + // the correct bounds (i < self->extra->length). + // + // We deliberately avoid protecting against children lists that grow + // faster than the index since list objects do not protect against it. + int rc = 0; + for (i = 0; self->extra && i < self->extra->length; i++) { + if (self->extra->children[i] == subelement) { + rc = 1; break; - if (rc < 0) + } + PyObject *child = Py_NewRef(self->extra->children[i]); + rc = PyObject_RichCompareBool(child, subelement, Py_EQ); + Py_DECREF(child); + if (rc < 0) { return NULL; + } + else if (rc > 0) { + break; + } } - if (i >= self->extra->length) { - /* subelement is not in children, so raise exception */ - PyErr_SetString( - PyExc_ValueError, - "list.remove(x): x not in list" - ); + if (rc == 0) { + PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list"); return NULL; } - found = self->extra->children[i]; + // An extra check must be done if the mutation occurs at the very last + // step and removes or clears the 'extra' list (the condition on the + // length would not be satisfied any more). + if (self->extra == NULL || i >= self->extra->length) { + Py_RETURN_NONE; + } + + PyObject *found = self->extra->children[i]; self->extra->length--; - for (; i < self->extra->length; i++) + for (; i < self->extra->length; i++) { self->extra->children[i] = self->extra->children[i+1]; + } Py_DECREF(found); Py_RETURN_NONE; _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com