https://github.com/python/cpython/commit/ddc27f9c385f57db1c227b655ec84dcf097a8976
commit: ddc27f9c385f57db1c227b655ec84dcf097a8976
branch: main
author: Bénédikt Tran <10796600+picn...@users.noreply.github.com>
committer: picnixz <10796600+picn...@users.noreply.github.com>
date: 2025-03-01T12:01:20+01:00
summary:

gh-128974: Fix `UnicodeError.__str__` when custom attributes have side-effects 
(#128975)

Fix some crashes when (custom) attributes of `UnicodeError` objects implement 
`object.__str__` with side-effects.

files:
A 
Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst
M Lib/test/test_exceptions.py
M Objects/exceptions.c

diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index bf0bc53b634022..20c617f8108d5f 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -1360,6 +1360,43 @@ def test_unicode_error_str_does_not_crash(self):
                 exc = UnicodeDecodeError('utf-8', encoded, start, end, '')
                 self.assertIsInstance(str(exc), str)
 
+    def test_unicode_error_evil_str_set_none_object(self):
+        def side_effect(exc):
+            exc.object = None
+        self.do_test_unicode_error_mutate(side_effect)
+
+    def test_unicode_error_evil_str_del_self_object(self):
+        def side_effect(exc):
+            del exc.object
+        self.do_test_unicode_error_mutate(side_effect)
+
+    def do_test_unicode_error_mutate(self, side_effect):
+        # Test that str(UnicodeError(...)) does not crash when
+        # side-effects mutate the underlying 'object' attribute.
+        # See https://github.com/python/cpython/issues/128974.
+
+        class Evil(str):
+            def __str__(self):
+                side_effect(exc)
+                return self
+
+        for reason, encoding in [
+            ("reason", Evil("utf-8")),
+            (Evil("reason"), "utf-8"),
+            (Evil("reason"), Evil("utf-8")),
+        ]:
+            with self.subTest(encoding=encoding, reason=reason):
+                with self.subTest(UnicodeEncodeError):
+                    exc = UnicodeEncodeError(encoding, "x", 0, 1, reason)
+                    self.assertRaises(TypeError, str, exc)
+                with self.subTest(UnicodeDecodeError):
+                    exc = UnicodeDecodeError(encoding, b"x", 0, 1, reason)
+                    self.assertRaises(TypeError, str, exc)
+
+        with self.subTest(UnicodeTranslateError):
+            exc = UnicodeTranslateError("x", 0, 1, Evil("reason"))
+            self.assertRaises(TypeError, str, exc)
+
     @no_tracing
     def test_badisinstance(self):
         # Bug #2542: if issubclass(e, MyException) raises an exception,
diff --git 
a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst
 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst
new file mode 100644
index 00000000000000..fc4453ae3f2644
--- /dev/null
+++ 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst
@@ -0,0 +1,3 @@
+Fix a crash in :meth:`UnicodeError.__str__ <object.__str__>` when custom
+attributes implement :meth:`~object.__str__` with side-effects.
+Patch by Bénédikt Tran.
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 28c7fdbd47ba8d..e30fea0f37a925 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2784,6 +2784,8 @@ SyntaxError_str(PyObject *op)
     if (!filename && !have_lineno)
         return PyObject_Str(self->msg ? self->msg : Py_None);
 
+    // Even if 'filename' can be an instance of a subclass of 'str',
+    // we only render its "true" content and do not use str(filename).
     if (filename && have_lineno)
         result = PyUnicode_FromFormat("%S (%U, line %ld)",
                    self->msg ? self->msg : Py_None,
@@ -2903,29 +2905,47 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError,
 
 /*
  * Check the validity of 'attr' as a unicode or bytes object depending
- * on 'as_bytes' and return a new reference on it if it is the case.
+ * on 'as_bytes'.
  *
  * The 'name' is the attribute name and is only used for error reporting.
  *
- * On success, this returns a strong reference on 'attr'.
- * On failure, this sets a TypeError and returns NULL.
+ * On success, this returns 0.
+ * On failure, this sets a TypeError and returns -1.
  */
-static PyObject *
-as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes)
+static int
+check_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes)
 {
     assert(as_bytes == 0 || as_bytes == 1);
     if (attr == NULL) {
-        PyErr_Format(PyExc_TypeError, "%s attribute not set", name);
-        return NULL;
+        PyErr_Format(PyExc_TypeError,
+                     "UnicodeError '%s' attribute is not set",
+                     name);
+        return -1;
     }
     if (!(as_bytes ? PyBytes_Check(attr) : PyUnicode_Check(attr))) {
         PyErr_Format(PyExc_TypeError,
-                     "%s attribute must be %s",
-                     name,
-                     as_bytes ? "bytes" : "unicode");
-        return NULL;
+                     "UnicodeError '%s' attribute must be a %s",
+                     name, as_bytes ? "bytes" : "string");
+        return -1;
     }
-    return Py_NewRef(attr);
+    return 0;
+}
+
+
+/*
+ * Check the validity of 'attr' as a unicode or bytes object depending
+ * on 'as_bytes' and return a new reference on it if it is the case.
+ *
+ * The 'name' is the attribute name and is only used for error reporting.
+ *
+ * On success, this returns a strong reference on 'attr'.
+ * On failure, this sets a TypeError and returns NULL.
+ */
+static PyObject *
+as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes)
+{
+    int rc = check_unicode_error_attribute(attr, name, as_bytes);
+    return rc < 0 ? NULL : Py_NewRef(attr);
 }
 
 
@@ -3591,7 +3611,10 @@ UnicodeEncodeError_str(PyObject *self)
     if (encoding_str == NULL) {
         goto done;
     }
-
+    // calls to PyObject_Str(...) above might mutate 'exc->object'
+    if (check_unicode_error_attribute(exc->object, "object", false) < 0) {
+        goto done;
+    }
     Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
     Py_ssize_t start = exc->start, end = exc->end;
 
@@ -3711,7 +3734,10 @@ UnicodeDecodeError_str(PyObject *self)
     if (encoding_str == NULL) {
         goto done;
     }
-
+    // calls to PyObject_Str(...) above might mutate 'exc->object'
+    if (check_unicode_error_attribute(exc->object, "object", true) < 0) {
+        goto done;
+    }
     Py_ssize_t len = PyBytes_GET_SIZE(exc->object);
     Py_ssize_t start = exc->start, end = exc->end;
 
@@ -3807,7 +3833,10 @@ UnicodeTranslateError_str(PyObject *self)
     if (reason_str == NULL) {
         goto done;
     }
-
+    // call to PyObject_Str(...) above might mutate 'exc->object'
+    if (check_unicode_error_attribute(exc->object, "object", false) < 0) {
+        goto done;
+    }
     Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
     Py_ssize_t start = exc->start, end = exc->end;
 

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to