https://github.com/python/cpython/commit/3146a25e97700374ec470361889f0adac6cedbec
commit: 3146a25e97700374ec470361889f0adac6cedbec
branch: main
author: Bénédikt Tran <10796600+picn...@users.noreply.github.com>
committer: picnixz <10796600+picn...@users.noreply.github.com>
date: 2025-03-03T13:58:15+01:00
summary:

gh-129173: refactor `PyCodec_BackslashReplaceErrors` into separate functions 
(#129895)

The logic of `PyCodec_BackslashReplaceErrors` is now split into separate 
functions,
each of which handling a specific exception type.

files:
M Python/codecs.c

diff --git a/Python/codecs.c b/Python/codecs.c
index d5d9a4a8bcabb7..8cdebfa1b611ea 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -956,49 +956,18 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
     return restuple;
 }
 
-PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
+
+// --- handler: 'backslashreplace' --------------------------------------------
+
+static PyObject *
+_PyCodec_BackslashReplaceUnicodeEncodeError(PyObject *exc)
 {
     PyObject *obj;
     Py_ssize_t objlen, start, end, slen;
-    if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
-        if (_PyUnicodeError_GetParams(exc,
-                                      &obj, &objlen,
-                                      &start, &end, &slen, true) < 0)
-        {
-            return NULL;
-        }
-        PyObject *res = PyUnicode_New(4 * slen, 127);
-        if (res == NULL) {
-            Py_DECREF(obj);
-            return NULL;
-        }
-        Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
-        const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj);
-        for (Py_ssize_t i = start; i < end; i++, outp += 4) {
-            const unsigned char ch = p[i];
-            outp[0] = '\\';
-            outp[1] = 'x';
-            outp[2] = Py_hexdigits[(ch >> 4) & 0xf];
-            outp[3] = Py_hexdigits[ch & 0xf];
-        }
-        assert(_PyUnicode_CheckConsistency(res, 1));
-        Py_DECREF(obj);
-        return Py_BuildValue("(Nn)", res, end);
-    }
-
-    if (
-        PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)
-        || PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)
-    ) {
-        if (_PyUnicodeError_GetParams(exc,
-                                      &obj, &objlen,
-                                      &start, &end, &slen, false) < 0)
-        {
-            return NULL;
-        }
-    }
-    else {
-        wrong_exception_type(exc);
+    if (_PyUnicodeError_GetParams(exc,
+                                  &obj, &objlen,
+                                  &start, &end, &slen, false) < 0)
+    {
         return NULL;
     }
 
@@ -1035,6 +1004,65 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
 }
 
 
+static PyObject *
+_PyCodec_BackslashReplaceUnicodeDecodeError(PyObject *exc)
+{
+    PyObject *obj;
+    Py_ssize_t objlen, start, end, slen;
+    if (_PyUnicodeError_GetParams(exc,
+                                  &obj, &objlen,
+                                  &start, &end, &slen, true) < 0)
+    {
+        return NULL;
+    }
+
+    PyObject *res = PyUnicode_New(4 * slen, 127);
+    if (res == NULL) {
+        Py_DECREF(obj);
+        return NULL;
+    }
+
+    Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
+    const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj);
+    for (Py_ssize_t i = start; i < end; i++, outp += 4) {
+        const unsigned char ch = p[i];
+        outp[0] = '\\';
+        outp[1] = 'x';
+        outp[2] = Py_hexdigits[(ch >> 4) & 0xf];
+        outp[3] = Py_hexdigits[ch & 0xf];
+    }
+    assert(_PyUnicode_CheckConsistency(res, 1));
+    Py_DECREF(obj);
+    return Py_BuildValue("(Nn)", res, end);
+}
+
+
+static inline PyObject *
+_PyCodec_BackslashReplaceUnicodeTranslateError(PyObject *exc)
+{
+    // Same implementation as for UnicodeEncodeError objects.
+    return _PyCodec_BackslashReplaceUnicodeEncodeError(exc);
+}
+
+
+PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
+{
+    if (_PyIsUnicodeEncodeError(exc)) {
+        return _PyCodec_BackslashReplaceUnicodeEncodeError(exc);
+    }
+    else if (_PyIsUnicodeDecodeError(exc)) {
+        return _PyCodec_BackslashReplaceUnicodeDecodeError(exc);
+    }
+    else if (_PyIsUnicodeTranslateError(exc)) {
+        return _PyCodec_BackslashReplaceUnicodeTranslateError(exc);
+    }
+    else {
+        wrong_exception_type(exc);
+        return NULL;
+    }
+}
+
+
 // --- handler: 'namereplace' -------------------------------------------------
 
 PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
@@ -1502,7 +1530,8 @@ xmlcharrefreplace_errors(PyObject *Py_UNUSED(self), 
PyObject *exc)
 }
 
 
-static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
+static inline PyObject *
+backslashreplace_errors(PyObject *Py_UNUSED(self), PyObject *exc)
 {
     return PyCodec_BackslashReplaceErrors(exc);
 }

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to