https://github.com/python/cpython/commit/3146a25e97700374ec470361889f0adac6cedbec commit: 3146a25e97700374ec470361889f0adac6cedbec branch: main author: Bénédikt Tran <10796600+picn...@users.noreply.github.com> committer: picnixz <10796600+picn...@users.noreply.github.com> date: 2025-03-03T13:58:15+01:00 summary:
gh-129173: refactor `PyCodec_BackslashReplaceErrors` into separate functions (#129895) The logic of `PyCodec_BackslashReplaceErrors` is now split into separate functions, each of which handling a specific exception type. files: M Python/codecs.c diff --git a/Python/codecs.c b/Python/codecs.c index d5d9a4a8bcabb7..8cdebfa1b611ea 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -956,49 +956,18 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) return restuple; } -PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) + +// --- handler: 'backslashreplace' -------------------------------------------- + +static PyObject * +_PyCodec_BackslashReplaceUnicodeEncodeError(PyObject *exc) { PyObject *obj; Py_ssize_t objlen, start, end, slen; - if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) { - if (_PyUnicodeError_GetParams(exc, - &obj, &objlen, - &start, &end, &slen, true) < 0) - { - return NULL; - } - PyObject *res = PyUnicode_New(4 * slen, 127); - if (res == NULL) { - Py_DECREF(obj); - return NULL; - } - Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res); - const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj); - for (Py_ssize_t i = start; i < end; i++, outp += 4) { - const unsigned char ch = p[i]; - outp[0] = '\\'; - outp[1] = 'x'; - outp[2] = Py_hexdigits[(ch >> 4) & 0xf]; - outp[3] = Py_hexdigits[ch & 0xf]; - } - assert(_PyUnicode_CheckConsistency(res, 1)); - Py_DECREF(obj); - return Py_BuildValue("(Nn)", res, end); - } - - if ( - PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError) - || PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError) - ) { - if (_PyUnicodeError_GetParams(exc, - &obj, &objlen, - &start, &end, &slen, false) < 0) - { - return NULL; - } - } - else { - wrong_exception_type(exc); + if (_PyUnicodeError_GetParams(exc, + &obj, &objlen, + &start, &end, &slen, false) < 0) + { return NULL; } @@ -1035,6 +1004,65 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) } +static PyObject * +_PyCodec_BackslashReplaceUnicodeDecodeError(PyObject *exc) +{ + PyObject *obj; + Py_ssize_t objlen, start, end, slen; + if (_PyUnicodeError_GetParams(exc, + &obj, &objlen, + &start, &end, &slen, true) < 0) + { + return NULL; + } + + PyObject *res = PyUnicode_New(4 * slen, 127); + if (res == NULL) { + Py_DECREF(obj); + return NULL; + } + + Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res); + const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj); + for (Py_ssize_t i = start; i < end; i++, outp += 4) { + const unsigned char ch = p[i]; + outp[0] = '\\'; + outp[1] = 'x'; + outp[2] = Py_hexdigits[(ch >> 4) & 0xf]; + outp[3] = Py_hexdigits[ch & 0xf]; + } + assert(_PyUnicode_CheckConsistency(res, 1)); + Py_DECREF(obj); + return Py_BuildValue("(Nn)", res, end); +} + + +static inline PyObject * +_PyCodec_BackslashReplaceUnicodeTranslateError(PyObject *exc) +{ + // Same implementation as for UnicodeEncodeError objects. + return _PyCodec_BackslashReplaceUnicodeEncodeError(exc); +} + + +PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) +{ + if (_PyIsUnicodeEncodeError(exc)) { + return _PyCodec_BackslashReplaceUnicodeEncodeError(exc); + } + else if (_PyIsUnicodeDecodeError(exc)) { + return _PyCodec_BackslashReplaceUnicodeDecodeError(exc); + } + else if (_PyIsUnicodeTranslateError(exc)) { + return _PyCodec_BackslashReplaceUnicodeTranslateError(exc); + } + else { + wrong_exception_type(exc); + return NULL; + } +} + + // --- handler: 'namereplace' ------------------------------------------------- PyObject *PyCodec_NameReplaceErrors(PyObject *exc) @@ -1502,7 +1530,8 @@ xmlcharrefreplace_errors(PyObject *Py_UNUSED(self), PyObject *exc) } -static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc) +static inline PyObject * +backslashreplace_errors(PyObject *Py_UNUSED(self), PyObject *exc) { return PyCodec_BackslashReplaceErrors(exc); } _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com