https://github.com/python/cpython/commit/225296cd5b505c180d3f45c355b43d7e1d99d3d5
commit: 225296cd5b505c180d3f45c355b43d7e1d99d3d5
branch: main
author: Bénédikt Tran <[email protected]>
committer: picnixz <[email protected]>
date: 2025-01-23T11:44:18+01:00
summary:
gh-126004: Fix positions handling in `codecs.replace_errors` (#127674)
This fixes how `PyCodec_ReplaceErrors` handles the `start` and `end` attributes
of `UnicodeError` objects via the `_PyUnicodeError_GetParams` helper.
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2024-12-06-11-32-58.gh-issue-126004.CYAwTB.rst
M Lib/test/test_capi/test_codecs.py
M Python/codecs.c
diff --git a/Lib/test/test_capi/test_codecs.py
b/Lib/test/test_capi/test_codecs.py
index 3e79dd2f7ca2fa..f57191ddcdbeb4 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -839,7 +839,8 @@ def test_codec_ignore_errors_handler(self):
def test_codec_replace_errors_handler(self):
handler = _testcapi.codec_replace_errors
- self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
+ self.do_test_codec_errors_handler(handler, self.all_unicode_errors,
+ safe=True)
def test_codec_xmlcharrefreplace_errors_handler(self):
handler = _testcapi.codec_xmlcharrefreplace_errors
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-06-11-32-58.gh-issue-126004.CYAwTB.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-06-11-32-58.gh-issue-126004.CYAwTB.rst
new file mode 100644
index 00000000000000..de70c59ee48eec
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-06-11-32-58.gh-issue-126004.CYAwTB.rst
@@ -0,0 +1,3 @@
+Fix handling of :attr:`UnicodeError.start` and :attr:`UnicodeError.end`
+values in the :func:`codecs.replace_errors` error handler. Patch by Bénédikt
+Tran.
diff --git a/Python/codecs.c b/Python/codecs.c
index 11eaca175abf13..b657dd134a668e 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -702,48 +702,46 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
{
- Py_ssize_t start, end, i, len;
+ Py_ssize_t start, end, slen;
if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
- PyObject *res;
- Py_UCS1 *outp;
- if (PyUnicodeEncodeError_GetStart(exc, &start))
+ if (_PyUnicodeError_GetParams(exc, NULL, NULL,
+ &start, &end, &slen, false) < 0) {
return NULL;
- if (PyUnicodeEncodeError_GetEnd(exc, &end))
- return NULL;
- len = end - start;
- res = PyUnicode_New(len, '?');
- if (res == NULL)
+ }
+ PyObject *res = PyUnicode_New(slen, '?');
+ if (res == NULL) {
return NULL;
+ }
assert(PyUnicode_KIND(res) == PyUnicode_1BYTE_KIND);
- outp = PyUnicode_1BYTE_DATA(res);
- for (i = 0; i < len; ++i)
- outp[i] = '?';
+ Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
+ memset(outp, '?', sizeof(Py_UCS1) * slen);
assert(_PyUnicode_CheckConsistency(res, 1));
return Py_BuildValue("(Nn)", res, end);
}
else if (PyObject_TypeCheck(exc, (PyTypeObject
*)PyExc_UnicodeDecodeError)) {
- if (PyUnicodeDecodeError_GetEnd(exc, &end))
+ if (_PyUnicodeError_GetParams(exc, NULL, NULL,
+ NULL, &end, NULL, true) < 0) {
return NULL;
+ }
return Py_BuildValue("(Cn)",
(int)Py_UNICODE_REPLACEMENT_CHARACTER,
end);
}
else if (PyObject_TypeCheck(exc, (PyTypeObject
*)PyExc_UnicodeTranslateError)) {
- PyObject *res;
- Py_UCS2 *outp;
- if (PyUnicodeTranslateError_GetStart(exc, &start))
- return NULL;
- if (PyUnicodeTranslateError_GetEnd(exc, &end))
+ if (_PyUnicodeError_GetParams(exc, NULL, NULL,
+ &start, &end, &slen, false) < 0) {
return NULL;
- len = end - start;
- res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER);
- if (res == NULL)
+ }
+ PyObject *res = PyUnicode_New(slen, Py_UNICODE_REPLACEMENT_CHARACTER);
+ if (res == NULL) {
return NULL;
- assert(PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND);
- outp = PyUnicode_2BYTE_DATA(res);
- for (i = 0; i < len; i++)
+ }
+ assert(slen == 0 || PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND);
+ Py_UCS2 *outp = PyUnicode_2BYTE_DATA(res);
+ for (Py_ssize_t i = 0; i < slen; ++i) {
outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER;
+ }
assert(_PyUnicode_CheckConsistency(res, 1));
return Py_BuildValue("(Nn)", res, end);
}
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]