https://github.com/python/cpython/commit/962fb872ebd97ab6ab808a1b8e2034577e5501bf
commit: 962fb872ebd97ab6ab808a1b8e2034577e5501bf
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-03-13T13:05:41+02:00
summary:
gh-145850: Change some implementation details in struct.Struct (GH-145851)
* calling it with non-ASCII string format will now raise a ValueError
instead of UnicodeEncodeError
* calling it with non-ASCII bytes format will now raise a ValueError
instead of struct.error
* getting the format attribute of uninitialized object will now raise
an AttributeError instead of RuntimeError.
files:
A Misc/NEWS.d/next/Library/2026-03-12-12-17-39.gh-issue-145850.uW3stt.rst
M Lib/test/test_struct.py
M Modules/_struct.c
M Modules/_xxtestfuzz/fuzzer.c
diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py
index e3e02097b1f550..c7dc69defded50 100644
--- a/Lib/test/test_struct.py
+++ b/Lib/test/test_struct.py
@@ -605,7 +605,7 @@ def test_Struct_reinitialization(self):
self.assertEqual(s.unpack(packed), (1, 2))
with self.assertWarnsRegex(FutureWarning, msg):
- with self.assertRaises(UnicodeEncodeError):
+ with self.assertRaises(ValueError):
s.__init__('\udc00')
self.assertEqual(s.format, '>hh')
self.assertEqual(s.pack(1, 2), packed)
@@ -872,10 +872,10 @@ def __init__(self, *args, **kwargs):
with self.assertWarnsRegex(DeprecationWarning, warnmsg + 'bad char'):
my_struct = MyStruct(format='$')
self.assertEqual(my_struct.pack(12345), b'\x30\x39')
- with self.assertWarnsRegex(DeprecationWarning, warnmsg + ".*can't
encode"):
+ with self.assertWarnsRegex(DeprecationWarning, warnmsg + "non-ASCII"):
my_struct = MyStruct('\udc00')
self.assertEqual(my_struct.pack(12345), b'\x30\x39')
- with self.assertWarnsRegex(DeprecationWarning, warnmsg + ".*can't
encode"):
+ with self.assertWarnsRegex(DeprecationWarning, warnmsg + "non-ASCII"):
my_struct = MyStruct(format='\udc00')
self.assertEqual(my_struct.pack(12345), b'\x30\x39')
@@ -928,11 +928,16 @@ def __init__(self, newargs, initargs):
with self.assertWarns(FutureWarning):
with self.assertRaises(struct.error):
MyStruct(('>h',), ('$',))
- with self.assertRaises(UnicodeEncodeError):
+ with self.assertRaises(ValueError):
MyStruct(('\udc00',), ('>h',))
+ with self.assertRaises(ValueError):
+ MyStruct((b'\xa4',), ('>h',))
with self.assertWarns(FutureWarning):
- with self.assertRaises(UnicodeEncodeError):
+ with self.assertRaises(ValueError):
MyStruct(('>h',), ('\udc00',))
+ with self.assertWarns(FutureWarning):
+ with self.assertRaises(ValueError):
+ MyStruct(('>h',), (b'\xa4',))
with self.assertWarns(FutureWarning):
my_struct = MyStruct(('>h',), ('<h',))
self.assertEqual(my_struct.format, '<h')
@@ -954,8 +959,10 @@ class MyStruct(struct.Struct):
MyStruct(42)
with self.assertRaises(struct.error):
MyStruct('$')
- with self.assertRaises(UnicodeEncodeError):
+ with self.assertRaises(ValueError):
MyStruct('\udc00')
+ with self.assertRaises(ValueError):
+ MyStruct(b'\xa4')
with self.assertRaises(TypeError):
MyStruct('>h', 42)
with self.assertRaises(TypeError):
@@ -1004,7 +1011,7 @@ def test_operations_on_half_initialized_Struct(self):
self.assertRaises(RuntimeError, S.pack_into, spam, 1)
self.assertRaises(RuntimeError, S.unpack, spam)
self.assertRaises(RuntimeError, S.unpack_from, spam)
- self.assertRaises(RuntimeError, getattr, S, 'format')
+ self.assertRaises(AttributeError, getattr, S, 'format')
self.assertRaises(RuntimeError, S.__sizeof__)
self.assertRaises(RuntimeError, repr, S)
self.assertEqual(S.size, -1)
diff --git
a/Misc/NEWS.d/next/Library/2026-03-12-12-17-39.gh-issue-145850.uW3stt.rst
b/Misc/NEWS.d/next/Library/2026-03-12-12-17-39.gh-issue-145850.uW3stt.rst
new file mode 100644
index 00000000000000..35ba57a95b0e7e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-03-12-12-17-39.gh-issue-145850.uW3stt.rst
@@ -0,0 +1,6 @@
+Changed some implementation details in :class:`struct.Struct`: calling it
+with non-ASCII string format will now raise a :exc:`ValueError` instead of
+:exc:`UnicodeEncodeError`, calling it with non-ASCII bytes format will now
+raise a :exc:`ValueError` instead of :exc:`struct.error`, getting
+the :attr:`!format` attribute of uninitialized object will now raise an
+:exc:`AttributeError` instead of :exc:`RuntimeError`.
diff --git a/Modules/_struct.c b/Modules/_struct.c
index 7eddc9bdc38a89..2059218029ea34 100644
--- a/Modules/_struct.c
+++ b/Modules/_struct.c
@@ -1635,8 +1635,12 @@ prepare_s(PyStructObject *self, PyObject *format)
_structmodulestate *state = get_struct_state_structinst(self);
- fmt = PyBytes_AS_STRING(format);
- if (strlen(fmt) != (size_t)PyBytes_GET_SIZE(format)) {
+ if (!PyUnicode_IS_ASCII(format)) {
+ PyErr_SetString(PyExc_ValueError, "non-ASCII character in struct
format");
+ return -1;
+ }
+ fmt = (const char *)PyUnicode_1BYTE_DATA(format);
+ if (strlen(fmt) != (size_t)PyUnicode_GET_LENGTH(format)) {
PyErr_SetString(state->StructError,
"embedded null character");
return -1;
@@ -1780,12 +1784,11 @@ static int
set_format(PyStructObject *self, PyObject *format)
{
if (PyUnicode_Check(format)) {
- format = PyUnicode_AsASCIIString(format);
- if (format == NULL)
- return -1;
+ format = PyUnicode_FromObject(format);
}
else if (PyBytes_Check(format)) {
- Py_INCREF(format);
+ format = PyUnicode_DecodeASCII(PyBytes_AS_STRING(format),
+ PyBytes_GET_SIZE(format),
"surrogateescape");
}
else {
PyErr_Format(PyExc_TypeError,
@@ -1793,6 +1796,9 @@ set_format(PyStructObject *self, PyObject *format)
"not %T", format);
return -1;
}
+ if (format == NULL) {
+ return -1;
+ }
if (prepare_s(self, format)) {
Py_DECREF(format);
return -1;
@@ -1821,7 +1827,7 @@ Struct_impl(PyTypeObject *type, PyObject *format)
if (self == NULL) {
return NULL;
}
- self->s_format = Py_NewRef(Py_None);
+ self->s_format = NULL;
self->s_codes = NULL;
self->s_size = -1;
self->s_len = -1;
@@ -1878,7 +1884,7 @@ s_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (self == NULL) {
return NULL;
}
- self->s_format = Py_NewRef(Py_None);
+ self->s_format = NULL;
self->s_codes = NULL;
self->s_size = -1;
self->s_len = -1;
@@ -1892,7 +1898,7 @@ s_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return NULL;
}
PyObject *exc = PyErr_GetRaisedException();
- Py_SETREF(self->s_format, Py_NewRef(Py_None));
+ Py_CLEAR(self->s_format);
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"Invalid 'format' argument for Struct.__new__(): %S", exc))
{
@@ -1910,8 +1916,8 @@ s_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
static bool
same_format(PyStructObject *s, PyObject *format)
{
- Py_ssize_t size = PyBytes_GET_SIZE(s->s_format);
- const void *data = PyBytes_AS_STRING(s->s_format);
+ Py_ssize_t size = PyUnicode_GET_LENGTH(s->s_format);
+ const void *data = PyUnicode_1BYTE_DATA(s->s_format);
if (PyUnicode_Check(format) && PyUnicode_IS_ASCII(format)) {
return PyUnicode_GET_LENGTH(format) == size
&& memcmp(PyUnicode_1BYTE_DATA(format), data, size) == 0;
@@ -1938,7 +1944,7 @@ static int
Struct___init___impl(PyStructObject *self, PyObject *format)
/*[clinic end generated code: output=b8e80862444e92d0 input=1af78a5f57d82cec]*/
{
- if (self->s_format == Py_None) {
+ if (self->s_format == NULL) {
if (set_format(self, format) < 0) {
return -1;
}
@@ -1965,7 +1971,7 @@ s_init(PyObject *self, PyObject *args, PyObject *kwargs)
{
if (!((PyStructObject *)self)->init_called
&& Py_TYPE(self)->tp_init == s_init
- && ((PyStructObject *)self)->s_format != Py_None)
+ && ((PyStructObject *)self)->s_format != NULL)
{
/* Struct.__init__() was called implicitly.
* __new__() already did all the work. */
@@ -2508,22 +2514,6 @@ Struct_pack_into_impl(PyStructObject *self, Py_buffer
*buffer,
Py_RETURN_NONE;
}
-static PyObject *
-s_get_format(PyObject *op, void *Py_UNUSED(closure))
-{
- PyStructObject *self = PyStructObject_CAST(op);
- ENSURE_STRUCT_IS_READY(self);
- return PyUnicode_FromStringAndSize(PyBytes_AS_STRING(self->s_format),
- PyBytes_GET_SIZE(self->s_format));
-}
-
-static PyObject *
-s_get_size(PyObject *op, void *Py_UNUSED(closure))
-{
- PyStructObject *self = PyStructObject_CAST(op);
- return PyLong_FromSsize_t(self->s_size);
-}
-
/*[clinic input]
Struct.__sizeof__
[clinic start generated code]*/
@@ -2545,14 +2535,7 @@ s_repr(PyObject *op)
{
PyStructObject *self = PyStructObject_CAST(op);
ENSURE_STRUCT_IS_READY(self);
- PyObject* fmt = PyUnicode_FromStringAndSize(
- PyBytes_AS_STRING(self->s_format), PyBytes_GET_SIZE(self->s_format));
- if (fmt == NULL) {
- return NULL;
- }
- PyObject* s = PyUnicode_FromFormat("%s(%R)", _PyType_Name(Py_TYPE(self)),
fmt);
- Py_DECREF(fmt);
- return s;
+ return PyUnicode_FromFormat("%s(%R)", _PyType_Name(Py_TYPE(self)),
self->s_format);
}
/* List of functions */
@@ -2569,15 +2552,13 @@ static struct PyMethodDef s_methods[] = {
static PyMemberDef s_members[] = {
{"__weaklistoffset__", Py_T_PYSSIZET, offsetof(PyStructObject,
weakreflist), Py_READONLY},
+ {"format", Py_T_OBJECT_EX, offsetof(PyStructObject, s_format),
+ Py_READONLY, PyDoc_STR("struct format string")},
+ {"size", Py_T_PYSSIZET, offsetof(PyStructObject, s_size), Py_READONLY,
+ PyDoc_STR("struct size in bytes")},
{NULL} /* sentinel */
};
-static PyGetSetDef s_getsetlist[] = {
- {"format", s_get_format, NULL, PyDoc_STR("struct format string"), NULL},
- {"size", s_get_size, NULL, PyDoc_STR("struct size in bytes"), NULL},
- {NULL} /* sentinel */
-};
-
static PyType_Slot PyStructType_slots[] = {
{Py_tp_dealloc, s_dealloc},
{Py_tp_getattro, PyObject_GenericGetAttr},
@@ -2588,7 +2569,6 @@ static PyType_Slot PyStructType_slots[] = {
{Py_tp_clear, s_clear},
{Py_tp_methods, s_methods},
{Py_tp_members, s_members},
- {Py_tp_getset, s_getsetlist},
{Py_tp_init, s_init},
{Py_tp_new, s_new},
{0, 0},
diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c
index f3a22f3f6a87cb..6cb11562476e40 100644
--- a/Modules/_xxtestfuzz/fuzzer.c
+++ b/Modules/_xxtestfuzz/fuzzer.c
@@ -133,6 +133,10 @@ static int fuzz_struct_unpack(const char* data, size_t
size) {
if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_SystemError)) {
PyErr_Clear();
}
+ /* Ignore any ValueError, these are triggered by non-ASCII format. */
+ if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
+ PyErr_Clear();
+ }
/* Ignore any struct.error exceptions, these can be caused by invalid
formats or incomplete buffers both of which are common. */
if (unpacked == NULL && PyErr_ExceptionMatches(struct_error)) {
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]