https://github.com/python/cpython/commit/962fb872ebd97ab6ab808a1b8e2034577e5501bf
commit: 962fb872ebd97ab6ab808a1b8e2034577e5501bf
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-03-13T13:05:41+02:00
summary:

gh-145850: Change some implementation details in struct.Struct (GH-145851)

* calling it with non-ASCII string format will now raise a ValueError
  instead of UnicodeEncodeError
* calling it with non-ASCII bytes format will now raise a ValueError
  instead of struct.error
* getting the format attribute of uninitialized object will now raise
  an AttributeError instead of RuntimeError.

files:
A Misc/NEWS.d/next/Library/2026-03-12-12-17-39.gh-issue-145850.uW3stt.rst
M Lib/test/test_struct.py
M Modules/_struct.c
M Modules/_xxtestfuzz/fuzzer.c

diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py
index e3e02097b1f550..c7dc69defded50 100644
--- a/Lib/test/test_struct.py
+++ b/Lib/test/test_struct.py
@@ -605,7 +605,7 @@ def test_Struct_reinitialization(self):
         self.assertEqual(s.unpack(packed), (1, 2))
 
         with self.assertWarnsRegex(FutureWarning, msg):
-            with self.assertRaises(UnicodeEncodeError):
+            with self.assertRaises(ValueError):
                 s.__init__('\udc00')
         self.assertEqual(s.format, '>hh')
         self.assertEqual(s.pack(1, 2), packed)
@@ -872,10 +872,10 @@ def __init__(self, *args, **kwargs):
         with self.assertWarnsRegex(DeprecationWarning, warnmsg + 'bad char'):
             my_struct = MyStruct(format='$')
         self.assertEqual(my_struct.pack(12345), b'\x30\x39')
-        with self.assertWarnsRegex(DeprecationWarning, warnmsg + ".*can't 
encode"):
+        with self.assertWarnsRegex(DeprecationWarning, warnmsg + "non-ASCII"):
             my_struct = MyStruct('\udc00')
         self.assertEqual(my_struct.pack(12345), b'\x30\x39')
-        with self.assertWarnsRegex(DeprecationWarning, warnmsg + ".*can't 
encode"):
+        with self.assertWarnsRegex(DeprecationWarning, warnmsg + "non-ASCII"):
             my_struct = MyStruct(format='\udc00')
         self.assertEqual(my_struct.pack(12345), b'\x30\x39')
 
@@ -928,11 +928,16 @@ def __init__(self, newargs, initargs):
         with self.assertWarns(FutureWarning):
             with self.assertRaises(struct.error):
                 MyStruct(('>h',), ('$',))
-        with self.assertRaises(UnicodeEncodeError):
+        with self.assertRaises(ValueError):
             MyStruct(('\udc00',), ('>h',))
+        with self.assertRaises(ValueError):
+            MyStruct((b'\xa4',), ('>h',))
         with self.assertWarns(FutureWarning):
-            with self.assertRaises(UnicodeEncodeError):
+            with self.assertRaises(ValueError):
                 MyStruct(('>h',), ('\udc00',))
+        with self.assertWarns(FutureWarning):
+            with self.assertRaises(ValueError):
+                MyStruct(('>h',), (b'\xa4',))
         with self.assertWarns(FutureWarning):
             my_struct = MyStruct(('>h',), ('<h',))
         self.assertEqual(my_struct.format, '<h')
@@ -954,8 +959,10 @@ class MyStruct(struct.Struct):
             MyStruct(42)
         with self.assertRaises(struct.error):
             MyStruct('$')
-        with self.assertRaises(UnicodeEncodeError):
+        with self.assertRaises(ValueError):
             MyStruct('\udc00')
+        with self.assertRaises(ValueError):
+            MyStruct(b'\xa4')
         with self.assertRaises(TypeError):
             MyStruct('>h', 42)
         with self.assertRaises(TypeError):
@@ -1004,7 +1011,7 @@ def test_operations_on_half_initialized_Struct(self):
         self.assertRaises(RuntimeError, S.pack_into, spam, 1)
         self.assertRaises(RuntimeError, S.unpack, spam)
         self.assertRaises(RuntimeError, S.unpack_from, spam)
-        self.assertRaises(RuntimeError, getattr, S, 'format')
+        self.assertRaises(AttributeError, getattr, S, 'format')
         self.assertRaises(RuntimeError, S.__sizeof__)
         self.assertRaises(RuntimeError, repr, S)
         self.assertEqual(S.size, -1)
diff --git 
a/Misc/NEWS.d/next/Library/2026-03-12-12-17-39.gh-issue-145850.uW3stt.rst 
b/Misc/NEWS.d/next/Library/2026-03-12-12-17-39.gh-issue-145850.uW3stt.rst
new file mode 100644
index 00000000000000..35ba57a95b0e7e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-03-12-12-17-39.gh-issue-145850.uW3stt.rst
@@ -0,0 +1,6 @@
+Changed some implementation details in :class:`struct.Struct`: calling it
+with non-ASCII string format will now raise a :exc:`ValueError` instead of
+:exc:`UnicodeEncodeError`, calling it with non-ASCII bytes format will now
+raise a :exc:`ValueError` instead of :exc:`struct.error`, getting
+the :attr:`!format` attribute of uninitialized object will now raise an
+:exc:`AttributeError` instead of :exc:`RuntimeError`.
diff --git a/Modules/_struct.c b/Modules/_struct.c
index 7eddc9bdc38a89..2059218029ea34 100644
--- a/Modules/_struct.c
+++ b/Modules/_struct.c
@@ -1635,8 +1635,12 @@ prepare_s(PyStructObject *self, PyObject *format)
 
     _structmodulestate *state = get_struct_state_structinst(self);
 
-    fmt = PyBytes_AS_STRING(format);
-    if (strlen(fmt) != (size_t)PyBytes_GET_SIZE(format)) {
+    if (!PyUnicode_IS_ASCII(format)) {
+        PyErr_SetString(PyExc_ValueError, "non-ASCII character in struct 
format");
+        return -1;
+    }
+    fmt = (const char *)PyUnicode_1BYTE_DATA(format);
+    if (strlen(fmt) != (size_t)PyUnicode_GET_LENGTH(format)) {
         PyErr_SetString(state->StructError,
                         "embedded null character");
         return -1;
@@ -1780,12 +1784,11 @@ static int
 set_format(PyStructObject *self, PyObject *format)
 {
     if (PyUnicode_Check(format)) {
-        format = PyUnicode_AsASCIIString(format);
-        if (format == NULL)
-            return -1;
+        format = PyUnicode_FromObject(format);
     }
     else if (PyBytes_Check(format)) {
-        Py_INCREF(format);
+        format = PyUnicode_DecodeASCII(PyBytes_AS_STRING(format),
+                                       PyBytes_GET_SIZE(format), 
"surrogateescape");
     }
     else  {
         PyErr_Format(PyExc_TypeError,
@@ -1793,6 +1796,9 @@ set_format(PyStructObject *self, PyObject *format)
                      "not %T", format);
         return -1;
     }
+    if (format == NULL) {
+        return -1;
+    }
     if (prepare_s(self, format)) {
         Py_DECREF(format);
         return -1;
@@ -1821,7 +1827,7 @@ Struct_impl(PyTypeObject *type, PyObject *format)
     if (self == NULL) {
         return NULL;
     }
-    self->s_format = Py_NewRef(Py_None);
+    self->s_format = NULL;
     self->s_codes = NULL;
     self->s_size = -1;
     self->s_len = -1;
@@ -1878,7 +1884,7 @@ s_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     if (self == NULL) {
         return NULL;
     }
-    self->s_format = Py_NewRef(Py_None);
+    self->s_format = NULL;
     self->s_codes = NULL;
     self->s_size = -1;
     self->s_len = -1;
@@ -1892,7 +1898,7 @@ s_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
             return NULL;
         }
         PyObject *exc = PyErr_GetRaisedException();
-        Py_SETREF(self->s_format, Py_NewRef(Py_None));
+        Py_CLEAR(self->s_format);
         if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
             "Invalid 'format' argument for Struct.__new__(): %S", exc))
         {
@@ -1910,8 +1916,8 @@ s_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 static bool
 same_format(PyStructObject *s, PyObject *format)
 {
-    Py_ssize_t size = PyBytes_GET_SIZE(s->s_format);
-    const void *data = PyBytes_AS_STRING(s->s_format);
+    Py_ssize_t size = PyUnicode_GET_LENGTH(s->s_format);
+    const void *data = PyUnicode_1BYTE_DATA(s->s_format);
     if (PyUnicode_Check(format) && PyUnicode_IS_ASCII(format)) {
         return PyUnicode_GET_LENGTH(format) == size
             && memcmp(PyUnicode_1BYTE_DATA(format), data, size) == 0;
@@ -1938,7 +1944,7 @@ static int
 Struct___init___impl(PyStructObject *self, PyObject *format)
 /*[clinic end generated code: output=b8e80862444e92d0 input=1af78a5f57d82cec]*/
 {
-    if (self->s_format == Py_None) {
+    if (self->s_format == NULL) {
         if (set_format(self, format) < 0) {
             return -1;
         }
@@ -1965,7 +1971,7 @@ s_init(PyObject *self, PyObject *args, PyObject *kwargs)
 {
     if (!((PyStructObject *)self)->init_called
             && Py_TYPE(self)->tp_init == s_init
-            && ((PyStructObject *)self)->s_format != Py_None)
+            && ((PyStructObject *)self)->s_format != NULL)
     {
         /* Struct.__init__() was called implicitly.
          * __new__() already did all the work. */
@@ -2508,22 +2514,6 @@ Struct_pack_into_impl(PyStructObject *self, Py_buffer 
*buffer,
     Py_RETURN_NONE;
 }
 
-static PyObject *
-s_get_format(PyObject *op, void *Py_UNUSED(closure))
-{
-    PyStructObject *self = PyStructObject_CAST(op);
-    ENSURE_STRUCT_IS_READY(self);
-    return PyUnicode_FromStringAndSize(PyBytes_AS_STRING(self->s_format),
-                                       PyBytes_GET_SIZE(self->s_format));
-}
-
-static PyObject *
-s_get_size(PyObject *op, void *Py_UNUSED(closure))
-{
-    PyStructObject *self = PyStructObject_CAST(op);
-    return PyLong_FromSsize_t(self->s_size);
-}
-
 /*[clinic input]
 Struct.__sizeof__
 [clinic start generated code]*/
@@ -2545,14 +2535,7 @@ s_repr(PyObject *op)
 {
     PyStructObject *self = PyStructObject_CAST(op);
     ENSURE_STRUCT_IS_READY(self);
-    PyObject* fmt = PyUnicode_FromStringAndSize(
-        PyBytes_AS_STRING(self->s_format), PyBytes_GET_SIZE(self->s_format));
-    if (fmt == NULL) {
-        return NULL;
-    }
-    PyObject* s = PyUnicode_FromFormat("%s(%R)", _PyType_Name(Py_TYPE(self)), 
fmt);
-    Py_DECREF(fmt);
-    return s;
+    return PyUnicode_FromFormat("%s(%R)", _PyType_Name(Py_TYPE(self)), 
self->s_format);
 }
 
 /* List of functions */
@@ -2569,15 +2552,13 @@ static struct PyMethodDef s_methods[] = {
 
 static PyMemberDef s_members[] = {
     {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(PyStructObject, 
weakreflist), Py_READONLY},
+    {"format", Py_T_OBJECT_EX, offsetof(PyStructObject, s_format),
+        Py_READONLY, PyDoc_STR("struct format string")},
+    {"size", Py_T_PYSSIZET, offsetof(PyStructObject, s_size), Py_READONLY,
+        PyDoc_STR("struct size in bytes")},
     {NULL}  /* sentinel */
 };
 
-static PyGetSetDef s_getsetlist[] = {
-    {"format", s_get_format, NULL, PyDoc_STR("struct format string"), NULL},
-    {"size", s_get_size, NULL, PyDoc_STR("struct size in bytes"), NULL},
-    {NULL} /* sentinel */
-};
-
 static PyType_Slot PyStructType_slots[] = {
     {Py_tp_dealloc, s_dealloc},
     {Py_tp_getattro, PyObject_GenericGetAttr},
@@ -2588,7 +2569,6 @@ static PyType_Slot PyStructType_slots[] = {
     {Py_tp_clear, s_clear},
     {Py_tp_methods, s_methods},
     {Py_tp_members, s_members},
-    {Py_tp_getset, s_getsetlist},
     {Py_tp_init, s_init},
     {Py_tp_new, s_new},
     {0, 0},
diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c
index f3a22f3f6a87cb..6cb11562476e40 100644
--- a/Modules/_xxtestfuzz/fuzzer.c
+++ b/Modules/_xxtestfuzz/fuzzer.c
@@ -133,6 +133,10 @@ static int fuzz_struct_unpack(const char* data, size_t 
size) {
     if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_SystemError)) {
         PyErr_Clear();
     }
+    /* Ignore any ValueError, these are triggered by non-ASCII format. */
+    if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
+        PyErr_Clear();
+    }
     /* Ignore any struct.error exceptions, these can be caused by invalid
        formats or incomplete buffers both of which are common. */
     if (unpacked == NULL && PyErr_ExceptionMatches(struct_error)) {

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to