https://github.com/python/cpython/commit/c070abadb2e2ddb9a032ee5ded67098e9a64ea3d
commit: c070abadb2e2ddb9a032ee5ded67098e9a64ea3d
branch: main
author: Bénédikt Tran <10796600+picn...@users.noreply.github.com>
committer: encukou <encu...@gmail.com>
date: 2025-02-24T11:56:32+01:00
summary:

gh-111178: fix UBSan failures in `Modules/pyexpat.c` (GH-129789)

Fix UBSan failures for `xmlparseobject`, XML handler setters
Suppress unused return values

Rewrite `RC_HANDLER` with better semantics

files:
M Modules/pyexpat.c

diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 3290706f143b9a..1ba644d3396f7f 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -92,12 +92,14 @@ typedef struct {
     PyObject **handlers;
 } xmlparseobject;
 
+#define xmlparseobject_CAST(op) ((xmlparseobject *)(op))
+
 #include "clinic/pyexpat.c.h"
 
 #define CHARACTER_DATA_BUFFER_SIZE 8192
 
-typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
-typedef void* xmlhandler;
+typedef const void *xmlhandler;
+typedef void (*xmlhandlersetter)(XML_Parser self, xmlhandler handler);
 
 struct HandlerInfo {
     const char *name;
@@ -108,6 +110,12 @@ struct HandlerInfo {
 
 static struct HandlerInfo handler_info[64];
 
+#define CALL_XML_HANDLER_SETTER(HANDLER_INFO, XML_PARSER, XML_HANDLER)      \
+    do {                                                                    \
+        xmlhandlersetter setter = (xmlhandlersetter)(HANDLER_INFO).setter;  \
+        setter((XML_PARSER), (XML_HANDLER));                                \
+    } while (0)
+
 /* Set an integer attribute on the error object; return true on success,
  * false on an exception.
  */
@@ -312,7 +320,7 @@ flush_character_buffer(xmlparseobject *self)
 static void
 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
 {
-    xmlparseobject *self = (xmlparseobject *) userData;
+    xmlparseobject *self = xmlparseobject_CAST(userData);
 
     if (PyErr_Occurred())
         return;
@@ -345,7 +353,7 @@ static void
 my_StartElementHandler(void *userData,
                        const XML_Char *name, const XML_Char *atts[])
 {
-    xmlparseobject *self = (xmlparseobject *)userData;
+    xmlparseobject *self = xmlparseobject_CAST(userData);
 
     if (have_handler(self, StartElement)) {
         PyObject *container, *rv, *args;
@@ -430,45 +438,57 @@ my_StartElementHandler(void *userData,
     }
 }
 
-#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
-                RETURN, GETUSERDATA) \
-static RC \
-my_##NAME##Handler PARAMS {\
-    xmlparseobject *self = GETUSERDATA ; \
-    PyObject *args = NULL; \
-    PyObject *rv = NULL; \
-    INIT \
-\
-    if (have_handler(self, NAME)) { \
-        if (PyErr_Occurred()) \
-            return RETURN; \
-        if (flush_character_buffer(self) < 0) \
-            return RETURN; \
-        args = Py_BuildValue PARAM_FORMAT ;\
-        if (!args) { flag_error(self); return RETURN;} \
-        self->in_callback = 1; \
-        rv = call_with_frame(#NAME,__LINE__, \
-                             self->handlers[NAME], args, self); \
-        self->in_callback = 0; \
-        Py_DECREF(args); \
-        if (rv == NULL) { \
-            flag_error(self); \
-            return RETURN; \
-        } \
-        CONVERSION \
-        Py_DECREF(rv); \
-    } \
-    return RETURN; \
+#define RC_HANDLER(RETURN_TYPE, NAME, PARAMS,       \
+                   INIT, PARSE_FORMAT, CONVERSION,  \
+                   RETURN_VARIABLE, GETUSERDATA)    \
+static RETURN_TYPE                                  \
+my_ ## NAME ## Handler PARAMS {                     \
+    xmlparseobject *self = GETUSERDATA;             \
+    INIT                                            \
+    if (!have_handler(self, NAME)) {                \
+        return RETURN_VARIABLE;                     \
+    }                                               \
+    if (PyErr_Occurred()) {                         \
+        return RETURN_VARIABLE;                     \
+    }                                               \
+    if (flush_character_buffer(self) < 0) {         \
+        return RETURN_VARIABLE;                     \
+    }                                               \
+    PyObject *args = Py_BuildValue PARSE_FORMAT;    \
+    if (args == NULL) {                             \
+        flag_error(self);                           \
+        return RETURN_VARIABLE;                     \
+    }                                               \
+    self->in_callback = 1;                          \
+    PyObject *rv = call_with_frame(                 \
+        #NAME, __LINE__,                            \
+        self->handlers[NAME], args, self);          \
+    self->in_callback = 0;                          \
+    Py_DECREF(args);                                \
+    if (rv == NULL) {                               \
+        flag_error(self);                           \
+        return RETURN_VARIABLE;                     \
+    }                                               \
+    CONVERSION                                      \
+    Py_DECREF(rv);                                  \
+    return RETURN_VARIABLE;                         \
 }
 
-#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
-        RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
-        (xmlparseobject *)userData)
-
-#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
-        RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
-                        rc = PyLong_AsLong(rv);, rc, \
-        (xmlparseobject *)userData)
+#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT)    \
+    RC_HANDLER(                                     \
+        void, NAME, PARAMS,                         \
+        /* no init */, PARAM_FORMAT,                \
+        /* no conversion */, /* no return var */,   \
+        xmlparseobject_CAST(userData)               \
+    )
+
+#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)     \
+    RC_HANDLER(                                     \
+        int, NAME, PARAMS,                          \
+        int rc = 0;, PARAM_FORMAT,                  \
+        rc = PyLong_AsLong(rv);, rc,                \
+        xmlparseobject_CAST(userData)               \
+    )
 
 VOID_HANDLER(EndElement,
              (void *userData, const XML_Char *name),
@@ -549,7 +569,7 @@ my_ElementDeclHandler(void *userData,
                       const XML_Char *name,
                       XML_Content *model)
 {
-    xmlparseobject *self = (xmlparseobject *)userData;
+    xmlparseobject *self = xmlparseobject_CAST(userData);
     PyObject *args = NULL;
 
     if (have_handler(self, ElementDecl)) {
@@ -979,8 +999,6 @@ 
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
 /*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/
 {
     xmlparseobject *new_parser;
-    int i;
-
     pyexpat_state *state = PyType_GetModuleState(cls);
 
     new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
@@ -1015,6 +1033,7 @@ 
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
     XML_SetUserData(new_parser->itself, (void *)new_parser);
 
     /* allocate and clear handlers first */
+    size_t i;
     for (i = 0; handler_info[i].name != NULL; i++)
         /* do nothing */;
 
@@ -1026,12 +1045,12 @@ 
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
     clear_handlers(new_parser, 1);
 
     /* then copy handlers from self */
-    for (i = 0; handler_info[i].name != NULL; i++) {
+    for (size_t i = 0; handler_info[i].name != NULL; i++) {
         PyObject *handler = self->handlers[i];
         if (handler != NULL) {
             new_parser->handlers[i] = Py_NewRef(handler);
-            handler_info[i].setter(new_parser->itself,
-                                   handler_info[i].handler);
+            struct HandlerInfo info = handler_info[i];
+            CALL_XML_HANDLER_SETTER(info, new_parser->itself, info.handler);
         }
     }
 
@@ -1240,30 +1259,34 @@ newxmlparseobject(pyexpat_state *state, const char 
*encoding,
 }
 
 static int
-xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
+xmlparse_traverse(PyObject *op, visitproc visit, void *arg)
 {
-    for (int i = 0; handler_info[i].name != NULL; i++) {
-        Py_VISIT(op->handlers[i]);
+    xmlparseobject *self = xmlparseobject_CAST(op);
+    for (size_t i = 0; handler_info[i].name != NULL; i++) {
+        Py_VISIT(self->handlers[i]);
     }
     Py_VISIT(Py_TYPE(op));
     return 0;
 }
 
 static int
-xmlparse_clear(xmlparseobject *op)
+xmlparse_clear(PyObject *op)
 {
-    clear_handlers(op, 0);
-    Py_CLEAR(op->intern);
+    xmlparseobject *self = xmlparseobject_CAST(op);
+    clear_handlers(self, 0);
+    Py_CLEAR(self->intern);
     return 0;
 }
 
 static void
-xmlparse_dealloc(xmlparseobject *self)
+xmlparse_dealloc(PyObject *op)
 {
+    xmlparseobject *self = xmlparseobject_CAST(op);
     PyObject_GC_UnTrack(self);
-    (void)xmlparse_clear(self);
-    if (self->itself != NULL)
+    (void)xmlparse_clear(op);
+    if (self->itself != NULL) {
         XML_ParserFree(self->itself);
+    }
     self->itself = NULL;
 
     if (self->handlers != NULL) {
@@ -1281,19 +1304,24 @@ xmlparse_dealloc(xmlparseobject *self)
 
 
 static PyObject *
-xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
+xmlparse_handler_getter(PyObject *op, void *closure)
 {
+    xmlparseobject *self = xmlparseobject_CAST(op);
+    struct HandlerInfo *hi = (struct HandlerInfo *)closure;
     assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
     int handlernum = (int)(hi - handler_info);
     PyObject *result = self->handlers[handlernum];
-    if (result == NULL)
+    if (result == NULL) {
         result = Py_None;
+    }
     return Py_NewRef(result);
 }
 
 static int
-xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo 
*hi)
+xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure)
 {
+    xmlparseobject *self = xmlparseobject_CAST(op);
+    struct HandlerInfo *hi = (struct HandlerInfo *)closure;
     assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
     int handlernum = (int)(hi - handler_info);
     if (v == NULL) {
@@ -1323,8 +1351,9 @@ xmlparse_handler_setter(xmlparseobject *self, PyObject 
*v, struct HandlerInfo *h
            otherwise would, but that's really an odd case.  A more
            elaborate system of handlers and state could remove the
            C handler more effectively. */
-        if (handlernum == CharacterData && self->in_callback)
+        if (handlernum == CharacterData && self->in_callback) {
             c_handler = noop_character_data_handler;
+        }
         v = NULL;
     }
     else if (v != NULL) {
@@ -1332,15 +1361,16 @@ xmlparse_handler_setter(xmlparseobject *self, PyObject 
*v, struct HandlerInfo *h
         c_handler = handler_info[handlernum].handler;
     }
     Py_XSETREF(self->handlers[handlernum], v);
-    handler_info[handlernum].setter(self->itself, c_handler);
+    CALL_XML_HANDLER_SETTER(handler_info[handlernum], self->itself, c_handler);
     return 0;
 }
 
-#define INT_GETTER(name) \
-    static PyObject * \
-    xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
-    { \
-        return PyLong_FromLong((long) XML_Get##name(self->itself)); \
+#define INT_GETTER(name)                                                \
+    static PyObject *                                                   \
+    xmlparse_##name##_getter(PyObject *op, void *Py_UNUSED(closure))    \
+    {                                                                   \
+        xmlparseobject *self = xmlparseobject_CAST(op);                 \
+        return PyLong_FromLong((long)XML_Get##name(self->itself));      \
     }
 INT_GETTER(ErrorCode)
 INT_GETTER(ErrorLineNumber)
@@ -1353,21 +1383,24 @@ INT_GETTER(CurrentByteIndex)
 #undef INT_GETTER
 
 static PyObject *
-xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
+xmlparse_buffer_text_getter(PyObject *op, void *Py_UNUSED(closure))
 {
+    xmlparseobject *self = xmlparseobject_CAST(op);
     return PyBool_FromLong(self->buffer != NULL);
 }
 
 static int
-xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
+xmlparse_buffer_text_setter(PyObject *op, PyObject *v, void 
*Py_UNUSED(closure))
 {
+    xmlparseobject *self = xmlparseobject_CAST(op);
     if (v == NULL) {
         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
         return -1;
     }
     int b = PyObject_IsTrue(v);
-    if (b < 0)
+    if (b < 0) {
         return -1;
+    }
     if (b) {
         if (self->buffer == NULL) {
             self->buffer = PyMem_Malloc(self->buffer_size);
@@ -1379,8 +1412,9 @@ xmlparse_buffer_text_setter(xmlparseobject *self, 
PyObject *v, void *closure)
         }
     }
     else if (self->buffer != NULL) {
-        if (flush_character_buffer(self) < 0)
+        if (flush_character_buffer(self) < 0) {
             return -1;
+        }
         PyMem_Free(self->buffer);
         self->buffer = NULL;
     }
@@ -1388,14 +1422,16 @@ xmlparse_buffer_text_setter(xmlparseobject *self, 
PyObject *v, void *closure)
 }
 
 static PyObject *
-xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
+xmlparse_buffer_size_getter(PyObject *op, void *Py_UNUSED(closure))
 {
-    return PyLong_FromLong((long) self->buffer_size);
+    xmlparseobject *self = xmlparseobject_CAST(op);
+    return PyLong_FromLong(self->buffer_size);
 }
 
 static int
-xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
+xmlparse_buffer_size_setter(PyObject *op, PyObject *v, void 
*Py_UNUSED(closure))
 {
+    xmlparseobject *self = xmlparseobject_CAST(op);
     if (v == NULL) {
         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
         return -1;
@@ -1408,8 +1444,9 @@ xmlparse_buffer_size_setter(xmlparseobject *self, 
PyObject *v, void *closure)
 
     new_buffer_size = PyLong_AsLong(v);
     if (new_buffer_size <= 0) {
-        if (!PyErr_Occurred())
+        if (!PyErr_Occurred()) {
             PyErr_SetString(PyExc_ValueError, "buffer_size must be greater 
than zero");
+        }
         return -1;
     }
 
@@ -1444,68 +1481,78 @@ xmlparse_buffer_size_setter(xmlparseobject *self, 
PyObject *v, void *closure)
 }
 
 static PyObject *
-xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
+xmlparse_buffer_used_getter(PyObject *op, void *Py_UNUSED(closure))
 {
-    return PyLong_FromLong((long) self->buffer_used);
+    xmlparseobject *self = xmlparseobject_CAST(op);
+    return PyLong_FromLong(self->buffer_used);
 }
 
 static PyObject *
-xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
+xmlparse_namespace_prefixes_getter(PyObject *op, void *Py_UNUSED(closure))
 {
+    xmlparseobject *self = xmlparseobject_CAST(op);
     return PyBool_FromLong(self->ns_prefixes);
 }
 
 static int
-xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void 
*closure)
+xmlparse_namespace_prefixes_setter(PyObject *op, PyObject *v, void 
*Py_UNUSED(closure))
 {
+    xmlparseobject *self = xmlparseobject_CAST(op);
     if (v == NULL) {
         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
         return -1;
     }
     int b = PyObject_IsTrue(v);
-    if (b < 0)
+    if (b < 0) {
         return -1;
+    }
     self->ns_prefixes = b;
     XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
     return 0;
 }
 
 static PyObject *
-xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
+xmlparse_ordered_attributes_getter(PyObject *op, void *Py_UNUSED(closure))
 {
+    xmlparseobject *self = xmlparseobject_CAST(op);
     return PyBool_FromLong(self->ordered_attributes);
 }
 
 static int
-xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void 
*closure)
+xmlparse_ordered_attributes_setter(PyObject *op, PyObject *v, void 
*Py_UNUSED(closure))
 {
+    xmlparseobject *self = xmlparseobject_CAST(op);
     if (v == NULL) {
         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
         return -1;
     }
     int b = PyObject_IsTrue(v);
-    if (b < 0)
+    if (b < 0) {
         return -1;
+    }
     self->ordered_attributes = b;
     return 0;
 }
 
 static PyObject *
-xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
+xmlparse_specified_attributes_getter(PyObject *op, void *Py_UNUSED(closure))
 {
-    return PyBool_FromLong((long) self->specified_attributes);
+    xmlparseobject *self = xmlparseobject_CAST(op);
+    return PyBool_FromLong(self->specified_attributes);
 }
 
 static int
-xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void 
*closure)
+xmlparse_specified_attributes_setter(PyObject *op, PyObject *v, void 
*Py_UNUSED(closure))
 {
+    xmlparseobject *self = xmlparseobject_CAST(op);
     if (v == NULL) {
         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
         return -1;
     }
     int b = PyObject_IsTrue(v);
-    if (b < 0)
+    if (b < 0) {
         return -1;
+    }
     self->specified_attributes = b;
     return 0;
 }
@@ -1516,10 +1563,10 @@ static PyMemberDef xmlparse_members[] = {
 };
 
 #define XMLPARSE_GETTER_DEF(name) \
-    {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
+    {#name, xmlparse_##name##_getter, NULL, NULL},
 #define XMLPARSE_GETTER_SETTER_DEF(name) \
-    {#name, (getter)xmlparse_##name##_getter, \
-            (setter)xmlparse_##name##_setter, NULL},
+    {#name, xmlparse_##name##_getter, \
+            xmlparse_##name##_setter, NULL},
 
 static PyGetSetDef xmlparse_getsetlist[] = {
     XMLPARSE_GETTER_DEF(ErrorCode)
@@ -1655,8 +1702,8 @@ static int init_handler_descrs(pyexpat_state *state)
     for (i = 0; handler_info[i].name != NULL; i++) {
         struct HandlerInfo *hi = &handler_info[i];
         hi->getset.name = hi->name;
-        hi->getset.get = (getter)xmlparse_handler_getter;
-        hi->getset.set = (setter)xmlparse_handler_setter;
+        hi->getset.get = xmlparse_handler_getter;
+        hi->getset.set = xmlparse_handler_setter;
         hi->getset.closure = &handler_info[i];
 
         PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, 
&hi->getset);
@@ -2120,7 +2167,7 @@ pyexpat_clear(PyObject *module)
 static void
 pyexpat_free(void *module)
 {
-    pyexpat_clear((PyObject *)module);
+    (void)pyexpat_clear((PyObject *)module);
 }
 
 static PyModuleDef_Slot pyexpat_slots[] = {
@@ -2151,22 +2198,24 @@ PyInit_pyexpat(void)
 static void
 clear_handlers(xmlparseobject *self, int initial)
 {
-    int i = 0;
-
-    for (; handler_info[i].name != NULL; i++) {
-        if (initial)
+    for (size_t i = 0; handler_info[i].name != NULL; i++) {
+        if (initial) {
             self->handlers[i] = NULL;
+        }
         else {
             Py_CLEAR(self->handlers[i]);
-            handler_info[i].setter(self->itself, NULL);
+            CALL_XML_HANDLER_SETTER(handler_info[i], self->itself, NULL);
         }
     }
 }
 
 static struct HandlerInfo handler_info[] = {
 
+    // The cast to `xmlhandlersetter` is needed as the signature of XML
+    // handler functions is not compatible with `xmlhandlersetter` since
+    // their second parameter is narrower than a `const void *`.
 #define HANDLER_INFO(name) \
-    {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
+    {#name, (xmlhandlersetter)XML_Set##name, my_##name},
 
     HANDLER_INFO(StartElementHandler)
     HANDLER_INFO(EndElementHandler)

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to