I wrote a patch for the GCC trunk to add an
__attribute__((format(PyArg_ParseTuple, 2, 3)))
declaration to functions (this specific declaration
should go to PyArg_ParseTuple only).

With that patch, parameter types are compared with the string parameter
(if that's a literal), and errors are reported if there is a type
mismatch (provided -Wformat is given).

I'll post more about this patch in the near future, and commit
some bug fixes I found with it, but here is the patch, in
a publish-early fashion.

There is little chance that this can go into GCC (as it is too
specific), so it likely needs to be maintained separately.
It was written for the current trunk, but hopefully applies
to most recent releases.

Regards,
Martin

Index: c-format.c
===================================================================
--- c-format.c  (revision 113267)
+++ c-format.c  (working copy)
@@ -62,7 +62,11 @@
                   gcc_cdiag_format_type,
                   gcc_cxxdiag_format_type, gcc_gfc_format_type,
                   scanf_format_type, strftime_format_type,
-                  strfmon_format_type, format_type_error = -1};
+                  strfmon_format_type, 
+#define python_first PyArg_ParseTuple_type
+#define python_last _PyArg_ParseTuple_SizeT_type
+                  PyArg_ParseTuple_type, _PyArg_ParseTuple_SizeT_type,
+                  format_type_error = -1};
 
 typedef struct function_format_info
 {
@@ -759,6 +763,12 @@
     strfmon_flag_specs, strfmon_flag_pairs,
     FMT_FLAG_ARG_CONVERT, 'w', '#', 'p', 0, 'L',
     NULL, NULL
+  },
+  { "PyArg_ParseTuple", NULL, NULL, NULL, NULL, NULL, NULL, 
+    0, 0, 0, 0, 0, 0, NULL, NULL
+  },
+  { "_PyArg_ParseTuple_SizeT", NULL, NULL, NULL, NULL, NULL, NULL, 
+    0, 0, 0, 0, 0, 0, NULL, NULL
   }
 };
 
@@ -813,6 +823,10 @@
                                    const char *, int, tree,
                                    unsigned HOST_WIDE_INT);
 
+static void check_format_info_python (format_check_results *,
+                                     function_format_info *,
+                                     const char *, int, tree, int);
+
 static void init_dollar_format_checking (int, tree);
 static int maybe_read_dollar_number (const char **, int,
                                     tree, tree *, const format_kind_info *);
@@ -1414,8 +1428,12 @@
      will decrement it if it finds there are extra arguments, but this way
      need not adjust it for every return.  */
   res->number_other++;
-  check_format_info_main (res, info, format_chars, format_length,
-                         params, arg_num);
+  if (info->format_type >= python_first && info->format_type <= python_last)
+    check_format_info_python (res, info, format_chars, format_length,
+                             params, arg_num);
+  else
+    check_format_info_main (res, info, format_chars, format_length,
+                           params, arg_num);
 }
 
 
@@ -2102,7 +2120,309 @@
     }
 }
 
+static tree
+lookup_type (const char* ident)
+{
+  tree result = maybe_get_identifier (ident);
+  if (!result)
+    {
+      error ("%s is not defined as a type", ident);
+      return NULL;
+    }
+  result = identifier_global_value (result);
+  if (!result || TREE_CODE (result) != TYPE_DECL)
+    {
+      error ("%s is not defined as a type", ident);
+      return NULL;
+    }
+  result = DECL_ORIGINAL_TYPE (result);
+  gcc_assert (result);
+  return result;
+}
 
+static int
+is_object (tree type, int indirections)
+{
+  static tree PyObject = NULL;
+  static tree ob_refcnt = NULL;
+  static tree ob_next = NULL;
+  tree name;
+
+  if (!PyObject)
+    {
+      ob_refcnt = get_identifier ("_ob_refcnt");
+      ob_next = get_identifier ("_ob_next");
+      PyObject = lookup_type ("PyObject");
+      if (!PyObject) return 0;
+    }
+
+  while (indirections--)
+    {
+      if (TREE_CODE (type) != POINTER_TYPE)
+       return 0;
+      type = TREE_TYPE (type);
+    }
+
+  /* type should be PyObject */
+  if (lang_hooks.types_compatible_p (type, PyObject))
+    return 1;
+  /* might be a "derived" PyObject */
+  if (TREE_CODE (type) != RECORD_TYPE)
+    return 0;
+  name = DECL_NAME (TYPE_FIELDS (type));
+  return name == ob_refcnt || name == ob_next;
+}
+
+static void
+check_format_info_python (format_check_results *ARG_UNUSED(res),
+                         function_format_info *info, 
+                         const char *format_chars,
+                         int format_length, tree params,
+                         int arg_num)
+{
+  static tree PyTypeObject_ptr = NULL;
+  static tree Py_ssize_t = NULL;
+  static tree Py_UNICODE = NULL;
+  static tree Py_complex = NULL;
+  int parens = 0;
+  tree type = NULL;
+  tree cur_param, cur_type;
+  int is_writing = 1;
+  int advance_fmt;
+  int first = 1;
+  /* If the wanted type is a pointer type, we need
+     to strip of all indirections, or else
+     char const* will not compare as compatible with
+     char*. */
+  int indirections;
+
+  if (!PyTypeObject_ptr)
+    {
+      PyTypeObject_ptr = lookup_type ("PyTypeObject");
+      if (!PyTypeObject_ptr) return;
+      PyTypeObject_ptr = build_pointer_type (PyTypeObject_ptr);
+    }
+
+  if (!Py_ssize_t)
+    {
+      Py_ssize_t = lookup_type ("Py_ssize_t");
+      if (!Py_ssize_t) return;
+    }
+
+  if (!Py_UNICODE)
+    {
+      Py_UNICODE = lookup_type ("Py_UNICODE");
+      if (!Py_UNICODE) return;
+    }
+
+  if (!Py_complex)
+    {
+      Py_complex = lookup_type ("Py_complex");
+      if (!Py_complex) return;
+    }
+
+  for (;format_length; 
+       format_chars += advance_fmt, format_length -= advance_fmt, first = 0) 
+    {
+      advance_fmt = 1;
+      indirections = is_writing;
+      /* Character codes without argument */
+      switch(*format_chars)
+       {
+       case ':': case ';':
+         /* End of argument list; function name or message follows.
+            Break out of for loop */
+         goto exit;
+       case '|':
+         /* Marker for optional arguments; ignore */
+         continue;
+       case '(':
+         parens++;
+         continue;
+       case ')':
+         parens--;
+         continue;
+       }
+
+      /* All other character codes require atleast one argument */
+      if (!params) 
+       {
+         error("No argument for code '%c'", *format_chars);
+         return;
+       }
+      cur_param = TREE_VALUE (params);
+      params = TREE_CHAIN (params);
+      arg_num++;
+      cur_type = TREE_TYPE (cur_param);
+      cur_type = TYPE_MAIN_VARIANT (cur_type);
+
+      switch (*format_chars)
+       {
+       case 'b': case 'B':
+         type = char_type_node; 
+         break;
+       case 'h': type = short_integer_type_node; break;
+       case 'H': type = short_unsigned_type_node; break;
+       case 'i': type = integer_type_node; break;
+       case 'I': type = unsigned_type_node; break;
+       case 'n': type = Py_ssize_t; break;
+       case 'l': type = long_integer_type_node; break;
+       case 'k': type = long_unsigned_type_node; break;
+       case 'L': type = long_long_integer_type_node; break;
+       case 'K': type = long_long_unsigned_type_node; break;
+       case 'f': type = float_type_node; break;
+       case 'd': type = double_type_node; break;
+       case 'D': type = Py_complex; break;
+       case 'c': type = char_type_node; break;
+       case 'e':
+         /* Process 'e' on its own; the next round will
+            bring us to 's' or 't'. */
+         if (format_chars[1] != 's' && format_chars[1] != 't')
+           {
+             error ("'e' is not followed by either 's' or 't'");
+             return;
+           }
+         type = char_type_node;
+         break;
+       case 's': case 'z': case 't':
+         type = char_type_node; 
+         indirections++;
+         break;
+       case 'u':
+         type = Py_UNICODE;
+         indirections++;
+         break;
+       case 'w':
+         if (TREE_CODE (cur_type) != POINTER_TYPE
+             || TREE_CODE (TREE_TYPE (cur_type)) != POINTER_TYPE)
+           {
+             error ("'w' must be associated with a pointer");
+             return;
+           }
+         cur_type = TREE_TYPE (TREE_TYPE (cur_type));
+         if (cur_type != void_type_node && cur_type != char_type_node)
+           {
+             error ("'w' must be associated with a void** or char**");
+             return;
+           }
+         goto next_arg;
+       case '#':
+         if (first || (format_chars[-1] != 's' &&
+                       format_chars[-1] != 't' &&
+                       format_chars[-1] != 'u' &&
+                       format_chars[-1] != 'w' &&
+                       format_chars[-1] != 'z'))
+           {
+             error ("'#' not preceded by s, t, u, w, or z");
+             return;
+           }
+         switch (info->format_type)
+           {
+           case PyArg_ParseTuple_type: 
+             type = integer_type_node; 
+             break;
+           case _PyArg_ParseTuple_SizeT_type: 
+             type = Py_ssize_t; 
+             break;
+           default:
+             internal_error ("unsupportet info->format_type");
+             return;
+           }
+         break;
+       case 'S': case 'O': case 'U':
+         {
+           if (format_chars[1] == '!')
+             {
+               advance_fmt++;
+               if (!lang_hooks.types_compatible_p (PyTypeObject_ptr, 
+                                                   cur_type))
+                 {
+                   error("O! argument %d not filled with PyTypeObject*",
+                         arg_num);
+                   return;
+                 }
+               cur_param = TREE_VALUE (params);
+               params = TREE_CHAIN (params);
+               arg_num++;
+               cur_type = TREE_TYPE (cur_param);
+               cur_type = TYPE_MAIN_VARIANT (cur_type);
+             }
+           if (format_chars[1] == '&')
+             {
+               advance_fmt++;
+               if (TREE_CODE (cur_type) != POINTER_TYPE
+                   && TREE_CODE (TREE_TYPE (cur_type)) != FUNCTION_TYPE)
+                 {
+                   error("O& argument not filled with function pointer");
+                   return;
+                 }
+               /* Target variable might be of arbitrary type;
+                  can't do any checking */
+               params = TREE_CHAIN (params);
+               arg_num++;
+               goto next_arg;
+             }
+           /* If indirections is 0, we expect PyObject*;
+              if indirections is 1, we expect PyObject**. */
+           if (!is_object (cur_type, indirections + 1)) 
+             {
+               error ("Invalid argument %d for 'O'", arg_num);
+               return;
+             }
+           goto next_arg;
+         }
+       default:
+         error("Unsupported format code '%c'", *format_chars);
+         return;
+       }
+
+      while (indirections--)
+       {
+         if (TREE_CODE (cur_type) != POINTER_TYPE
+             && TREE_CODE (cur_type) != ARRAY_TYPE)
+           {
+             error ("Argument %d for '%c' has not enough indirections", 
+                    arg_num, *format_chars);
+             return;
+           }
+         cur_type = TYPE_MAIN_VARIANT (TREE_TYPE (cur_type));
+         /* Allow void* everywhere */
+         if (cur_type == void_type_node)
+           goto next_arg;
+       }
+      if (lang_hooks.types_compatible_p (type, cur_type))
+       goto next_arg;
+      /* Allow signed/unsigned char when char is expected */
+      if (type == char_type_node && 
+         (cur_type == unsigned_char_type_node
+          || cur_type == signed_char_type_node))
+       goto next_arg;
+      if (TREE_CODE (type) == TREE_CODE (cur_type))
+       {
+         if (TREE_CODE (type) == INTEGER_TYPE
+             && TYPE_PRECISION (type) == TYPE_PRECISION (cur_type))
+           {
+             warning (OPT_Wformat,
+                      "Mixing signed and unsigned in argument %d",
+                      arg_num);
+             goto next_arg;
+           }
+       }
+      error ("Invalid type for '%c' argument %d", 
+            *format_chars, arg_num);
+      return;
+    next_arg:
+      ;
+    }
+
+ exit:  
+  if (parens)
+    error ("Unbalanced number of parens");
+  
+  if (params)
+    error ("Too many arguments");
+}
+
 /* Check the argument types from a single format conversion (possibly
    including width and precision arguments).  */
 static void
_______________________________________________
Python-Dev mailing list
Python-Dev@python.org
http://mail.python.org/mailman/listinfo/python-dev
Unsubscribe: 
http://mail.python.org/mailman/options/python-dev/archive%40mail-archive.com

Reply via email to