[Python-Dev] GCC patch for catching errors in PyArg_ParseTuple

2006-09-22 Thread Martin v. Löwis
I wrote a patch for the GCC trunk to add an
__attribute__((format(PyArg_ParseTuple, 2, 3)))
declaration to functions (this specific declaration
should go to PyArg_ParseTuple only).

With that patch, parameter types are compared with the string parameter
(if that's a literal), and errors are reported if there is a type
mismatch (provided -Wformat is given).

I'll post more about this patch in the near future, and commit
some bug fixes I found with it, but here is the patch, in
a publish-early fashion.

There is little chance that this can go into GCC (as it is too
specific), so it likely needs to be maintained separately.
It was written for the current trunk, but hopefully applies
to most recent releases.

Regards,
Martin

Index: c-format.c
===
--- c-format.c  (revision 113267)
+++ c-format.c  (working copy)
@@ -62,7 +62,11 @@
   gcc_cdiag_format_type,
   gcc_cxxdiag_format_type, gcc_gfc_format_type,
   scanf_format_type, strftime_format_type,
-  strfmon_format_type, format_type_error = -1};
+  strfmon_format_type, 
+#define python_first PyArg_ParseTuple_type
+#define python_last _PyArg_ParseTuple_SizeT_type
+  PyArg_ParseTuple_type, _PyArg_ParseTuple_SizeT_type,
+  format_type_error = -1};
 
 typedef struct function_format_info
 {
@@ -759,6 +763,12 @@
 strfmon_flag_specs, strfmon_flag_pairs,
 FMT_FLAG_ARG_CONVERT, 'w', '#', 'p', 0, 'L',
 NULL, NULL
+  },
+  { PyArg_ParseTuple, NULL, NULL, NULL, NULL, NULL, NULL, 
+0, 0, 0, 0, 0, 0, NULL, NULL
+  },
+  { _PyArg_ParseTuple_SizeT, NULL, NULL, NULL, NULL, NULL, NULL, 
+0, 0, 0, 0, 0, 0, NULL, NULL
   }
 };
 
@@ -813,6 +823,10 @@
const char *, int, tree,
unsigned HOST_WIDE_INT);
 
+static void check_format_info_python (format_check_results *,
+ function_format_info *,
+ const char *, int, tree, int);
+
 static void init_dollar_format_checking (int, tree);
 static int maybe_read_dollar_number (const char **, int,
 tree, tree *, const format_kind_info *);
@@ -1414,8 +1428,12 @@
  will decrement it if it finds there are extra arguments, but this way
  need not adjust it for every return.  */
   res-number_other++;
-  check_format_info_main (res, info, format_chars, format_length,
- params, arg_num);
+  if (info-format_type = python_first  info-format_type = python_last)
+check_format_info_python (res, info, format_chars, format_length,
+ params, arg_num);
+  else
+check_format_info_main (res, info, format_chars, format_length,
+   params, arg_num);
 }
 
 
@@ -2102,7 +2120,309 @@
 }
 }
 
+static tree
+lookup_type (const char* ident)
+{
+  tree result = maybe_get_identifier (ident);
+  if (!result)
+{
+  error (%s is not defined as a type, ident);
+  return NULL;
+}
+  result = identifier_global_value (result);
+  if (!result || TREE_CODE (result) != TYPE_DECL)
+{
+  error (%s is not defined as a type, ident);
+  return NULL;
+}
+  result = DECL_ORIGINAL_TYPE (result);
+  gcc_assert (result);
+  return result;
+}
 
+static int
+is_object (tree type, int indirections)
+{
+  static tree PyObject = NULL;
+  static tree ob_refcnt = NULL;
+  static tree ob_next = NULL;
+  tree name;
+
+  if (!PyObject)
+{
+  ob_refcnt = get_identifier (_ob_refcnt);
+  ob_next = get_identifier (_ob_next);
+  PyObject = lookup_type (PyObject);
+  if (!PyObject) return 0;
+}
+
+  while (indirections--)
+{
+  if (TREE_CODE (type) != POINTER_TYPE)
+   return 0;
+  type = TREE_TYPE (type);
+}
+
+  /* type should be PyObject */
+  if (lang_hooks.types_compatible_p (type, PyObject))
+return 1;
+  /* might be a derived PyObject */
+  if (TREE_CODE (type) != RECORD_TYPE)
+return 0;
+  name = DECL_NAME (TYPE_FIELDS (type));
+  return name == ob_refcnt || name == ob_next;
+}
+
+static void
+check_format_info_python (format_check_results *ARG_UNUSED(res),
+ function_format_info *info, 
+ const char *format_chars,
+ int format_length, tree params,
+ int arg_num)
+{
+  static tree PyTypeObject_ptr = NULL;
+  static tree Py_ssize_t = NULL;
+  static tree Py_UNICODE = NULL;
+  static tree Py_complex = NULL;
+  int parens = 0;
+  tree type = NULL;
+  tree cur_param, cur_type;
+  int is_writing = 1;
+  int advance_fmt;
+  int first = 1;
+  /* If the wanted type is a pointer type, we need
+ to strip of all indirections, or else
+ char const* will not compare as compatible with
+ char*. */
+  int indirections;
+
+  if (!PyTypeObject_ptr)
+{
+  

Re: [Python-Dev] GCC patch for catching errors in PyArg_ParseTuple

2006-09-22 Thread Giovanni Bajo
Martin v. Löwis wrote:

 I'll post more about this patch in the near future, and commit
 some bug fixes I found with it, but here is the patch, in
 a publish-early fashion.

 There is little chance that this can go into GCC (as it is too
 specific), so it likely needs to be maintained separately.
 It was written for the current trunk, but hopefully applies
 to most recent releases.

A way not to maintain this patch forever would be to devise a way to make
format syntax pluggable / scriptable. There have been previous discussions
on the GCC mailing lists.

Giovanni Bajo

___
Python-Dev mailing list
Python-Dev@python.org
http://mail.python.org/mailman/listinfo/python-dev
Unsubscribe: 
http://mail.python.org/mailman/options/python-dev/archive%40mail-archive.com


Re: [Python-Dev] GCC patch for catching errors in PyArg_ParseTuple

2006-09-22 Thread Martin v. Löwis
Giovanni Bajo schrieb:
 A way not to maintain this patch forever would be to devise a way to make
 format syntax pluggable / scriptable. There have been previous discussions
 on the GCC mailing lists.

Perhaps. I very much doubt that this can or will be done, in a way that
would support PyArg_ParseTuple. It's probably easier to replace
PyArg_ParseTuple with something that can be statically checked by any
compiler.

Regards,
Martin
___
Python-Dev mailing list
Python-Dev@python.org
http://mail.python.org/mailman/listinfo/python-dev
Unsubscribe: 
http://mail.python.org/mailman/options/python-dev/archive%40mail-archive.com