[pypy-commit] cffi default: Found out how to properly generalize the "pass a Python string as

arigo Mon, 30 Jul 2012 06:05:31 -0700

Author: Armin Rigo <ar...@tunes.org>
Branch: 
Changeset: r741:2ac4cc98111f
Date: 2012-07-30 15:05 +0200
http://bitbucket.org/cffi/cffi/changeset/2ac4cc98111f/


Log:    Found out how to properly generalize the "pass a Python string as a
        'char *' argument to a function call". It has the nice effect that
        the documented example can be simplified.

diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c
--- a/c/_cffi_backend.c
+++ b/c/_cffi_backend.c
@@ -64,7 +64,8 @@
 
     Py_ssize_t ct_size;     /* size of instances, or -1 if unknown */
     Py_ssize_t ct_length;   /* length of arrays, or -1 if unknown;
-                               or alignment of primitive and struct types */
+                               or alignment of primitive and struct types;
+                               always -1 for pointers */
     int ct_flags;           /* CT_xxx flags */
 
     int ct_name_position;   /* index in ct_name of where to put a var name */
@@ -735,78 +736,91 @@
 }
 
 static int
+convert_array_from_object(char *data, CTypeDescrObject *ct, PyObject *init)
+{
+    /* used by convert_from_object(), and also to decode lists/tuples/unicodes
+       passed as function arguments.  'ct' is an CT_ARRAY in the first case
+       and a CT_POINTER in the second case. */
+    const char *expected;
+    CTypeDescrObject *ctitem = ct->ct_itemdescr;
+
+    if (PyList_Check(init) || PyTuple_Check(init)) {
+        PyObject **items;
+        Py_ssize_t i, n;
+        n = PySequence_Fast_GET_SIZE(init);
+        if (ct->ct_length >= 0 && n > ct->ct_length) {
+            PyErr_Format(PyExc_IndexError,
+                         "too many initializers for '%s' (got %zd)",
+                         ct->ct_name, n);
+            return -1;
+        }
+        items = PySequence_Fast_ITEMS(init);
+        for (i=0; i<n; i++) {
+            if (convert_from_object(data, ctitem, items[i]) < 0)
+                return -1;
+            data += ctitem->ct_size;
+        }
+        return 0;
+    }
+    else if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) {
+        if (ctitem->ct_size == sizeof(char)) {
+            char *srcdata;
+            Py_ssize_t n;
+            if (!PyString_Check(init)) {
+                expected = "str or list or tuple";
+                goto cannot_convert;
+            }
+            n = PyString_GET_SIZE(init);
+            if (ct->ct_length >= 0 && n > ct->ct_length) {
+                PyErr_Format(PyExc_IndexError,
+                             "initializer string is too long for '%s' "
+                             "(got %zd characters)", ct->ct_name, n);
+                return -1;
+            }
+            if (n != ct->ct_length)
+                n++;
+            srcdata = PyString_AS_STRING(init);
+            memcpy(data, srcdata, n);
+            return 0;
+        }
+#ifdef HAVE_WCHAR_H
+        else {
+            Py_ssize_t n;
+            if (!PyUnicode_Check(init)) {
+                expected = "unicode or list or tuple";
+                goto cannot_convert;
+            }
+            n = _my_PyUnicode_SizeAsWideChar(init);
+            if (ct->ct_length >= 0 && n > ct->ct_length) {
+                PyErr_Format(PyExc_IndexError,
+                             "initializer unicode is too long for '%s' "
+                             "(got %zd characters)", ct->ct_name, n);
+                return -1;
+            }
+            if (n != ct->ct_length)
+                n++;
+            _my_PyUnicode_AsWideChar(init, (wchar_t *)data, n);
+            return 0;
+        }
+#endif
+    }
+    else {
+        expected = "list or tuple";
+        goto cannot_convert;
+    }
+
+ cannot_convert:
+    return _convert_error(init, ct->ct_name, expected);
+}
+
+static int
 convert_from_object(char *data, CTypeDescrObject *ct, PyObject *init)
 {
     const char *expected;
     char buf[sizeof(PY_LONG_LONG)];
 
     if (ct->ct_flags & CT_ARRAY) {
-        CTypeDescrObject *ctitem = ct->ct_itemdescr;
-
-        if (PyList_Check(init) || PyTuple_Check(init)) {
-            PyObject **items;
-            Py_ssize_t i, n;
-            n = PySequence_Fast_GET_SIZE(init);
-            if (ct->ct_length >= 0 && n > ct->ct_length) {
-                PyErr_Format(PyExc_IndexError,
-                             "too many initializers for '%s' (got %zd)",
-                             ct->ct_name, n);
-                return -1;
-            }
-            items = PySequence_Fast_ITEMS(init);
-            for (i=0; i<n; i++) {
-                if (convert_from_object(data, ctitem, items[i]) < 0)
-                    return -1;
-                data += ctitem->ct_size;
-            }
-            return 0;
-        }
-        else if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) {
-            if (ctitem->ct_size == sizeof(char)) {
-                char *srcdata;
-                Py_ssize_t n;
-                if (!PyString_Check(init)) {
-                    expected = "str or list or tuple";
-                    goto cannot_convert;
-                }
-                n = PyString_GET_SIZE(init);
-                if (ct->ct_length >= 0 && n > ct->ct_length) {
-                    PyErr_Format(PyExc_IndexError,
-                                 "initializer string is too long for '%s' "
-                                 "(got %zd characters)", ct->ct_name, n);
-                    return -1;
-                }
-                if (n != ct->ct_length)
-                    n++;
-                srcdata = PyString_AS_STRING(init);
-                memcpy(data, srcdata, n);
-                return 0;
-            }
-#ifdef HAVE_WCHAR_H
-            else {
-                Py_ssize_t n;
-                if (!PyUnicode_Check(init)) {
-                    expected = "unicode or list or tuple";
-                    goto cannot_convert;
-                }
-                n = _my_PyUnicode_SizeAsWideChar(init);
-                if (ct->ct_length >= 0 && n > ct->ct_length) {
-                    PyErr_Format(PyExc_IndexError,
-                                 "initializer unicode is too long for '%s' "
-                                 "(got %zd characters)", ct->ct_name, n);
-                    return -1;
-                }
-                if (n != ct->ct_length)
-                    n++;
-                _my_PyUnicode_AsWideChar(init, (wchar_t *)data, n);
-                return 0;
-            }
-#endif
-        }
-        else {
-            expected = "list or tuple";
-            goto cannot_convert;
-        }
+        return convert_array_from_object(data, ct, init);
     }
     if (ct->ct_flags & (CT_POINTER|CT_FUNCTIONPTR)) {
         char *ptrdata;
@@ -1599,14 +1613,72 @@
     return ct_int;
 }
 
+static PyObject *
+_prepare_pointer_call_argument(CTypeDescrObject *ctptr, PyObject *init)
+{
+    /* 'ctptr' is here a pointer type 'ITEM *'.  Accept as argument an
+       initializer for an array 'ITEM[]'.  This includes the case of
+       passing a Python string to a 'char *' argument. */
+    Py_ssize_t length, datasize;
+    CTypeDescrObject *ctitem = ctptr->ct_itemdescr;
+    PyObject *result;
+    char *data;
+
+    /* XXX some code duplication, how to avoid it? */
+    if (PyString_Check(init)) {
+        /* from a string: just returning the string here is fine.
+           We assume that the C code won't modify the 'char *' data. */
+        if ((ctitem->ct_flags & CT_PRIMITIVE_CHAR) &&
+                (ctitem->ct_size == sizeof(char))) {
+            Py_INCREF(init);
+            return init;
+        }
+        else
+            return Py_None;
+    }
+    else if (PyList_Check(init) || PyTuple_Check(init)) {
+        length = PySequence_Fast_GET_SIZE(init);
+    }
+    else if (PyUnicode_Check(init)) {
+        /* from a unicode, we add the null terminator */
+        length = _my_PyUnicode_SizeAsWideChar(init) + 1;
+    }
+    else {
+        /* refuse to receive just an integer (and interpret it
+           as the array size) */
+        return Py_None;
+    }
+
+    if (ctitem->ct_size <= 0)
+        return Py_None;
+    datasize = length * ctitem->ct_size;
+    if ((datasize / ctitem->ct_size) != length) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "array size would overflow a Py_ssize_t");
+        return NULL;
+    }
+
+    result = PyString_FromStringAndSize(NULL, datasize);
+    if (result == NULL)
+        return NULL;
+
+    data = PyString_AS_STRING(result);
+    memset(data, 0, datasize);
+    if (convert_array_from_object(data, ctptr, init) < 0) {
+        Py_DECREF(result);
+        return NULL;
+    }
+    return result;
+}
+
 static PyObject*
 cdata_call(CDataObject *cd, PyObject *args, PyObject *kwds)
 {
     char *buffer;
     void** buffer_array;
     cif_description_t *cif_descr;
-    Py_ssize_t i, nargs, nargs_declared;
-    PyObject *signature, *res, *fvarargs;
+    Py_ssize_t i, nargs, nargs_declared, free_me_until = 0;
+    PyObject *signature, *res = NULL, *fvarargs;
     CTypeDescrObject *fresult;
     char *resultdata;
     char *errormsg;
@@ -1636,7 +1708,10 @@
         /* regular case: this function does not take '...' arguments */
         if (nargs != nargs_declared) {
             errormsg = "'%s' expects %zd arguments, got %zd";
-            goto bad_number_of_arguments;
+          bad_number_of_arguments:
+            PyErr_Format(PyExc_TypeError, errormsg,
+                         cd->c_type->ct_name, nargs_declared, nargs);
+            goto error;
         }
     }
     else {
@@ -1708,26 +1783,21 @@
         else
             argtype = (CTypeDescrObject *)PyTuple_GET_ITEM(fvarargs, i);
 
-        if ((argtype->ct_flags & CT_POINTER) &&
-            (argtype->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR)) {
-            if (argtype->ct_itemdescr->ct_size == sizeof(char)) {
-                if (PyString_Check(obj)) {
-                    /* special case: Python string -> cdata 'char *' */
-                    *(char **)data = PyString_AS_STRING(obj);
+        if (argtype->ct_flags & CT_POINTER) {
+            PyObject *string;
+            if (!CData_Check(obj)) {
+                string = _prepare_pointer_call_argument(argtype, obj);
+                if (string != Py_None) {
+                    if (string == NULL)
+                        goto error;
+                    ((char **)data)[0] = PyString_AS_STRING(string);
+                    ((char **)data)[1] = (char *)string;
+                    assert(i < nargs_declared); /* otherwise, obj is a CData */
+                    free_me_until = i + 1;
                     continue;
                 }
             }
-#ifdef HAVE_WCHAR_H
-            else {
-                if (PyUnicode_Check(obj)) {
-                    /* Python Unicode string -> cdata 'wchar_t *':
-                       not supported yet */
-                    PyErr_SetString(PyExc_NotImplementedError,
-                        "automatic unicode-to-'wchar_t *' conversion");
-                    goto error;
-                }
-            }
-#endif
+            ((char **)data)[1] = NULL;
         }
         if (convert_from_object(data, argtype, obj) < 0)
             goto error;
@@ -1761,23 +1831,26 @@
     else {
         res = convert_to_object(resultdata, fresult);
     }
-    PyObject_Free(buffer);
- done:
+    /* fall-through */
+
+ error:
+    for (i=0; i<free_me_until; i++) {
+        CTypeDescrObject *argtype;
+        argtype = (CTypeDescrObject *)PyTuple_GET_ITEM(signature, 2 + i);
+        if (argtype->ct_flags & CT_POINTER) {
+            char *data = buffer + cif_descr->exchange_offset_arg[1 + i];
+            PyObject *string_or_null = (PyObject *)(((char **)data)[1]);
+            Py_XDECREF(string_or_null);
+        }
+    }
+    if (buffer)
+        PyObject_Free(buffer);
     if (fvarargs != NULL) {
         Py_DECREF(fvarargs);
         if (cif_descr != NULL)  /* but only if fvarargs != NULL, if variadic */
             PyObject_Free(cif_descr);
     }
     return res;
-
- bad_number_of_arguments:
-    PyErr_Format(PyExc_TypeError, errormsg,
-                 cd->c_type->ct_name, nargs_declared, nargs);
- error:
-    if (buffer)
-        PyObject_Free(buffer);
-    res = NULL;
-    goto done;
 }
 
 static PyObject *cdata_iter(CDataObject *);
@@ -2619,6 +2692,7 @@
         return NULL;
 
     td->ct_size = sizeof(void *);
+    td->ct_length = -1;
     td->ct_flags = CT_POINTER;
     if (ctitem->ct_flags & (CT_STRUCT|CT_UNION))
         td->ct_flags |= CT_IS_PTR_TO_OWNED;
@@ -3131,6 +3205,15 @@
             exchange_offset = ALIGN_ARG(exchange_offset);
             cif_descr->exchange_offset_arg[1 + i] = exchange_offset;
             exchange_offset += atype->size;
+            /* if 'farg' is a pointer type 'ITEM *', then we might receive
+               as argument to the function call what is an initializer
+               for an array 'ITEM[]'.  This includes the case of passing a
+               Python string to a 'char *' argument.  In this case, we
+               convert the initializer to a cdata 'ITEM[]' that gets
+               temporarily stored here: */
+            if (farg->ct_flags & CT_POINTER) {
+                exchange_offset += sizeof(PyObject *);
+            }
         }
     }
 
@@ -3898,6 +3981,11 @@
     return result;
 }
 
+static int _testfunc18(struct _testfunc17_s *ptr)
+{
+    return ptr->a1 + (int)ptr->a2;
+}
+
 static PyObject *b__testfunc(PyObject *self, PyObject *args)
 {
     /* for testing only */
@@ -3924,6 +4012,7 @@
     case 15: f = &_testfunc15; break;
     case 16: f = &_testfunc16; break;
     case 17: f = &_testfunc17; break;
+    case 18: f = &_testfunc18; break;
     default:
         PyErr_SetNone(PyExc_ValueError);
         return NULL;
diff --git a/c/test_c.py b/c/test_c.py
--- a/c/test_c.py
+++ b/c/test_c.py
@@ -763,12 +763,22 @@
     BFunc6bis = new_function_type((BIntArray,), BIntPtr, False)
     f = cast(BFunc6bis, _testfunc(6))
     #
-    py.test.raises(TypeError, f, [142])
+    res = f([142])
+    assert typeof(res) is BIntPtr
+    assert res[0] == 142 - 1000
+    #
+    res = f((143,))
+    assert typeof(res) is BIntPtr
+    assert res[0] == 143 - 1000
     #
     x = newp(BIntArray, [242])
     res = f(x)
     assert typeof(res) is BIntPtr
     assert res[0] == 242 - 1000
+    #
+    py.test.raises(TypeError, f, 123456)
+    py.test.raises(TypeError, f, "foo")
+    py.test.raises(TypeError, f, u"bar")
 
 def test_call_function_7():
     BChar = new_primitive_type("char")
@@ -1332,6 +1342,14 @@
     assert repr(s) == "<cdata 'struct test17' owning 8 bytes>"
     assert s.a1 == 40
     assert s.a2 == 40.0 * 40.0
+    #
+    BStruct17Ptr = new_pointer_type(BStruct17)
+    BFunc18 = new_function_type((BStruct17Ptr,), BInt)
+    f = cast(BFunc18, _testfunc(18))
+    x = f([[40, 2.5]])
+    assert x == 42
+    x = f([{'a2': 43.1}])
+    assert x == 43
 
 def test_cast_with_functionptr():
     BFunc = new_function_type((), new_void_type())
@@ -1458,8 +1476,7 @@
         return len(unicode(p))
     BFunc = new_function_type((BWCharP,), BInt, False)
     f = callback(BFunc, cb, -42)
-    #assert f(u'a\u1234b') == 3    -- not implemented
-    py.test.raises(NotImplementedError, f, u'a\u1234b')
+    assert f(u'a\u1234b') == 3
     #
     if wchar4 and not pyuni4:
         # try out-of-range wchar_t values
diff --git a/doc/source/index.rst b/doc/source/index.rst
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -658,6 +658,7 @@
    ffi.cdef("""
       int main_like(int argv, char *argv[]);
    """)
+   lib = ffi.dlopen("some_library.so")
 
 Now, everything is simple, except, how do we create the ``char**`` argument
 here?
@@ -665,20 +666,34 @@
 
 .. code-block:: python
 
-   argv = ffi.new("char *[]", ["arg0", "arg1"])
+   lib.main_like(2, ["arg0", "arg1"])
 
-Does not work, because the initializer receives python ``str`` instead of
-``char*``. Now, the following would almost work:
+does not work, because the initializer receives two Python ``str`` objects
+where it was expecting ``<cdata 'char *'>`` objects.  You need to use
+``ffi.new()`` explicitly to make these objects:
 
 .. code-block:: python
 
+   lib.main_like(2, [ffi.new("char[]", "arg0"),
+                     ffi.new("char[]", "arg1")])
+
+Note that the two ``<cdata 'char[]'>`` objects are kept alive for the
+duration of the call: they are only freed when the list itself is freed,
+and the list is only freed when the call returns.
+
+If you want instead to build an "argv" variable that you want to reuse,
+then more care is needed:
+
+.. code-block:: python
+
+   # DOES NOT WORK!
    argv = ffi.new("char *[]", [ffi.new("char[]", "arg0"),
                                ffi.new("char[]", "arg1")])
 
-However, the two ``char[]`` objects will not be automatically kept alive.
-To keep them alive, one solution is to make sure that the list is stored
-somewhere for long enough.
-For example:
+In the above example, the inner "arg0" string is deallocated as soon
+as "argv" is built.  You have to make sure that you keep a reference
+to the inner "char[]" objects, either directly or by keeping the list
+alive like this:
 
 .. code-block:: python
 
@@ -686,7 +701,6 @@
                      ffi.new("char[]", "arg1")]
    argv = ffi.new("char *[]", argv_keepalive)
 
-will work.
 
 Function calls
 --------------
@@ -895,23 +909,21 @@
 |               | any pointer or array   |                  |                |
 |               | type                   |                  |                |
 +---------------+------------------------+                  +----------------+
-|  ``char *``   | another <cdata> with   |                  | ``[]``,        |
-|               | any pointer or array   |                  | ``+``, ``-``,  |
-|               | type, or               |                  | str()          |
-|               | a Python string when   |                  |                |
-|               | passed as func argument|                  |                |
+|  ``char *``   | same as pointers (*)   |                  | ``[]``,        |
+|               |                        |                  | ``+``, ``-``,  |
+|               |                        |                  | str()          |
 +---------------+------------------------+                  +----------------+
-| ``wchar_t *`` | same as pointers       |                  | ``[]``,        |
-|               | (passing a unicode as  |                  | ``+``, ``-``,  |
-|               | func argument is not   |                  | unicode()      |
-|               | implemented)           |                  |                |
+| ``wchar_t *`` | same as pointers (*)   |                  | ``[]``,        |
+|               |                        |                  | ``+``, ``-``,  |
+|               |                        |                  | unicode()      |
+|               |                        |                  |                |
 +---------------+------------------------+                  +----------------+
-|  pointers to  | same as pointers       |                  | ``[]``,        |
+|  pointers to  | same as pointers (*)   |                  | ``[]``,        |
 |  structure or |                        |                  | ``+``, ``-``,  |
 |  union        |                        |                  | and read/write |
 |               |                        |                  | struct fields  |
-+---------------+                        |                  +----------------+
-| function      |                        |                  | call           |
++---------------+------------------------+                  +----------------+
+| function      | same as pointers       |                  | call           |
 | pointers      |                        |                  |                |
 +---------------+------------------------+------------------+----------------+
 |  arrays       | a list or tuple of     | a <cdata>        | len(), iter(), |
@@ -941,6 +953,19 @@
 |               |                        | if out of range  |                |
 +---------------+------------------------+------------------+----------------+
 
+(*) Note that when calling a function, as per C, a ``item *`` argument
+is identical to a ``item[]`` argument.  So you can pass an argument that
+is accepted by either C type, like for example passing a Python string
+to a ``char *`` argument or a list of integers to a ``int *`` argument.
+Note that even if you want to pass a single ``item``, you need to specify
+it in a list of length 1; for example, a ``struct foo *`` argument might
+be passed as ``[[field1, field2...]]``.
+
+As an optimization, the CPython version of CFFI assumes that a function
+with a ``char *`` argument to which you pass a Python string will not
+actually modify the array of characters passed in, and so passes directly
+a pointer inside the Python string object.
+
 
 Reference: verifier
 -------------------
diff --git a/testing/backend_tests.py b/testing/backend_tests.py
--- a/testing/backend_tests.py
+++ b/testing/backend_tests.py
@@ -802,6 +802,28 @@
         res = a(1)    # and the error reported to stderr
         assert res == 42
 
+    def test_structptr_argument(self):
+        ffi = FFI(backend=self.Backend())
+        ffi.cdef("struct foo_s { int a, b; };")
+        def cb(p):
+            return p[0].a * 1000 + p[0].b * 100 + p[1].a * 10 + p[1].b
+        a = ffi.callback("int(*)(struct foo_s[])", cb)
+        res = a([[5, 6], {'a': 7, 'b': 8}])
+        assert res == 5678
+        res = a([[5], {'b': 8}])
+        assert res == 5008
+
+    def test_array_argument_as_list(self):
+        ffi = FFI(backend=self.Backend())
+        ffi.cdef("struct foo_s { int a, b; };")
+        seen = []
+        def cb(argv):
+            seen.append(str(argv[0]))
+            seen.append(str(argv[1]))
+        a = ffi.callback("void(*)(char *[])", cb)
+        a([ffi.new("char[]", "foobar"), ffi.new("char[]", "baz")])
+        assert seen == ["foobar", "baz"]
+
     def test_cast_float(self):
         ffi = FFI(backend=self.Backend())
         a = ffi.cast("float", 12)
diff --git a/testing/test_ctypes.py b/testing/test_ctypes.py
--- a/testing/test_ctypes.py
+++ b/testing/test_ctypes.py
@@ -13,3 +13,11 @@
     def test_array_of_func_ptr(self):
         py.test.skip("ctypes backend: not supported: "
                      "initializers for function pointers")
+
+    def test_structptr_argument(self):
+        py.test.skip("ctypes backend: not supported: passing a list "
+                     "for a pointer argument")
+
+    def test_array_argument_as_list(self):
+        py.test.skip("ctypes backend: not supported: passing a list "
+                     "for a pointer argument")
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] cffi default: Found out how to properly generalize the "pass a Python string as

Reply via email to