[pypy-commit] cffi default: hg merge char16_char32_t

arigo Fri, 02 Jun 2017 00:20:07 -0700

Author: Armin Rigo <ar...@tunes.org>
Branch: 
Changeset: r2962:719b689e9e4b
Date: 2017-06-02 08:50 +0200
http://bitbucket.org/cffi/cffi/changeset/719b689e9e4b/


Log:    hg merge char16_char32_t

        Issue #315: add 'char16_t' and 'char32_t', which are explicitly-
        sized vresions of 'wchar_t'. Like the latter, it converts to unicode
        characters and arrays of it to unicode strings.

diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c
--- a/c/_cffi_backend.c
+++ b/c/_cffi_backend.c
@@ -118,7 +118,7 @@
 /* base type flag: exactly one of the following: */
 #define CT_PRIMITIVE_SIGNED   0x001   /* signed integer */
 #define CT_PRIMITIVE_UNSIGNED 0x002   /* unsigned integer */
-#define CT_PRIMITIVE_CHAR     0x004   /* char, wchar_t */
+#define CT_PRIMITIVE_CHAR     0x004   /* char, wchar_t, charN_t */
 #define CT_PRIMITIVE_FLOAT    0x008   /* float, double, long double */
 #define CT_POINTER            0x010   /* pointer, excluding ptr-to-func */
 #define CT_ARRAY              0x020   /* array */
@@ -285,9 +285,7 @@
 # include "file_emulator.h"
 #endif
 
-#ifdef HAVE_WCHAR_H
-# include "wchar_helper.h"
-#endif
+#include "wchar_helper.h"
 
 typedef struct _cffi_allocator_s {
     PyObject *ca_alloc, *ca_free;
@@ -1049,12 +1047,14 @@
     }
     else if (ct->ct_flags & CT_PRIMITIVE_CHAR) {
         /*READ(data, ct->ct_size)*/
-        if (ct->ct_size == sizeof(char))
+        switch (ct->ct_size) {
+        case sizeof(char):
             return PyBytes_FromStringAndSize(data, 1);
-#ifdef HAVE_WCHAR_H
-        else
-            return _my_PyUnicode_FromWideChar((wchar_t *)data, 1);
-#endif
+        case 2:
+            return _my_PyUnicode_FromChar16((cffi_char16_t *)data, 1);
+        case 4:
+            return _my_PyUnicode_FromChar32((cffi_char32_t *)data, 1);
+        }
     }
     else if (ct->ct_flags & CT_PRIMITIVE_COMPLEX) {
         Py_complex value = read_raw_complex_data(data, ct->ct_size);
@@ -1133,27 +1133,53 @@
     return -1;
 }
 
-#ifdef HAVE_WCHAR_H
-static wchar_t _convert_to_wchar_t(PyObject *init)
-{
+static cffi_char16_t _convert_to_char16_t(PyObject *init)
+{
+    char err_got[80];
+    err_got[0] = 0;
+
     if (PyUnicode_Check(init)) {
-        wchar_t ordinal;
-        if (_my_PyUnicode_AsSingleWideChar(init, &ordinal) == 0)
+        cffi_char16_t ordinal;
+        if (_my_PyUnicode_AsSingleChar16(init, &ordinal, err_got) == 0)
             return ordinal;
     }
     if (CData_Check(init) &&
            (((CDataObject *)init)->c_type->ct_flags & CT_PRIMITIVE_CHAR) &&
-           (((CDataObject *)init)->c_type->ct_size == sizeof(wchar_t))) {
+           (((CDataObject *)init)->c_type->ct_size == 2)) {
         char *data = ((CDataObject *)init)->c_data;
-        /*READ(data, sizeof(wchar_t))*/
-        return *(wchar_t *)data;
+        /*READ(data, 2)*/
+        return *(cffi_char16_t *)data;
     }
     PyErr_Format(PyExc_TypeError,
-                 "initializer for ctype 'wchar_t' must be a unicode string "
-                 "of length 1, not %.200s", Py_TYPE(init)->tp_name);
-    return (wchar_t)-1;
-}
-#endif
+                 "initializer for ctype 'char16_t' must be a unicode string "
+                 "of length 1, not %.200s",
+                 err_got[0] == 0 ? Py_TYPE(init)->tp_name : err_got);
+    return (cffi_char16_t)-1;
+}
+
+static cffi_char32_t _convert_to_char32_t(PyObject *init)
+{
+    char err_got[80];
+    err_got[0] = 0;
+
+    if (PyUnicode_Check(init)) {
+        cffi_char32_t ordinal;
+        if (_my_PyUnicode_AsSingleChar32(init, &ordinal, err_got) == 0)
+            return ordinal;
+    }
+    if (CData_Check(init) &&
+           (((CDataObject *)init)->c_type->ct_flags & CT_PRIMITIVE_CHAR) &&
+           (((CDataObject *)init)->c_type->ct_size == 4)) {
+        char *data = ((CDataObject *)init)->c_data;
+        /*READ(data, 4)*/
+        return *(cffi_char32_t *)data;
+    }
+    PyErr_Format(PyExc_TypeError,
+                 "initializer for ctype 'char32_t' must be a unicode string "
+                 "of length 1, not %.200s",
+                 err_got[0] == 0 ? Py_TYPE(init)->tp_name : err_got);
+    return (cffi_char32_t)-1;
+}
 
 static int _convert_error(PyObject *init, const char *ct_name,
                           const char *expected)
@@ -1191,7 +1217,7 @@
 convert_from_object_bitfield(char *data, CFieldObject *cf, PyObject *init);
 
 static Py_ssize_t
-get_new_array_length(PyObject **pvalue)
+get_new_array_length(CTypeDescrObject *ctitem, PyObject **pvalue)
 {
     PyObject *value = *pvalue;
 
@@ -1204,7 +1230,12 @@
     }
     else if (PyUnicode_Check(value)) {
         /* from a unicode, we add the null terminator */
-        return _my_PyUnicode_SizeAsWideChar(value) + 1;
+        int length;
+        if (ctitem->ct_size == 2)
+            length = _my_PyUnicode_SizeAsChar16(value);
+        else
+            length = _my_PyUnicode_SizeAsChar32(value);
+        return length + 1;
     }
     else {
         Py_ssize_t explicitlength;
@@ -1235,7 +1266,8 @@
 {
     /* a special case for var-sized C99 arrays */
     if ((cf->cf_type->ct_flags & CT_ARRAY) && cf->cf_type->ct_size < 0) {
-        Py_ssize_t varsizelength = get_new_array_length(&value);
+        Py_ssize_t varsizelength = get_new_array_length(
+                                      cf->cf_type->ct_itemdescr, &value);
         if (varsizelength < 0)
             return -1;
         if (optvarsize != NULL) {
@@ -1336,14 +1368,18 @@
             memcpy(data, srcdata, n);
             return 0;
         }
-#ifdef HAVE_WCHAR_H
         else {
             Py_ssize_t n;
             if (!PyUnicode_Check(init)) {
                 expected = "unicode or list or tuple";
                 goto cannot_convert;
             }
-            n = _my_PyUnicode_SizeAsWideChar(init);
+
+            if (ctitem->ct_size == 4)
+                n = _my_PyUnicode_SizeAsChar32(init);
+            else
+                n = _my_PyUnicode_SizeAsChar16(init);
+
             if (ct->ct_length >= 0 && n > ct->ct_length) {
                 PyErr_Format(PyExc_IndexError,
                              "initializer unicode is too long for '%s' "
@@ -1352,10 +1388,12 @@
             }
             if (n != ct->ct_length)
                 n++;
-            _my_PyUnicode_AsWideChar(init, (wchar_t *)data, n);
+            if (ctitem->ct_size == 4)
+                _my_PyUnicode_AsChar32(init, (cffi_char32_t *)data, n);
+            else
+                _my_PyUnicode_AsChar16(init, (cffi_char16_t *)data, n);
             return 0;
         }
-#endif
     }
     else {
         expected = "list or tuple";
@@ -1537,22 +1575,29 @@
         return 0;
     }
     if (ct->ct_flags & CT_PRIMITIVE_CHAR) {
-        if (ct->ct_size == sizeof(char)) {
+        switch (ct->ct_size) {
+        case sizeof(char): {
             int res = _convert_to_char(init);
             if (res < 0)
                 return -1;
             data[0] = res;
             return 0;
         }
-#ifdef HAVE_WCHAR_H
-        else {
-            wchar_t res = _convert_to_wchar_t(init);
-            if (res == (wchar_t)-1 && PyErr_Occurred())
+        case 2: {
+            cffi_char16_t res = _convert_to_char16_t(init);
+            if (res == (cffi_char16_t)-1 && PyErr_Occurred())
                 return -1;
-            *(wchar_t *)data = res;
+            *(cffi_char16_t *)data = res;
             return 0;
         }
-#endif
+        case 4: {
+            int res = _convert_to_char32_t(init);
+            if (res == -1 && PyErr_Occurred())
+                return -1;
+            *(cffi_char32_t *)data = res;
+            return 0;
+        }
+        }
     }
     if (ct->ct_flags & (CT_STRUCT|CT_UNION)) {
 
@@ -2033,12 +2078,16 @@
     }
     else if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR) {
         /*READ(cd->c_data, cd->c_type->ct_size)*/
-        if (cd->c_type->ct_size == sizeof(char))
+        switch (cd->c_type->ct_size) {
+        case sizeof(char):
             return PyInt_FromLong((unsigned char)cd->c_data[0]);
-#ifdef HAVE_WCHAR_H
-        else
-            return PyInt_FromLong((long)*(wchar_t *)cd->c_data);
-#endif
+        case 2:
+            return PyInt_FromLong((long)*(cffi_char16_t *)cd->c_data);
+        case 4:
+            /* NB. cast via int32_t instead of cffi_char32_t, so that
+               we expose a signed result to the user */
+            return PyInt_FromLong((long)*(int32_t *)cd->c_data);
+        }
     }
     else if (cd->c_type->ct_flags & CT_PRIMITIVE_FLOAT) {
         PyObject *o = cdata_float(cd);
@@ -2730,7 +2779,11 @@
     }
     else if (PyUnicode_Check(init)) {
         /* from a unicode, we add the null terminator */
-        length = _my_PyUnicode_SizeAsWideChar(init) + 1;
+        if (ctitem->ct_size == 2)
+            length = _my_PyUnicode_SizeAsChar16(init);
+        else
+            length = _my_PyUnicode_SizeAsChar32(init);
+        length += 1;
     }
     else if ((ctitem->ct_flags & CT_IS_FILE) && PyFile_Check(init)) {
         *output_data = (char *)PyFile_AsFile(init);
@@ -3456,7 +3509,7 @@
         dataoffset = offsetof(CDataObject_own_nolength, alignment);
         datasize = ct->ct_size;
         if (datasize < 0) {
-            explicitlength = get_new_array_length(&init);
+            explicitlength = get_new_array_length(ct->ct_itemdescr, &init);
             if (explicitlength < 0)
                 return NULL;
             ctitem = ct->ct_itemdescr;
@@ -3626,18 +3679,17 @@
         value = (unsigned char)PyString_AS_STRING(ob)[0];
     }
 #endif
-#ifdef HAVE_WCHAR_H
     else if (PyUnicode_Check(ob)) {
-        wchar_t ordinal;
-        if (_my_PyUnicode_AsSingleWideChar(ob, &ordinal) < 0) {
+        char err_buf[80];
+        cffi_char32_t ordinal;
+        if (_my_PyUnicode_AsSingleChar32(ob, &ordinal, err_buf) < 0) {
             PyErr_Format(PyExc_TypeError,
-                      "cannot cast unicode string of length %zd to ctype '%s'",
-                         PyUnicode_GET_SIZE(ob), ct->ct_name);
+                         "cannot cast %s to ctype '%s'", err_buf, ct->ct_name);
             return NULL;
         }
-        value = (long)ordinal;
-    }
-#endif
+        /* the user sees char32_t being signed, but not char16_t */
+        value = (int32_t)ordinal;
+    }
     else if (PyBytes_Check(ob)) {
         int res = _convert_to_char(ob);
         if (res < 0)
@@ -3674,17 +3726,16 @@
         *out_value = (unsigned char)PyBytes_AS_STRING(io)[0];
         return 1;
     }
-#if HAVE_WCHAR_H
     else if (PyUnicode_Check(io)) {
-        wchar_t ordinal;
-        if (_my_PyUnicode_AsSingleWideChar(io, &ordinal) < 0) {
+        char ignored[80];
+        cffi_char32_t ordinal;
+        if (_my_PyUnicode_AsSingleChar32(io, &ordinal, ignored) < 0) {
             Py_DECREF(io);
             return -1;
         }
-        *out_value = (long)ordinal;
+        *out_value = (int32_t)ordinal;
         return 1;
     }
-#endif
     return 0;
 }
 
@@ -4106,6 +4157,8 @@
        EPTYPE2(fc, "float _Complex", cffi_float_complex_t, 
CT_PRIMITIVE_COMPLEX ) \
        EPTYPE2(dc, "double _Complex", cffi_double_complex_t, 
CT_PRIMITIVE_COMPLEX ) \
        ENUM_PRIMITIVE_TYPES_WCHAR                               \
+       EPTYPE2(c16, "char16_t", cffi_char16_t, CT_PRIMITIVE_CHAR ) \
+       EPTYPE2(c32, "char32_t", cffi_char32_t, CT_PRIMITIVE_CHAR ) \
        EPTYPE(b, _Bool, CT_PRIMITIVE_UNSIGNED | CT_IS_BOOL )    \
      /* the following types are not primitive in the C sense */ \
        EPTYPE(i8, int8_t, CT_PRIMITIVE_SIGNED)                  \
@@ -6036,27 +6089,46 @@
             }
             return PyBytes_FromStringAndSize(start, length);
         }
-#ifdef HAVE_WCHAR_H
         else if (cd->c_type->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR) {
-            const wchar_t *start = (wchar_t *)cd->c_data;
-            assert(cd->c_type->ct_itemdescr->ct_size == sizeof(wchar_t));
-            if (length < 0) {
-                /*READ(start, sizeof(wchar_t))*/
-                length = 0;
-                while (start[length])
-                    length++;
-                /*READ(start, sizeof(wchar_t) * length)*/
+            switch (cd->c_type->ct_itemdescr->ct_size) {
+            case 2: {
+                const cffi_char16_t *start = (cffi_char16_t *)cd->c_data;
+                if (length < 0) {
+                    /*READ(start, 2)*/
+                    length = 0;
+                    while (start[length])
+                        length++;
+                    /*READ(start, 2 * length)*/
+                }
+                else {
+                    /*READ(start, 2 * length)*/
+                    maxlen = length;
+                    length = 0;
+                    while (length < maxlen && start[length])
+                        length++;
+                }
+                return _my_PyUnicode_FromChar16(start, length);
             }
-            else {
-                /*READ(start, sizeof(wchar_t) * length)*/
-                maxlen = length;
-                length = 0;
-                while (length < maxlen && start[length])
-                    length++;
+            case 4: {
+                const cffi_char32_t *start = (cffi_char32_t *)cd->c_data;
+                if (length < 0) {
+                    /*READ(start, 4)*/
+                    length = 0;
+                    while (start[length])
+                        length++;
+                    /*READ(start, 4 * length)*/
+                }
+                else {
+                    /*READ(start, 4 * length)*/
+                    maxlen = length;
+                    length = 0;
+                    while (length < maxlen && start[length])
+                        length++;
+                }
+                return _my_PyUnicode_FromChar32(start, length);
             }
-            return _my_PyUnicode_FromWideChar(start, length);
-        }
-#endif
+            }
+        }
     }
     else if (cd->c_type->ct_flags & CT_IS_ENUM) {
         return convert_cdata_to_enum_string(cd, 0);
@@ -6070,12 +6142,14 @@
         /*READ(cd->c_data, cd->c_type->ct_size)*/
         if (cd->c_type->ct_size == sizeof(char))
             return PyBytes_FromStringAndSize(cd->c_data, 1);
-#ifdef HAVE_WCHAR_H
         else if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR) {
-            assert(cd->c_type->ct_size == sizeof(wchar_t));
-            return _my_PyUnicode_FromWideChar((wchar_t *)cd->c_data, 1);
-        }
-#endif
+            switch (cd->c_type->ct_size) {
+            case 2:
+                return _my_PyUnicode_FromChar16((cffi_char16_t *)cd->c_data, 
1);
+            case 4:
+                return _my_PyUnicode_FromChar32((cffi_char32_t *)cd->c_data, 
1);
+            }
+        }
     }
     PyErr_Format(PyExc_TypeError, "string(): unexpected cdata '%s' argument",
                  cd->c_type->ct_name);
@@ -6120,12 +6194,14 @@
     /* byte- and unicode strings */
     ctitem = cd->c_type->ct_itemdescr;
     if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) {
-        if (ctitem->ct_size == sizeof(char))
+        switch (ctitem->ct_size) {
+        case sizeof(char):
             return PyBytes_FromStringAndSize(cd->c_data, length);
-#ifdef HAVE_WCHAR_H
-        else if (ctitem->ct_size == sizeof(wchar_t))
-            return _my_PyUnicode_FromWideChar((wchar_t *)cd->c_data, length);
-#endif
+        case 2:
+            return _my_PyUnicode_FromChar16((cffi_char16_t 
*)cd->c_data,length);
+        case 4:
+            return _my_PyUnicode_FromChar32((cffi_char32_t 
*)cd->c_data,length);
+        }
     }
 
     /* else, the result is a list.  This implementation should be
@@ -6992,12 +7068,51 @@
     return PyBytes_FromStringAndSize(&x, 1);
 }
 
+/* backward-compatibility hack: instead of _cffi_to_c_char16_t() and
+ * _cffi_to_c_char32_t(), we have _cffi_to_c_wchar_t() handling whatever
+ * size is wchar_t, and _cffi_to_c_wchar3216_t() handling the opposite.
+ */
 #ifdef HAVE_WCHAR_H
-static PyObject *_cffi_from_c_wchar_t(wchar_t x) {
-    return _my_PyUnicode_FromWideChar(&x, 1);
-}
+typedef wchar_t cffi_wchar_t;
+#else
+typedef uint16_t cffi_wchar_t;   /* random pick... */
 #endif
 
+static cffi_wchar_t _cffi_to_c_wchar_t(PyObject *init)
+{
+    if (sizeof(cffi_wchar_t) == 2)
+        return (cffi_wchar_t)_convert_to_char16_t(init);
+    else
+        return (cffi_wchar_t)_convert_to_char32_t(init);
+}
+static PyObject *_cffi_from_c_wchar_t(cffi_wchar_t x) {
+    if (sizeof(cffi_wchar_t) == 2) {
+        cffi_char16_t input = x;
+        return _my_PyUnicode_FromChar16(&input, 1);
+    }
+    else {
+        cffi_char32_t input = x;
+        return _my_PyUnicode_FromChar32(&input, 1);
+    }
+}
+static int _cffi_to_c_wchar3216_t(PyObject *init)
+{
+    if (sizeof(cffi_wchar_t) == 4)
+        return (int)_convert_to_char16_t(init);
+    else
+        return (int)_convert_to_char32_t(init);
+}
+static PyObject *_cffi_from_c_wchar3216_t(int x) {
+    if (sizeof(cffi_wchar_t) == 4) {
+        cffi_char16_t input = x;
+        return _my_PyUnicode_FromChar16(&input, 1);
+    }
+    else {
+        cffi_char32_t input = x;
+        return _my_PyUnicode_FromChar32(&input, 1);
+    }
+}
+
 struct _cffi_externpy_s;      /* forward declaration */
 static void cffi_call_python(struct _cffi_externpy_s *, char *args);
 
@@ -7021,18 +7136,15 @@
     convert_to_object,
     convert_from_object,
     convert_struct_to_owning_object,
-#ifdef HAVE_WCHAR_H
-    _convert_to_wchar_t,
+    _cffi_to_c_wchar_t,
     _cffi_from_c_wchar_t,
-#else
-    0,
-    0,
-#endif
     _cffi_to_c_long_double,
     _cffi_to_c__Bool,
     _prepare_pointer_call_argument,
     convert_array_from_object,
     cffi_call_python,
+    _cffi_to_c_wchar3216_t,
+    _cffi_from_c_wchar3216_t,
 };
 
 static struct { const char *name; int value; } all_dlopen_flags[] = {
diff --git a/c/cffi1_module.c b/c/cffi1_module.c
--- a/c/cffi1_module.c
+++ b/c/cffi1_module.c
@@ -2,8 +2,9 @@
 #include "parse_c_type.c"
 #include "realize_c_type.c"
 
-#define CFFI_VERSION_MIN    0x2601
-#define CFFI_VERSION_MAX    0x27FF
+#define CFFI_VERSION_MIN            0x2601
+#define CFFI_VERSION_CHAR16CHAR32   0x2801
+#define CFFI_VERSION_MAX            0x28FF
 
 typedef struct FFIObject_s FFIObject;
 typedef struct LibObject_s LibObject;
@@ -183,6 +184,8 @@
     num_exports = 25;
     if (ctx->flags & 1)    /* set to mean that 'extern "Python"' is used */
         num_exports = 26;
+    if (version >= CFFI_VERSION_CHAR16CHAR32)
+        num_exports = 28;
     memcpy(exports, (char *)cffi_exports, num_exports * sizeof(void *));
 
     /* make the module object */
diff --git a/c/parse_c_type.c b/c/parse_c_type.c
--- a/c/parse_c_type.c
+++ b/c/parse_c_type.c
@@ -493,6 +493,7 @@
 
     case '1':
         if (size == 8 && !memcmp(p, "uint16", 6)) return _CFFI_PRIM_UINT16;
+        if (size == 8 && !memcmp(p, "char16", 6)) return _CFFI_PRIM_CHAR16;
         break;
 
     case '2':
@@ -501,6 +502,7 @@
 
     case '3':
         if (size == 8 && !memcmp(p, "uint32", 6)) return _CFFI_PRIM_UINT32;
+        if (size == 8 && !memcmp(p, "char32", 6)) return _CFFI_PRIM_CHAR32;
         break;
 
     case '4':
diff --git a/c/realize_c_type.c b/c/realize_c_type.c
--- a/c/realize_c_type.c
+++ b/c/realize_c_type.c
@@ -153,6 +153,8 @@
         "uintmax_t",
         "float _Complex",
         "double _Complex",
+        "char16_t",
+        "char32_t",
     };
     PyObject *x;
 
diff --git a/c/test_c.py b/c/test_c.py
--- a/c/test_c.py
+++ b/c/test_c.py
@@ -2098,22 +2098,36 @@
     py.test.raises(TypeError, newp, BStructPtr, [cast(BFunc2, 0)])
 
 def test_wchar():
-    BWChar = new_primitive_type("wchar_t")
+    _test_wchar_variant("wchar_t")
+
+def test_char16():
+    assert sizeof(new_primitive_type("char16_t")) == 2
+    _test_wchar_variant("char16_t")
+
+def test_char32():
+    assert sizeof(new_primitive_type("char32_t")) == 4
+    _test_wchar_variant("char32_t")
+
+def _test_wchar_variant(typename):
+    BWChar = new_primitive_type(typename)
     BInt = new_primitive_type("int")
     pyuni4 = {1: True, 2: False}[len(u+'\U00012345')]
     wchar4 = {2: False, 4: True}[sizeof(BWChar)]
-    assert str(cast(BWChar, 0x45)) == "<cdata 'wchar_t' %s'E'>" % (
-        mandatory_u_prefix,)
-    assert str(cast(BWChar, 0x1234)) == "<cdata 'wchar_t' %s'\u1234'>" % (
-        mandatory_u_prefix,)
-    if wchar4:
-        if not _hacked_pypy_uni4():
+    assert str(cast(BWChar, 0x45)) == "<cdata '%s' %s'E'>" % (
+        typename, mandatory_u_prefix)
+    assert str(cast(BWChar, 0x1234)) == "<cdata '%s' %s'\u1234'>" % (
+        typename, mandatory_u_prefix)
+    if not _hacked_pypy_uni4():
+        if wchar4:
             x = cast(BWChar, 0x12345)
-            assert str(x) == "<cdata 'wchar_t' %s'\U00012345'>" % (
-                mandatory_u_prefix,)
+            assert str(x) == "<cdata '%s' %s'\U00012345'>" % (
+                typename, mandatory_u_prefix)
             assert int(x) == 0x12345
-    else:
-        assert not pyuni4
+        else:
+            x = cast(BWChar, 0x18345)
+            assert str(x) == "<cdata '%s' %s'\u8345'>" % (
+                typename, mandatory_u_prefix)
+            assert int(x) == 0x8345
     #
     BWCharP = new_pointer_type(BWChar)
     BStruct = new_struct_type("struct foo_s")
@@ -2128,9 +2142,9 @@
     s.a1 = u+'\u1234'
     assert s.a1 == u+'\u1234'
     if pyuni4:
-        assert wchar4
-        s.a1 = u+'\U00012345'
-        assert s.a1 == u+'\U00012345'
+        if wchar4:
+            s.a1 = u+'\U00012345'
+            assert s.a1 == u+'\U00012345'
     elif wchar4:
         if not _hacked_pypy_uni4():
             s.a1 = cast(BWChar, 0x12345)
@@ -2165,17 +2179,17 @@
         py.test.raises(IndexError, 'a[4]')
     #
     w = cast(BWChar, 'a')
-    assert repr(w) == "<cdata 'wchar_t' %s'a'>" % mandatory_u_prefix
+    assert repr(w) == "<cdata '%s' %s'a'>" % (typename, mandatory_u_prefix)
     assert str(w) == repr(w)
     assert string(w) == u+'a'
     assert int(w) == ord('a')
     w = cast(BWChar, 0x1234)
-    assert repr(w) == "<cdata 'wchar_t' %s'\u1234'>" % mandatory_u_prefix
+    assert repr(w) == "<cdata '%s' %s'\u1234'>" % (typename, 
mandatory_u_prefix)
     assert str(w) == repr(w)
     assert string(w) == u+'\u1234'
     assert int(w) == 0x1234
     w = cast(BWChar, u+'\u8234')
-    assert repr(w) == "<cdata 'wchar_t' %s'\u8234'>" % mandatory_u_prefix
+    assert repr(w) == "<cdata '%s' %s'\u8234'>" % (typename, 
mandatory_u_prefix)
     assert str(w) == repr(w)
     assert string(w) == u+'\u8234'
     assert int(w) == 0x8234
@@ -2183,8 +2197,8 @@
     assert repr(w) == "<cdata 'int' 4660>"
     if wchar4 and not _hacked_pypy_uni4():
         w = cast(BWChar, u+'\U00012345')
-        assert repr(w) == "<cdata 'wchar_t' %s'\U00012345'>" % (
-            mandatory_u_prefix,)
+        assert repr(w) == "<cdata '%s' %s'\U00012345'>" % (
+            typename, mandatory_u_prefix)
         assert str(w) == repr(w)
         assert string(w) == u+'\U00012345'
         assert int(w) == 0x12345
@@ -2211,7 +2225,7 @@
     py.test.raises(RuntimeError, string, q)
     #
     def cb(p):
-        assert repr(p).startswith("<cdata 'wchar_t *' 0x")
+        assert repr(p).startswith("<cdata '%s *' 0x" % typename)
         return len(string(p))
     BFunc = new_function_type((BWCharP,), BInt, False)
     f = callback(BFunc, cb, -42)
diff --git a/c/wchar_helper.h b/c/wchar_helper.h
--- a/c/wchar_helper.h
+++ b/c/wchar_helper.h
@@ -2,31 +2,28 @@
  * wchar_t helpers
  */
 
-#if (Py_UNICODE_SIZE == 2) && (SIZEOF_WCHAR_T == 4)
-# define CONVERT_WCHAR_TO_SURROGATES
-#endif
+typedef uint16_t cffi_char16_t;
+typedef uint32_t cffi_char32_t;
+/* NB. cffi_char32_t is unsigned to make the logic here a bit easier */
 
 
-#ifdef CONVERT_WCHAR_TO_SURROGATES
+#if Py_UNICODE_SIZE == 2
 
 /* Before Python 2.7, PyUnicode_FromWideChar is not able to convert
    wchar_t values greater than 65535 into two-unicode-characters surrogates.
    But even the Python 2.7 version doesn't detect wchar_t values that are
    out of range(1114112), and just returns nonsense.
+
+   From cffi 1.11 we can't use it anyway, because we need a version
+   with char32_t input types.
 */
 static PyObject *
-_my_PyUnicode_FromWideChar(register const wchar_t *w,
-                           Py_ssize_t size)
+_my_PyUnicode_FromChar32(const cffi_char32_t *w, Py_ssize_t size)
 {
     PyObject *unicode;
     register Py_ssize_t i;
     Py_ssize_t alloc;
-    const wchar_t *orig_w;
-
-    if (w == NULL) {
-        PyErr_BadInternalCall();
-        return NULL;
-    }
+    const cffi_char32_t *orig_w;
 
     alloc = size;
     orig_w = w;
@@ -45,11 +42,11 @@
         register Py_UNICODE *u;
         u = PyUnicode_AS_UNICODE(unicode);
         for (i = size; i > 0; i--) {
-            if (((unsigned int)*w) > 0xFFFF) {
-                wchar_t ordinal;
-                if (((unsigned int)*w) > 0x10FFFF) {
+            if (*w > 0xFFFF) {
+                cffi_char32_t ordinal;
+                if (*w > 0x10FFFF) {
                     PyErr_Format(PyExc_ValueError,
-                                 "wchar_t out of range for "
+                                 "char32_t out of range for "
                                  "conversion to unicode: 0x%x", (int)*w);
                     Py_DECREF(unicode);
                     return NULL;
@@ -66,9 +63,55 @@
     return unicode;
 }
 
-#else
+static PyObject *
+_my_PyUnicode_FromChar16(const cffi_char16_t *w, Py_ssize_t size)
+{
+    return PyUnicode_FromUnicode((const Py_UNICODE *)w, size);
+}
 
-# define _my_PyUnicode_FromWideChar PyUnicode_FromWideChar
+#else   /* Py_UNICODE_SIZE == 4 */
+
+static PyObject *
+_my_PyUnicode_FromChar32(const cffi_char32_t *w, Py_ssize_t size)
+{
+    return PyUnicode_FromUnicode((const Py_UNICODE *)w, size);
+}
+
+static PyObject *
+_my_PyUnicode_FromChar16(const cffi_char16_t *w, Py_ssize_t size)
+{
+    /* 'size' is the length of the 'w' array */
+    PyObject *result = PyUnicode_FromUnicode(NULL, size);
+
+    if (result != NULL) {
+        Py_UNICODE *u_base = PyUnicode_AS_UNICODE(result);
+        Py_UNICODE *u = u_base;
+
+        if (size == 1) {      /* performance only */
+            *u = (cffi_char32_t)*w;
+        }
+        else {
+            while (size > 0) {
+                cffi_char32_t ch = *w++;
+                size--;
+                if (0xD800 <= ch && ch <= 0xDBFF && size > 0) {
+                    cffi_char32_t ch2 = *w;
+                    if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
+                        ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
+                        w++;
+                        size--;
+                    }
+                }
+                *u++ = ch;
+            }
+            if (PyUnicode_Resize(&result, u - u_base) < 0) {
+                Py_DECREF(result);
+                return NULL;
+            }
+        }
+    }
+    return result;
+}
 
 #endif
 
@@ -78,28 +121,70 @@
 #define AS_SURROGATE(u)   (0x10000 + (((u)[0] - 0xD800) << 10) +     \
                                      ((u)[1] - 0xDC00))
 
-static int _my_PyUnicode_AsSingleWideChar(PyObject *unicode, wchar_t *result)
+static int
+_my_PyUnicode_AsSingleChar16(PyObject *unicode, cffi_char16_t *result,
+                             char *err_got)
+{
+    Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
+    if (PyUnicode_GET_SIZE(unicode) != 1) {
+        sprintf(err_got, "unicode string of length %zd",
+                PyUnicode_GET_SIZE(unicode));
+        return -1;
+    }
+#if Py_UNICODE_SIZE == 4
+    if (((unsigned int)u[0]) > 0xFFFF)
+    {
+        sprintf(err_got, "larger-than-0xFFFF character");
+        return -1;
+    }
+#endif
+    *result = (cffi_char16_t)u[0];
+    return 0;
+}
+
+static int
+_my_PyUnicode_AsSingleChar32(PyObject *unicode, cffi_char32_t *result,
+                             char *err_got)
 {
     Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
     if (PyUnicode_GET_SIZE(unicode) == 1) {
-        *result = (wchar_t)(u[0]);
+        *result = (cffi_char32_t)u[0];
         return 0;
     }
-#ifdef CONVERT_WCHAR_TO_SURROGATES
+#if Py_UNICODE_SIZE == 2
     if (PyUnicode_GET_SIZE(unicode) == 2 && IS_SURROGATE(u)) {
         *result = AS_SURROGATE(u);
         return 0;
     }
 #endif
+    sprintf(err_got, "unicode string of length %zd",
+            PyUnicode_GET_SIZE(unicode));
     return -1;
 }
 
-static Py_ssize_t _my_PyUnicode_SizeAsWideChar(PyObject *unicode)
+static Py_ssize_t _my_PyUnicode_SizeAsChar16(PyObject *unicode)
 {
     Py_ssize_t length = PyUnicode_GET_SIZE(unicode);
     Py_ssize_t result = length;
 
-#ifdef CONVERT_WCHAR_TO_SURROGATES
+#if Py_UNICODE_SIZE == 4
+    Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
+    Py_ssize_t i;
+
+    for (i=0; i<length; i++) {
+        if (u[i] > 0xFFFF)
+            result++;
+    }
+#endif
+    return result;
+}
+
+static Py_ssize_t _my_PyUnicode_SizeAsChar32(PyObject *unicode)
+{
+    Py_ssize_t length = PyUnicode_GET_SIZE(unicode);
+    Py_ssize_t result = length;
+
+#if Py_UNICODE_SIZE == 2
     Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
     Py_ssize_t i;
 
@@ -111,15 +196,41 @@
     return result;
 }
 
-static void _my_PyUnicode_AsWideChar(PyObject *unicode,
-                                     wchar_t *result,
-                                     Py_ssize_t resultlen)
+static void _my_PyUnicode_AsChar16(PyObject *unicode,
+                                   cffi_char16_t *result,
+                                   Py_ssize_t resultlen)
+{
+    Py_ssize_t len = PyUnicode_GET_SIZE(unicode);
+    Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
+    Py_ssize_t i;
+    for (i=0; i<len; i++) {
+#if Py_UNICODE_SIZE == 2
+        cffi_char16_t ordinal = u[i];
+#else
+        cffi_char32_t ordinal = u[i];
+        if (ordinal > 0xFFFF) {
+            /* NB. like CPython, ignore the problem of unicode string objects
+             * containing characters greater than sys.maxunicode.  It is
+             * easier to not add exception handling here */
+            ordinal -= 0x10000;
+            *result++ = 0xD800 | (ordinal >> 10);
+            *result++ = 0xDC00 | (ordinal & 0x3FF);
+            continue;
+        }
+#endif
+        *result++ = ordinal;
+    }
+}
+
+static void _my_PyUnicode_AsChar32(PyObject *unicode,
+                                   cffi_char32_t *result,
+                                   Py_ssize_t resultlen)
 {
     Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
     Py_ssize_t i;
     for (i=0; i<resultlen; i++) {
-        wchar_t ordinal = *u;
-#ifdef CONVERT_WCHAR_TO_SURROGATES
+        cffi_char32_t ordinal = *u;
+#if Py_UNICODE_SIZE == 2
         if (IS_SURROGATE(u)) {
             ordinal = AS_SURROGATE(u);
             u++;
diff --git a/cffi/_cffi_include.h b/cffi/_cffi_include.h
--- a/cffi/_cffi_include.h
+++ b/cffi/_cffi_include.h
@@ -62,11 +62,16 @@
     typedef unsigned char _Bool;
 #  endif
 # endif
+# if _MSC_VER < 1900 || !defined(__cplusplus)   /* MSVC < 2015, or plain C */
+    typedef uint16_t char16_t;
+    typedef int32_t char32_t;
+# endif
 #else
 # include <stdint.h>
 # if (defined (__SVR4) && defined (__sun)) || defined(_AIX) || defined(__hpux)
 #  include <alloca.h>
 # endif
+# include <uchar.h>
 #endif
 
 #ifdef __GNUC__
@@ -159,9 +164,9 @@
 #define _cffi_from_c_struct                                              \
     ((PyObject *(*)(char *, struct _cffi_ctypedescr *))_cffi_exports[18])
 #define _cffi_to_c_wchar_t                                               \
-    ((wchar_t(*)(PyObject *))_cffi_exports[19])
+    ((_cffi_wchar_t(*)(PyObject *))_cffi_exports[19])
 #define _cffi_from_c_wchar_t                                             \
-    ((PyObject *(*)(wchar_t))_cffi_exports[20])
+    ((PyObject *(*)(_cffi_wchar_t))_cffi_exports[20])
 #define _cffi_to_c_long_double                                           \
     ((long double(*)(PyObject *))_cffi_exports[21])
 #define _cffi_to_c__Bool                                                 \
@@ -174,7 +179,11 @@
 #define _CFFI_CPIDX  25
 #define _cffi_call_python                                                \
     ((void(*)(struct _cffi_externpy_s *, char *))_cffi_exports[_CFFI_CPIDX])
-#define _CFFI_NUM_EXPORTS 26
+#define _cffi_to_c_wchar3216_t                                           \
+    ((int(*)(PyObject *))_cffi_exports[26])
+#define _cffi_from_c_wchar3216_t                                         \
+    ((PyObject *(*)(int))_cffi_exports[27])
+#define _CFFI_NUM_EXPORTS 28
 
 struct _cffi_ctypedescr;
 
@@ -215,6 +224,46 @@
     return NULL;
 }
 
+
+#ifdef HAVE_WCHAR_H
+typedef wchar_t _cffi_wchar_t;
+#else
+typedef uint16_t _cffi_wchar_t;   /* same random pick as _cffi_backend.c */
+#endif
+
+_CFFI_UNUSED_FN static uint16_t _cffi_to_c_char16_t(PyObject *o)
+{
+    if (sizeof(_cffi_wchar_t) == 2)
+        return (uint16_t)_cffi_to_c_wchar_t(o);
+    else
+        return (uint16_t)_cffi_to_c_wchar3216_t(o);
+}
+
+_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char16_t(uint16_t x)
+{
+    if (sizeof(_cffi_wchar_t) == 2)
+        return _cffi_from_c_wchar_t(x);
+    else
+        return _cffi_from_c_wchar3216_t(x);
+}
+
+_CFFI_UNUSED_FN static int _cffi_to_c_char32_t(PyObject *o)
+{
+    if (sizeof(_cffi_wchar_t) == 4)
+        return (int)_cffi_to_c_wchar_t(o);
+    else
+        return (int)_cffi_to_c_wchar3216_t(o);
+}
+
+_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char32_t(int x)
+{
+    if (sizeof(_cffi_wchar_t) == 4)
+        return _cffi_from_c_wchar_t(x);
+    else
+        return _cffi_from_c_wchar3216_t(x);
+}
+
+
 /**********  end CPython-specific section  **********/
 #else
 _CFFI_UNUSED_FN
diff --git a/cffi/cffi_opcode.py b/cffi/cffi_opcode.py
--- a/cffi/cffi_opcode.py
+++ b/cffi/cffi_opcode.py
@@ -107,9 +107,10 @@
 PRIM_UINTMAX       = 47
 PRIM_FLOATCOMPLEX  = 48
 PRIM_DOUBLECOMPLEX = 49
+PRIM_CHAR16        = 50
+PRIM_CHAR32        = 51
 
-
-_NUM_PRIM          = 50
+_NUM_PRIM          = 52
 _UNKNOWN_PRIM          = -1
 _UNKNOWN_FLOAT_PRIM    = -2
 _UNKNOWN_LONG_DOUBLE   = -3
@@ -135,6 +136,8 @@
     'double _Complex':    PRIM_DOUBLECOMPLEX,
     '_Bool':              PRIM_BOOL,
     'wchar_t':            PRIM_WCHAR,
+    'char16_t':           PRIM_CHAR16,
+    'char32_t':           PRIM_CHAR32,
     'int8_t':             PRIM_INT8,
     'uint8_t':            PRIM_UINT8,
     'int16_t':            PRIM_INT16,
diff --git a/cffi/model.py b/cffi/model.py
--- a/cffi/model.py
+++ b/cffi/model.py
@@ -122,6 +122,8 @@
         '_Bool':              'i',
         # the following types are not primitive in the C sense
         'wchar_t':            'c',
+        'char16_t':           'c',
+        'char32_t':           'c',
         'int8_t':             'i',
         'uint8_t':            'i',
         'int16_t':            'i',
diff --git a/cffi/parse_c_type.h b/cffi/parse_c_type.h
--- a/cffi/parse_c_type.h
+++ b/cffi/parse_c_type.h
@@ -81,8 +81,10 @@
 #define _CFFI_PRIM_UINTMAX      47
 #define _CFFI_PRIM_FLOATCOMPLEX 48
 #define _CFFI_PRIM_DOUBLECOMPLEX 49
+#define _CFFI_PRIM_CHAR16       50
+#define _CFFI_PRIM_CHAR32       51
 
-#define _CFFI__NUM_PRIM         50
+#define _CFFI__NUM_PRIM         52
 #define _CFFI__UNKNOWN_PRIM           (-1)
 #define _CFFI__UNKNOWN_FLOAT_PRIM     (-2)
 #define _CFFI__UNKNOWN_LONG_DOUBLE    (-3)
diff --git a/cffi/recompiler.py b/cffi/recompiler.py
--- a/cffi/recompiler.py
+++ b/cffi/recompiler.py
@@ -3,8 +3,9 @@
 from .error import VerificationError
 from .cffi_opcode import *
 
-VERSION = "0x2601"
-VERSION_EMBEDDED = "0x2701"
+VERSION_BASE = 0x2601
+VERSION_EMBEDDED = 0x2701
+VERSION_CHAR16CHAR32 = 0x2801
 
 
 class GlobalExpr:
@@ -126,6 +127,10 @@
         self.ffi = ffi
         self.module_name = module_name
         self.target_is_python = target_is_python
+        self._version = VERSION_BASE
+
+    def needs_version(self, ver):
+        self._version = max(self._version, ver)
 
     def collect_type_table(self):
         self._typesdict = {}
@@ -304,9 +309,7 @@
             prnt('#endif')
             lines = self._rel_readlines('_embedding.h')
             prnt(''.join(lines))
-            version = VERSION_EMBEDDED
-        else:
-            version = VERSION
+            self.needs_version(VERSION_EMBEDDED)
         #
         # then paste the C source given by the user, verbatim.
         prnt('/************************************************************/')
@@ -405,7 +408,7 @@
             prnt('        _cffi_call_python_org = '
                  '(void(*)(struct _cffi_externpy_s *, char *))p[1];')
             prnt('    }')
-        prnt('    p[0] = (const void *)%s;' % version)
+        prnt('    p[0] = (const void *)0x%x;' % self._version)
         prnt('    p[1] = &_cffi_type_context;')
         prnt('}')
         # on Windows, distutils insists on putting init_cffi_xyz in
@@ -423,21 +426,22 @@
         prnt('PyMODINIT_FUNC')
         prnt('PyInit_%s(void)' % (base_module_name,))
         prnt('{')
-        prnt('  return _cffi_init("%s", %s, &_cffi_type_context);' % (
-            self.module_name, version))
+        prnt('  return _cffi_init("%s", 0x%x, &_cffi_type_context);' % (
+            self.module_name, self._version))
         prnt('}')
         prnt('#else')
         prnt('PyMODINIT_FUNC')
         prnt('init%s(void)' % (base_module_name,))
         prnt('{')
-        prnt('  _cffi_init("%s", %s, &_cffi_type_context);' % (
-            self.module_name, version))
+        prnt('  _cffi_init("%s", 0x%x, &_cffi_type_context);' % (
+            self.module_name, self._version))
         prnt('}')
         prnt('#endif')
         prnt()
         prnt('#ifdef __GNUC__')
         prnt('#  pragma GCC visibility pop')
         prnt('#endif')
+        self._version = None
 
     def _to_py(self, x):
         if isinstance(x, str):
@@ -476,7 +480,8 @@
             prnt('from %s import ffi as _ffi%d' % (included_module_name, i))
         prnt()
         prnt("ffi = _cffi_backend.FFI('%s'," % (self.module_name,))
-        prnt("    _version = %s," % (VERSION,))
+        prnt("    _version = 0x%x," % (self._version,))
+        self._version = None
         #
         # the '_types' keyword argument
         self.cffi_types = tuple(self.cffi_types)    # don't change any more
@@ -515,8 +520,11 @@
                 # double' here, and _cffi_to_c_double would loose precision
                 converter = '(%s)_cffi_to_c_double' % (tp.get_c_name(''),)
             else:
-                converter = '(%s)_cffi_to_c_%s' % (tp.get_c_name(''),
+                cname = tp.get_c_name('')
+                converter = '(%s)_cffi_to_c_%s' % (cname,
                                                    tp.name.replace(' ', '_'))
+                if cname in ('char16_t', 'char32_t'):
+                    self.needs_version(VERSION_CHAR16CHAR32)
             errvalue = '-1'
         #
         elif isinstance(tp, model.PointerType):
@@ -573,7 +581,10 @@
             elif isinstance(tp, model.UnknownFloatType):
                 return '_cffi_from_c_double(%s)' % (var,)
             elif tp.name != 'long double' and not tp.is_complex_type():
-                return '_cffi_from_c_%s(%s)' % (tp.name.replace(' ', '_'), var)
+                cname = tp.name.replace(' ', '_')
+                if cname in ('char16_t', 'char32_t'):
+                    self.needs_version(VERSION_CHAR16CHAR32)
+                return '_cffi_from_c_%s(%s)' % (cname, var)
             else:
                 return '_cffi_from_c_deref((char *)&%s, _cffi_type(%d))' % (
                     var, self._gettypenum(tp))
diff --git a/testing/cffi0/test_ffi_backend.py 
b/testing/cffi0/test_ffi_backend.py
--- a/testing/cffi0/test_ffi_backend.py
+++ b/testing/cffi0/test_ffi_backend.py
@@ -1,6 +1,7 @@
 import py, sys, platform
 import pytest
 from testing.cffi0 import backend_tests, test_function, test_ownlib
+from testing.support import u
 from cffi import FFI
 import _cffi_backend
 
@@ -397,6 +398,8 @@
             "double",
             "long double",
             "wchar_t",
+            "char16_t",
+            "char32_t",
             "_Bool",
             "int8_t",
             "uint8_t",
@@ -508,3 +511,43 @@
             py.test.raises(TypeError, cd)
             py.test.raises(TypeError, cd, ffi.NULL)
             py.test.raises(TypeError, cd, ffi.typeof("void *"))
+
+    def test_explicitly_defined_char16_t(self):
+        ffi = FFI()
+        ffi.cdef("typedef uint16_t char16_t;")
+        x = ffi.cast("char16_t", 1234)
+        assert ffi.typeof(x) is ffi.typeof("uint16_t")
+
+    def test_char16_t(self):
+        ffi = FFI()
+        x = ffi.new("char16_t[]", 5)
+        assert len(x) == 5 and ffi.sizeof(x) == 10
+        x[2] = u+'\u1324'
+        assert x[2] == u+'\u1324'
+        y = ffi.new("char16_t[]", u+'\u1234\u5678')
+        assert len(y) == 3
+        assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00']
+        assert ffi.string(y) == u+'\u1234\u5678'
+        z = ffi.new("char16_t[]", u+'\U00012345')
+        assert len(z) == 3
+        assert list(z) == [u+'\ud808', u+'\udf45', u+'\x00']
+        assert ffi.string(z) == u+'\U00012345'
+
+    def test_char32_t(self):
+        ffi = FFI()
+        x = ffi.new("char32_t[]", 5)
+        assert len(x) == 5 and ffi.sizeof(x) == 20
+        x[3] = u+'\U00013245'
+        assert x[3] == u+'\U00013245'
+        y = ffi.new("char32_t[]", u+'\u1234\u5678')
+        assert len(y) == 3
+        assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00']
+        py_uni = u+'\U00012345'
+        z = ffi.new("char32_t[]", py_uni)
+        assert len(z) == 2
+        assert list(z) == [py_uni, u+'\x00']    # maybe a 2-unichars string
+        assert ffi.string(z) == py_uni
+        if len(py_uni) == 1:    # 4-bytes unicodes in Python
+            s = ffi.new("char32_t[]", u+'\ud808\udf00')
+            assert len(s) == 3
+            assert list(s) == [u+'\ud808', u+'\udf00', u+'\x00']
diff --git a/testing/cffi0/test_ownlib.py b/testing/cffi0/test_ownlib.py
--- a/testing/cffi0/test_ownlib.py
+++ b/testing/cffi0/test_ownlib.py
@@ -2,6 +2,7 @@
 import subprocess, weakref
 from cffi import FFI
 from cffi.backend_ctypes import CTypesBackend
+from testing.support import u
 
 
 SOURCE = """\
@@ -92,6 +93,15 @@
 }
 
 EXPORT int my_array[7] = {0, 1, 2, 3, 4, 5, 6};
+
+EXPORT unsigned short foo_2bytes(unsigned short a)
+{
+    return (unsigned short)(a + 42);
+}
+EXPORT unsigned int foo_4bytes(unsigned int a)
+{
+    return (unsigned int)(a + 42);
+}
 """
 
 class TestOwnLib(object):
@@ -300,3 +310,18 @@
         pfn = ffi.addressof(lib, "test_getting_errno")
         assert ffi.typeof(pfn) == ffi.typeof("int(*)(void)")
         assert pfn == lib.test_getting_errno
+
+    def test_char16_char32_t(self):
+        if self.module is None:
+            py.test.skip("fix the auto-generation of the tiny test lib")
+        if self.Backend is CTypesBackend:
+            py.test.skip("not implemented with the ctypes backend")
+        ffi = FFI(backend=self.Backend())
+        ffi.cdef("""
+            char16_t foo_2bytes(char16_t);
+            char32_t foo_4bytes(char32_t);
+        """)
+        lib = ffi.dlopen(self.module)
+        assert lib.foo_2bytes(u+'\u1234') == u+'\u125e'
+        assert lib.foo_4bytes(u+'\u1234') == u+'\u125e'
+        assert lib.foo_4bytes(u+'\U00012345') == u+'\U0001236f'
diff --git a/testing/cffi0/test_verify.py b/testing/cffi0/test_verify.py
--- a/testing/cffi0/test_verify.py
+++ b/testing/cffi0/test_verify.py
@@ -241,7 +241,7 @@
         F = tp.is_float_type()
         X = tp.is_complex_type()
         I = tp.is_integer_type()
-        assert C == (typename in ('char', 'wchar_t'))
+        assert C == (typename in ('char', 'wchar_t', 'char16_t', 'char32_t'))
         assert F == (typename in ('float', 'double', 'long double'))
         assert X == (typename in ('float _Complex', 'double _Complex'))
         assert I + F + C + X == 1      # one and only one of them is true
@@ -384,6 +384,10 @@
     lib = ffi.verify("wchar_t foo(wchar_t x) { return x+1; }")
     assert lib.foo(uniexample1) == uniexample2
 
+def test_char16_char32_type():
+    py.test.skip("XXX test or fully prevent char16_t and char32_t from "
+                 "working in ffi.verify() mode")
+
 def test_no_argument():
     ffi = FFI()
     ffi.cdef("int foo(void);")
diff --git a/testing/cffi1/test_new_ffi_1.py b/testing/cffi1/test_new_ffi_1.py
--- a/testing/cffi1/test_new_ffi_1.py
+++ b/testing/cffi1/test_new_ffi_1.py
@@ -1672,6 +1672,8 @@
             "double",
             "long double",
             "wchar_t",
+            "char16_t",
+            "char32_t",
             "_Bool",
             "int8_t",
             "uint8_t",
@@ -1742,3 +1744,30 @@
         exec("from _test_import_from_lib import *", d)
         assert (sorted([x for x in d.keys() if not x.startswith('__')]) ==
                 ['ffi', 'lib'])
+
+    def test_char16_t(self):
+        x = ffi.new("char16_t[]", 5)
+        assert len(x) == 5 and ffi.sizeof(x) == 10
+        x[2] = u+'\u1324'
+        assert x[2] == u+'\u1324'
+        y = ffi.new("char16_t[]", u+'\u1234\u5678')
+        assert len(y) == 3
+        assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00']
+        assert ffi.string(y) == u+'\u1234\u5678'
+        z = ffi.new("char16_t[]", u+'\U00012345')
+        assert len(z) == 3
+        assert list(z) == [u+'\ud808', u+'\udf45', u+'\x00']
+        assert ffi.string(z) == u+'\U00012345'
+
+    def test_char32_t(self):
+        x = ffi.new("char32_t[]", 5)
+        assert len(x) == 5 and ffi.sizeof(x) == 20
+        x[3] = u+'\U00013245'
+        assert x[3] == u+'\U00013245'
+        y = ffi.new("char32_t[]", u+'\u1234\u5678')
+        assert len(y) == 3
+        assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00']
+        z = ffi.new("char32_t[]", u+'\U00012345')
+        assert len(z) == 2
+        assert list(z) == [u+'\U00012345', u+'\x00'] # maybe a 2-unichars strin
+        assert ffi.string(z) == u+'\U00012345'
diff --git a/testing/cffi1/test_recompiler.py b/testing/cffi1/test_recompiler.py
--- a/testing/cffi1/test_recompiler.py
+++ b/testing/cffi1/test_recompiler.py
@@ -24,10 +24,11 @@
     assert ''.join(map(str, recomp.cffi_types)) == expected_output
 
 def verify(ffi, module_name, source, *args, **kwds):
+    no_cpp = kwds.pop('no_cpp', False)
     kwds.setdefault('undef_macros', ['NDEBUG'])
     module_name = '_CFFI_' + module_name
     ffi.set_source(module_name, source)
-    if not os.environ.get('NO_CPP'):     # test the .cpp mode too
+    if not os.environ.get('NO_CPP') and not no_cpp:   # test the .cpp mode too
         kwds.setdefault('source_extension', '.cpp')
         source = 'extern "C" {\n%s\n}' % (source,)
     else:
@@ -2250,3 +2251,29 @@
     int f(int a) { return a + 40; }
     """, extra_compile_args=['-fvisibility=hidden'])
     assert lib.f(2) == 42
+
+def test_override_default_definition():
+    ffi = FFI()
+    ffi.cdef("typedef long int16_t, char16_t;")
+    lib = verify(ffi, "test_override_default_definition", "")
+    assert ffi.typeof("int16_t") is ffi.typeof("char16_t") is 
ffi.typeof("long")
+
+def test_char16_char32_type(no_cpp=False):
+    ffi = FFI()
+    ffi.cdef("""
+        char16_t foo_2bytes(char16_t);
+        char32_t foo_4bytes(char32_t);
+    """)
+    lib = verify(ffi, "test_char16_char32_type" + no_cpp * "_nocpp", """
+    char16_t foo_2bytes(char16_t a) { return (char16_t)(a + 42); }
+    char32_t foo_4bytes(char32_t a) { return (char32_t)(a + 42); }
+    """, no_cpp=no_cpp)
+    assert lib.foo_2bytes(u+'\u1234') == u+'\u125e'
+    assert lib.foo_4bytes(u+'\u1234') == u+'\u125e'
+    assert lib.foo_4bytes(u+'\U00012345') == u+'\U0001236f'
+    py.test.raises(TypeError, lib.foo_2bytes, u+'\U00012345')
+    py.test.raises(TypeError, lib.foo_2bytes, 1234)
+    py.test.raises(TypeError, lib.foo_4bytes, 1234)
+
+def test_char16_char32_plain_c():
+    test_char16_char32_type(no_cpp=True)
diff --git a/testing/cffi1/test_verify1.py b/testing/cffi1/test_verify1.py
--- a/testing/cffi1/test_verify1.py
+++ b/testing/cffi1/test_verify1.py
@@ -221,7 +221,7 @@
         F = tp.is_float_type()
         X = tp.is_complex_type()
         I = tp.is_integer_type()
-        assert C == (typename in ('char', 'wchar_t'))
+        assert C == (typename in ('char', 'wchar_t', 'char16_t', 'char32_t'))
         assert F == (typename in ('float', 'double', 'long double'))
         assert X == (typename in ('float _Complex', 'double _Complex'))
         assert I + F + C + X == 1      # one and only one of them is true
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] cffi default: hg merge char16_char32_t

Reply via email to