Author: Armin Rigo <[email protected]>
Branch:
Changeset: r613:b0c29bd26001
Date: 2012-07-09 17:13 +0200
http://bitbucket.org/cffi/cffi/changeset/b0c29bd26001/
Log: Merge the 'wchar_t' branch, adding support for wchar_t.
diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c
--- a/c/_cffi_backend.c
+++ b/c/_cffi_backend.c
@@ -27,7 +27,7 @@
/* base type flag: exactly one of the following: */
#define CT_PRIMITIVE_SIGNED 1 /* signed integer */
#define CT_PRIMITIVE_UNSIGNED 2 /* unsigned integer */
-#define CT_PRIMITIVE_CHAR 4 /* char (and, later, wchar_t) */
+#define CT_PRIMITIVE_CHAR 4 /* char, wchar_t */
#define CT_PRIMITIVE_FLOAT 8 /* float, double */
#define CT_POINTER 16 /* pointer, excluding ptr-to-func */
#define CT_ARRAY 32 /* array */
@@ -157,6 +157,10 @@
# endif
#endif
+#ifdef HAVE_WCHAR_H
+# include "wchar_helper.h"
+#endif
+
/************************************************************/
static CTypeDescrObject *
@@ -602,7 +606,12 @@
return PyFloat_FromDouble(value);
}
else if (ct->ct_flags & CT_PRIMITIVE_CHAR) {
- return PyString_FromStringAndSize(data, 1);
+ if (ct->ct_size == sizeof(char))
+ return PyString_FromStringAndSize(data, 1);
+#ifdef HAVE_WCHAR_H
+ else
+ return _my_PyUnicode_FromWideChar((wchar_t *)data, 1);
+#endif
}
PyErr_Format(PyExc_SystemError,
@@ -664,8 +673,9 @@
return (unsigned char)(PyString_AS_STRING(init)[0]);
}
if (CData_Check(init) &&
- (((CDataObject *)init)->c_type->ct_flags & CT_PRIMITIVE_CHAR)) {
- return (unsigned char)(((CDataObject *)init)->c_data[0]);
+ (((CDataObject *)init)->c_type->ct_flags & CT_PRIMITIVE_CHAR) &&
+ (((CDataObject *)init)->c_type->ct_size == sizeof(char))) {
+ return *(unsigned char *)((CDataObject *)init)->c_data;
}
PyErr_Format(PyExc_TypeError,
"initializer for ctype 'char' must be a string of length 1, "
@@ -673,6 +683,26 @@
return -1;
}
+#ifdef HAVE_WCHAR_H
+static wchar_t _convert_to_wchar_t(PyObject *init)
+{
+ if (PyUnicode_Check(init)) {
+ wchar_t ordinal;
+ if (_my_PyUnicode_AsSingleWideChar(init, &ordinal) == 0)
+ return ordinal;
+ }
+ if (CData_Check(init) &&
+ (((CDataObject *)init)->c_type->ct_flags & CT_PRIMITIVE_CHAR) &&
+ (((CDataObject *)init)->c_type->ct_size == sizeof(wchar_t))) {
+ return *(wchar_t *)((CDataObject *)init)->c_data;
+ }
+ PyErr_Format(PyExc_TypeError,
+ "initializer for ctype 'wchar_t' must be a unicode string "
+ "of length 1, not %.200s", Py_TYPE(init)->tp_name);
+ return (wchar_t)-1;
+}
+#endif
+
static int _convert_error(PyObject *init, const char *ct_name,
const char *expected)
{
@@ -732,24 +762,46 @@
return 0;
}
else if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) {
- char *srcdata;
- Py_ssize_t n;
- if (!PyString_Check(init)) {
- expected = "str or list or tuple";
- goto cannot_convert;
+ if (ctitem->ct_size == sizeof(char)) {
+ char *srcdata;
+ Py_ssize_t n;
+ if (!PyString_Check(init)) {
+ expected = "str or list or tuple";
+ goto cannot_convert;
+ }
+ n = PyString_GET_SIZE(init);
+ if (ct->ct_length >= 0 && n > ct->ct_length) {
+ PyErr_Format(PyExc_IndexError,
+ "initializer string is too long for '%s' "
+ "(got %zd characters)", ct->ct_name, n);
+ return -1;
+ }
+ if (n != ct->ct_length)
+ n++;
+ srcdata = PyString_AS_STRING(init);
+ memcpy(data, srcdata, n);
+ return 0;
}
- n = PyString_GET_SIZE(init);
- if (ct->ct_length >= 0 && n > ct->ct_length) {
- PyErr_Format(PyExc_IndexError,
- "initializer string is too long for '%s' "
- "(got %zd characters)", ct->ct_name, n);
- return -1;
+#ifdef HAVE_WCHAR_H
+ else {
+ Py_ssize_t n;
+ if (!PyUnicode_Check(init)) {
+ expected = "unicode or list or tuple";
+ goto cannot_convert;
+ }
+ n = _my_PyUnicode_SizeAsWideChar(init);
+ if (ct->ct_length >= 0 && n > ct->ct_length) {
+ PyErr_Format(PyExc_IndexError,
+ "initializer unicode is too long for '%s' "
+ "(got %zd characters)", ct->ct_name, n);
+ return -1;
+ }
+ if (n != ct->ct_length)
+ n++;
+ _my_PyUnicode_AsWideChar(init, (wchar_t *)data, n);
+ return 0;
}
- if (n != ct->ct_length)
- n++;
- srcdata = PyString_AS_STRING(init);
- memcpy(data, srcdata, n);
- return 0;
+#endif
}
else {
expected = "list or tuple";
@@ -829,11 +881,22 @@
return 0;
}
if (ct->ct_flags & CT_PRIMITIVE_CHAR) {
- int res = _convert_to_char(init);
- if (res < 0)
- return -1;
- data[0] = res;
- return 0;
+ if (ct->ct_size == sizeof(char)) {
+ int res = _convert_to_char(init);
+ if (res < 0)
+ return -1;
+ data[0] = res;
+ return 0;
+ }
+#ifdef HAVE_WCHAR_H
+ else {
+ wchar_t res = _convert_to_wchar_t(init);
+ if (res == (wchar_t)-1 && PyErr_Occurred())
+ return -1;
+ *(wchar_t *)data = res;
+ return 0;
+ }
+#endif
}
if (ct->ct_flags & (CT_STRUCT|CT_UNION)) {
@@ -1064,11 +1127,13 @@
static PyObject *cdata_str(CDataObject *cd)
{
- if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR) {
+ if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR &&
+ cd->c_type->ct_size == sizeof(char)) {
return PyString_FromStringAndSize(cd->c_data, 1);
}
else if (cd->c_type->ct_itemdescr != NULL &&
- cd->c_type->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR) {
+ cd->c_type->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR &&
+ cd->c_type->ct_itemdescr->ct_size == sizeof(char)) {
Py_ssize_t length;
if (cd->c_type->ct_flags & CT_ARRAY) {
@@ -1101,6 +1166,48 @@
return Py_TYPE(cd)->tp_repr((PyObject *)cd);
}
+#ifdef HAVE_WCHAR_H
+static PyObject *cdata_unicode(CDataObject *cd)
+{
+ if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR &&
+ cd->c_type->ct_size == sizeof(wchar_t)) {
+ return _my_PyUnicode_FromWideChar((wchar_t *)cd->c_data, 1);
+ }
+ else if (cd->c_type->ct_itemdescr != NULL &&
+ cd->c_type->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR &&
+ cd->c_type->ct_itemdescr->ct_size == sizeof(wchar_t)) {
+ Py_ssize_t length;
+ const wchar_t *start = (wchar_t *)cd->c_data;
+
+ if (cd->c_type->ct_flags & CT_ARRAY) {
+ const Py_ssize_t lenmax = get_array_length(cd);
+ length = 0;
+ while (length < lenmax && start[length])
+ length++;
+ }
+ else {
+ if (cd->c_data == NULL) {
+ PyObject *s = cdata_repr(cd);
+ if (s != NULL) {
+ PyErr_Format(PyExc_RuntimeError,
+ "cannot use unicode() on %s",
+ PyString_AS_STRING(s));
+ Py_DECREF(s);
+ }
+ return NULL;
+ }
+ length = 0;
+ while (start[length])
+ length++;
+ }
+
+ return _my_PyUnicode_FromWideChar((wchar_t *)cd->c_data, length);
+ }
+ else
+ return cdata_repr(cd);
+}
+#endif
+
static PyObject *cdataowning_repr(CDataObject *cd)
{
Py_ssize_t size;
@@ -1152,7 +1259,12 @@
return convert_to_object(cd->c_data, cd->c_type);
}
else if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR) {
- return PyInt_FromLong((unsigned char)cd->c_data[0]);
+ if (cd->c_type->ct_size == sizeof(char))
+ return PyInt_FromLong((unsigned char)cd->c_data[0]);
+#ifdef HAVE_WCHAR_H
+ else
+ return PyInt_FromLong((long)*(wchar_t *)cd->c_data);
+#endif
}
else if (cd->c_type->ct_flags & CT_PRIMITIVE_FLOAT) {
PyObject *o = convert_to_object(cd->c_data, cd->c_type);
@@ -1552,12 +1664,27 @@
argtype = (CTypeDescrObject *)PyTuple_GET_ITEM(fvarargs, i);
if ((argtype->ct_flags & CT_POINTER) &&
- (argtype->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR) &&
- PyString_Check(obj)) {
- /* special case: Python string -> cdata 'char *' */
- *(char **)data = PyString_AS_STRING(obj);
+ (argtype->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR)) {
+ if (argtype->ct_itemdescr->ct_size == sizeof(char)) {
+ if (PyString_Check(obj)) {
+ /* special case: Python string -> cdata 'char *' */
+ *(char **)data = PyString_AS_STRING(obj);
+ continue;
+ }
+ }
+#ifdef HAVE_WCHAR_H
+ else {
+ if (PyUnicode_Check(obj)) {
+ /* Python Unicode string -> cdata 'wchar_t *':
+ not supported yet */
+ PyErr_SetString(PyExc_NotImplementedError,
+ "automatic unicode-to-'wchar_t *' conversion");
+ goto error;
+ }
+ }
+#endif
}
- else if (convert_from_object(data, argtype, obj) < 0)
+ if (convert_from_object(data, argtype, obj) < 0)
goto error;
}
@@ -1645,6 +1772,11 @@
(objobjargproc)cdata_ass_sub, /*mp_ass_subscript*/
};
+static PyMethodDef CData_methods[] = {
+ {"__unicode__", (PyCFunction)cdata_unicode, METH_NOARGS},
+ {NULL, NULL} /* sentinel */
+};
+
static PyTypeObject CData_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_cffi_backend.CData",
@@ -1672,6 +1804,8 @@
cdata_richcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
(getiterfunc)cdata_iter, /* tp_iter */
+ 0, /* tp_iternext */
+ CData_methods, /* tp_methods */
};
static PyTypeObject CDataOwning_Type = {
@@ -1848,7 +1982,7 @@
return NULL;
}
if (ctitem->ct_flags & CT_PRIMITIVE_CHAR)
- datasize += sizeof(char); /* forcefully add a null character */
+ datasize *= 2; /* forcefully add another character: a null */
}
else if (ct->ct_flags & CT_ARRAY) {
dataoffset = offsetof(CDataObject_own_nolength, alignment);
@@ -1861,6 +1995,10 @@
/* from a string, we add the null terminator */
explicitlength = PyString_GET_SIZE(init) + 1;
}
+ else if (PyUnicode_Check(init)) {
+ /* from a unicode, we add the null terminator */
+ explicitlength = _my_PyUnicode_SizeAsWideChar(init) + 1;
+ }
else {
explicitlength = PyNumber_AsSsize_t(init, PyExc_OverflowError);
if (explicitlength < 0) {
@@ -1973,6 +2111,18 @@
value = (unsigned char)PyString_AS_STRING(ob)[0];
}
}
+#ifdef HAVE_WCHAR_H
+ else if (PyUnicode_Check(ob)) {
+ wchar_t ordinal;
+ if (_my_PyUnicode_AsSingleWideChar(ob, &ordinal) < 0) {
+ PyErr_Format(PyExc_TypeError,
+ "cannot cast unicode of length %zd to ctype '%s'",
+ PyUnicode_GET_SIZE(ob), ct->ct_name);
+ return NULL;
+ }
+ value = (long)ordinal;
+ }
+#endif
else {
value = _my_PyLong_AsUnsignedLongLong(ob, 0);
if (value == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred())
@@ -2240,7 +2390,6 @@
{ "ptrdiff_t", sizeof(ptrdiff_t) },
{ "size_t", sizeof(size_t) | UNSIGNED },
{ "ssize_t", sizeof(ssize_t) },
- /*{ "wchar_t", sizeof(wchar_t) | UNSIGNED },*/
{ NULL }
};
#undef UNSIGNED
@@ -2284,10 +2433,17 @@
EPTYPE(ull, unsigned long long, CT_PRIMITIVE_UNSIGNED ) \
EPTYPE(f, float, CT_PRIMITIVE_FLOAT ) \
EPTYPE(d, double, CT_PRIMITIVE_FLOAT )
+#ifdef HAVE_WCHAR_H
+# define ENUM_PRIMITIVE_TYPES_WCHAR \
+ EPTYPE(wc, wchar_t, CT_PRIMITIVE_CHAR )
+#else
+# define ENUM_PRIMITIVE_TYPES_WCHAR /* nothing */
+#endif
#define EPTYPE(code, typename, flags) \
struct aligncheck_##code { char x; typename y; };
ENUM_PRIMITIVE_TYPES
+ ENUM_PRIMITIVE_TYPES_WCHAR
#undef EPTYPE
CTypeDescrObject *td;
@@ -2301,7 +2457,9 @@
flags \
},
ENUM_PRIMITIVE_TYPES
+ ENUM_PRIMITIVE_TYPES_WCHAR
#undef EPTYPE
+#undef ENUM_PRIMITIVE_TYPES_WCHAR
#undef ENUM_PRIMITIVE_TYPES
{ NULL }
};
@@ -2314,6 +2472,11 @@
for (ptypes=types; ; ptypes++) {
if (ptypes->name == NULL) {
+#ifndef HAVE_WCHAR_H
+ if (strcmp(name, "wchar_t"))
+ PyErr_SetString(PyExc_NotImplementedError, name);
+ else
+#endif
PyErr_SetString(PyExc_KeyError, name);
return NULL;
}
@@ -2358,11 +2521,11 @@
td->ct_length = ptypes->align;
td->ct_extra = ffitype;
td->ct_flags = ptypes->flags;
- if (td->ct_flags & CT_PRIMITIVE_SIGNED) {
+ if (td->ct_flags & (CT_PRIMITIVE_SIGNED | CT_PRIMITIVE_CHAR)) {
if (td->ct_size <= sizeof(long))
td->ct_flags |= CT_PRIMITIVE_FITS_LONG;
}
- else if (td->ct_flags & (CT_PRIMITIVE_UNSIGNED | CT_PRIMITIVE_CHAR)) {
+ else if (td->ct_flags & CT_PRIMITIVE_UNSIGNED) {
if (td->ct_size < sizeof(long))
td->ct_flags |= CT_PRIMITIVE_FITS_LONG;
}
@@ -2592,6 +2755,10 @@
if (!(ftype->ct_flags & (CT_PRIMITIVE_SIGNED |
CT_PRIMITIVE_UNSIGNED |
CT_PRIMITIVE_CHAR)) ||
+#ifdef HAVE_WCHAR_H
+ ((ftype->ct_flags & CT_PRIMITIVE_CHAR)
+ && ftype->ct_size == sizeof(wchar_t)) ||
+#endif
fbitsize == 0 ||
fbitsize > 8 * ftype->ct_size) {
PyErr_Format(PyExc_TypeError, "invalid bit field '%s'",
@@ -3763,6 +3930,12 @@
return PyString_FromStringAndSize(&x, 1);
}
+#ifdef HAVE_WCHAR_H
+static PyObject *_cffi_from_c_wchar_t(wchar_t x) {
+ return _my_PyUnicode_FromWideChar(&x, 1);
+}
+#endif
+
static void *cffi_exports[] = {
_cffi_to_c_char_p,
_cffi_to_c_signed_char,
@@ -3788,6 +3961,13 @@
convert_to_object,
convert_from_object,
convert_struct_to_owning_object,
+#ifdef HAVE_WCHAR_H
+ _convert_to_wchar_t,
+ _cffi_from_c_wchar_t,
+#else
+ 0,
+ 0,
+#endif
};
/************************************************************/
diff --git a/c/test_c.py b/c/test_c.py
--- a/c/test_c.py
+++ b/c/test_c.py
@@ -1279,6 +1279,121 @@
py.test.raises(TypeError, newp, BStructPtr, [cast(BIntP, 0)])
py.test.raises(TypeError, newp, BStructPtr, [cast(BFunc2, 0)])
+def test_wchar():
+ BWChar = new_primitive_type("wchar_t")
+ BInt = new_primitive_type("int")
+ pyuni4 = {1: True, 2: False}[len(u'\U00012345')]
+ wchar4 = {2: False, 4: True}[sizeof(BWChar)]
+ assert str(cast(BWChar, 0x45)) == "<cdata 'wchar_t' u'E'>"
+ assert str(cast(BWChar, 0x1234)) == "<cdata 'wchar_t' u'\u1234'>"
+ if wchar4:
+ x = cast(BWChar, 0x12345)
+ assert str(x) == "<cdata 'wchar_t' u'\U00012345'>"
+ assert unicode(x) == u'\U00012345'
+ else:
+ assert not pyuni4
+ #
+ BWCharP = new_pointer_type(BWChar)
+ BStruct = new_struct_type("foo_s")
+ BStructPtr = new_pointer_type(BStruct)
+ complete_struct_or_union(BStruct, [('a1', BWChar, -1),
+ ('a2', BWCharP, -1)])
+ s = newp(BStructPtr)
+ s.a1 = u'\x00'
+ assert s.a1 == u'\x00'
+ py.test.raises(TypeError, "s.a1 = 'a'")
+ py.test.raises(TypeError, "s.a1 = '\xFF'")
+ s.a1 = u'\u1234'
+ assert s.a1 == u'\u1234'
+ if pyuni4:
+ assert wchar4
+ s.a1 = u'\U00012345'
+ assert s.a1 == u'\U00012345'
+ elif wchar4:
+ s.a1 = cast(BWChar, 0x12345)
+ assert s.a1 == u'\ud808\udf45'
+ s.a1 = u'\ud807\udf44'
+ assert s.a1 == u'\U00011f44'
+ else:
+ py.test.raises(ValueError, "s.a1 = u'\U00012345'")
+ #
+ BWCharArray = new_array_type(BWCharP, None)
+ a = newp(BWCharArray, u'hello \u1234 world')
+ assert len(a) == 14 # including the final null
+ assert unicode(a) == u'hello \u1234 world'
+ a[13] = u'!'
+ assert unicode(a) == u'hello \u1234 world!'
+ assert str(a) == repr(a)
+ assert a[6] == u'\u1234'
+ a[6] = u'-'
+ assert unicode(a) == 'hello - world!'
+ assert str(a) == repr(a)
+ #
+ if wchar4:
+ u = u'\U00012345\U00012346\U00012347'
+ a = newp(BWCharArray, u)
+ assert len(a) == 4
+ assert unicode(a) == u
+ assert len(list(a)) == 4
+ expected = [u'\U00012345', u'\U00012346', u'\U00012347', unichr(0)]
+ assert list(a) == expected
+ got = [a[i] for i in range(4)]
+ assert got == expected
+ py.test.raises(IndexError, 'a[4]')
+ #
+ w = cast(BWChar, 'a')
+ assert repr(w) == "<cdata 'wchar_t' u'a'>"
+ assert str(w) == repr(w)
+ assert unicode(w) == u'a'
+ assert int(w) == ord('a')
+ w = cast(BWChar, 0x1234)
+ assert repr(w) == "<cdata 'wchar_t' u'\u1234'>"
+ assert str(w) == repr(w)
+ assert unicode(w) == u'\u1234'
+ assert int(w) == 0x1234
+ w = cast(BWChar, u'\u1234')
+ assert repr(w) == "<cdata 'wchar_t' u'\u1234'>"
+ assert str(w) == repr(w)
+ assert unicode(w) == u'\u1234'
+ assert int(w) == 0x1234
+ w = cast(BInt, u'\u1234')
+ assert repr(w) == "<cdata 'int' 4660>"
+ if wchar4:
+ w = cast(BWChar, u'\U00012345')
+ assert repr(w) == "<cdata 'wchar_t' u'\U00012345'>"
+ assert str(w) == repr(w)
+ assert unicode(w) == u'\U00012345'
+ assert int(w) == 0x12345
+ w = cast(BInt, u'\U00012345')
+ assert repr(w) == "<cdata 'int' 74565>"
+ py.test.raises(TypeError, cast, BInt, u'')
+ py.test.raises(TypeError, cast, BInt, u'XX')
+ assert int(cast(BInt, u'a')) == ord('a')
+ #
+ a = newp(BWCharArray, u'hello - world')
+ p = cast(BWCharP, a)
+ assert unicode(p) == u'hello - world'
+ p[6] = u'\u2345'
+ assert unicode(p) == u'hello \u2345 world'
+ #
+ s = newp(BStructPtr, [u'\u1234', p])
+ assert s.a1 == u'\u1234'
+ assert s.a2 == p
+ assert str(s.a2) == repr(s.a2)
+ assert unicode(s.a2) == u'hello \u2345 world'
+ #
+ q = cast(BWCharP, 0)
+ assert str(q) == repr(q)
+ py.test.raises(RuntimeError, unicode, q)
+ #
+ def cb(p):
+ assert repr(p).startswith("<cdata 'wchar_t *' 0x")
+ return len(unicode(p))
+ BFunc = new_function_type((BWCharP,), BInt, False)
+ f = callback(BFunc, cb, -42)
+ #assert f(u'a\u1234b') == 3 -- not implemented
+ py.test.raises(NotImplementedError, f, u'a\u1234b')
+
def test_keepalive_struct():
# exception to the no-keepalive rule: p=newp(BStructPtr) returns a
# pointer owning the memory, and p[0] returns a pointer to the
diff --git a/c/wchar_helper.h b/c/wchar_helper.h
new file mode 100644
--- /dev/null
+++ b/c/wchar_helper.h
@@ -0,0 +1,121 @@
+/*
+ * wchar_t helpers
+ */
+
+#if (Py_UNICODE_SIZE == 2) && (SIZEOF_WCHAR_T == 4)
+# define CONVERT_WCHAR_TO_SURROGATES
+#endif
+
+
+#if PY_VERSION_HEX < 0x02070000 && defined(CONVERT_WCHAR_TO_SURROGATES)
+
+/* Before Python 2.7, PyUnicode_FromWideChar is not able to convert
+ wchar_t values greater than 65535 into two-unicode-characters surrogates.
+*/
+static PyObject *
+_my_PyUnicode_FromWideChar(register const wchar_t *w,
+ Py_ssize_t size)
+{
+ PyObject *unicode;
+ register Py_ssize_t i;
+ Py_ssize_t alloc;
+ const wchar_t *orig_w;
+
+ if (w == NULL) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+
+ alloc = size;
+ orig_w = w;
+ for (i = size; i > 0; i--) {
+ if (*w > 0xFFFF)
+ alloc++;
+ w++;
+ }
+ w = orig_w;
+ unicode = PyUnicode_FromUnicode(NULL, alloc);
+ if (!unicode)
+ return NULL;
+
+ /* Copy the wchar_t data into the new object */
+ {
+ register Py_UNICODE *u;
+ u = PyUnicode_AS_UNICODE(unicode);
+ for (i = size; i > 0; i--) {
+ if (*w > 0xFFFF) {
+ wchar_t ordinal = *w++;
+ ordinal -= 0x10000;
+ *u++ = 0xD800 | (ordinal >> 10);
+ *u++ = 0xDC00 | (ordinal & 0x3FF);
+ }
+ else
+ *u++ = *w++;
+ }
+ }
+ return unicode;
+}
+
+#else
+
+# define _my_PyUnicode_FromWideChar PyUnicode_FromWideChar
+
+#endif
+
+
+#define IS_SURROGATE(u) (0xD800 <= (u)[0] && (u)[0] <= 0xDBFF && \
+ 0xDC00 <= (u)[1] && (u)[1] <= 0xDFFF)
+#define AS_SURROGATE(u) (0x10000 + (((u)[0] - 0xD800) << 10) + \
+ ((u)[1] - 0xDC00))
+
+static int _my_PyUnicode_AsSingleWideChar(PyObject *unicode, wchar_t *result)
+{
+ Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
+ if (PyUnicode_GET_SIZE(unicode) == 1) {
+ *result = (wchar_t)(u[0]);
+ return 0;
+ }
+#ifdef CONVERT_WCHAR_TO_SURROGATES
+ if (PyUnicode_GET_SIZE(unicode) == 2 && IS_SURROGATE(u)) {
+ *result = AS_SURROGATE(u);
+ return 0;
+ }
+#endif
+ return -1;
+}
+
+static Py_ssize_t _my_PyUnicode_SizeAsWideChar(PyObject *unicode)
+{
+ Py_ssize_t length = PyUnicode_GET_SIZE(unicode);
+ Py_ssize_t result = length;
+
+#ifdef CONVERT_WCHAR_TO_SURROGATES
+ Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
+ Py_ssize_t i;
+
+ for (i=0; i<length-1; i++) {
+ if (IS_SURROGATE(u+i))
+ result--;
+ }
+#endif
+ return result;
+}
+
+static void _my_PyUnicode_AsWideChar(PyObject *unicode,
+ wchar_t *result,
+ Py_ssize_t resultlen)
+{
+ Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
+ Py_ssize_t i;
+ for (i=0; i<resultlen; i++) {
+ wchar_t ordinal = *u;
+#ifdef CONVERT_WCHAR_TO_SURROGATES
+ if (IS_SURROGATE(u)) {
+ ordinal = AS_SURROGATE(u);
+ u++;
+ }
+#endif
+ result[i] = ordinal;
+ u++;
+ }
+}
diff --git a/cffi/backend_ctypes.py b/cffi/backend_ctypes.py
--- a/cffi/backend_ctypes.py
+++ b/cffi/backend_ctypes.py
@@ -267,8 +267,6 @@
if size == ctypes.sizeof(ctypes.c_size_t):
result['size_t'] = size | UNSIGNED
result['ssize_t'] = size
- #if size == ctypes.sizeof(ctypes.c_wchar):
- # result['wchar_t'] = size | UNSIGNED
return result
def load_library(self, path):
@@ -292,6 +290,8 @@
return CTypesVoid
def new_primitive_type(self, name):
+ if name == 'wchar_t':
+ raise NotImplementedError(name)
ctype = self.PRIMITIVE_TYPES[name]
if name == 'char':
kind = 'char'
diff --git a/cffi/cparser.py b/cffi/cparser.py
--- a/cffi/cparser.py
+++ b/cffi/cparser.py
@@ -53,7 +53,7 @@
# internals of CParser... the following registers the
# typedefs, because their presence or absence influences the
# parsing itself (but what they are typedef'ed to plays no role)
- csourcelines = []
+ csourcelines = ['typedef int wchar_t;']
for name in sorted(self._declarations):
if name.startswith('typedef '):
csourcelines.append('typedef int %s;' % (name[8:],))
diff --git a/cffi/model.py b/cffi/model.py
--- a/cffi/model.py
+++ b/cffi/model.py
@@ -53,7 +53,7 @@
return self.name + replace_with
def is_char_type(self):
- return self.name == 'char'
+ return self.name in ('char', 'wchar_t')
def is_signed_type(self):
return self.is_integer_type() and not self.is_unsigned_type()
def is_unsigned_type(self):
diff --git a/cffi/verifier.py b/cffi/verifier.py
--- a/cffi/verifier.py
+++ b/cffi/verifier.py
@@ -619,7 +619,11 @@
((int(*)(char *, CTypeDescrObject *, PyObject *))_cffi_exports[17])
#define _cffi_from_c_struct \
((PyObject *(*)(char *, CTypeDescrObject *))_cffi_exports[18])
-#define _CFFI_NUM_EXPORTS 19
+#define _cffi_to_c_wchar_t \
+ ((wchar_t(*)(PyObject *))_cffi_exports[19])
+#define _cffi_from_c_wchar_t \
+ ((PyObject *(*)(wchar_t))_cffi_exports[20])
+#define _CFFI_NUM_EXPORTS 21
#if SIZEOF_LONG < SIZEOF_LONG_LONG
# define _cffi_to_c_long_long PyLong_AsLongLong
diff --git a/testing/backend_tests.py b/testing/backend_tests.py
--- a/testing/backend_tests.py
+++ b/testing/backend_tests.py
@@ -6,7 +6,7 @@
SIZE_OF_LONG = ctypes.sizeof(ctypes.c_long)
SIZE_OF_SHORT = ctypes.sizeof(ctypes.c_short)
SIZE_OF_PTR = ctypes.sizeof(ctypes.c_void_p)
-#SIZE_OF_WCHAR = ctypes.sizeof(ctypes.c_wchar)
+SIZE_OF_WCHAR = ctypes.sizeof(ctypes.c_wchar)
class BackendTests:
@@ -41,7 +41,6 @@
self._test_int_type(ffi, 'ptrdiff_t', SIZE_OF_PTR, False)
self._test_int_type(ffi, 'size_t', SIZE_OF_PTR, True)
self._test_int_type(ffi, 'ssize_t', SIZE_OF_PTR, False)
- #self._test_int_type(ffi, 'wchar_t', SIZE_OF_WCHAR, True)
def _test_int_type(self, ffi, c_decl, size, unsigned):
if unsigned:
@@ -274,8 +273,9 @@
assert ffi.new("char", "\xff")[0] == '\xff'
assert ffi.new("char")[0] == '\x00'
assert int(ffi.cast("char", 300)) == 300 - 256
- assert bool(ffi.new("char"))
+ assert bool(ffi.cast("char", 0))
py.test.raises(TypeError, ffi.new, "char", 32)
+ py.test.raises(TypeError, ffi.new, "char", u"x")
py.test.raises(TypeError, ffi.new, "char", "foo")
#
p = ffi.new("char[]", ['a', 'b', '\x9c'])
@@ -297,6 +297,63 @@
assert [p[i] for i in range(2)] == ['a', 'b']
py.test.raises(IndexError, ffi.new, "char[2]", "abc")
+ def check_wchar_t(self, ffi):
+ try:
+ ffi.cast("wchar_t", 0)
+ except NotImplementedError:
+ py.test.skip("NotImplementedError: wchar_t")
+
+ def test_wchar_t(self):
+ ffi = FFI(backend=self.Backend())
+ self.check_wchar_t(ffi)
+ assert ffi.new("wchar_t", u'x')[0] == u'x'
+ assert ffi.new("wchar_t", unichr(1234))[0] == unichr(1234)
+ if SIZE_OF_WCHAR > 2:
+ assert ffi.new("wchar_t", u'\U00012345')[0] == u'\U00012345'
+ else:
+ py.test.raises(TypeError, ffi.new, "wchar_t", u'\U00012345')
+ assert ffi.new("wchar_t")[0] == u'\x00'
+ assert int(ffi.cast("wchar_t", 300)) == 300
+ assert bool(ffi.cast("wchar_t", 0))
+ py.test.raises(TypeError, ffi.new, "wchar_t", 32)
+ py.test.raises(TypeError, ffi.new, "wchar_t", "foo")
+ #
+ p = ffi.new("wchar_t[]", [u'a', u'b', unichr(1234)])
+ assert len(p) == 3
+ assert p[0] == u'a'
+ assert p[1] == u'b' and type(p[1]) is unicode
+ assert p[2] == unichr(1234)
+ p[0] = u'x'
+ assert p[0] == u'x' and type(p[0]) is unicode
+ p[1] = unichr(1357)
+ assert p[1] == unichr(1357)
+ p = ffi.new("wchar_t[]", u"abcd")
+ assert len(p) == 5
+ assert p[4] == u'\x00'
+ p = ffi.new("wchar_t[]", u"a\u1234b")
+ assert len(p) == 4
+ assert p[1] == unichr(0x1234)
+ #
+ p = ffi.new("wchar_t[]", u'\U00023456')
+ if SIZE_OF_WCHAR == 2:
+ assert sys.maxunicode == 0xffff
+ assert len(p) == 3
+ assert p[0] == u'\ud84d'
+ assert p[1] == u'\udc56'
+ assert p[2] == u'\x00'
+ else:
+ assert len(p) == 2
+ assert p[0] == u'\U00023456'
+ assert p[1] == u'\x00'
+ #
+ p = ffi.new("wchar_t[4]", u"ab")
+ assert len(p) == 4
+ assert [p[i] for i in range(4)] == [u'a', u'b', u'\x00', u'\x00']
+ p = ffi.new("wchar_t[2]", u"ab")
+ assert len(p) == 2
+ assert [p[i] for i in range(2)] == [u'a', u'b']
+ py.test.raises(IndexError, ffi.new, "wchar_t[2]", u"abc")
+
def test_none_as_null_doesnt_work(self):
ffi = FFI(backend=self.Backend())
p = ffi.new("int*[1]")
@@ -492,6 +549,14 @@
assert str(ffi.new("char", "x")) == "x"
assert str(ffi.new("char", "\x00")) == ""
+ def test_unicode_from_wchar_pointer(self):
+ ffi = FFI(backend=self.Backend())
+ self.check_wchar_t(ffi)
+ assert unicode(ffi.new("wchar_t", u"x")) == u"x"
+ assert unicode(ffi.new("wchar_t", u"\x00")) == u""
+ x = ffi.new("wchar_t", u"\x00")
+ assert str(x) == repr(x)
+
def test_string_from_char_array(self):
ffi = FFI(backend=self.Backend())
assert str(ffi.cast("char", "x")) == "x"
@@ -509,6 +574,28 @@
p = ffi.cast("char *", a)
assert str(p) == 'hello'
+ def test_string_from_wchar_array(self):
+ ffi = FFI(backend=self.Backend())
+ self.check_wchar_t(ffi)
+ assert unicode(ffi.cast("wchar_t", "x")) == u"x"
+ assert unicode(ffi.cast("wchar_t", u"x")) == u"x"
+ x = ffi.cast("wchar_t", "x")
+ assert str(x) == repr(x)
+ #
+ p = ffi.new("wchar_t[]", u"hello.")
+ p[5] = u'!'
+ assert unicode(p) == u"hello!"
+ p[6] = unichr(1234)
+ assert unicode(p) == u"hello!\u04d2"
+ p[3] = u'\x00'
+ assert unicode(p) == u"hel"
+ py.test.raises(IndexError, "p[7] = u'X'")
+ #
+ a = ffi.new("wchar_t[]", u"hello\x00world")
+ assert len(a) == 12
+ p = ffi.cast("wchar_t *", a)
+ assert unicode(p) == u'hello'
+
def test_fetch_const_char_p_field(self):
# 'const' is ignored so far
ffi = FFI(backend=self.Backend())
@@ -521,6 +608,18 @@
s.name = ffi.NULL
assert s.name == ffi.NULL
+ def test_fetch_const_wchar_p_field(self):
+ # 'const' is ignored so far
+ ffi = FFI(backend=self.Backend())
+ self.check_wchar_t(ffi)
+ ffi.cdef("struct foo { const wchar_t *name; };")
+ t = ffi.new("const wchar_t[]", u"testing")
+ s = ffi.new("struct foo", [t])
+ assert type(s.name) not in (str, unicode)
+ assert unicode(s.name) == u"testing"
+ s.name = ffi.NULL
+ assert s.name == ffi.NULL
+
def test_voidp(self):
ffi = FFI(backend=self.Backend())
py.test.raises(TypeError, ffi.new, "void")
@@ -630,6 +729,19 @@
p = ffi.cast("int", "\x81")
assert int(p) == 0x81
+ def test_wchar_cast(self):
+ ffi = FFI(backend=self.Backend())
+ self.check_wchar_t(ffi)
+ p = ffi.cast("int", ffi.cast("wchar_t", unichr(1234)))
+ assert int(p) == 1234
+ p = ffi.cast("long long", ffi.cast("wchar_t", -1))
+ if SIZE_OF_WCHAR == 2: # 2 bytes, unsigned
+ assert int(p) == 0xffff
+ else: # 4 bytes, signed
+ assert int(p) == -1
+ p = ffi.cast("int", unichr(1234))
+ assert int(p) == 1234
+
def test_cast_array_to_charp(self):
ffi = FFI(backend=self.Backend())
a = ffi.new("short int[]", [0x1234, 0x5678])
diff --git a/testing/test_verify.py b/testing/test_verify.py
--- a/testing/test_verify.py
+++ b/testing/test_verify.py
@@ -68,9 +68,9 @@
all_float_types = ['float', 'double']
def test_primitive_category():
- for typename in all_integer_types + all_float_types + ['char']:
+ for typename in all_integer_types + all_float_types + ['char', 'wchar_t']:
tp = model.PrimitiveType(typename)
- assert tp.is_char_type() == (typename == 'char')
+ assert tp.is_char_type() == (typename in ('char', 'wchar_t'))
assert tp.is_signed_type() == (typename in all_signed_integer_types)
assert tp.is_unsigned_type()== (typename in all_unsigned_integer_types)
assert tp.is_integer_type() == (typename in all_integer_types)
@@ -104,6 +104,19 @@
assert lib.foo("A") == "B"
py.test.raises(TypeError, lib.foo, "bar")
+def test_wchar_type():
+ ffi = FFI()
+ if ffi.sizeof('wchar_t') == 2:
+ uniexample1 = u'\u1234'
+ uniexample2 = u'\u1235'
+ else:
+ uniexample1 = u'\U00012345'
+ uniexample2 = u'\U00012346'
+ #
+ ffi.cdef("wchar_t foo(wchar_t);")
+ lib = ffi.verify("wchar_t foo(wchar_t x) { return x+1; }")
+ assert lib.foo(uniexample1) == uniexample2
+
def test_no_argument():
ffi = FFI()
ffi.cdef("int foo(void);")
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit