Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r613:b0c29bd26001 Date: 2012-07-09 17:13 +0200 http://bitbucket.org/cffi/cffi/changeset/b0c29bd26001/
Log: Merge the 'wchar_t' branch, adding support for wchar_t. diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c --- a/c/_cffi_backend.c +++ b/c/_cffi_backend.c @@ -27,7 +27,7 @@ /* base type flag: exactly one of the following: */ #define CT_PRIMITIVE_SIGNED 1 /* signed integer */ #define CT_PRIMITIVE_UNSIGNED 2 /* unsigned integer */ -#define CT_PRIMITIVE_CHAR 4 /* char (and, later, wchar_t) */ +#define CT_PRIMITIVE_CHAR 4 /* char, wchar_t */ #define CT_PRIMITIVE_FLOAT 8 /* float, double */ #define CT_POINTER 16 /* pointer, excluding ptr-to-func */ #define CT_ARRAY 32 /* array */ @@ -157,6 +157,10 @@ # endif #endif +#ifdef HAVE_WCHAR_H +# include "wchar_helper.h" +#endif + /************************************************************/ static CTypeDescrObject * @@ -602,7 +606,12 @@ return PyFloat_FromDouble(value); } else if (ct->ct_flags & CT_PRIMITIVE_CHAR) { - return PyString_FromStringAndSize(data, 1); + if (ct->ct_size == sizeof(char)) + return PyString_FromStringAndSize(data, 1); +#ifdef HAVE_WCHAR_H + else + return _my_PyUnicode_FromWideChar((wchar_t *)data, 1); +#endif } PyErr_Format(PyExc_SystemError, @@ -664,8 +673,9 @@ return (unsigned char)(PyString_AS_STRING(init)[0]); } if (CData_Check(init) && - (((CDataObject *)init)->c_type->ct_flags & CT_PRIMITIVE_CHAR)) { - return (unsigned char)(((CDataObject *)init)->c_data[0]); + (((CDataObject *)init)->c_type->ct_flags & CT_PRIMITIVE_CHAR) && + (((CDataObject *)init)->c_type->ct_size == sizeof(char))) { + return *(unsigned char *)((CDataObject *)init)->c_data; } PyErr_Format(PyExc_TypeError, "initializer for ctype 'char' must be a string of length 1, " @@ -673,6 +683,26 @@ return -1; } +#ifdef HAVE_WCHAR_H +static wchar_t _convert_to_wchar_t(PyObject *init) +{ + if (PyUnicode_Check(init)) { + wchar_t ordinal; + if (_my_PyUnicode_AsSingleWideChar(init, &ordinal) == 0) + return ordinal; + } + if (CData_Check(init) && + (((CDataObject *)init)->c_type->ct_flags & CT_PRIMITIVE_CHAR) && + (((CDataObject *)init)->c_type->ct_size == sizeof(wchar_t))) { + return *(wchar_t *)((CDataObject *)init)->c_data; + } + PyErr_Format(PyExc_TypeError, + "initializer for ctype 'wchar_t' must be a unicode string " + "of length 1, not %.200s", Py_TYPE(init)->tp_name); + return (wchar_t)-1; +} +#endif + static int _convert_error(PyObject *init, const char *ct_name, const char *expected) { @@ -732,24 +762,46 @@ return 0; } else if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) { - char *srcdata; - Py_ssize_t n; - if (!PyString_Check(init)) { - expected = "str or list or tuple"; - goto cannot_convert; + if (ctitem->ct_size == sizeof(char)) { + char *srcdata; + Py_ssize_t n; + if (!PyString_Check(init)) { + expected = "str or list or tuple"; + goto cannot_convert; + } + n = PyString_GET_SIZE(init); + if (ct->ct_length >= 0 && n > ct->ct_length) { + PyErr_Format(PyExc_IndexError, + "initializer string is too long for '%s' " + "(got %zd characters)", ct->ct_name, n); + return -1; + } + if (n != ct->ct_length) + n++; + srcdata = PyString_AS_STRING(init); + memcpy(data, srcdata, n); + return 0; } - n = PyString_GET_SIZE(init); - if (ct->ct_length >= 0 && n > ct->ct_length) { - PyErr_Format(PyExc_IndexError, - "initializer string is too long for '%s' " - "(got %zd characters)", ct->ct_name, n); - return -1; +#ifdef HAVE_WCHAR_H + else { + Py_ssize_t n; + if (!PyUnicode_Check(init)) { + expected = "unicode or list or tuple"; + goto cannot_convert; + } + n = _my_PyUnicode_SizeAsWideChar(init); + if (ct->ct_length >= 0 && n > ct->ct_length) { + PyErr_Format(PyExc_IndexError, + "initializer unicode is too long for '%s' " + "(got %zd characters)", ct->ct_name, n); + return -1; + } + if (n != ct->ct_length) + n++; + _my_PyUnicode_AsWideChar(init, (wchar_t *)data, n); + return 0; } - if (n != ct->ct_length) - n++; - srcdata = PyString_AS_STRING(init); - memcpy(data, srcdata, n); - return 0; +#endif } else { expected = "list or tuple"; @@ -829,11 +881,22 @@ return 0; } if (ct->ct_flags & CT_PRIMITIVE_CHAR) { - int res = _convert_to_char(init); - if (res < 0) - return -1; - data[0] = res; - return 0; + if (ct->ct_size == sizeof(char)) { + int res = _convert_to_char(init); + if (res < 0) + return -1; + data[0] = res; + return 0; + } +#ifdef HAVE_WCHAR_H + else { + wchar_t res = _convert_to_wchar_t(init); + if (res == (wchar_t)-1 && PyErr_Occurred()) + return -1; + *(wchar_t *)data = res; + return 0; + } +#endif } if (ct->ct_flags & (CT_STRUCT|CT_UNION)) { @@ -1064,11 +1127,13 @@ static PyObject *cdata_str(CDataObject *cd) { - if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR) { + if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR && + cd->c_type->ct_size == sizeof(char)) { return PyString_FromStringAndSize(cd->c_data, 1); } else if (cd->c_type->ct_itemdescr != NULL && - cd->c_type->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR) { + cd->c_type->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR && + cd->c_type->ct_itemdescr->ct_size == sizeof(char)) { Py_ssize_t length; if (cd->c_type->ct_flags & CT_ARRAY) { @@ -1101,6 +1166,48 @@ return Py_TYPE(cd)->tp_repr((PyObject *)cd); } +#ifdef HAVE_WCHAR_H +static PyObject *cdata_unicode(CDataObject *cd) +{ + if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR && + cd->c_type->ct_size == sizeof(wchar_t)) { + return _my_PyUnicode_FromWideChar((wchar_t *)cd->c_data, 1); + } + else if (cd->c_type->ct_itemdescr != NULL && + cd->c_type->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR && + cd->c_type->ct_itemdescr->ct_size == sizeof(wchar_t)) { + Py_ssize_t length; + const wchar_t *start = (wchar_t *)cd->c_data; + + if (cd->c_type->ct_flags & CT_ARRAY) { + const Py_ssize_t lenmax = get_array_length(cd); + length = 0; + while (length < lenmax && start[length]) + length++; + } + else { + if (cd->c_data == NULL) { + PyObject *s = cdata_repr(cd); + if (s != NULL) { + PyErr_Format(PyExc_RuntimeError, + "cannot use unicode() on %s", + PyString_AS_STRING(s)); + Py_DECREF(s); + } + return NULL; + } + length = 0; + while (start[length]) + length++; + } + + return _my_PyUnicode_FromWideChar((wchar_t *)cd->c_data, length); + } + else + return cdata_repr(cd); +} +#endif + static PyObject *cdataowning_repr(CDataObject *cd) { Py_ssize_t size; @@ -1152,7 +1259,12 @@ return convert_to_object(cd->c_data, cd->c_type); } else if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR) { - return PyInt_FromLong((unsigned char)cd->c_data[0]); + if (cd->c_type->ct_size == sizeof(char)) + return PyInt_FromLong((unsigned char)cd->c_data[0]); +#ifdef HAVE_WCHAR_H + else + return PyInt_FromLong((long)*(wchar_t *)cd->c_data); +#endif } else if (cd->c_type->ct_flags & CT_PRIMITIVE_FLOAT) { PyObject *o = convert_to_object(cd->c_data, cd->c_type); @@ -1552,12 +1664,27 @@ argtype = (CTypeDescrObject *)PyTuple_GET_ITEM(fvarargs, i); if ((argtype->ct_flags & CT_POINTER) && - (argtype->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR) && - PyString_Check(obj)) { - /* special case: Python string -> cdata 'char *' */ - *(char **)data = PyString_AS_STRING(obj); + (argtype->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR)) { + if (argtype->ct_itemdescr->ct_size == sizeof(char)) { + if (PyString_Check(obj)) { + /* special case: Python string -> cdata 'char *' */ + *(char **)data = PyString_AS_STRING(obj); + continue; + } + } +#ifdef HAVE_WCHAR_H + else { + if (PyUnicode_Check(obj)) { + /* Python Unicode string -> cdata 'wchar_t *': + not supported yet */ + PyErr_SetString(PyExc_NotImplementedError, + "automatic unicode-to-'wchar_t *' conversion"); + goto error; + } + } +#endif } - else if (convert_from_object(data, argtype, obj) < 0) + if (convert_from_object(data, argtype, obj) < 0) goto error; } @@ -1645,6 +1772,11 @@ (objobjargproc)cdata_ass_sub, /*mp_ass_subscript*/ }; +static PyMethodDef CData_methods[] = { + {"__unicode__", (PyCFunction)cdata_unicode, METH_NOARGS}, + {NULL, NULL} /* sentinel */ +}; + static PyTypeObject CData_Type = { PyVarObject_HEAD_INIT(NULL, 0) "_cffi_backend.CData", @@ -1672,6 +1804,8 @@ cdata_richcompare, /* tp_richcompare */ 0, /* tp_weaklistoffset */ (getiterfunc)cdata_iter, /* tp_iter */ + 0, /* tp_iternext */ + CData_methods, /* tp_methods */ }; static PyTypeObject CDataOwning_Type = { @@ -1848,7 +1982,7 @@ return NULL; } if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) - datasize += sizeof(char); /* forcefully add a null character */ + datasize *= 2; /* forcefully add another character: a null */ } else if (ct->ct_flags & CT_ARRAY) { dataoffset = offsetof(CDataObject_own_nolength, alignment); @@ -1861,6 +1995,10 @@ /* from a string, we add the null terminator */ explicitlength = PyString_GET_SIZE(init) + 1; } + else if (PyUnicode_Check(init)) { + /* from a unicode, we add the null terminator */ + explicitlength = _my_PyUnicode_SizeAsWideChar(init) + 1; + } else { explicitlength = PyNumber_AsSsize_t(init, PyExc_OverflowError); if (explicitlength < 0) { @@ -1973,6 +2111,18 @@ value = (unsigned char)PyString_AS_STRING(ob)[0]; } } +#ifdef HAVE_WCHAR_H + else if (PyUnicode_Check(ob)) { + wchar_t ordinal; + if (_my_PyUnicode_AsSingleWideChar(ob, &ordinal) < 0) { + PyErr_Format(PyExc_TypeError, + "cannot cast unicode of length %zd to ctype '%s'", + PyUnicode_GET_SIZE(ob), ct->ct_name); + return NULL; + } + value = (long)ordinal; + } +#endif else { value = _my_PyLong_AsUnsignedLongLong(ob, 0); if (value == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred()) @@ -2240,7 +2390,6 @@ { "ptrdiff_t", sizeof(ptrdiff_t) }, { "size_t", sizeof(size_t) | UNSIGNED }, { "ssize_t", sizeof(ssize_t) }, - /*{ "wchar_t", sizeof(wchar_t) | UNSIGNED },*/ { NULL } }; #undef UNSIGNED @@ -2284,10 +2433,17 @@ EPTYPE(ull, unsigned long long, CT_PRIMITIVE_UNSIGNED ) \ EPTYPE(f, float, CT_PRIMITIVE_FLOAT ) \ EPTYPE(d, double, CT_PRIMITIVE_FLOAT ) +#ifdef HAVE_WCHAR_H +# define ENUM_PRIMITIVE_TYPES_WCHAR \ + EPTYPE(wc, wchar_t, CT_PRIMITIVE_CHAR ) +#else +# define ENUM_PRIMITIVE_TYPES_WCHAR /* nothing */ +#endif #define EPTYPE(code, typename, flags) \ struct aligncheck_##code { char x; typename y; }; ENUM_PRIMITIVE_TYPES + ENUM_PRIMITIVE_TYPES_WCHAR #undef EPTYPE CTypeDescrObject *td; @@ -2301,7 +2457,9 @@ flags \ }, ENUM_PRIMITIVE_TYPES + ENUM_PRIMITIVE_TYPES_WCHAR #undef EPTYPE +#undef ENUM_PRIMITIVE_TYPES_WCHAR #undef ENUM_PRIMITIVE_TYPES { NULL } }; @@ -2314,6 +2472,11 @@ for (ptypes=types; ; ptypes++) { if (ptypes->name == NULL) { +#ifndef HAVE_WCHAR_H + if (strcmp(name, "wchar_t")) + PyErr_SetString(PyExc_NotImplementedError, name); + else +#endif PyErr_SetString(PyExc_KeyError, name); return NULL; } @@ -2358,11 +2521,11 @@ td->ct_length = ptypes->align; td->ct_extra = ffitype; td->ct_flags = ptypes->flags; - if (td->ct_flags & CT_PRIMITIVE_SIGNED) { + if (td->ct_flags & (CT_PRIMITIVE_SIGNED | CT_PRIMITIVE_CHAR)) { if (td->ct_size <= sizeof(long)) td->ct_flags |= CT_PRIMITIVE_FITS_LONG; } - else if (td->ct_flags & (CT_PRIMITIVE_UNSIGNED | CT_PRIMITIVE_CHAR)) { + else if (td->ct_flags & CT_PRIMITIVE_UNSIGNED) { if (td->ct_size < sizeof(long)) td->ct_flags |= CT_PRIMITIVE_FITS_LONG; } @@ -2592,6 +2755,10 @@ if (!(ftype->ct_flags & (CT_PRIMITIVE_SIGNED | CT_PRIMITIVE_UNSIGNED | CT_PRIMITIVE_CHAR)) || +#ifdef HAVE_WCHAR_H + ((ftype->ct_flags & CT_PRIMITIVE_CHAR) + && ftype->ct_size == sizeof(wchar_t)) || +#endif fbitsize == 0 || fbitsize > 8 * ftype->ct_size) { PyErr_Format(PyExc_TypeError, "invalid bit field '%s'", @@ -3763,6 +3930,12 @@ return PyString_FromStringAndSize(&x, 1); } +#ifdef HAVE_WCHAR_H +static PyObject *_cffi_from_c_wchar_t(wchar_t x) { + return _my_PyUnicode_FromWideChar(&x, 1); +} +#endif + static void *cffi_exports[] = { _cffi_to_c_char_p, _cffi_to_c_signed_char, @@ -3788,6 +3961,13 @@ convert_to_object, convert_from_object, convert_struct_to_owning_object, +#ifdef HAVE_WCHAR_H + _convert_to_wchar_t, + _cffi_from_c_wchar_t, +#else + 0, + 0, +#endif }; /************************************************************/ diff --git a/c/test_c.py b/c/test_c.py --- a/c/test_c.py +++ b/c/test_c.py @@ -1279,6 +1279,121 @@ py.test.raises(TypeError, newp, BStructPtr, [cast(BIntP, 0)]) py.test.raises(TypeError, newp, BStructPtr, [cast(BFunc2, 0)]) +def test_wchar(): + BWChar = new_primitive_type("wchar_t") + BInt = new_primitive_type("int") + pyuni4 = {1: True, 2: False}[len(u'\U00012345')] + wchar4 = {2: False, 4: True}[sizeof(BWChar)] + assert str(cast(BWChar, 0x45)) == "<cdata 'wchar_t' u'E'>" + assert str(cast(BWChar, 0x1234)) == "<cdata 'wchar_t' u'\u1234'>" + if wchar4: + x = cast(BWChar, 0x12345) + assert str(x) == "<cdata 'wchar_t' u'\U00012345'>" + assert unicode(x) == u'\U00012345' + else: + assert not pyuni4 + # + BWCharP = new_pointer_type(BWChar) + BStruct = new_struct_type("foo_s") + BStructPtr = new_pointer_type(BStruct) + complete_struct_or_union(BStruct, [('a1', BWChar, -1), + ('a2', BWCharP, -1)]) + s = newp(BStructPtr) + s.a1 = u'\x00' + assert s.a1 == u'\x00' + py.test.raises(TypeError, "s.a1 = 'a'") + py.test.raises(TypeError, "s.a1 = '\xFF'") + s.a1 = u'\u1234' + assert s.a1 == u'\u1234' + if pyuni4: + assert wchar4 + s.a1 = u'\U00012345' + assert s.a1 == u'\U00012345' + elif wchar4: + s.a1 = cast(BWChar, 0x12345) + assert s.a1 == u'\ud808\udf45' + s.a1 = u'\ud807\udf44' + assert s.a1 == u'\U00011f44' + else: + py.test.raises(ValueError, "s.a1 = u'\U00012345'") + # + BWCharArray = new_array_type(BWCharP, None) + a = newp(BWCharArray, u'hello \u1234 world') + assert len(a) == 14 # including the final null + assert unicode(a) == u'hello \u1234 world' + a[13] = u'!' + assert unicode(a) == u'hello \u1234 world!' + assert str(a) == repr(a) + assert a[6] == u'\u1234' + a[6] = u'-' + assert unicode(a) == 'hello - world!' + assert str(a) == repr(a) + # + if wchar4: + u = u'\U00012345\U00012346\U00012347' + a = newp(BWCharArray, u) + assert len(a) == 4 + assert unicode(a) == u + assert len(list(a)) == 4 + expected = [u'\U00012345', u'\U00012346', u'\U00012347', unichr(0)] + assert list(a) == expected + got = [a[i] for i in range(4)] + assert got == expected + py.test.raises(IndexError, 'a[4]') + # + w = cast(BWChar, 'a') + assert repr(w) == "<cdata 'wchar_t' u'a'>" + assert str(w) == repr(w) + assert unicode(w) == u'a' + assert int(w) == ord('a') + w = cast(BWChar, 0x1234) + assert repr(w) == "<cdata 'wchar_t' u'\u1234'>" + assert str(w) == repr(w) + assert unicode(w) == u'\u1234' + assert int(w) == 0x1234 + w = cast(BWChar, u'\u1234') + assert repr(w) == "<cdata 'wchar_t' u'\u1234'>" + assert str(w) == repr(w) + assert unicode(w) == u'\u1234' + assert int(w) == 0x1234 + w = cast(BInt, u'\u1234') + assert repr(w) == "<cdata 'int' 4660>" + if wchar4: + w = cast(BWChar, u'\U00012345') + assert repr(w) == "<cdata 'wchar_t' u'\U00012345'>" + assert str(w) == repr(w) + assert unicode(w) == u'\U00012345' + assert int(w) == 0x12345 + w = cast(BInt, u'\U00012345') + assert repr(w) == "<cdata 'int' 74565>" + py.test.raises(TypeError, cast, BInt, u'') + py.test.raises(TypeError, cast, BInt, u'XX') + assert int(cast(BInt, u'a')) == ord('a') + # + a = newp(BWCharArray, u'hello - world') + p = cast(BWCharP, a) + assert unicode(p) == u'hello - world' + p[6] = u'\u2345' + assert unicode(p) == u'hello \u2345 world' + # + s = newp(BStructPtr, [u'\u1234', p]) + assert s.a1 == u'\u1234' + assert s.a2 == p + assert str(s.a2) == repr(s.a2) + assert unicode(s.a2) == u'hello \u2345 world' + # + q = cast(BWCharP, 0) + assert str(q) == repr(q) + py.test.raises(RuntimeError, unicode, q) + # + def cb(p): + assert repr(p).startswith("<cdata 'wchar_t *' 0x") + return len(unicode(p)) + BFunc = new_function_type((BWCharP,), BInt, False) + f = callback(BFunc, cb, -42) + #assert f(u'a\u1234b') == 3 -- not implemented + py.test.raises(NotImplementedError, f, u'a\u1234b') + def test_keepalive_struct(): # exception to the no-keepalive rule: p=newp(BStructPtr) returns a # pointer owning the memory, and p[0] returns a pointer to the diff --git a/c/wchar_helper.h b/c/wchar_helper.h new file mode 100644 --- /dev/null +++ b/c/wchar_helper.h @@ -0,0 +1,121 @@ +/* + * wchar_t helpers + */ + +#if (Py_UNICODE_SIZE == 2) && (SIZEOF_WCHAR_T == 4) +# define CONVERT_WCHAR_TO_SURROGATES +#endif + + +#if PY_VERSION_HEX < 0x02070000 && defined(CONVERT_WCHAR_TO_SURROGATES) + +/* Before Python 2.7, PyUnicode_FromWideChar is not able to convert + wchar_t values greater than 65535 into two-unicode-characters surrogates. +*/ +static PyObject * +_my_PyUnicode_FromWideChar(register const wchar_t *w, + Py_ssize_t size) +{ + PyObject *unicode; + register Py_ssize_t i; + Py_ssize_t alloc; + const wchar_t *orig_w; + + if (w == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + + alloc = size; + orig_w = w; + for (i = size; i > 0; i--) { + if (*w > 0xFFFF) + alloc++; + w++; + } + w = orig_w; + unicode = PyUnicode_FromUnicode(NULL, alloc); + if (!unicode) + return NULL; + + /* Copy the wchar_t data into the new object */ + { + register Py_UNICODE *u; + u = PyUnicode_AS_UNICODE(unicode); + for (i = size; i > 0; i--) { + if (*w > 0xFFFF) { + wchar_t ordinal = *w++; + ordinal -= 0x10000; + *u++ = 0xD800 | (ordinal >> 10); + *u++ = 0xDC00 | (ordinal & 0x3FF); + } + else + *u++ = *w++; + } + } + return unicode; +} + +#else + +# define _my_PyUnicode_FromWideChar PyUnicode_FromWideChar + +#endif + + +#define IS_SURROGATE(u) (0xD800 <= (u)[0] && (u)[0] <= 0xDBFF && \ + 0xDC00 <= (u)[1] && (u)[1] <= 0xDFFF) +#define AS_SURROGATE(u) (0x10000 + (((u)[0] - 0xD800) << 10) + \ + ((u)[1] - 0xDC00)) + +static int _my_PyUnicode_AsSingleWideChar(PyObject *unicode, wchar_t *result) +{ + Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode); + if (PyUnicode_GET_SIZE(unicode) == 1) { + *result = (wchar_t)(u[0]); + return 0; + } +#ifdef CONVERT_WCHAR_TO_SURROGATES + if (PyUnicode_GET_SIZE(unicode) == 2 && IS_SURROGATE(u)) { + *result = AS_SURROGATE(u); + return 0; + } +#endif + return -1; +} + +static Py_ssize_t _my_PyUnicode_SizeAsWideChar(PyObject *unicode) +{ + Py_ssize_t length = PyUnicode_GET_SIZE(unicode); + Py_ssize_t result = length; + +#ifdef CONVERT_WCHAR_TO_SURROGATES + Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode); + Py_ssize_t i; + + for (i=0; i<length-1; i++) { + if (IS_SURROGATE(u+i)) + result--; + } +#endif + return result; +} + +static void _my_PyUnicode_AsWideChar(PyObject *unicode, + wchar_t *result, + Py_ssize_t resultlen) +{ + Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode); + Py_ssize_t i; + for (i=0; i<resultlen; i++) { + wchar_t ordinal = *u; +#ifdef CONVERT_WCHAR_TO_SURROGATES + if (IS_SURROGATE(u)) { + ordinal = AS_SURROGATE(u); + u++; + } +#endif + result[i] = ordinal; + u++; + } +} diff --git a/cffi/backend_ctypes.py b/cffi/backend_ctypes.py --- a/cffi/backend_ctypes.py +++ b/cffi/backend_ctypes.py @@ -267,8 +267,6 @@ if size == ctypes.sizeof(ctypes.c_size_t): result['size_t'] = size | UNSIGNED result['ssize_t'] = size - #if size == ctypes.sizeof(ctypes.c_wchar): - # result['wchar_t'] = size | UNSIGNED return result def load_library(self, path): @@ -292,6 +290,8 @@ return CTypesVoid def new_primitive_type(self, name): + if name == 'wchar_t': + raise NotImplementedError(name) ctype = self.PRIMITIVE_TYPES[name] if name == 'char': kind = 'char' diff --git a/cffi/cparser.py b/cffi/cparser.py --- a/cffi/cparser.py +++ b/cffi/cparser.py @@ -53,7 +53,7 @@ # internals of CParser... the following registers the # typedefs, because their presence or absence influences the # parsing itself (but what they are typedef'ed to plays no role) - csourcelines = [] + csourcelines = ['typedef int wchar_t;'] for name in sorted(self._declarations): if name.startswith('typedef '): csourcelines.append('typedef int %s;' % (name[8:],)) diff --git a/cffi/model.py b/cffi/model.py --- a/cffi/model.py +++ b/cffi/model.py @@ -53,7 +53,7 @@ return self.name + replace_with def is_char_type(self): - return self.name == 'char' + return self.name in ('char', 'wchar_t') def is_signed_type(self): return self.is_integer_type() and not self.is_unsigned_type() def is_unsigned_type(self): diff --git a/cffi/verifier.py b/cffi/verifier.py --- a/cffi/verifier.py +++ b/cffi/verifier.py @@ -619,7 +619,11 @@ ((int(*)(char *, CTypeDescrObject *, PyObject *))_cffi_exports[17]) #define _cffi_from_c_struct \ ((PyObject *(*)(char *, CTypeDescrObject *))_cffi_exports[18]) -#define _CFFI_NUM_EXPORTS 19 +#define _cffi_to_c_wchar_t \ + ((wchar_t(*)(PyObject *))_cffi_exports[19]) +#define _cffi_from_c_wchar_t \ + ((PyObject *(*)(wchar_t))_cffi_exports[20]) +#define _CFFI_NUM_EXPORTS 21 #if SIZEOF_LONG < SIZEOF_LONG_LONG # define _cffi_to_c_long_long PyLong_AsLongLong diff --git a/testing/backend_tests.py b/testing/backend_tests.py --- a/testing/backend_tests.py +++ b/testing/backend_tests.py @@ -6,7 +6,7 @@ SIZE_OF_LONG = ctypes.sizeof(ctypes.c_long) SIZE_OF_SHORT = ctypes.sizeof(ctypes.c_short) SIZE_OF_PTR = ctypes.sizeof(ctypes.c_void_p) -#SIZE_OF_WCHAR = ctypes.sizeof(ctypes.c_wchar) +SIZE_OF_WCHAR = ctypes.sizeof(ctypes.c_wchar) class BackendTests: @@ -41,7 +41,6 @@ self._test_int_type(ffi, 'ptrdiff_t', SIZE_OF_PTR, False) self._test_int_type(ffi, 'size_t', SIZE_OF_PTR, True) self._test_int_type(ffi, 'ssize_t', SIZE_OF_PTR, False) - #self._test_int_type(ffi, 'wchar_t', SIZE_OF_WCHAR, True) def _test_int_type(self, ffi, c_decl, size, unsigned): if unsigned: @@ -274,8 +273,9 @@ assert ffi.new("char", "\xff")[0] == '\xff' assert ffi.new("char")[0] == '\x00' assert int(ffi.cast("char", 300)) == 300 - 256 - assert bool(ffi.new("char")) + assert bool(ffi.cast("char", 0)) py.test.raises(TypeError, ffi.new, "char", 32) + py.test.raises(TypeError, ffi.new, "char", u"x") py.test.raises(TypeError, ffi.new, "char", "foo") # p = ffi.new("char[]", ['a', 'b', '\x9c']) @@ -297,6 +297,63 @@ assert [p[i] for i in range(2)] == ['a', 'b'] py.test.raises(IndexError, ffi.new, "char[2]", "abc") + def check_wchar_t(self, ffi): + try: + ffi.cast("wchar_t", 0) + except NotImplementedError: + py.test.skip("NotImplementedError: wchar_t") + + def test_wchar_t(self): + ffi = FFI(backend=self.Backend()) + self.check_wchar_t(ffi) + assert ffi.new("wchar_t", u'x')[0] == u'x' + assert ffi.new("wchar_t", unichr(1234))[0] == unichr(1234) + if SIZE_OF_WCHAR > 2: + assert ffi.new("wchar_t", u'\U00012345')[0] == u'\U00012345' + else: + py.test.raises(TypeError, ffi.new, "wchar_t", u'\U00012345') + assert ffi.new("wchar_t")[0] == u'\x00' + assert int(ffi.cast("wchar_t", 300)) == 300 + assert bool(ffi.cast("wchar_t", 0)) + py.test.raises(TypeError, ffi.new, "wchar_t", 32) + py.test.raises(TypeError, ffi.new, "wchar_t", "foo") + # + p = ffi.new("wchar_t[]", [u'a', u'b', unichr(1234)]) + assert len(p) == 3 + assert p[0] == u'a' + assert p[1] == u'b' and type(p[1]) is unicode + assert p[2] == unichr(1234) + p[0] = u'x' + assert p[0] == u'x' and type(p[0]) is unicode + p[1] = unichr(1357) + assert p[1] == unichr(1357) + p = ffi.new("wchar_t[]", u"abcd") + assert len(p) == 5 + assert p[4] == u'\x00' + p = ffi.new("wchar_t[]", u"a\u1234b") + assert len(p) == 4 + assert p[1] == unichr(0x1234) + # + p = ffi.new("wchar_t[]", u'\U00023456') + if SIZE_OF_WCHAR == 2: + assert sys.maxunicode == 0xffff + assert len(p) == 3 + assert p[0] == u'\ud84d' + assert p[1] == u'\udc56' + assert p[2] == u'\x00' + else: + assert len(p) == 2 + assert p[0] == u'\U00023456' + assert p[1] == u'\x00' + # + p = ffi.new("wchar_t[4]", u"ab") + assert len(p) == 4 + assert [p[i] for i in range(4)] == [u'a', u'b', u'\x00', u'\x00'] + p = ffi.new("wchar_t[2]", u"ab") + assert len(p) == 2 + assert [p[i] for i in range(2)] == [u'a', u'b'] + py.test.raises(IndexError, ffi.new, "wchar_t[2]", u"abc") + def test_none_as_null_doesnt_work(self): ffi = FFI(backend=self.Backend()) p = ffi.new("int*[1]") @@ -492,6 +549,14 @@ assert str(ffi.new("char", "x")) == "x" assert str(ffi.new("char", "\x00")) == "" + def test_unicode_from_wchar_pointer(self): + ffi = FFI(backend=self.Backend()) + self.check_wchar_t(ffi) + assert unicode(ffi.new("wchar_t", u"x")) == u"x" + assert unicode(ffi.new("wchar_t", u"\x00")) == u"" + x = ffi.new("wchar_t", u"\x00") + assert str(x) == repr(x) + def test_string_from_char_array(self): ffi = FFI(backend=self.Backend()) assert str(ffi.cast("char", "x")) == "x" @@ -509,6 +574,28 @@ p = ffi.cast("char *", a) assert str(p) == 'hello' + def test_string_from_wchar_array(self): + ffi = FFI(backend=self.Backend()) + self.check_wchar_t(ffi) + assert unicode(ffi.cast("wchar_t", "x")) == u"x" + assert unicode(ffi.cast("wchar_t", u"x")) == u"x" + x = ffi.cast("wchar_t", "x") + assert str(x) == repr(x) + # + p = ffi.new("wchar_t[]", u"hello.") + p[5] = u'!' + assert unicode(p) == u"hello!" + p[6] = unichr(1234) + assert unicode(p) == u"hello!\u04d2" + p[3] = u'\x00' + assert unicode(p) == u"hel" + py.test.raises(IndexError, "p[7] = u'X'") + # + a = ffi.new("wchar_t[]", u"hello\x00world") + assert len(a) == 12 + p = ffi.cast("wchar_t *", a) + assert unicode(p) == u'hello' + def test_fetch_const_char_p_field(self): # 'const' is ignored so far ffi = FFI(backend=self.Backend()) @@ -521,6 +608,18 @@ s.name = ffi.NULL assert s.name == ffi.NULL + def test_fetch_const_wchar_p_field(self): + # 'const' is ignored so far + ffi = FFI(backend=self.Backend()) + self.check_wchar_t(ffi) + ffi.cdef("struct foo { const wchar_t *name; };") + t = ffi.new("const wchar_t[]", u"testing") + s = ffi.new("struct foo", [t]) + assert type(s.name) not in (str, unicode) + assert unicode(s.name) == u"testing" + s.name = ffi.NULL + assert s.name == ffi.NULL + def test_voidp(self): ffi = FFI(backend=self.Backend()) py.test.raises(TypeError, ffi.new, "void") @@ -630,6 +729,19 @@ p = ffi.cast("int", "\x81") assert int(p) == 0x81 + def test_wchar_cast(self): + ffi = FFI(backend=self.Backend()) + self.check_wchar_t(ffi) + p = ffi.cast("int", ffi.cast("wchar_t", unichr(1234))) + assert int(p) == 1234 + p = ffi.cast("long long", ffi.cast("wchar_t", -1)) + if SIZE_OF_WCHAR == 2: # 2 bytes, unsigned + assert int(p) == 0xffff + else: # 4 bytes, signed + assert int(p) == -1 + p = ffi.cast("int", unichr(1234)) + assert int(p) == 1234 + def test_cast_array_to_charp(self): ffi = FFI(backend=self.Backend()) a = ffi.new("short int[]", [0x1234, 0x5678]) diff --git a/testing/test_verify.py b/testing/test_verify.py --- a/testing/test_verify.py +++ b/testing/test_verify.py @@ -68,9 +68,9 @@ all_float_types = ['float', 'double'] def test_primitive_category(): - for typename in all_integer_types + all_float_types + ['char']: + for typename in all_integer_types + all_float_types + ['char', 'wchar_t']: tp = model.PrimitiveType(typename) - assert tp.is_char_type() == (typename == 'char') + assert tp.is_char_type() == (typename in ('char', 'wchar_t')) assert tp.is_signed_type() == (typename in all_signed_integer_types) assert tp.is_unsigned_type()== (typename in all_unsigned_integer_types) assert tp.is_integer_type() == (typename in all_integer_types) @@ -104,6 +104,19 @@ assert lib.foo("A") == "B" py.test.raises(TypeError, lib.foo, "bar") +def test_wchar_type(): + ffi = FFI() + if ffi.sizeof('wchar_t') == 2: + uniexample1 = u'\u1234' + uniexample2 = u'\u1235' + else: + uniexample1 = u'\U00012345' + uniexample2 = u'\U00012346' + # + ffi.cdef("wchar_t foo(wchar_t);") + lib = ffi.verify("wchar_t foo(wchar_t x) { return x+1; }") + assert lib.foo(uniexample1) == uniexample2 + def test_no_argument(): ffi = FFI() ffi.cdef("int foo(void);") _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit