Author: Armin Rigo <ar...@tunes.org> Branch: wchar_t Changeset: r605:a0c1585fe7d5 Date: 2012-07-09 16:03 +0200 http://bitbucket.org/cffi/cffi/changeset/a0c1585fe7d5/
Log: in-progress diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c --- a/c/_cffi_backend.c +++ b/c/_cffi_backend.c @@ -762,24 +762,46 @@ return 0; } else if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) { - char *srcdata; - Py_ssize_t n; - if (!PyString_Check(init)) { - expected = "str or list or tuple"; - goto cannot_convert; + if (ctitem->ct_size == sizeof(char)) { + char *srcdata; + Py_ssize_t n; + if (!PyString_Check(init)) { + expected = "str or list or tuple"; + goto cannot_convert; + } + n = PyString_GET_SIZE(init); + if (ct->ct_length >= 0 && n > ct->ct_length) { + PyErr_Format(PyExc_IndexError, + "initializer string is too long for '%s' " + "(got %zd characters)", ct->ct_name, n); + return -1; + } + if (n != ct->ct_length) + n++; + srcdata = PyString_AS_STRING(init); + memcpy(data, srcdata, n); + return 0; } - n = PyString_GET_SIZE(init); - if (ct->ct_length >= 0 && n > ct->ct_length) { - PyErr_Format(PyExc_IndexError, - "initializer string is too long for '%s' " - "(got %zd characters)", ct->ct_name, n); - return -1; +#ifdef HAVE_WCHAR_H + else { + Py_ssize_t n; + if (!PyUnicode_Check(init)) { + expected = "unicode or list or tuple"; + goto cannot_convert; + } + n = _my_PyUnicode_SizeAsWideChar(init); + if (ct->ct_length >= 0 && n > ct->ct_length) { + PyErr_Format(PyExc_IndexError, + "initializer unicode is too long for '%s' " + "(got %zd characters)", ct->ct_name, n); + return -1; + } + if (n != ct->ct_length) + n++; + _my_PyUnicode_AsWideChar(init, (wchar_t *)data, n); + return 0; } - if (n != ct->ct_length) - n++; - srcdata = PyString_AS_STRING(init); - memcpy(data, srcdata, n); - return 0; +#endif } else { expected = "list or tuple"; @@ -1153,18 +1175,17 @@ else if (cd->c_type->ct_itemdescr != NULL && cd->c_type->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR && cd->c_type->ct_itemdescr->ct_size > sizeof(char)) { - abort(); Py_ssize_t length; if (cd->c_type->ct_flags & CT_ARRAY) { - const char *start = cd->c_data; - const char *end; - length = get_array_length(cd); - end = (const char *)memchr(start, 0, length); - if (end != NULL) - length = end - start; + const wchar_t *start = (wchar_t *)cd->c_data; + const Py_ssize_t lenmax = get_array_length(cd); + length = 0; + while (length < lenmax && start[length]) + length++; } else { + abort(); if (cd->c_data == NULL) { PyObject *s = cdata_repr(cd); if (s != NULL) { @@ -1178,7 +1199,7 @@ length = strlen(cd->c_data); } - return PyString_FromStringAndSize(cd->c_data, length); + return _my_PyUnicode_FromWideChar((wchar_t *)cd->c_data, length); } else return cdata_repr(cd); @@ -1949,6 +1970,10 @@ /* from a string, we add the null terminator */ explicitlength = PyString_GET_SIZE(init) + 1; } + else if (PyUnicode_Check(init)) { + /* from a unicode, we add the null terminator */ + explicitlength = PyUnicode_GET_SIZE(init) + 1; + } else { explicitlength = PyNumber_AsSsize_t(init, PyExc_OverflowError); if (explicitlength < 0) { diff --git a/c/test_c.py b/c/test_c.py --- a/c/test_c.py +++ b/c/test_c.py @@ -1311,21 +1311,36 @@ else: py.test.raises(ValueError, "s.a1 = u'\U00012345'") # - a = new_array_type(BWCharP, u'hello \u1234 world') + BWCharArray = new_array_type(BWCharP, None) + a = newp(BWCharArray, u'hello \u1234 world') assert len(a) == 14 # including the final null assert unicode(a) == u'hello \u1234 world' - py.test.raises(UnicodeEncodeError, str, a) + a[13] = u'!' + assert unicode(a) == u'hello \u1234 world!' + assert str(a) == repr(a) assert a[6] == u'\u1234' a[6] = '-' assert str(a) == 'hello - world' # + if wchar4: + u = u'\U00012345\U00012346\U00012347' + a = newp(BWCharArray, u) + assert len(a) == 4 + assert unicode(a) == u + assert len(list(a)) == 4 + expected = [u'\U00012345', u'\U00012346', u'\U00012347', unichr(0)] + assert list(a) == expected + got = [a[i] for i in range(4)] + assert got == expected + py.test.raises(IndexError, 'a[4]') + # w = cast(BWChar, 'a') assert repr(w) == "<cdata 'wchar_t' u'a'>" assert str(w) == 'a' assert unicode(w) == u'a' w = cast(BWChar, 0x1234) assert repr(w) == "<cdata 'wchar_t' u'\u1234'>" - py.test.raises(UnicodeEncodeError, str, w) + py.test.raises(xxUnicodeEncodeError, str, w) assert unicode(w) == u'\u1234' assert int(w) == 0x1234 # @@ -1333,13 +1348,13 @@ assert str(p) == 'hello - world' assert unicode(p) == u'hello - world' p[6] = u'\u2345' - py.test.raises(UnicodeEncodeError, str, p) + py.test.raises(xxUnicodeEncodeError, str, p) assert unicode(p) == u'hello \u2345 world' # s = newp(BStructPtr, [u'\u1234', p]) assert s.a1 == u'\u1234' assert s.a2 == p - py.test.raises(UnicodeEncodeError, str, s.a2) + py.test.raises(xxUnicodeEncodeError, str, s.a2) assert unicode(s.a2) == u'hello \u2345 world' # q = cast(BWCharP, 0) diff --git a/c/wchar_helper.h b/c/wchar_helper.h --- a/c/wchar_helper.h +++ b/c/wchar_helper.h @@ -63,6 +63,11 @@ #endif +#define IS_SURROGATE(u) (0xD800 <= (u)[0] && (u)[0] <= 0xDBFF && \ + 0xDC00 <= (u)[1] && (u)[1] <= 0xDFFF) +#define AS_SURROGATE(u) (0x10000 + (((u)[0] - 0xD800) << 10) + \ + ((u)[1] - 0xDC00)) + static int _my_PyUnicode_AsSingleWideChar(PyObject *unicode, wchar_t *result) { Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode); @@ -71,12 +76,46 @@ return 0; } #ifdef CONVERT_WCHAR_TO_SURROGATES - if (PyUnicode_GET_SIZE(unicode) == 2 && - 0xD800 <= u[0] && u[0] <= 0xDBFF && - 0xDC00 <= u[1] && u[1] <= 0xDFFF) { - *result = 0x10000 + ((u[0] - 0xD800) << 10) + (u[1] - 0xDC00); + if (PyUnicode_GET_SIZE(unicode) == 2 && IS_SURROGATE(u)) { + *result = AS_SURROGATE(u); return 0; } #endif return -1; } + +static Py_ssize_t _my_PyUnicode_SizeAsWideChar(PyObject *unicode) +{ + Py_ssize_t length = PyUnicode_GET_SIZE(unicode); + Py_ssize_t result = length; + +#ifdef CONVERT_WCHAR_TO_SURROGATES + Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode); + Py_ssize_t i; + + for (i=0; i<length-1; i++) { + if (IS_SURROGATE(u+i)) + result--; + } +#endif + return result; +} + +static void _my_PyUnicode_AsWideChar(PyObject *unicode, + wchar_t *result, + Py_ssize_t resultlen) +{ + Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode); + Py_ssize_t i; + for (i=0; i<resultlen; i++) { + wchar_t ordinal = *u; +#ifdef CONVERT_WCHAR_TO_SURROGATES + if (IS_SURROGATE(u)) { + ordinal = AS_SURROGATE(u); + u++; + } +#endif + result[i] = ordinal; + u++; + } +} _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit