Author: Armin Rigo <ar...@tunes.org> Branch: cffi-char16-char32 Changeset: r91486:e1a00d29e987 Date: 2017-06-02 11:36 +0200 http://bitbucket.org/pypy/pypy/changeset/e1a00d29e987/
Log: in-progress diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py --- a/pypy/module/_cffi_backend/ctypeprim.py +++ b/pypy/module/_cffi_backend/ctypeprim.py @@ -10,7 +10,7 @@ from rpython.rtyper.tool import rfficache from pypy.interpreter.error import oefmt -from pypy.module._cffi_backend import cdataobj, misc +from pypy.module._cffi_backend import cdataobj, misc, wchar_helper from pypy.module._cffi_backend.ctypeobj import W_CType @@ -148,50 +148,66 @@ return self.space.newbytes(s) -# XXX explicitly use an integer type instead of lltype.UniChar here, -# because for now the latter is defined as unsigned by RPython (even -# though it may be signed when 'wchar_t' is written to C). -WCHAR_INT = {(2, False): rffi.USHORT, - (4, False): rffi.UINT, - (4, True): rffi.INT}[rffi.sizeof(lltype.UniChar), - rfficache.signof_c_type('wchar_t')] -WCHAR_INTP = rffi.CArrayPtr(WCHAR_INT) +class W_CTypePrimitiveUniChar(W_CTypePrimitiveCharOrUniChar): + _attrs_ = ['is_signed'] + _immutable_fields_ = ['is_signed'] -class W_CTypePrimitiveUniChar(W_CTypePrimitiveCharOrUniChar): - _attrs_ = [] + _wchar_is_signed = rfficache.signof_c_type('wchar_t') - if rffi.r_wchar_t.SIGN: - def write_raw_integer_data(self, w_cdata, value): - w_cdata.write_raw_signed_data(value) + def __init__(self, space, size, name, name_position, align): + W_CTypePrimitiveUniChar.__init__(self, space, size, name, + name_position, align) + self.is_signed = self._wchar_is_signed and (name == "wchar_t") + # "char16_t" and "char32_t" are always unsigned def cast_to_int(self, cdata): - unichardata = rffi.cast(WCHAR_INTP, cdata) - return self.space.newint(unichardata[0]) + if self.is_signed_wchar: + value = misc.read_raw_long_data(cdata, self.size) + return self.space.newint(value) + else: + value = misc.read_raw_ulong_data(cdata, self.size) + if self.size < rffi.sizeof(lltype.Signed): + return self.space.newint(intmask(value)) + else: + return self.space.newint(value) # r_uint => 'long' object def convert_to_object(self, cdata): - unichardata = rffi.cast(rffi.CWCHARP, cdata) - return self.space.newunicode(unichardata[0]) + if self.is_signed_wchar: + unichardata = rffi.cast(rffi.CWCHARP, cdata) + return self.space.newunicode(unichardata[0]) + else: + value = misc.read_raw_ulong_data(cdata, self.size) # r_uint + u = wchar_helper.ordinal_to_unicode(value) + return self.space.newunicode(u) def string(self, cdataobj, maxlen): with cdataobj as ptr: w_res = self.convert_to_object(ptr) return w_res - def _convert_to_unichar(self, w_ob): + def _convert_to_charN_t(self, w_ob, size): + # returns a r_uint. If size == 2, it is smaller than 0x10000 space = self.space if space.isinstance_w(w_ob, space.w_unicode): - s = space.unicode_w(w_ob) - if len(s) == 1: - return s[0] - if (isinstance(w_ob, cdataobj.W_CData) and - isinstance(w_ob.ctype, W_CTypePrimitiveUniChar)): + u = space.unicode_w(w_ob) + if len(u) == 1: + u = ord(u[0]) + if size == 2 and u > 0xffff: + raise self._convert_error("single character <= 0xFFFF", + w_ob) + return r_uint(u) + elif size == 4 and len(u) == 2 and ... + + elif (isinstance(w_ob, cdataobj.W_CData) and + isinstance(w_ob.ctype, W_CTypePrimitiveUniChar) and + w_ob.ctype.size == 2): with w_ob as ptr: - return rffi.cast(rffi.CWCHARP, ptr)[0] + return misc.read_raw_ulong_data(ptr, 2) raise self._convert_error("unicode string of length 1", w_ob) def convert_from_object(self, cdata, w_ob): - value = self._convert_to_unichar(w_ob) - rffi.cast(rffi.CWCHARP, cdata)[0] = value + ordinal = self._convert_to_char16(w_ob, self.size) + misc.write_raw_unsigned_data(cdata, ordinal, self.size) def unpack_ptr(self, w_ctypeptr, ptr, length): u = rffi.wcharpsize2unicode(rffi.cast(rffi.CWCHARP, ptr), length) diff --git a/pypy/module/_cffi_backend/newtype.py b/pypy/module/_cffi_backend/newtype.py --- a/pypy/module/_cffi_backend/newtype.py +++ b/pypy/module/_cffi_backend/newtype.py @@ -111,6 +111,9 @@ eptype("size_t", rffi.SIZE_T, ctypeprim.W_CTypePrimitiveUnsigned) eptype("ssize_t", rffi.SSIZE_T, ctypeprim.W_CTypePrimitiveSigned) +eptypesize("char16_t", 2, ctypeprim.W_CTypePrimitiveUniChar) +eptypesize("char32_t", 4, ctypeprim.W_CTypePrimitiveUniChar) + _WCTSigned = ctypeprim.W_CTypePrimitiveSigned _WCTUnsign = ctypeprim.W_CTypePrimitiveUnsigned diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -2087,22 +2087,40 @@ py.test.raises(TypeError, newp, BStructPtr, [cast(BFunc2, 0)]) def test_wchar(): - BWChar = new_primitive_type("wchar_t") + _test_wchar_variant("wchar_t") + +def test_char16(): + BChar16 = new_primitive_type("char16_t") + assert sizeof(BChar16) == 2 + _test_wchar_variant("char16_t") + assert int(cast(BChar16, -1)) == 0xffff # always unsigned + +def test_char32(): + BChar32 = new_primitive_type("char32_t") + assert sizeof(BChar32) == 4 + _test_wchar_variant("char32_t") + assert int(cast(BChar32, -1)) == 0xffffffff # always unsigned + +def _test_wchar_variant(typename): + BWChar = new_primitive_type(typename) BInt = new_primitive_type("int") pyuni4 = {1: True, 2: False}[len(u+'\U00012345')] wchar4 = {2: False, 4: True}[sizeof(BWChar)] - assert str(cast(BWChar, 0x45)) == "<cdata 'wchar_t' %s'E'>" % ( - mandatory_u_prefix,) - assert str(cast(BWChar, 0x1234)) == "<cdata 'wchar_t' %s'\u1234'>" % ( - mandatory_u_prefix,) - if wchar4: - if not _hacked_pypy_uni4(): + assert str(cast(BWChar, 0x45)) == "<cdata '%s' %s'E'>" % ( + typename, mandatory_u_prefix) + assert str(cast(BWChar, 0x1234)) == "<cdata '%s' %s'\u1234'>" % ( + typename, mandatory_u_prefix) + if not _hacked_pypy_uni4(): + if wchar4: x = cast(BWChar, 0x12345) - assert str(x) == "<cdata 'wchar_t' %s'\U00012345'>" % ( - mandatory_u_prefix,) + assert str(x) == "<cdata '%s' %s'\U00012345'>" % ( + typename, mandatory_u_prefix) assert int(x) == 0x12345 - else: - assert not pyuni4 + else: + x = cast(BWChar, 0x18345) + assert str(x) == "<cdata '%s' %s'\u8345'>" % ( + typename, mandatory_u_prefix) + assert int(x) == 0x8345 # BWCharP = new_pointer_type(BWChar) BStruct = new_struct_type("struct foo_s") @@ -2117,9 +2135,9 @@ s.a1 = u+'\u1234' assert s.a1 == u+'\u1234' if pyuni4: - assert wchar4 - s.a1 = u+'\U00012345' - assert s.a1 == u+'\U00012345' + if wchar4: + s.a1 = u+'\U00012345' + assert s.a1 == u+'\U00012345' elif wchar4: if not _hacked_pypy_uni4(): s.a1 = cast(BWChar, 0x12345) @@ -2154,17 +2172,17 @@ py.test.raises(IndexError, 'a[4]') # w = cast(BWChar, 'a') - assert repr(w) == "<cdata 'wchar_t' %s'a'>" % mandatory_u_prefix + assert repr(w) == "<cdata '%s' %s'a'>" % (typename, mandatory_u_prefix) assert str(w) == repr(w) assert string(w) == u+'a' assert int(w) == ord('a') w = cast(BWChar, 0x1234) - assert repr(w) == "<cdata 'wchar_t' %s'\u1234'>" % mandatory_u_prefix + assert repr(w) == "<cdata '%s' %s'\u1234'>" % (typename, mandatory_u_prefix) assert str(w) == repr(w) assert string(w) == u+'\u1234' assert int(w) == 0x1234 w = cast(BWChar, u+'\u8234') - assert repr(w) == "<cdata 'wchar_t' %s'\u8234'>" % mandatory_u_prefix + assert repr(w) == "<cdata '%s' %s'\u8234'>" % (typename, mandatory_u_prefix) assert str(w) == repr(w) assert string(w) == u+'\u8234' assert int(w) == 0x8234 @@ -2172,8 +2190,8 @@ assert repr(w) == "<cdata 'int' 4660>" if wchar4 and not _hacked_pypy_uni4(): w = cast(BWChar, u+'\U00012345') - assert repr(w) == "<cdata 'wchar_t' %s'\U00012345'>" % ( - mandatory_u_prefix,) + assert repr(w) == "<cdata '%s' %s'\U00012345'>" % ( + typename, mandatory_u_prefix) assert str(w) == repr(w) assert string(w) == u+'\U00012345' assert int(w) == 0x12345 @@ -2200,7 +2218,7 @@ py.test.raises(RuntimeError, string, q) # def cb(p): - assert repr(p).startswith("<cdata 'wchar_t *' 0x") + assert repr(p).startswith("<cdata '%s *' 0x" % typename) return len(string(p)) BFunc = new_function_type((BWCharP,), BInt, False) f = callback(BFunc, cb, -42) diff --git a/pypy/module/_cffi_backend/wchar_helper.py b/pypy/module/_cffi_backend/wchar_helper.py new file mode 100644 --- /dev/null +++ b/pypy/module/_cffi_backend/wchar_helper.py @@ -0,0 +1,17 @@ +from rpython.rlib.rarithmetic import r_uint, r_ulonglong, intmask +from rpython.rtyper.lltypesystem import lltype, rffi + +SIZE_UNICHAR = rffi.sizeof(lltype.UniChar) + + +if SIZE_UNICODE == 4: + def ordinal_to_unicode(ordinal): # 'ordinal' is a r_uint + return unichr(intmask(ordinal)) +else: + def ordinal_to_unicode(ordinal): # 'ordinal' is a r_uint + if ordinal <= 0xffff: + return unichr(intmask(ordinal)) + else: + ordinal = intmask(ordinal - 0x10000) + return (unichr(0xD800 | (ordinal >> 10)) + + unichr(0xDC00 | (ordinal & 0x3FF))) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit