[pypy-commit] pypy cffi-char16-char32: in-progress

arigo Fri, 02 Jun 2017 02:47:12 -0700

Author: Armin Rigo <ar...@tunes.org>
Branch: cffi-char16-char32
Changeset: r91486:e1a00d29e987
Date: 2017-06-02 11:36 +0200
http://bitbucket.org/pypy/pypy/changeset/e1a00d29e987/


Log:    in-progress

diff --git a/pypy/module/_cffi_backend/ctypeprim.py 
b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -10,7 +10,7 @@
 from rpython.rtyper.tool import rfficache
 
 from pypy.interpreter.error import oefmt
-from pypy.module._cffi_backend import cdataobj, misc
+from pypy.module._cffi_backend import cdataobj, misc, wchar_helper
 from pypy.module._cffi_backend.ctypeobj import W_CType
 
 
@@ -148,50 +148,66 @@
         return self.space.newbytes(s)
 
 
-# XXX explicitly use an integer type instead of lltype.UniChar here,
-# because for now the latter is defined as unsigned by RPython (even
-# though it may be signed when 'wchar_t' is written to C).
-WCHAR_INT = {(2, False): rffi.USHORT,
-             (4, False): rffi.UINT,
-             (4, True): rffi.INT}[rffi.sizeof(lltype.UniChar),
-                                  rfficache.signof_c_type('wchar_t')]
-WCHAR_INTP = rffi.CArrayPtr(WCHAR_INT)
+class W_CTypePrimitiveUniChar(W_CTypePrimitiveCharOrUniChar):
+    _attrs_            = ['is_signed']
+    _immutable_fields_ = ['is_signed']
 
-class W_CTypePrimitiveUniChar(W_CTypePrimitiveCharOrUniChar):
-    _attrs_ = []
+    _wchar_is_signed = rfficache.signof_c_type('wchar_t')
 
-    if rffi.r_wchar_t.SIGN:
-        def write_raw_integer_data(self, w_cdata, value):
-            w_cdata.write_raw_signed_data(value)
+    def __init__(self, space, size, name, name_position, align):
+        W_CTypePrimitiveUniChar.__init__(self, space, size, name,
+                                         name_position, align)
+        self.is_signed = self._wchar_is_signed and (name == "wchar_t")
+        # "char16_t" and "char32_t" are always unsigned
 
     def cast_to_int(self, cdata):
-        unichardata = rffi.cast(WCHAR_INTP, cdata)
-        return self.space.newint(unichardata[0])
+        if self.is_signed_wchar:
+            value = misc.read_raw_long_data(cdata, self.size)
+            return self.space.newint(value)
+        else:
+            value = misc.read_raw_ulong_data(cdata, self.size)
+            if self.size < rffi.sizeof(lltype.Signed):
+                return self.space.newint(intmask(value))
+            else:
+                return self.space.newint(value)    # r_uint => 'long' object
 
     def convert_to_object(self, cdata):
-        unichardata = rffi.cast(rffi.CWCHARP, cdata)
-        return self.space.newunicode(unichardata[0])
+        if self.is_signed_wchar:
+            unichardata = rffi.cast(rffi.CWCHARP, cdata)
+            return self.space.newunicode(unichardata[0])
+        else:
+            value = misc.read_raw_ulong_data(cdata, self.size)   # r_uint
+            u = wchar_helper.ordinal_to_unicode(value)
+            return self.space.newunicode(u)
 
     def string(self, cdataobj, maxlen):
         with cdataobj as ptr:
             w_res = self.convert_to_object(ptr)
         return w_res
 
-    def _convert_to_unichar(self, w_ob):
+    def _convert_to_charN_t(self, w_ob, size):
+        # returns a r_uint.  If size == 2, it is smaller than 0x10000
         space = self.space
         if space.isinstance_w(w_ob, space.w_unicode):
-            s = space.unicode_w(w_ob)
-            if len(s) == 1:
-                return s[0]
-        if (isinstance(w_ob, cdataobj.W_CData) and
-               isinstance(w_ob.ctype, W_CTypePrimitiveUniChar)):
+            u = space.unicode_w(w_ob)
+            if len(u) == 1:
+                u = ord(u[0])
+                if size == 2 and u > 0xffff:
+                    raise self._convert_error("single character <= 0xFFFF",
+                                              w_ob)
+                return r_uint(u)
+            elif size == 4 and len(u) == 2 and ...
+
+        elif (isinstance(w_ob, cdataobj.W_CData) and
+               isinstance(w_ob.ctype, W_CTypePrimitiveUniChar) and
+               w_ob.ctype.size == 2):
             with w_ob as ptr:
-                return rffi.cast(rffi.CWCHARP, ptr)[0]
+                return misc.read_raw_ulong_data(ptr, 2)
         raise self._convert_error("unicode string of length 1", w_ob)
 
     def convert_from_object(self, cdata, w_ob):
-        value = self._convert_to_unichar(w_ob)
-        rffi.cast(rffi.CWCHARP, cdata)[0] = value
+        ordinal = self._convert_to_char16(w_ob, self.size)
+        misc.write_raw_unsigned_data(cdata, ordinal, self.size)
 
     def unpack_ptr(self, w_ctypeptr, ptr, length):
         u = rffi.wcharpsize2unicode(rffi.cast(rffi.CWCHARP, ptr), length)
diff --git a/pypy/module/_cffi_backend/newtype.py 
b/pypy/module/_cffi_backend/newtype.py
--- a/pypy/module/_cffi_backend/newtype.py
+++ b/pypy/module/_cffi_backend/newtype.py
@@ -111,6 +111,9 @@
 eptype("size_t",    rffi.SIZE_T,    ctypeprim.W_CTypePrimitiveUnsigned)
 eptype("ssize_t",   rffi.SSIZE_T,   ctypeprim.W_CTypePrimitiveSigned)
 
+eptypesize("char16_t", 2, ctypeprim.W_CTypePrimitiveUniChar)
+eptypesize("char32_t", 4, ctypeprim.W_CTypePrimitiveUniChar)
+
 _WCTSigned = ctypeprim.W_CTypePrimitiveSigned
 _WCTUnsign = ctypeprim.W_CTypePrimitiveUnsigned
 
diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py 
b/pypy/module/_cffi_backend/test/_backend_test_c.py
--- a/pypy/module/_cffi_backend/test/_backend_test_c.py
+++ b/pypy/module/_cffi_backend/test/_backend_test_c.py
@@ -2087,22 +2087,40 @@
     py.test.raises(TypeError, newp, BStructPtr, [cast(BFunc2, 0)])
 
 def test_wchar():
-    BWChar = new_primitive_type("wchar_t")
+    _test_wchar_variant("wchar_t")
+
+def test_char16():
+    BChar16 = new_primitive_type("char16_t")
+    assert sizeof(BChar16) == 2
+    _test_wchar_variant("char16_t")
+    assert int(cast(BChar16, -1)) == 0xffff       # always unsigned
+
+def test_char32():
+    BChar32 = new_primitive_type("char32_t")
+    assert sizeof(BChar32) == 4
+    _test_wchar_variant("char32_t")
+    assert int(cast(BChar32, -1)) == 0xffffffff   # always unsigned
+
+def _test_wchar_variant(typename):
+    BWChar = new_primitive_type(typename)
     BInt = new_primitive_type("int")
     pyuni4 = {1: True, 2: False}[len(u+'\U00012345')]
     wchar4 = {2: False, 4: True}[sizeof(BWChar)]
-    assert str(cast(BWChar, 0x45)) == "<cdata 'wchar_t' %s'E'>" % (
-        mandatory_u_prefix,)
-    assert str(cast(BWChar, 0x1234)) == "<cdata 'wchar_t' %s'\u1234'>" % (
-        mandatory_u_prefix,)
-    if wchar4:
-        if not _hacked_pypy_uni4():
+    assert str(cast(BWChar, 0x45)) == "<cdata '%s' %s'E'>" % (
+        typename, mandatory_u_prefix)
+    assert str(cast(BWChar, 0x1234)) == "<cdata '%s' %s'\u1234'>" % (
+        typename, mandatory_u_prefix)
+    if not _hacked_pypy_uni4():
+        if wchar4:
             x = cast(BWChar, 0x12345)
-            assert str(x) == "<cdata 'wchar_t' %s'\U00012345'>" % (
-                mandatory_u_prefix,)
+            assert str(x) == "<cdata '%s' %s'\U00012345'>" % (
+                typename, mandatory_u_prefix)
             assert int(x) == 0x12345
-    else:
-        assert not pyuni4
+        else:
+            x = cast(BWChar, 0x18345)
+            assert str(x) == "<cdata '%s' %s'\u8345'>" % (
+                typename, mandatory_u_prefix)
+            assert int(x) == 0x8345
     #
     BWCharP = new_pointer_type(BWChar)
     BStruct = new_struct_type("struct foo_s")
@@ -2117,9 +2135,9 @@
     s.a1 = u+'\u1234'
     assert s.a1 == u+'\u1234'
     if pyuni4:
-        assert wchar4
-        s.a1 = u+'\U00012345'
-        assert s.a1 == u+'\U00012345'
+        if wchar4:
+            s.a1 = u+'\U00012345'
+            assert s.a1 == u+'\U00012345'
     elif wchar4:
         if not _hacked_pypy_uni4():
             s.a1 = cast(BWChar, 0x12345)
@@ -2154,17 +2172,17 @@
         py.test.raises(IndexError, 'a[4]')
     #
     w = cast(BWChar, 'a')
-    assert repr(w) == "<cdata 'wchar_t' %s'a'>" % mandatory_u_prefix
+    assert repr(w) == "<cdata '%s' %s'a'>" % (typename, mandatory_u_prefix)
     assert str(w) == repr(w)
     assert string(w) == u+'a'
     assert int(w) == ord('a')
     w = cast(BWChar, 0x1234)
-    assert repr(w) == "<cdata 'wchar_t' %s'\u1234'>" % mandatory_u_prefix
+    assert repr(w) == "<cdata '%s' %s'\u1234'>" % (typename, 
mandatory_u_prefix)
     assert str(w) == repr(w)
     assert string(w) == u+'\u1234'
     assert int(w) == 0x1234
     w = cast(BWChar, u+'\u8234')
-    assert repr(w) == "<cdata 'wchar_t' %s'\u8234'>" % mandatory_u_prefix
+    assert repr(w) == "<cdata '%s' %s'\u8234'>" % (typename, 
mandatory_u_prefix)
     assert str(w) == repr(w)
     assert string(w) == u+'\u8234'
     assert int(w) == 0x8234
@@ -2172,8 +2190,8 @@
     assert repr(w) == "<cdata 'int' 4660>"
     if wchar4 and not _hacked_pypy_uni4():
         w = cast(BWChar, u+'\U00012345')
-        assert repr(w) == "<cdata 'wchar_t' %s'\U00012345'>" % (
-            mandatory_u_prefix,)
+        assert repr(w) == "<cdata '%s' %s'\U00012345'>" % (
+            typename, mandatory_u_prefix)
         assert str(w) == repr(w)
         assert string(w) == u+'\U00012345'
         assert int(w) == 0x12345
@@ -2200,7 +2218,7 @@
     py.test.raises(RuntimeError, string, q)
     #
     def cb(p):
-        assert repr(p).startswith("<cdata 'wchar_t *' 0x")
+        assert repr(p).startswith("<cdata '%s *' 0x" % typename)
         return len(string(p))
     BFunc = new_function_type((BWCharP,), BInt, False)
     f = callback(BFunc, cb, -42)
diff --git a/pypy/module/_cffi_backend/wchar_helper.py 
b/pypy/module/_cffi_backend/wchar_helper.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cffi_backend/wchar_helper.py
@@ -0,0 +1,17 @@
+from rpython.rlib.rarithmetic import r_uint, r_ulonglong, intmask
+from rpython.rtyper.lltypesystem import lltype, rffi
+
+SIZE_UNICHAR = rffi.sizeof(lltype.UniChar)
+
+
+if SIZE_UNICODE == 4:
+    def ordinal_to_unicode(ordinal):    # 'ordinal' is a r_uint
+        return unichr(intmask(ordinal))
+else:
+    def ordinal_to_unicode(ordinal):    # 'ordinal' is a r_uint
+        if ordinal <= 0xffff:
+            return unichr(intmask(ordinal))
+        else:
+            ordinal = intmask(ordinal - 0x10000)
+            return (unichr(0xD800 | (ordinal >> 10)) +
+                    unichr(0xDC00 | (ordinal & 0x3FF)))
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy cffi-char16-char32: in-progress

Reply via email to