Author: Armin Rigo <[email protected]>
Branch: cffi-char16-char32
Changeset: r91486:e1a00d29e987
Date: 2017-06-02 11:36 +0200
http://bitbucket.org/pypy/pypy/changeset/e1a00d29e987/
Log: in-progress
diff --git a/pypy/module/_cffi_backend/ctypeprim.py
b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -10,7 +10,7 @@
from rpython.rtyper.tool import rfficache
from pypy.interpreter.error import oefmt
-from pypy.module._cffi_backend import cdataobj, misc
+from pypy.module._cffi_backend import cdataobj, misc, wchar_helper
from pypy.module._cffi_backend.ctypeobj import W_CType
@@ -148,50 +148,66 @@
return self.space.newbytes(s)
-# XXX explicitly use an integer type instead of lltype.UniChar here,
-# because for now the latter is defined as unsigned by RPython (even
-# though it may be signed when 'wchar_t' is written to C).
-WCHAR_INT = {(2, False): rffi.USHORT,
- (4, False): rffi.UINT,
- (4, True): rffi.INT}[rffi.sizeof(lltype.UniChar),
- rfficache.signof_c_type('wchar_t')]
-WCHAR_INTP = rffi.CArrayPtr(WCHAR_INT)
+class W_CTypePrimitiveUniChar(W_CTypePrimitiveCharOrUniChar):
+ _attrs_ = ['is_signed']
+ _immutable_fields_ = ['is_signed']
-class W_CTypePrimitiveUniChar(W_CTypePrimitiveCharOrUniChar):
- _attrs_ = []
+ _wchar_is_signed = rfficache.signof_c_type('wchar_t')
- if rffi.r_wchar_t.SIGN:
- def write_raw_integer_data(self, w_cdata, value):
- w_cdata.write_raw_signed_data(value)
+ def __init__(self, space, size, name, name_position, align):
+ W_CTypePrimitiveUniChar.__init__(self, space, size, name,
+ name_position, align)
+ self.is_signed = self._wchar_is_signed and (name == "wchar_t")
+ # "char16_t" and "char32_t" are always unsigned
def cast_to_int(self, cdata):
- unichardata = rffi.cast(WCHAR_INTP, cdata)
- return self.space.newint(unichardata[0])
+ if self.is_signed_wchar:
+ value = misc.read_raw_long_data(cdata, self.size)
+ return self.space.newint(value)
+ else:
+ value = misc.read_raw_ulong_data(cdata, self.size)
+ if self.size < rffi.sizeof(lltype.Signed):
+ return self.space.newint(intmask(value))
+ else:
+ return self.space.newint(value) # r_uint => 'long' object
def convert_to_object(self, cdata):
- unichardata = rffi.cast(rffi.CWCHARP, cdata)
- return self.space.newunicode(unichardata[0])
+ if self.is_signed_wchar:
+ unichardata = rffi.cast(rffi.CWCHARP, cdata)
+ return self.space.newunicode(unichardata[0])
+ else:
+ value = misc.read_raw_ulong_data(cdata, self.size) # r_uint
+ u = wchar_helper.ordinal_to_unicode(value)
+ return self.space.newunicode(u)
def string(self, cdataobj, maxlen):
with cdataobj as ptr:
w_res = self.convert_to_object(ptr)
return w_res
- def _convert_to_unichar(self, w_ob):
+ def _convert_to_charN_t(self, w_ob, size):
+ # returns a r_uint. If size == 2, it is smaller than 0x10000
space = self.space
if space.isinstance_w(w_ob, space.w_unicode):
- s = space.unicode_w(w_ob)
- if len(s) == 1:
- return s[0]
- if (isinstance(w_ob, cdataobj.W_CData) and
- isinstance(w_ob.ctype, W_CTypePrimitiveUniChar)):
+ u = space.unicode_w(w_ob)
+ if len(u) == 1:
+ u = ord(u[0])
+ if size == 2 and u > 0xffff:
+ raise self._convert_error("single character <= 0xFFFF",
+ w_ob)
+ return r_uint(u)
+ elif size == 4 and len(u) == 2 and ...
+
+ elif (isinstance(w_ob, cdataobj.W_CData) and
+ isinstance(w_ob.ctype, W_CTypePrimitiveUniChar) and
+ w_ob.ctype.size == 2):
with w_ob as ptr:
- return rffi.cast(rffi.CWCHARP, ptr)[0]
+ return misc.read_raw_ulong_data(ptr, 2)
raise self._convert_error("unicode string of length 1", w_ob)
def convert_from_object(self, cdata, w_ob):
- value = self._convert_to_unichar(w_ob)
- rffi.cast(rffi.CWCHARP, cdata)[0] = value
+ ordinal = self._convert_to_char16(w_ob, self.size)
+ misc.write_raw_unsigned_data(cdata, ordinal, self.size)
def unpack_ptr(self, w_ctypeptr, ptr, length):
u = rffi.wcharpsize2unicode(rffi.cast(rffi.CWCHARP, ptr), length)
diff --git a/pypy/module/_cffi_backend/newtype.py
b/pypy/module/_cffi_backend/newtype.py
--- a/pypy/module/_cffi_backend/newtype.py
+++ b/pypy/module/_cffi_backend/newtype.py
@@ -111,6 +111,9 @@
eptype("size_t", rffi.SIZE_T, ctypeprim.W_CTypePrimitiveUnsigned)
eptype("ssize_t", rffi.SSIZE_T, ctypeprim.W_CTypePrimitiveSigned)
+eptypesize("char16_t", 2, ctypeprim.W_CTypePrimitiveUniChar)
+eptypesize("char32_t", 4, ctypeprim.W_CTypePrimitiveUniChar)
+
_WCTSigned = ctypeprim.W_CTypePrimitiveSigned
_WCTUnsign = ctypeprim.W_CTypePrimitiveUnsigned
diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py
b/pypy/module/_cffi_backend/test/_backend_test_c.py
--- a/pypy/module/_cffi_backend/test/_backend_test_c.py
+++ b/pypy/module/_cffi_backend/test/_backend_test_c.py
@@ -2087,22 +2087,40 @@
py.test.raises(TypeError, newp, BStructPtr, [cast(BFunc2, 0)])
def test_wchar():
- BWChar = new_primitive_type("wchar_t")
+ _test_wchar_variant("wchar_t")
+
+def test_char16():
+ BChar16 = new_primitive_type("char16_t")
+ assert sizeof(BChar16) == 2
+ _test_wchar_variant("char16_t")
+ assert int(cast(BChar16, -1)) == 0xffff # always unsigned
+
+def test_char32():
+ BChar32 = new_primitive_type("char32_t")
+ assert sizeof(BChar32) == 4
+ _test_wchar_variant("char32_t")
+ assert int(cast(BChar32, -1)) == 0xffffffff # always unsigned
+
+def _test_wchar_variant(typename):
+ BWChar = new_primitive_type(typename)
BInt = new_primitive_type("int")
pyuni4 = {1: True, 2: False}[len(u+'\U00012345')]
wchar4 = {2: False, 4: True}[sizeof(BWChar)]
- assert str(cast(BWChar, 0x45)) == "<cdata 'wchar_t' %s'E'>" % (
- mandatory_u_prefix,)
- assert str(cast(BWChar, 0x1234)) == "<cdata 'wchar_t' %s'\u1234'>" % (
- mandatory_u_prefix,)
- if wchar4:
- if not _hacked_pypy_uni4():
+ assert str(cast(BWChar, 0x45)) == "<cdata '%s' %s'E'>" % (
+ typename, mandatory_u_prefix)
+ assert str(cast(BWChar, 0x1234)) == "<cdata '%s' %s'\u1234'>" % (
+ typename, mandatory_u_prefix)
+ if not _hacked_pypy_uni4():
+ if wchar4:
x = cast(BWChar, 0x12345)
- assert str(x) == "<cdata 'wchar_t' %s'\U00012345'>" % (
- mandatory_u_prefix,)
+ assert str(x) == "<cdata '%s' %s'\U00012345'>" % (
+ typename, mandatory_u_prefix)
assert int(x) == 0x12345
- else:
- assert not pyuni4
+ else:
+ x = cast(BWChar, 0x18345)
+ assert str(x) == "<cdata '%s' %s'\u8345'>" % (
+ typename, mandatory_u_prefix)
+ assert int(x) == 0x8345
#
BWCharP = new_pointer_type(BWChar)
BStruct = new_struct_type("struct foo_s")
@@ -2117,9 +2135,9 @@
s.a1 = u+'\u1234'
assert s.a1 == u+'\u1234'
if pyuni4:
- assert wchar4
- s.a1 = u+'\U00012345'
- assert s.a1 == u+'\U00012345'
+ if wchar4:
+ s.a1 = u+'\U00012345'
+ assert s.a1 == u+'\U00012345'
elif wchar4:
if not _hacked_pypy_uni4():
s.a1 = cast(BWChar, 0x12345)
@@ -2154,17 +2172,17 @@
py.test.raises(IndexError, 'a[4]')
#
w = cast(BWChar, 'a')
- assert repr(w) == "<cdata 'wchar_t' %s'a'>" % mandatory_u_prefix
+ assert repr(w) == "<cdata '%s' %s'a'>" % (typename, mandatory_u_prefix)
assert str(w) == repr(w)
assert string(w) == u+'a'
assert int(w) == ord('a')
w = cast(BWChar, 0x1234)
- assert repr(w) == "<cdata 'wchar_t' %s'\u1234'>" % mandatory_u_prefix
+ assert repr(w) == "<cdata '%s' %s'\u1234'>" % (typename,
mandatory_u_prefix)
assert str(w) == repr(w)
assert string(w) == u+'\u1234'
assert int(w) == 0x1234
w = cast(BWChar, u+'\u8234')
- assert repr(w) == "<cdata 'wchar_t' %s'\u8234'>" % mandatory_u_prefix
+ assert repr(w) == "<cdata '%s' %s'\u8234'>" % (typename,
mandatory_u_prefix)
assert str(w) == repr(w)
assert string(w) == u+'\u8234'
assert int(w) == 0x8234
@@ -2172,8 +2190,8 @@
assert repr(w) == "<cdata 'int' 4660>"
if wchar4 and not _hacked_pypy_uni4():
w = cast(BWChar, u+'\U00012345')
- assert repr(w) == "<cdata 'wchar_t' %s'\U00012345'>" % (
- mandatory_u_prefix,)
+ assert repr(w) == "<cdata '%s' %s'\U00012345'>" % (
+ typename, mandatory_u_prefix)
assert str(w) == repr(w)
assert string(w) == u+'\U00012345'
assert int(w) == 0x12345
@@ -2200,7 +2218,7 @@
py.test.raises(RuntimeError, string, q)
#
def cb(p):
- assert repr(p).startswith("<cdata 'wchar_t *' 0x")
+ assert repr(p).startswith("<cdata '%s *' 0x" % typename)
return len(string(p))
BFunc = new_function_type((BWCharP,), BInt, False)
f = callback(BFunc, cb, -42)
diff --git a/pypy/module/_cffi_backend/wchar_helper.py
b/pypy/module/_cffi_backend/wchar_helper.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cffi_backend/wchar_helper.py
@@ -0,0 +1,17 @@
+from rpython.rlib.rarithmetic import r_uint, r_ulonglong, intmask
+from rpython.rtyper.lltypesystem import lltype, rffi
+
+SIZE_UNICHAR = rffi.sizeof(lltype.UniChar)
+
+
+if SIZE_UNICODE == 4:
+ def ordinal_to_unicode(ordinal): # 'ordinal' is a r_uint
+ return unichr(intmask(ordinal))
+else:
+ def ordinal_to_unicode(ordinal): # 'ordinal' is a r_uint
+ if ordinal <= 0xffff:
+ return unichr(intmask(ordinal))
+ else:
+ ordinal = intmask(ordinal - 0x10000)
+ return (unichr(0xD800 | (ordinal >> 10)) +
+ unichr(0xDC00 | (ordinal & 0x3FF)))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit