Author: Armin Rigo <[email protected]>
Branch:
Changeset: r79866:ea08d9ade6c4
Date: 2015-09-27 16:35 +0100
http://bitbucket.org/pypy/pypy/changeset/ea08d9ade6c4/
Log: Confusion: RPython assumes everywhere (including in the JIT) that
lltype.UniChar is unsigned. But that's wrong: the C backend
translates it to 'wchar_t', which is sometimes a signed 32-bit
integer. Notable result: when RPython code starts to run with the
JIT, the sign of UniChars suddenly appears to change.
It should be fixed everywhere in RPython. For now, a simple
workaround to fix one case.
diff --git a/pypy/module/_cffi_backend/ctypeprim.py
b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -125,12 +125,24 @@
cdata[0] = value
+# XXX explicitly use an integer type instead of lltype.UniChar here,
+# because for now the latter is defined as unsigned by RPython (even
+# though it may be signed when 'wchar_t' is written to C).
+WCHAR_INT = {(2, False): rffi.USHORT,
+ (4, False): rffi.UINT,
+ (4, True): rffi.INT}[rffi.sizeof(lltype.UniChar),
rffi.r_wchar_t.SIGN]
+WCHAR_INTP = rffi.CArrayPtr(WCHAR_INT)
+
class W_CTypePrimitiveUniChar(W_CTypePrimitiveCharOrUniChar):
_attrs_ = []
+ if rffi.r_wchar_t.SIGN:
+ def write_raw_integer_data(self, w_cdata, value):
+ w_cdata.write_raw_signed_data(value)
+
def cast_to_int(self, cdata):
- unichardata = rffi.cast(rffi.CWCHARP, cdata)
- return self.space.wrap(ord(unichardata[0]))
+ unichardata = rffi.cast(WCHAR_INTP, cdata)
+ return self.space.wrap(unichardata[0])
def convert_to_object(self, cdata):
unichardata = rffi.cast(rffi.CWCHARP, cdata)
diff --git a/rpython/jit/metainterp/test/test_ajit.py
b/rpython/jit/metainterp/test/test_ajit.py
--- a/rpython/jit/metainterp/test/test_ajit.py
+++ b/rpython/jit/metainterp/test/test_ajit.py
@@ -4319,3 +4319,15 @@
self.meta_interp(allfuncs, [9, 2000])
+
+ def test_unichar_might_be_signed(self):
+ py.test.skip("wchar_t is sometimes a signed 32-bit integer type, "
+ "but RPython inteprets it as unsigned (but still "
+ "translates to wchar_t, so can create confusion)")
+ def f(x):
+ return rffi.cast(lltype.Signed, rffi.cast(lltype.UniChar, x))
+ res = self.interp_operations(f, [-1])
+ if rffi.r_wchar_t.SIGN:
+ assert res == -1
+ else:
+ assert res == 2 ** 16 - 1 or res == 2 ** 32 - 1
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit