Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r79866:ea08d9ade6c4 Date: 2015-09-27 16:35 +0100 http://bitbucket.org/pypy/pypy/changeset/ea08d9ade6c4/
Log: Confusion: RPython assumes everywhere (including in the JIT) that lltype.UniChar is unsigned. But that's wrong: the C backend translates it to 'wchar_t', which is sometimes a signed 32-bit integer. Notable result: when RPython code starts to run with the JIT, the sign of UniChars suddenly appears to change. It should be fixed everywhere in RPython. For now, a simple workaround to fix one case. diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py --- a/pypy/module/_cffi_backend/ctypeprim.py +++ b/pypy/module/_cffi_backend/ctypeprim.py @@ -125,12 +125,24 @@ cdata[0] = value +# XXX explicitly use an integer type instead of lltype.UniChar here, +# because for now the latter is defined as unsigned by RPython (even +# though it may be signed when 'wchar_t' is written to C). +WCHAR_INT = {(2, False): rffi.USHORT, + (4, False): rffi.UINT, + (4, True): rffi.INT}[rffi.sizeof(lltype.UniChar), rffi.r_wchar_t.SIGN] +WCHAR_INTP = rffi.CArrayPtr(WCHAR_INT) + class W_CTypePrimitiveUniChar(W_CTypePrimitiveCharOrUniChar): _attrs_ = [] + if rffi.r_wchar_t.SIGN: + def write_raw_integer_data(self, w_cdata, value): + w_cdata.write_raw_signed_data(value) + def cast_to_int(self, cdata): - unichardata = rffi.cast(rffi.CWCHARP, cdata) - return self.space.wrap(ord(unichardata[0])) + unichardata = rffi.cast(WCHAR_INTP, cdata) + return self.space.wrap(unichardata[0]) def convert_to_object(self, cdata): unichardata = rffi.cast(rffi.CWCHARP, cdata) diff --git a/rpython/jit/metainterp/test/test_ajit.py b/rpython/jit/metainterp/test/test_ajit.py --- a/rpython/jit/metainterp/test/test_ajit.py +++ b/rpython/jit/metainterp/test/test_ajit.py @@ -4319,3 +4319,15 @@ self.meta_interp(allfuncs, [9, 2000]) + + def test_unichar_might_be_signed(self): + py.test.skip("wchar_t is sometimes a signed 32-bit integer type, " + "but RPython inteprets it as unsigned (but still " + "translates to wchar_t, so can create confusion)") + def f(x): + return rffi.cast(lltype.Signed, rffi.cast(lltype.UniChar, x)) + res = self.interp_operations(f, [-1]) + if rffi.r_wchar_t.SIGN: + assert res == -1 + else: + assert res == 2 ** 16 - 1 or res == 2 ** 32 - 1 _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit