Author: Armin Rigo <ar...@tunes.org>
Branch: 
Changeset: r79866:ea08d9ade6c4
Date: 2015-09-27 16:35 +0100
http://bitbucket.org/pypy/pypy/changeset/ea08d9ade6c4/

Log:    Confusion: RPython assumes everywhere (including in the JIT) that
        lltype.UniChar is unsigned. But that's wrong: the C backend
        translates it to 'wchar_t', which is sometimes a signed 32-bit
        integer. Notable result: when RPython code starts to run with the
        JIT, the sign of UniChars suddenly appears to change.

        It should be fixed everywhere in RPython. For now, a simple
        workaround to fix one case.

diff --git a/pypy/module/_cffi_backend/ctypeprim.py 
b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -125,12 +125,24 @@
         cdata[0] = value
 
 
+# XXX explicitly use an integer type instead of lltype.UniChar here,
+# because for now the latter is defined as unsigned by RPython (even
+# though it may be signed when 'wchar_t' is written to C).
+WCHAR_INT = {(2, False): rffi.USHORT,
+             (4, False): rffi.UINT,
+             (4, True): rffi.INT}[rffi.sizeof(lltype.UniChar), 
rffi.r_wchar_t.SIGN]
+WCHAR_INTP = rffi.CArrayPtr(WCHAR_INT)
+
 class W_CTypePrimitiveUniChar(W_CTypePrimitiveCharOrUniChar):
     _attrs_ = []
 
+    if rffi.r_wchar_t.SIGN:
+        def write_raw_integer_data(self, w_cdata, value):
+            w_cdata.write_raw_signed_data(value)
+
     def cast_to_int(self, cdata):
-        unichardata = rffi.cast(rffi.CWCHARP, cdata)
-        return self.space.wrap(ord(unichardata[0]))
+        unichardata = rffi.cast(WCHAR_INTP, cdata)
+        return self.space.wrap(unichardata[0])
 
     def convert_to_object(self, cdata):
         unichardata = rffi.cast(rffi.CWCHARP, cdata)
diff --git a/rpython/jit/metainterp/test/test_ajit.py 
b/rpython/jit/metainterp/test/test_ajit.py
--- a/rpython/jit/metainterp/test/test_ajit.py
+++ b/rpython/jit/metainterp/test/test_ajit.py
@@ -4319,3 +4319,15 @@
  
         
         self.meta_interp(allfuncs, [9, 2000])
+
+    def test_unichar_might_be_signed(self):
+        py.test.skip("wchar_t is sometimes a signed 32-bit integer type, "
+                     "but RPython inteprets it as unsigned (but still "
+                     "translates to wchar_t, so can create confusion)")
+        def f(x):
+            return rffi.cast(lltype.Signed, rffi.cast(lltype.UniChar, x))
+        res = self.interp_operations(f, [-1])
+        if rffi.r_wchar_t.SIGN:
+            assert res == -1
+        else:
+            assert res == 2 ** 16 - 1 or res == 2 ** 32 - 1
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to