Author: fijal
Branch: unicode-utf8
Changeset: r93338:93560a4f1a42
Date: 2017-12-09 21:35 +0200
http://bitbucket.org/pypy/pypy/changeset/93560a4f1a42/

Log:    fix _rawffi and add a todo item

diff --git a/TODO b/TODO
--- a/TODO
+++ b/TODO
@@ -12,3 +12,4 @@
 * improve performance of splitlines
 
 * fix _pypyjson to not use a wrapped dict when decoding an object
+* make sure we review all the places that call ord(unichr) to check for 
ValueErrors
\ No newline at end of file
diff --git a/pypy/module/_locale/interp_locale.py 
b/pypy/module/_locale/interp_locale.py
--- a/pypy/module/_locale/interp_locale.py
+++ b/pypy/module/_locale/interp_locale.py
@@ -133,10 +133,11 @@
             rffi.free_charp(s1_c)
             rffi.free_charp(s2_c)
 
-    s1, s2 = space.unicode_w(w_s1), space.unicode_w(w_s2)
+    s1, l1 = space.utf8_len_w(w_s1)
+    s2, l2 = space.utf8_len_w(w_s2)
 
-    s1_c = rffi.unicode2wcharp(s1)
-    s2_c = rffi.unicode2wcharp(s2)
+    s1_c = rffi.utf82wcharp(s1, l1)
+    s2_c = rffi.utf82wcharp(s2, l2)
     try:
         result = _wcscoll(s1_c, s2_c)
     finally:
diff --git a/pypy/module/_rawffi/alt/type_converter.py 
b/pypy/module/_rawffi/alt/type_converter.py
--- a/pypy/module/_rawffi/alt/type_converter.py
+++ b/pypy/module/_rawffi/alt/type_converter.py
@@ -227,8 +227,8 @@
             ucharval = self.get_char(w_ffitype)
             return space.newbytes(chr(ucharval))
         elif w_ffitype.is_unichar():
-            wcharval = self.get_unichar(w_ffitype)
-            return space.newutf8(rutf8.unichr_as_utf8(r_uint(wcharval)), 1)
+            wcharval = r_uint(self.get_unichar(w_ffitype))
+            return space.newutf8(rutf8.unichr_as_utf8(wcharval), 1)
         elif w_ffitype.is_double():
             return self._float(w_ffitype)
         elif w_ffitype.is_singlefloat():
diff --git a/pypy/module/_rawffi/interp_rawffi.py 
b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -448,7 +448,8 @@
             elif c == 'c':
                 return space.newbytes(func(add_arg, argdesc, ll_type))
             elif c == 'u':
-                return space.newunicode(func(add_arg, argdesc, ll_type))
+                return space.newutf8(rutf8.unichr_as_utf8(
+                    ord(func(add_arg, argdesc, ll_type))), 1)
             elif c == 'f' or c == 'd' or c == 'g':
                 return space.newfloat(float(func(add_arg, argdesc, ll_type)))
             else:
@@ -596,10 +597,10 @@
         return space.w_None
     wcharp_addr = rffi.cast(rffi.CWCHARP, address)
     if maxlength == -1:
-        s = rffi.wcharp2unicode(wcharp_addr)
+        s, lgt = rffi.wcharp2utf8(wcharp_addr)
     else:
-        s = rffi.wcharp2unicoden(wcharp_addr, maxlength)
-    return space.newunicode(s)
+        s, lgt = rffi.wcharp2utf8n(wcharp_addr, maxlength)
+    return space.newutf8(s, lgt)
 
 @unwrap_spec(address=r_uint, maxlength=int)
 def charp2rawstring(space, address, maxlength=-1):
@@ -612,8 +613,8 @@
 def wcharp2rawunicode(space, address, maxlength=-1):
     if maxlength == -1:
         return wcharp2unicode(space, address)
-    s = rffi.wcharpsize2unicode(rffi.cast(rffi.CWCHARP, address), maxlength)
-    return space.newunicode(s)
+    s = rffi.wcharpsize2utf8(rffi.cast(rffi.CWCHARP, address), maxlength)
+    return space.newutf8(s, maxlength)
 
 @unwrap_spec(address=r_uint, newcontent='bufferstr')
 def rawstring2charp(space, address, newcontent):
diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py
--- a/rpython/annotator/unaryop.py
+++ b/rpython/annotator/unaryop.py
@@ -792,7 +792,7 @@
     def ord(self):
         # warning, on 32-bit with 32-bit unichars, this might return
         # negative numbers
-        return SomeInteger()
+        return SomeInteger(nonneg=True)
 
 class __extend__(SomeIterator):
 
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -19,7 +19,7 @@
 from rpython.rlib.objectmodel import enforceargs, we_are_translated, specialize
 from rpython.rlib.objectmodel import always_inline, dont_inline, try_inline
 from rpython.rlib.rstring import StringBuilder
-from rpython.rlib import jit
+from rpython.rlib import jit, types
 from rpython.rlib.signature import signature
 from rpython.rlib.types import char, none
 from rpython.rlib.rarithmetic import r_uint
@@ -27,6 +27,8 @@
 from rpython.rtyper.lltypesystem import lltype, rffi
 
 
+# we need a way to accept both r_uint and int(nonneg=True)
+#@signature(types.int_nonneg(), types.bool(), returns=types.str())
 def unichr_as_utf8(code, allow_surrogates=False):
     """Encode code (numeric value) as utf8 encoded string
     """
diff --git a/rpython/rlib/types.py b/rpython/rlib/types.py
--- a/rpython/rlib/types.py
+++ b/rpython/rlib/types.py
@@ -26,6 +26,8 @@
 def int():
     return model.SomeInteger()
 
+def int_nonneg():
+    return model.SomeInteger(nonneg=True)
 
 def bool():
     return model.SomeBool()
diff --git a/rpython/rtyper/lltypesystem/rffi.py 
b/rpython/rtyper/lltypesystem/rffi.py
--- a/rpython/rtyper/lltypesystem/rffi.py
+++ b/rpython/rtyper/lltypesystem/rffi.py
@@ -1019,7 +1019,27 @@
     s = StringBuilder(size)
     for i in range(size):
         rutf8.unichr_as_utf8_append(s, ord(w[i]))
-    return s.build()    
+    return s.build()
+
+def wcharp2utf8(w):
+    from rpython.rlib import rutf8
+
+    s = rutf8.Utf8StringBuilder()
+    i = 0
+    while ord(w[i]):
+        s.append_code(ord(w[i]))
+        i += 1
+    return s.build(), i
+
+def wcharp2utf8n(w, maxlen):
+    from rpython.rlib import rutf8
+
+    s = rutf8.Utf8StringBuilder(maxlen)
+    i = 0
+    while i < maxlen and w[i]:
+        s.append_code(ord(w[i]))
+        i += 1
+    return s.build(), i
 
 def utf82wcharp(utf8, utf8len):
     from rpython.rlib import rutf8
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to