Author: fijal
Branch: unicode-utf8
Changeset: r93295:1be16a6d5f37
Date: 2017-12-07 16:52 +0200
http://bitbucket.org/pypy/pypy/changeset/1be16a6d5f37/

Log:    changes to array module before changing the world

diff --git a/pypy/module/array/interp_array.py 
b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -1,4 +1,4 @@
-from rpython.rlib import jit, rgc
+from rpython.rlib import jit, rgc, rutf8
 from rpython.rlib.buffer import RawBuffer
 from rpython.rlib.objectmodel import keepalive_until_here
 from rpython.rlib.rarithmetic import ovfcheck, widen
@@ -451,7 +451,7 @@
         """
         if self.typecode == 'u':
             buf = rffi.cast(UNICODE_ARRAY, self._buffer_as_unsigned())
-            return space.newunicode(rffi.wcharpsize2unicode(buf, self.len))
+            return space.newutf8(rffi.wcharpsize2unicode(buf, self.len))
         else:
             raise oefmt(space.w_ValueError,
                         "tounicode() may only be called on type 'u' arrays")
@@ -797,7 +797,7 @@
          TypeCode(rffi.UINT,          'int_w', True)
 types = {
     'c': TypeCode(lltype.Char,        'bytes_w', method=''),
-    'u': TypeCode(lltype.UniChar,     'unicode_w', method=''),
+    'u': TypeCode(lltype.UniChar,     'utf8_len_w', method=''),
     'b': TypeCode(rffi.SIGNEDCHAR,    'int_w', True, True),
     'B': TypeCode(rffi.UCHAR,         'int_w', True),
     'h': TypeCode(rffi.SHORT,         'int_w', True, True),
@@ -895,11 +895,17 @@
                                 "unsigned %d-byte integer out of range",
                                 mytype.bytes)
                 return rffi.cast(mytype.itemtype, item)
-            if mytype.unwrap == 'bytes_w' or mytype.unwrap == 'unicode_w':
+            if mytype.unwrap == 'bytes_w':
                 if len(item) != 1:
                     raise oefmt(space.w_TypeError, "array item must be char")
                 item = item[0]
                 return rffi.cast(mytype.itemtype, item)
+            if mytype.unwrap == 'utf8_len_w':
+                utf8, lgt = item
+                if lgt != 1:
+                    raise oefmt(space.w_TypeError, "array item must be char")
+                uchar = rutf8.codepoint_at_pos(utf8, 0)
+                return rffi.cast(mytype.itemtype, uchar)
             #
             # "regular" case: it fits in an rpython integer (lltype.Signed)
             # or it is a float
@@ -1007,7 +1013,9 @@
             elif mytype.typecode == 'c':
                 return space.newbytes(item)
             elif mytype.typecode == 'u':
-                return space.newunicode(item)
+                code = ord(item)
+                return space.newutf8(rutf8.unichr_as_utf8(code), 1,
+                          rutf8.get_flag_from_code(code))
             assert 0, "unreachable"
 
         # interface
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to