Author: fijal Branch: unicode-utf8 Changeset: r93295:1be16a6d5f37 Date: 2017-12-07 16:52 +0200 http://bitbucket.org/pypy/pypy/changeset/1be16a6d5f37/
Log: changes to array module before changing the world diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -1,4 +1,4 @@ -from rpython.rlib import jit, rgc +from rpython.rlib import jit, rgc, rutf8 from rpython.rlib.buffer import RawBuffer from rpython.rlib.objectmodel import keepalive_until_here from rpython.rlib.rarithmetic import ovfcheck, widen @@ -451,7 +451,7 @@ """ if self.typecode == 'u': buf = rffi.cast(UNICODE_ARRAY, self._buffer_as_unsigned()) - return space.newunicode(rffi.wcharpsize2unicode(buf, self.len)) + return space.newutf8(rffi.wcharpsize2unicode(buf, self.len)) else: raise oefmt(space.w_ValueError, "tounicode() may only be called on type 'u' arrays") @@ -797,7 +797,7 @@ TypeCode(rffi.UINT, 'int_w', True) types = { 'c': TypeCode(lltype.Char, 'bytes_w', method=''), - 'u': TypeCode(lltype.UniChar, 'unicode_w', method=''), + 'u': TypeCode(lltype.UniChar, 'utf8_len_w', method=''), 'b': TypeCode(rffi.SIGNEDCHAR, 'int_w', True, True), 'B': TypeCode(rffi.UCHAR, 'int_w', True), 'h': TypeCode(rffi.SHORT, 'int_w', True, True), @@ -895,11 +895,17 @@ "unsigned %d-byte integer out of range", mytype.bytes) return rffi.cast(mytype.itemtype, item) - if mytype.unwrap == 'bytes_w' or mytype.unwrap == 'unicode_w': + if mytype.unwrap == 'bytes_w': if len(item) != 1: raise oefmt(space.w_TypeError, "array item must be char") item = item[0] return rffi.cast(mytype.itemtype, item) + if mytype.unwrap == 'utf8_len_w': + utf8, lgt = item + if lgt != 1: + raise oefmt(space.w_TypeError, "array item must be char") + uchar = rutf8.codepoint_at_pos(utf8, 0) + return rffi.cast(mytype.itemtype, uchar) # # "regular" case: it fits in an rpython integer (lltype.Signed) # or it is a float @@ -1007,7 +1013,9 @@ elif mytype.typecode == 'c': return space.newbytes(item) elif mytype.typecode == 'u': - return space.newunicode(item) + code = ord(item) + return space.newutf8(rutf8.unichr_as_utf8(code), 1, + rutf8.get_flag_from_code(code)) assert 0, "unreachable" # interface _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit