Author: fijal Branch: unicode-utf8 Changeset: r93329:0d84c39f767e Date: 2017-12-09 16:01 +0200 http://bitbucket.org/pypy/pypy/changeset/0d84c39f767e/
Log: fix struct module diff --git a/pypy/module/struct/formatiterator.py b/pypy/module/struct/formatiterator.py --- a/pypy/module/struct/formatiterator.py +++ b/pypy/module/struct/formatiterator.py @@ -1,6 +1,6 @@ from rpython.rlib.rarithmetic import (r_uint, r_ulonglong, r_longlong, maxint, intmask) -from rpython.rlib import jit +from rpython.rlib import jit, rutf8 from rpython.rlib.objectmodel import specialize from rpython.rlib.rstruct.error import StructError from rpython.rlib.rstruct.formatiterator import FormatIterator @@ -107,7 +107,7 @@ def accept_unicode_arg(self): w_obj = self.accept_obj_arg() - return self.space.unicode_w(w_obj) + return self.space.utf8_len_w(w_obj) def accept_float_arg(self): w_obj = self.accept_obj_arg() @@ -191,6 +191,9 @@ assert 0, "unreachable" self.result_w.append(w_value) + def append_utf8(self, value): + self.result_w.append(self.space.newutf8(rutf8.unichr_as_utf8(value), 1)) + def get_pos(self): return self.pos diff --git a/rpython/rlib/rstruct/nativefmttable.py b/rpython/rlib/rstruct/nativefmttable.py --- a/rpython/rlib/rstruct/nativefmttable.py +++ b/rpython/rlib/rstruct/nativefmttable.py @@ -4,7 +4,7 @@ """ import struct -from rpython.rlib import jit, longlong2float +from rpython.rlib import rutf8, longlong2float from rpython.rlib.objectmodel import specialize from rpython.rlib.rarithmetic import r_singlefloat, widen, intmask from rpython.rlib.rstruct import standardfmttable as std @@ -139,17 +139,17 @@ from rpython.rlib.rstruct import unichar def pack_unichar(fmtiter): - unistr = fmtiter.accept_unicode_arg() - if len(unistr) != 1: + utf8, lgt = fmtiter.accept_unicode_arg() + if lgt != 1: raise StructError("expected a unicode string of length 1") - c = unistr[0] # string->char conversion for the annotator - unichar.pack_unichar(c, fmtiter.wbuf, fmtiter.pos) + uchr = rutf8.codepoint_at_pos(utf8, 0) + unichar.pack_codepoint(uchr, fmtiter.wbuf, fmtiter.pos) fmtiter.advance(unichar.UNICODE_SIZE) @specialize.argtype(0) def unpack_unichar(fmtiter): data = fmtiter.read(unichar.UNICODE_SIZE) - fmtiter.appendobj(unichar.unpack_unichar(data)) + fmtiter.append_utf8(unichar.unpack_codepoint(data)) native_fmttable['u'] = {'size': unichar.UNICODE_SIZE, 'alignment': unichar.UNICODE_SIZE, diff --git a/rpython/rlib/rstruct/unichar.py b/rpython/rlib/rstruct/unichar.py --- a/rpython/rlib/rstruct/unichar.py +++ b/rpython/rlib/rstruct/unichar.py @@ -3,12 +3,8 @@ """ import sys -from rpython.rlib.runicode import MAXUNICODE -if MAXUNICODE <= 65535: - UNICODE_SIZE = 2 -else: - UNICODE_SIZE = 4 +UNICODE_SIZE = 4 BIGENDIAN = sys.byteorder == "big" def pack_unichar(unich, buf, pos): @@ -34,7 +30,7 @@ buf.setitem(pos+2, chr((unich >> 16) & 0xFF)) buf.setitem(pos+3, chr(unich >> 24)) -def unpack_unichar(rawstring): +def unpack_codepoint(rawstring): assert len(rawstring) == UNICODE_SIZE if UNICODE_SIZE == 2: if BIGENDIAN: @@ -54,4 +50,7 @@ ord(rawstring[1]) << 8 | ord(rawstring[2]) << 16 | ord(rawstring[3]) << 24) - return unichr(n) + return n + +def unpack_unichar(rawstring): + return unichr(unpack_codepoint(rawstring)) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit