Author: fijal
Branch: unicode-utf8
Changeset: r92890:f7f1771c68d3
Date: 2017-10-31 18:43 +0100
http://bitbucket.org/pypy/pypy/changeset/f7f1771c68d3/
Log: fix readbuf_w
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -87,13 +87,15 @@
def readbuf_w(self, space):
# XXX for now
- from rpython.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE
- v = self._utf8.decode("utf8")
- builder = MutableStringBuffer(len(v) * UNICODE_SIZE)
+ from rpython.rlib.rstruct.unichar import pack_codepoint, UNICODE_SIZE
+ builder = MutableStringBuffer(self._len() * UNICODE_SIZE)
pos = 0
- for unich in v:
- pack_unichar(unich, builder, pos)
+ i = 0
+ while i < len(self._utf8):
+ unich = rutf8.codepoint_at_pos(self._utf8, i)
+ pack_codepoint(unich, builder, pos)
pos += UNICODE_SIZE
+ i = rutf8.next_codepoint_pos(self._utf8, i)
return StringBuffer(builder.finish())
def writebuf_w(self, space):
diff --git a/rpython/rlib/rstruct/unichar.py b/rpython/rlib/rstruct/unichar.py
--- a/rpython/rlib/rstruct/unichar.py
+++ b/rpython/rlib/rstruct/unichar.py
@@ -12,24 +12,27 @@
BIGENDIAN = sys.byteorder == "big"
def pack_unichar(unich, buf, pos):
+ pack_codepoint(ord(unich), buf, pos)
+
+def pack_codepoint(unich, buf, pos):
if UNICODE_SIZE == 2:
if BIGENDIAN:
- buf.setitem(pos, chr(ord(unich) >> 8))
- buf.setitem(pos+1, chr(ord(unich) & 0xFF))
+ buf.setitem(pos, chr(unich >> 8))
+ buf.setitem(pos+1, chr(unich & 0xFF))
else:
- buf.setitem(pos, chr(ord(unich) & 0xFF))
- buf.setitem(pos+1, chr(ord(unich) >> 8))
+ buf.setitem(pos, chr(unich & 0xFF))
+ buf.setitem(pos+1, chr(unich >> 8))
else:
if BIGENDIAN:
- buf.setitem(pos, chr(ord(unich) >> 24))
- buf.setitem(pos+1, chr((ord(unich) >> 16) & 0xFF))
- buf.setitem(pos+2, chr((ord(unich) >> 8) & 0xFF))
- buf.setitem(pos+3, chr(ord(unich) & 0xFF))
+ buf.setitem(pos, chr(unich >> 24))
+ buf.setitem(pos+1, chr((unich >> 16) & 0xFF))
+ buf.setitem(pos+2, chr((unich >> 8) & 0xFF))
+ buf.setitem(pos+3, chr(unich & 0xFF))
else:
- buf.setitem(pos, chr(ord(unich) & 0xFF))
- buf.setitem(pos+1, chr((ord(unich) >> 8) & 0xFF))
- buf.setitem(pos+2, chr((ord(unich) >> 16) & 0xFF))
- buf.setitem(pos+3, chr(ord(unich) >> 24))
+ buf.setitem(pos, chr(unich & 0xFF))
+ buf.setitem(pos+1, chr((unich >> 8) & 0xFF))
+ buf.setitem(pos+2, chr((unich >> 16) & 0xFF))
+ buf.setitem(pos+3, chr(unich >> 24))
def unpack_unichar(rawstring):
assert len(rawstring) == UNICODE_SIZE
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit