Author: fijal
Branch: unicode-utf8
Changeset: r92890:f7f1771c68d3
Date: 2017-10-31 18:43 +0100
http://bitbucket.org/pypy/pypy/changeset/f7f1771c68d3/

Log:    fix readbuf_w

diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -87,13 +87,15 @@
 
     def readbuf_w(self, space):
         # XXX for now
-        from rpython.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE
-        v = self._utf8.decode("utf8")
-        builder = MutableStringBuffer(len(v) * UNICODE_SIZE)
+        from rpython.rlib.rstruct.unichar import pack_codepoint, UNICODE_SIZE
+        builder = MutableStringBuffer(self._len() * UNICODE_SIZE)
         pos = 0
-        for unich in v:
-            pack_unichar(unich, builder, pos)
+        i = 0
+        while i < len(self._utf8):
+            unich = rutf8.codepoint_at_pos(self._utf8, i)
+            pack_codepoint(unich, builder, pos)
             pos += UNICODE_SIZE
+            i = rutf8.next_codepoint_pos(self._utf8, i)
         return StringBuffer(builder.finish())
 
     def writebuf_w(self, space):
diff --git a/rpython/rlib/rstruct/unichar.py b/rpython/rlib/rstruct/unichar.py
--- a/rpython/rlib/rstruct/unichar.py
+++ b/rpython/rlib/rstruct/unichar.py
@@ -12,24 +12,27 @@
 BIGENDIAN = sys.byteorder == "big"
 
 def pack_unichar(unich, buf, pos):
+    pack_codepoint(ord(unich), buf, pos)
+
+def pack_codepoint(unich, buf, pos):
     if UNICODE_SIZE == 2:
         if BIGENDIAN:
-            buf.setitem(pos,   chr(ord(unich) >> 8))
-            buf.setitem(pos+1, chr(ord(unich) & 0xFF))
+            buf.setitem(pos,   chr(unich >> 8))
+            buf.setitem(pos+1, chr(unich & 0xFF))
         else:
-            buf.setitem(pos,   chr(ord(unich) & 0xFF))
-            buf.setitem(pos+1, chr(ord(unich) >> 8))
+            buf.setitem(pos,   chr(unich & 0xFF))
+            buf.setitem(pos+1, chr(unich >> 8))
     else:
         if BIGENDIAN:
-            buf.setitem(pos,   chr(ord(unich) >> 24))
-            buf.setitem(pos+1, chr((ord(unich) >> 16) & 0xFF))
-            buf.setitem(pos+2, chr((ord(unich) >> 8) & 0xFF))
-            buf.setitem(pos+3, chr(ord(unich) & 0xFF))
+            buf.setitem(pos,   chr(unich >> 24))
+            buf.setitem(pos+1, chr((unich >> 16) & 0xFF))
+            buf.setitem(pos+2, chr((unich >> 8) & 0xFF))
+            buf.setitem(pos+3, chr(unich & 0xFF))
         else:
-            buf.setitem(pos,   chr(ord(unich) & 0xFF))
-            buf.setitem(pos+1, chr((ord(unich) >> 8) & 0xFF))
-            buf.setitem(pos+2, chr((ord(unich) >> 16) & 0xFF))
-            buf.setitem(pos+3, chr(ord(unich) >> 24))
+            buf.setitem(pos,   chr(unich & 0xFF))
+            buf.setitem(pos+1, chr((unich >> 8) & 0xFF))
+            buf.setitem(pos+2, chr((unich >> 16) & 0xFF))
+            buf.setitem(pos+3, chr(unich >> 24))
 
 def unpack_unichar(rawstring):
     assert len(rawstring) == UNICODE_SIZE
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to