Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r95660:a041db80ea09
Date: 2019-01-17 00:43 +0200
http://bitbucket.org/pypy/pypy/changeset/a041db80ea09/

Log:    merge unicode-utf8 into branch

diff --git a/pypy/module/_io/interp_stringio.py 
b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -1,4 +1,4 @@
-from rpython.rlib.rutf8 import get_utf8_length
+from rpython.rlib.rutf8 import get_utf8_length, next_codepoint_pos
 
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.typedef import (
@@ -11,8 +11,16 @@
 class UnicodeIO(object):
     def __init__(self, data=None, pos=0):
         if data is None:
-            data = []
-        self.data = data
+            data = ''
+        self.data = []
+        self.pos = 0
+        # break the data into unicode codepoints
+        _pos = 0
+        while _pos < pos:
+            _pos = next_codepoint_pos(data, _pos)
+            if _pos >= len(data):
+                break
+        self.write(data[_pos:])
         self.pos = pos
 
     def resize(self, newlength):
@@ -90,12 +98,14 @@
         return result
 
     def write(self, string):
-        length = len(string)
+        length = get_utf8_length(string)
         if self.pos + length > len(self.data):
             self.resize(self.pos + length)
-
+        pos = 0
         for i in range(length):
-            self.data[self.pos + i] = string[i]
+            nextpos = next_codepoint_pos(string, pos)
+            self.data[self.pos + i] = string[pos:nextpos]
+            pos = nextpos
         self.pos += length
 
     def seek(self, pos):
@@ -192,7 +202,7 @@
         if pos < 0:
             raise oefmt(space.w_ValueError,
                         "position value cannot be negative")
-        self.buf = UnicodeIO(list(initval), pos)
+        self.buf = UnicodeIO(initval, pos)
         if not space.is_w(w_dict, space.w_None):
             if not space.isinstance_w(w_dict, space.w_dict):
                 raise oefmt(
diff --git a/pypy/module/_io/test/test_stringio.py 
b/pypy/module/_io/test/test_stringio.py
--- a/pypy/module/_io/test/test_stringio.py
+++ b/pypy/module/_io/test/test_stringio.py
@@ -42,6 +42,17 @@
         assert buf[5:] == sio.read(900)
         assert "" == sio.read()
 
+    def test_read_binary(self):
+        # data is from a test_imghdr test for a GIF file
+        import io
+        buf_in = (u'\x47\x49\x46\x38\x39\x61\x10\x00\x10\x00\xf6\x64\x00\xeb'
+                  u'\xbb\x18\xeb\xbe\x21\xf3\xc1\x1a\xfa\xc7\x19\xfd\xcb\x1b'
+                  u'\xff\xcc\x1c\xeb')
+        assert len(buf_in) == 32
+        sio = io.StringIO(buf_in)
+        buf_out = sio.read(32)
+        assert buf_in == buf_out
+
     def test_readline(self):
         import io
         sio = io.StringIO('123\n456')
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to