Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r95660:a041db80ea09
Date: 2019-01-17 00:43 +0200
http://bitbucket.org/pypy/pypy/changeset/a041db80ea09/
Log: merge unicode-utf8 into branch
diff --git a/pypy/module/_io/interp_stringio.py
b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -1,4 +1,4 @@
-from rpython.rlib.rutf8 import get_utf8_length
+from rpython.rlib.rutf8 import get_utf8_length, next_codepoint_pos
from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter.typedef import (
@@ -11,8 +11,16 @@
class UnicodeIO(object):
def __init__(self, data=None, pos=0):
if data is None:
- data = []
- self.data = data
+ data = ''
+ self.data = []
+ self.pos = 0
+ # break the data into unicode codepoints
+ _pos = 0
+ while _pos < pos:
+ _pos = next_codepoint_pos(data, _pos)
+ if _pos >= len(data):
+ break
+ self.write(data[_pos:])
self.pos = pos
def resize(self, newlength):
@@ -90,12 +98,14 @@
return result
def write(self, string):
- length = len(string)
+ length = get_utf8_length(string)
if self.pos + length > len(self.data):
self.resize(self.pos + length)
-
+ pos = 0
for i in range(length):
- self.data[self.pos + i] = string[i]
+ nextpos = next_codepoint_pos(string, pos)
+ self.data[self.pos + i] = string[pos:nextpos]
+ pos = nextpos
self.pos += length
def seek(self, pos):
@@ -192,7 +202,7 @@
if pos < 0:
raise oefmt(space.w_ValueError,
"position value cannot be negative")
- self.buf = UnicodeIO(list(initval), pos)
+ self.buf = UnicodeIO(initval, pos)
if not space.is_w(w_dict, space.w_None):
if not space.isinstance_w(w_dict, space.w_dict):
raise oefmt(
diff --git a/pypy/module/_io/test/test_stringio.py
b/pypy/module/_io/test/test_stringio.py
--- a/pypy/module/_io/test/test_stringio.py
+++ b/pypy/module/_io/test/test_stringio.py
@@ -42,6 +42,17 @@
assert buf[5:] == sio.read(900)
assert "" == sio.read()
+ def test_read_binary(self):
+ # data is from a test_imghdr test for a GIF file
+ import io
+ buf_in = (u'\x47\x49\x46\x38\x39\x61\x10\x00\x10\x00\xf6\x64\x00\xeb'
+ u'\xbb\x18\xeb\xbe\x21\xf3\xc1\x1a\xfa\xc7\x19\xfd\xcb\x1b'
+ u'\xff\xcc\x1c\xeb')
+ assert len(buf_in) == 32
+ sio = io.StringIO(buf_in)
+ buf_out = sio.read(32)
+ assert buf_in == buf_out
+
def test_readline(self):
import io
sio = io.StringIO('123\n456')
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit