Author: Matti Picus <matti.pi...@gmail.com>
Branch: unicode-utf8-py3
Changeset: r95574:2cb8cf7a5047
Date: 2019-01-03 07:55 +0200
http://bitbucket.org/pypy/pypy/changeset/2cb8cf7a5047/

Log:    take tests from py3.5, code from unicode-utf8

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -317,35 +317,44 @@
     def __init__(self, text=None):
         self.text = text
         self.pos = 0
+        self.upos = 0
 
     def set(self, space, w_decoded):
         check_decoded(space, w_decoded)
         self.text = space.utf8_w(w_decoded)
         self.pos = 0
+        self.upos = 0
 
     def reset(self):
         self.text = None
         self.pos = 0
+        self.upos = 0
 
     def get_chars(self, size):
-        if self.text is None:
+        if self.text is None or size == 0:
             return ""
 
-        available = len(self.text) - self.pos
+        lgt = codepoints_in_utf8(self.text)
+        available = lgt - self.upos
         if size < 0 or size > available:
             size = available
         assert size >= 0
 
         if self.pos > 0 or size < available:
             start = self.pos
-            end = self.pos + size
+            pos = start
+            for  i in range(size):
+                pos = next_codepoint_pos(self.text, pos)
+                self.upos += 1
             assert start >= 0
-            assert end >= 0
-            chars = self.text[start:end]
+            assert pos >= 0
+            chars = self.text[start:pos]
+            self.pos = pos
         else:
             chars = self.text
+            self.pos = len(self.text)
+            self.upos = lgt
 
-        self.pos += size
         return chars
 
     def has_data(self):
@@ -357,16 +366,24 @@
     def next_char(self):
         if self.exhausted():
             raise StopIteration
-        ch = self.text[self.pos]
-        self.pos = next_codepoint_pos(self.text, self.pos)
+        newpos = next_codepoint_pos(self.text, self.pos)
+        pos = self.pos
+        assert pos >= 0
+        assert newpos >= 0
+        ch = self.text[pos:newpos]
+        self.pos = newpos
+        self.upos += 1
         return ch
 
     def peek_char(self):
         # like next_char, but doesn't advance pos
         if self.exhausted():
             raise StopIteration
-        ch = self.text[self.pos]
-        return ch
+        newpos = next_codepoint_pos(self.text, self.pos)
+        pos = self.pos
+        assert pos >= 0
+        assert newpos >= 0
+        return self.text[pos:newpos]
 
     def find_newline_universal(self, limit):
         # Universal newline search. Find any of \r, \r\n, \n
@@ -416,6 +433,7 @@
                 except StopIteration:
                     # This is the tricky case: we found a \r right at the end
                     self.pos -= 1
+                    self.upos -= 1
                     return False
         return False
 
diff --git a/pypy/module/_io/test/test_interp_textio.py 
b/pypy/module/_io/test/test_interp_textio.py
--- a/pypy/module/_io/test/test_interp_textio.py
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -58,28 +58,34 @@
 
 @given(st.text())
 def test_read_buffer(text):
-    buf = DecodeBuffer(text.encode('utf-8'))
-    assert buf.get_chars(-1) == text.encode('utf-8')
+    buf = DecodeBuffer(text)
+    assert buf.get_chars(-1) == text
     assert buf.exhausted()
 
 @given(st.text(), st.lists(st.integers(min_value=0)))
 @example(u'\x80', [1])
 def test_readn_buffer(text, sizes):
-    buf = DecodeBuffer(text.encode('utf-8'))
+    buf = DecodeBuffer(text)
     strings = []
     for n in sizes:
         s = buf.get_chars(n)
         if not buf.exhausted():
-            assert len(s.decode('utf-8')) == n
+            assert len(s) == n
         else:
-            assert len(s.decode('utf-8')) <= n
+            assert len(s) <= n
         strings.append(s)
-    assert ''.join(strings) == text[:sum(sizes)].encode('utf-8')
+    assert ''.join(strings) == text[:sum(sizes)]
 
 @given(st.text())
+@example(u'\x800')
 def test_next_char(text):
-    buf = DecodeBuffer(text.encode('utf-8'))
-    for i in range(len(text)):
-        ch = buf.next_char()
-        assert ch == text[i].encode('utf-8')
+    buf = DecodeBuffer(text)
+    chars = []
+    try:
+        while True:
+            ch = buf.next_char()
+            chars.append(ch)
+    except StopIteration:
+        pass
     assert buf.exhausted()
+    assert u''.join(chars) == text
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to