Author: Matti Picus <matti.pi...@gmail.com> Branch: unicode-utf8-py3 Changeset: r95574:2cb8cf7a5047 Date: 2019-01-03 07:55 +0200 http://bitbucket.org/pypy/pypy/changeset/2cb8cf7a5047/
Log: take tests from py3.5, code from unicode-utf8 diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -317,35 +317,44 @@ def __init__(self, text=None): self.text = text self.pos = 0 + self.upos = 0 def set(self, space, w_decoded): check_decoded(space, w_decoded) self.text = space.utf8_w(w_decoded) self.pos = 0 + self.upos = 0 def reset(self): self.text = None self.pos = 0 + self.upos = 0 def get_chars(self, size): - if self.text is None: + if self.text is None or size == 0: return "" - available = len(self.text) - self.pos + lgt = codepoints_in_utf8(self.text) + available = lgt - self.upos if size < 0 or size > available: size = available assert size >= 0 if self.pos > 0 or size < available: start = self.pos - end = self.pos + size + pos = start + for i in range(size): + pos = next_codepoint_pos(self.text, pos) + self.upos += 1 assert start >= 0 - assert end >= 0 - chars = self.text[start:end] + assert pos >= 0 + chars = self.text[start:pos] + self.pos = pos else: chars = self.text + self.pos = len(self.text) + self.upos = lgt - self.pos += size return chars def has_data(self): @@ -357,16 +366,24 @@ def next_char(self): if self.exhausted(): raise StopIteration - ch = self.text[self.pos] - self.pos = next_codepoint_pos(self.text, self.pos) + newpos = next_codepoint_pos(self.text, self.pos) + pos = self.pos + assert pos >= 0 + assert newpos >= 0 + ch = self.text[pos:newpos] + self.pos = newpos + self.upos += 1 return ch def peek_char(self): # like next_char, but doesn't advance pos if self.exhausted(): raise StopIteration - ch = self.text[self.pos] - return ch + newpos = next_codepoint_pos(self.text, self.pos) + pos = self.pos + assert pos >= 0 + assert newpos >= 0 + return self.text[pos:newpos] def find_newline_universal(self, limit): # Universal newline search. Find any of \r, \r\n, \n @@ -416,6 +433,7 @@ except StopIteration: # This is the tricky case: we found a \r right at the end self.pos -= 1 + self.upos -= 1 return False return False diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -58,28 +58,34 @@ @given(st.text()) def test_read_buffer(text): - buf = DecodeBuffer(text.encode('utf-8')) - assert buf.get_chars(-1) == text.encode('utf-8') + buf = DecodeBuffer(text) + assert buf.get_chars(-1) == text assert buf.exhausted() @given(st.text(), st.lists(st.integers(min_value=0))) @example(u'\x80', [1]) def test_readn_buffer(text, sizes): - buf = DecodeBuffer(text.encode('utf-8')) + buf = DecodeBuffer(text) strings = [] for n in sizes: s = buf.get_chars(n) if not buf.exhausted(): - assert len(s.decode('utf-8')) == n + assert len(s) == n else: - assert len(s.decode('utf-8')) <= n + assert len(s) <= n strings.append(s) - assert ''.join(strings) == text[:sum(sizes)].encode('utf-8') + assert ''.join(strings) == text[:sum(sizes)] @given(st.text()) +@example(u'\x800') def test_next_char(text): - buf = DecodeBuffer(text.encode('utf-8')) - for i in range(len(text)): - ch = buf.next_char() - assert ch == text[i].encode('utf-8') + buf = DecodeBuffer(text) + chars = [] + try: + while True: + ch = buf.next_char() + chars.append(ch) + except StopIteration: + pass assert buf.exhausted() + assert u''.join(chars) == text _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit