Author: Ronan Lamy <[email protected]> Branch: unicode-utf8 Changeset: r93170:f9a1926628b2 Date: 2017-11-24 20:22 +0000 http://bitbucket.org/pypy/pypy/changeset/f9a1926628b2/
Log: hg merge default diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_textio.py @@ -0,0 +1,27 @@ +from hypothesis import given, strategies as st + +from io import BytesIO, TextIOWrapper + +LINESEP = ['', '\r', '\n', '\r\n'] + [email protected] +def text_with_newlines(draw): + sep = draw(st.sampled_from(LINESEP)) + lines = draw(st.lists(st.text(max_size=10), max_size=10)) + return sep.join(lines) + +@given(txt=text_with_newlines(), + mode=st.sampled_from(['\r', '\n', '\r\n', '']), + limit=st.integers(min_value=-1)) +def test_readline(txt, mode, limit): + textio = TextIOWrapper(BytesIO(txt.encode('utf-8')), newline=mode) + lines = [] + while True: + line = textio.readline(limit) + if limit > 0: + assert len(line) < limit + if line: + lines.append(line) + else: + break + assert u''.join(lines) == txt diff --git a/pypy/module/_continuation/test/conftest.py b/pypy/module/_continuation/test/conftest.py new file mode 100644 --- /dev/null +++ b/pypy/module/_continuation/test/conftest.py @@ -0,0 +1,7 @@ +import pytest +import sys + +def pytest_configure(config): + if sys.platform.startswith('linux'): + from rpython.rlib.rvmprof.cintf import configure_libbacktrace_linux + configure_libbacktrace_linux() diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -174,18 +174,16 @@ start = self.pos if limit < 0 or limit > len(self.buf) - self.pos: limit = len(self.buf) - self.pos + assert limit >= 0 - assert limit >= 0 - end = start + limit - - endpos, consumed = self._find_line_ending( + endpos, found = self._find_line_ending( # XXX: super inefficient, makes a copy of the entire contents. u"".join(self.buf), start, - end + limit ) - if endpos < 0: - endpos = end + if not found: + endpos = start + limit assert endpos >= 0 self.pos = endpos return space.newunicode(u"".join(self.buf[start:endpos])) diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -221,44 +221,49 @@ def newlines_get_w(self, space): return space.w_None - def _find_line_ending(self, line, start, end): - size = end - start + def _find_newline_universal(self, line, start, limit): + # Universal newline search. Find any of \r, \r\n, \n + # The decoder ensures that \r\n are not split in two pieces + limit = min(limit, len(line) - start) + end = start + limit + i = start + while i < end: + ch = line[i] + i += 1 + if ch == '\n': + return i, True + if ch == '\r': + if i >= end: + break + if line[i] == '\n': + return i + 1, True + else: + return i, True + return end, False + + def _find_marker(self, marker, line, start, limit): + limit = min(limit, len(line) - start) + end = start + limit + for i in range(start, end - len(marker) + 1): + ch = line[i] + if ch == marker[0]: + for j in range(1, len(marker)): + if line[i + j] != marker[j]: + break # from inner loop + else: + return i + len(marker), True + return end - len(marker) + 1, False + + def _find_line_ending(self, line, start, limit): if self.readuniversal: - # Universal newline search. Find any of \r, \r\n, \n - # The decoder ensures that \r\n are not split in two pieces - i = start - while True: - # Fast path for non-control chars. - while i < end and line[i] > '\r': - i += 1 - if i >= end: - return -1, size - ch = line[i] - i += 1 - if ch == '\n': - return i, 0 - if ch == '\r': - if line[i] == '\n': - return i + 1, 0 - else: - return i, 0 + return self._find_newline_universal(line, start, limit) if self.readtranslate: # Newlines are already translated, only search for \n newline = '\n' else: # Non-universal mode. newline = self.readnl - end_scan = end - len(newline) + 1 - for i in range(start, end_scan): - ch = line[i] - if ch == newline[0]: - for j in range(1, len(newline)): - if line[i + j] != newline[j]: - break - else: - return i + len(newline), 0 - return -1, end_scan - + return self._find_marker(newline, line, start, limit) W_TextIOBase.typedef = TypeDef( '_io._TextIOBase', W_IOBase.typedef, @@ -661,7 +666,7 @@ limit = convert_size(space, w_limit) line = None - remaining = None + remnant = None builder = StringBuilder() while True: @@ -669,61 +674,60 @@ has_data = self._ensure_data(space) if not has_data: # end of file - start = endpos = offset_to_buffer = 0 + start = end_scan = 0 break - if not remaining: - line = self.decoded_chars - start = self.decoded_chars_used - offset_to_buffer = 0 + if remnant: + assert not self.readtranslate and self.readnl == '\r\n' + assert self.decoded_chars_used == 0 + if remnant == '\r' and self.decoded_chars[0] == '\n': + builder.append('\r\n') + self.decoded_chars_used = 1 + line = remnant = None + start = end_scan = 0 + break + else: + builder.append(remnant) + remnant = None + continue + + line = self.decoded_chars + start = self.decoded_chars_used + if limit > 0: + remaining = limit - builder.getlength() + assert remaining >= 0 else: - assert self.decoded_chars_used == 0 - line = remaining + self.decoded_chars - start = 0 - offset_to_buffer = len(remaining) - remaining = None + remaining = sys.maxint + end_scan, found = self._find_line_ending(line, start, remaining) + assert end_scan >= 0 + if found: + break - line_len = len(line) - endpos, consumed = self._find_line_ending(line, start, line_len) - chunked = builder.getlength() - if endpos >= 0: - if limit >= 0 and endpos >= start + limit - chunked: - endpos = start + limit - chunked - assert endpos >= 0 - break - assert consumed >= 0 - - # We can put aside up to `endpos` - endpos = consumed + start - if limit >= 0 and endpos >= start + limit - chunked: + if limit >= 0 and end_scan - start >= remaining: # Didn't find line ending, but reached length limit - endpos = start + limit - chunked - assert endpos >= 0 break # No line ending seen yet - put aside current data - if endpos > start: - s = line[start:endpos] + if end_scan > start: + s = line[start:end_scan] builder.append(s) - # There may be some remaining bytes we'll have to prepend to the + # There may be some remaining chars we'll have to prepend to the # next chunk of data - if endpos < line_len: - remaining = line[endpos:] + if end_scan < len(line): + remnant = line[end_scan:] line = None # We have consumed the buffer self._unset_decoded() if line: # Our line ends in the current buffer - decoded_chars_used = endpos - offset_to_buffer - assert decoded_chars_used >= 0 - self.decoded_chars_used = decoded_chars_used - if start > 0 or endpos < len(line): - line = line[start:endpos] + self.decoded_chars_used = end_scan + if start > 0 or end_scan < len(line): + line = line[start:end_scan] builder.append(line) - elif remaining: - builder.append(remaining) + elif remnant: + builder.append(remnant) result = builder.build() return space.new_from_utf8(result) diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py new file mode 100644 --- /dev/null +++ b/pypy/module/_io/test/test_interp_textio.py @@ -0,0 +1,33 @@ +from hypothesis import given, strategies as st, assume +from pypy.module._io.interp_bytesio import W_BytesIO +from pypy.module._io.interp_textio import W_TextIOWrapper + +LINESEP = ['', '\r', '\n', '\r\n'] + [email protected] +def text_with_newlines(draw): + sep = draw(st.sampled_from(LINESEP)) + lines = draw(st.lists(st.text(max_size=10), max_size=10)) + return sep.join(lines) + +@given(txt=text_with_newlines(), + mode=st.sampled_from(['\r', '\n', '\r\n', '']), + limit=st.integers(min_value=-1)) +def test_readline(space, txt, mode, limit): + assume(limit != 0) + w_stream = W_BytesIO(space) + w_stream.descr_init(space, space.newbytes(txt.encode('utf-8'))) + w_textio = W_TextIOWrapper(space) + w_textio.descr_init( + space, w_stream, encoding='utf-8', + w_newline=space.newtext(mode)) + lines = [] + while True: + line = space.unicode_w(w_textio.readline_w(space, space.newint(limit))) + if limit > 0: + assert len(line) <= limit + if line: + lines.append(line) + else: + break + assert u''.join(lines) == txt diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ cffi>=1.4.0 -vmprof>=0.4.10 # required to parse log files in rvmprof tests + +# parse log files in rvmprof tests +vmprof>=0.4.10; 'x86' in platform.machine #skip arm, s390x # hypothesis is used for test generation on untranslated tests hypothesis diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -9,6 +9,7 @@ from rpython.rtyper.tool import rffi_platform as platform from rpython.rlib import rthread, jit from rpython.rlib.objectmodel import we_are_translated +from rpython.config.translationoption import get_translation_config class VMProfPlatformUnsupported(Exception): pass @@ -133,11 +134,17 @@ #endif """]) +if get_translation_config() is None: + # tests need the full eci here + _eci = global_eci +else: + _eci = auto_eci + vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], - rffi.INT, compilation_info=auto_eci, + rffi.INT, compilation_info=_eci, _nowrapper=True) vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], - lltype.Void, compilation_info=auto_eci, + lltype.Void, compilation_info=_eci, _nowrapper=True) _______________________________________________ pypy-commit mailing list [email protected] https://mail.python.org/mailman/listinfo/pypy-commit
