Author: fijal Branch: unicode-utf8 Changeset: r93156:8fac293591e9 Date: 2017-11-24 10:04 +0100 http://bitbucket.org/pypy/pypy/changeset/8fac293591e9/
Log: merge diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -223,14 +223,7 @@ def _find_line_ending(self, line, start, end): size = end - start - if self.readtranslate: - # Newlines are already translated, only search for \n - pos = line.find('\n', start, end) - if pos >= 0: - return pos + 1, 0 - else: - return -1, size - elif self.readuniversal: + if self.readuniversal: # Universal newline search. Find any of \r, \r\n, \n # The decoder ensures that \r\n are not split in two pieces i = start @@ -249,16 +242,22 @@ return i + 1, 0 else: return i, 0 + if self.readtranslate: + # Newlines are already translated, only search for \n + newline = '\n' else: # Non-universal mode. - pos = line.find(self.readnl, start, end) - if pos >= 0: - return pos + len(self.readnl), 0 - else: - pos = line.find(self.readnl[0], start, end) - if pos >= 0: - return -1, pos - start - return -1, size + newline = self.readnl + end_scan = end - len(newline) + 1 + for i in range(start, end_scan): + ch = line[i] + if ch == newline[0]: + for j in range(1, len(newline)): + if line[i + j] != newline[j]: + break + else: + return i + len(newline), 0 + return -1, end_scan W_TextIOBase.typedef = TypeDef( @@ -548,6 +547,10 @@ self.decoded_chars_used += size return chars + def _has_data(self): + return (self.decoded_chars is not None and + self.decoded_chars_used < len(self.decoded_chars)) + def _read_chunk(self, space): """Read and decode the next chunk of data from the BufferedReader. The return value is True unless EOF was reached. The decoded string @@ -595,6 +598,19 @@ return not eof + def _ensure_data(self, space): + while not self._has_data(): + try: + if not self._read_chunk(space): + self._unset_decoded() + self.snapshot = None + return False + except OperationError as e: + if trap_eintr(space, e): + continue + raise + return True + def next_w(self, space): self._check_attached(space) self.telling = False @@ -628,23 +644,13 @@ builder = StringBuilder(size) # Keep reading chunks until we have n characters to return - while True: + while remaining > 0: + if not self._ensure_data(space): + break data = self._get_decoded_chars(remaining) builder.append(data) remaining -= len(data) - if remaining <= 0: # Done - break - - try: - if not self._read_chunk(space): - # EOF - break - except OperationError as e: - if trap_eintr(space, e): - continue - raise - return space.new_from_utf8(builder.build()) def readline_w(self, space, w_limit=None): @@ -660,20 +666,9 @@ while True: # First, get some data if necessary - has_data = True - while not self.decoded_chars: - try: - if not self._read_chunk(space): - has_data = False - break - except OperationError as e: - if trap_eintr(space, e): - continue - raise + has_data = self._ensure_data(space) if not has_data: # end of file - self._unset_decoded() - self.snapshot = None start = endpos = offset_to_buffer = 0 break _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit