Author: Ronan Lamy <ronan.l...@gmail.com> Branch: Changeset: r93172:e1dbf4f46c45 Date: 2017-11-25 01:15 +0000 http://bitbucket.org/pypy/pypy/changeset/e1dbf4f46c45/
Log: Extract DecodeBuffer object from W_TextIOWrapper diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -333,6 +333,45 @@ self.input = input +class DecodeBuffer(object): + def __init__(self): + self.text = None + self.pos = 0 + + def set(self, space, w_decoded): + check_decoded(space, w_decoded) + self.text = space.unicode_w(w_decoded) + self.pos = 0 + + def reset(self): + self.text = None + self.pos = 0 + + def get_chars(self, size): + if self.text is None: + return u"" + + available = len(self.text) - self.pos + if size < 0 or size > available: + size = available + assert size >= 0 + + if self.pos > 0 or size < available: + start = self.pos + end = self.pos + size + assert start >= 0 + assert end >= 0 + chars = self.text[start:end] + else: + chars = self.text + + self.pos += size + return chars + + def has_data(self): + return (self.text is not None and self.pos < len(self.text)) + + def check_decoded(space, w_decoded): if not space.isinstance_w(w_decoded, space.w_unicode): msg = "decoder should return a string result, not '%T'" @@ -346,8 +385,7 @@ self.w_encoder = None self.w_decoder = None - self.decoded_chars = None # buffer for text returned from decoder - self.decoded_chars_used = 0 # offset into _decoded_chars for read() + self.decoded = DecodeBuffer() self.pending_bytes = None # list of bytes objects waiting to be # written, or NULL self.chunk_size = 8192 @@ -515,44 +553,10 @@ # _____________________________________________________________ # read methods - def _unset_decoded(self): - self.decoded_chars = None - self.decoded_chars_used = 0 - - def _set_decoded(self, space, w_decoded): - check_decoded(space, w_decoded) - self.decoded_chars = space.unicode_w(w_decoded) - self.decoded_chars_used = 0 - - def _get_decoded_chars(self, size): - if self.decoded_chars is None: - return u"" - - available = len(self.decoded_chars) - self.decoded_chars_used - if size < 0 or size > available: - size = available - assert size >= 0 - - if self.decoded_chars_used > 0 or size < available: - start = self.decoded_chars_used - end = self.decoded_chars_used + size - assert start >= 0 - assert end >= 0 - chars = self.decoded_chars[start:end] - else: - chars = self.decoded_chars - - self.decoded_chars_used += size - return chars - - def _has_data(self): - return (self.decoded_chars is not None and - self.decoded_chars_used < len(self.decoded_chars)) - def _read_chunk(self, space): """Read and decode the next chunk of data from the BufferedReader. The return value is True unless EOF was reached. The decoded string - is placed in self._decoded_chars (replacing its previous value). + is placed in self.decoded (replacing its previous value). The entire input chunk is sent to the decoder, though some of it may remain buffered in the decoder, yet to be converted.""" @@ -572,7 +576,7 @@ dec_buffer = None dec_flags = 0 - # Read a chunk, decode it, and put the result in self._decoded_chars + # Read a chunk, decode it, and put the result in self.decoded w_input = space.call_method(self.w_buffer, "read1", space.newint(self.chunk_size)) @@ -584,7 +588,7 @@ eof = space.len_w(w_input) == 0 w_decoded = space.call_method(self.w_decoder, "decode", w_input, space.newbool(eof)) - self._set_decoded(space, w_decoded) + self.decoded.set(space, w_decoded) if space.len_w(w_decoded) > 0: eof = False @@ -597,10 +601,10 @@ return not eof def _ensure_data(self, space): - while not self._has_data(): + while not self.decoded.has_data(): try: if not self._read_chunk(space): - self._unset_decoded() + self.decoded.reset() self.snapshot = None return False except OperationError as e: @@ -633,7 +637,7 @@ w_bytes = space.call_method(self.w_buffer, "read") w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True) check_decoded(space, w_decoded) - w_result = space.newunicode(self._get_decoded_chars(-1)) + w_result = space.newunicode(self.decoded.get_chars(-1)) w_final = space.add(w_result, w_decoded) self.snapshot = None return w_final @@ -645,7 +649,7 @@ while remaining > 0: if not self._ensure_data(space): break - data = self._get_decoded_chars(remaining) + data = self.decoded.get_chars(remaining) builder.append(data) remaining -= len(data) @@ -672,10 +676,10 @@ if remnant: assert not self.readtranslate and self.readnl == u'\r\n' - assert self.decoded_chars_used == 0 - if remnant == u'\r' and self.decoded_chars[0] == u'\n': + assert self.decoded.pos == 0 + if remnant == u'\r' and self.decoded.text[0] == u'\n': builder.append(u'\r\n') - self.decoded_chars_used = 1 + self.decoded.pos = 1 line = remnant = None start = end_scan = 0 break @@ -684,8 +688,8 @@ remnant = None continue - line = self.decoded_chars - start = self.decoded_chars_used + line = self.decoded.text + start = self.decoded.pos if limit > 0: remaining = limit - builder.getlength() assert remaining >= 0 @@ -711,11 +715,11 @@ remnant = line[end_scan:] line = None # We have consumed the buffer - self._unset_decoded() + self.decoded.reset() if line: # Our line ends in the current buffer - self.decoded_chars_used = end_scan + self.decoded.pos = end_scan if start > 0 or end_scan < len(line): line = line[start:end_scan] builder.append(line) @@ -855,7 +859,7 @@ raise oefmt(space.w_IOError, "can't do nonzero end-relative seeks") space.call_method(self, "flush") - self._unset_decoded() + self.decoded.reset() self.snapshot = None if self.w_decoder: space.call_method(self.w_decoder, "reset") @@ -880,7 +884,7 @@ # Seek back to the safe start point space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos)) - self._unset_decoded() + self.decoded.reset() self.snapshot = None # Restore the decoder to its state from the safe start point. @@ -901,13 +905,13 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_chunk, space.newbool(bool(cookie.need_eof))) - self._set_decoded(space, w_decoded) + self.decoded.set(space, w_decoded) # Skip chars_to_skip of the decoded characters - if len(self.decoded_chars) < cookie.chars_to_skip: + if len(self.decoded.text) < cookie.chars_to_skip: raise oefmt(space.w_IOError, "can't restore logical file position") - self.decoded_chars_used = cookie.chars_to_skip + self.decoded.pos = cookie.chars_to_skip else: self.snapshot = PositionSnapshot(cookie.dec_flags, "") @@ -933,7 +937,7 @@ w_pos = space.call_method(self.w_buffer, "tell") if self.w_decoder is None or self.snapshot is None: - assert not self.decoded_chars + assert not self.decoded.text return w_pos cookie = PositionCookie(space.bigint_w(w_pos)) @@ -944,11 +948,11 @@ cookie.start_pos -= len(input) # How many decoded characters have been used up since the snapshot? - if not self.decoded_chars_used: + if not self.decoded.pos: # We haven't moved from the snapshot point. return space.newlong_from_rbigint(cookie.pack()) - chars_to_skip = self.decoded_chars_used + chars_to_skip = self.decoded.pos # Starting from the snapshot position, we will walk the decoder # forward until it gives us enough decoded characters. _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit