Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r44690:69b1b4b69b35 Date: 2011-06-04 09:52 +0200 http://bitbucket.org/pypy/pypy/changeset/69b1b4b69b35/
Log: merge heads diff --git a/pypy/rlib/streamio.py b/pypy/rlib/streamio.py --- a/pypy/rlib/streamio.py +++ b/pypy/rlib/streamio.py @@ -141,7 +141,8 @@ def construct_stream_tower(stream, buffering, universal, reading, writing, binary): if buffering == 0: # no buffering - pass + if reading: # force some minimal buffering for readline() + stream = ReadlineInputStream(stream) elif buffering == 1: # line-buffering if writing: stream = LineBufferingOutputStream(stream) @@ -749,6 +750,113 @@ flush_buffers=False) +class ReadlineInputStream(Stream): + + """Minimal buffering input stream. + + Only does buffering for readline(). The other kinds of reads, and + all writes, are not buffered at all. + """ + + bufsize = 2**13 # 8 K + + def __init__(self, base, bufsize=-1): + self.base = base + self.do_read = base.read # function to fill buffer some more + self.do_seek = base.seek # seek to a byte offset + if bufsize == -1: # Get default from the class + bufsize = self.bufsize + self.bufsize = bufsize # buffer size (hint only) + self.buf = None # raw data (may contain "\n") + self.bufstart = 0 + + def flush_buffers(self): + if self.buf is not None: + try: + self.do_seek(self.bufstart-len(self.buf), 1) + except MyNotImplementedError: + pass + else: + self.buf = None + self.bufstart = 0 + + def readline(self): + if self.buf is not None: + i = self.buf.find('\n', self.bufstart) + else: + self.buf = '' + i = -1 + # + if i < 0: + self.buf = self.buf[self.bufstart:] + self.bufstart = 0 + while True: + bufsize = max(self.bufsize, len(self.buf) >> 2) + data = self.do_read(bufsize) + if not data: + result = self.buf # end-of-file reached + self.buf = None + return result + startsearch = len(self.buf) # there is no '\n' in buf so far + self.buf += data + i = self.buf.find('\n', startsearch) + if i >= 0: + break + # + i += 1 + result = self.buf[self.bufstart:i] + self.bufstart = i + return result + + def peek(self): + if self.buf is None: + return '' + if self.bufstart > 0: + self.buf = self.buf[self.bufstart:] + self.bufstart = 0 + return self.buf + + def tell(self): + pos = self.base.tell() + if self.buf is not None: + pos -= (len(self.buf) - self.bufstart) + return pos + + def readall(self): + result = self.base.readall() + if self.buf is not None: + result = self.buf[self.bufstart:] + result + self.buf = None + self.bufstart = 0 + return result + + def read(self, n): + if self.buf is None: + return self.do_read(n) + else: + m = n - (len(self.buf) - self.bufstart) + start = self.bufstart + if m > 0: + result = self.buf[start:] + self.do_read(m) + self.buf = None + self.bufstart = 0 + return result + elif n >= 0: + self.bufstart = start + n + return self.buf[start : self.bufstart] + else: + return '' + + seek = PassThrough("seek", flush_buffers=True) + write = PassThrough("write", flush_buffers=True) + truncate = PassThrough("truncate", flush_buffers=True) + flush = PassThrough("flush", flush_buffers=True) + flushable = PassThrough("flushable", flush_buffers=False) + close = PassThrough("close", flush_buffers=False) + try_to_find_file_descriptor = PassThrough("try_to_find_file_descriptor", + flush_buffers=False) + + class BufferingOutputStream(Stream): """Standard buffering output stream. diff --git a/pypy/rlib/test/test_streamio.py b/pypy/rlib/test/test_streamio.py --- a/pypy/rlib/test/test_streamio.py +++ b/pypy/rlib/test/test_streamio.py @@ -1008,6 +1008,75 @@ assert base.buf == data +class TestReadlineInputStream: + + packets = ["a", "b", "\n", "def", "\nxy\npq\nuv", "wx"] + lines = ["ab\n", "def\n", "xy\n", "pq\n", "uvwx"] + + def makeStream(self, seek=False, tell=False, bufsize=-1): + base = TSource(self.packets) + self.source = base + def f(*args): + if seek is False: + raise NotImplementedError # a bug! + if seek is None: + raise streamio.MyNotImplementedError # can be caught + raise ValueError(seek) # uh? + if not tell: + base.tell = f + if not seek: + base.seek = f + return streamio.ReadlineInputStream(base, bufsize) + + def test_readline(self): + for file in [self.makeStream(), self.makeStream(bufsize=2)]: + i = 0 + while 1: + r = file.readline() + if r == "": + break + assert self.lines[i] == r + i += 1 + assert i == len(self.lines) + + def test_readline_and_read_interleaved(self): + for file in [self.makeStream(seek=True), + self.makeStream(seek=True, bufsize=2)]: + i = 0 + while 1: + firstchar = file.read(1) + if firstchar == "": + break + r = file.readline() + assert r != "" + assert self.lines[i] == firstchar + r + i += 1 + assert i == len(self.lines) + + def test_readline_and_read_interleaved_no_seek(self): + for file in [self.makeStream(seek=None), + self.makeStream(seek=None, bufsize=2)]: + i = 0 + while 1: + firstchar = file.read(1) + if firstchar == "": + break + r = file.readline() + assert r != "" + assert self.lines[i] == firstchar + r + i += 1 + assert i == len(self.lines) + + def test_readline_and_readall(self): + file = self.makeStream(seek=True, tell=True, bufsize=2) + r = file.readline() + assert r == 'ab\n' + assert file.tell() == 3 + r = file.readall() + assert r == 'def\nxy\npq\nuvwx' + r = file.readall() + assert r == '' + # Speed test _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit