Author: Richard Plangger <[email protected]>
Branch: py3.5-bz2-lzma
Changeset: r87432:7e400cfdf59e
Date: 2016-09-28 11:43 +0200
http://bitbucket.org/pypy/pypy/changeset/7e400cfdf59e/
Log: progress on the lzma changes to allow max_length parameter
diff --git a/lib_pypy/_lzma.py b/lib_pypy/_lzma.py
--- a/lib_pypy/_lzma.py
+++ b/lib_pypy/_lzma.py
@@ -445,6 +445,7 @@
self.eof = False
self.lzs = _new_lzma_stream()
self._bufsiz = max(8192, io.DEFAULT_BUFFER_SIZE)
+ self.needs_input = True
if format == FORMAT_AUTO:
catch_lzma_error(m.lzma_auto_decoder, self.lzs, memlimit,
decoder_flags)
@@ -473,9 +474,9 @@
else:
raise ValueError("invalid...")
- def decompress(self, data):
+ def decompress(self, data, max_length=-1):
"""
- decompress(data) -> bytes
+ decompress(data, max_length=-1) -> bytes
Provide data to the decompressor object. Returns a chunk of
decompressed data if possible, or b"" otherwise.
@@ -487,18 +488,71 @@
with self.lock:
if self.eof:
raise EOFError("Already...")
- return self._decompress(data)
+ lzs = self.lzs
+ data = to_bytes(data)
+ buf = ffi.new('char[]', data)
+ buf_size = len(data)
+ if lzs.next_in:
+ # in this case there is data left that needs to be processed
before the first
+ # argument can be processed
+ addr_input_buffer = int(ffi.cast('uintptr_t',
self.input_buffer))
+ addr_next_in = int(ffi.cast('uintptr_t', lzs.next_in))
+ avail_now = (addr_input_buffer + self.input_buffer_size) - \
+ (addr_next_in + lzs.avail_in)
+ avail_total = self.input_buffer_size - lzs.avail_in
+ if avail_total < len:
+ # resize the buffer, it is too small!
+ pass
+ elif avail_now < len:
+ # move all data to the front
+ ffi.memmove(self.input_buffer, lzs.next_in, lzs.avail_in)
+ ffi.memmove(lzs.next_in+lzs.avail_in, buf, buf_size)
+ lzn.avail_in += buf_size
+ used_input_buffer = True
+ else:
+ lzn.avail_in = buf_size
+ lzn.next_in = buf
+ used_input_buffer = False
- def _decompress(self, data):
+ result = self._decompress(buf, buf_len, max_length)
+
+ if self.eof:
+ self.needs_input = False
+ if lzs.avail_in > 0:
+ self.unused_data = ffi.buffer(lzs.next_in, lzs.avail_in)[:]
+ return result
+ elif lzs.avail_in == 0:
+ # completed successfully!
+ self.needs_input = True
+ lzs.next_in = None
+ else:
+ self.needs_input = False
+ if not used_input_buffer:
+ # free buffer it is to small
+ if self.input_buffer is ffi.NULL and \
+ self.input_buffer_size < lzs.avail_in:
+ m.free(self.input_buffer)
+ self.input_buffer = None
+
+ # allocate if necessary
+ if self.input_buffer is None:
+ self.input_buffer = m.malloc(lzs.avail_in)
+ self.input_buffer_size = lzs.avail_in
+
+ ffi.memmove(self.input_buffer, lzs.next_in, lzs.avail_in)
+ lzs.next_in = self.input_buffer
+
+ return result
+
+ def _decompress(self, buf, buf_len, max_length):
lzs = self.lzs
- # we need in_ so that lzs.next_in doesn't get garbage collected until
- # in_ goes out of scope
- data = to_bytes(data)
- lzs.next_in = in_ = ffi.new('char[]', data)
- lzs.avail_in = len(data)
+ lzs.next_in = buf
+ lzs.avail_in = buf_len
bufsiz = self._bufsiz
+ if not (max_length < 0 or max_length > io.DEFAULT_BUFFER_SIZE):
+ bufsiz = max_length
lzs.next_out = orig_out = m.malloc(bufsiz)
if orig_out == ffi.NULL:
@@ -519,13 +573,13 @@
if ret == m.LZMA_STREAM_END:
self.eof = True
- if lzs.avail_in > 0:
- self.unused_data = ffi.buffer(lzs.next_in,
lzs.avail_in)[:]
break
elif lzs.avail_in == 0:
# it ate everything
break
elif lzs.avail_out == 0:
+ if data_size == max_length:
+ break
# ran out of space in the output buffer, let's grow it
bufsiz += (bufsiz >> 3) + 6
next_out = m.realloc(orig_out, bufsiz)
diff --git a/pypy/module/bz2/interp_bz2.py b/pypy/module/bz2/interp_bz2.py
--- a/pypy/module/bz2/interp_bz2.py
+++ b/pypy/module/bz2/interp_bz2.py
@@ -386,7 +386,7 @@
self.running = False
self.unused_data = ""
self.needs_input = True
- self.input_buffer = None
+ self.input_buffer = ""
self.left_to_process = 0
self._init_bz2decomp()
@@ -456,6 +456,7 @@
if self.left_to_process != 0:
end = len(data)
start = end - self.left_to_process
+ assert start > 0
self.unused_data = data[start:]
res = out.make_result_string()
return self.space.newbytes(res)
@@ -476,7 +477,7 @@
if data == '':
return self.space.newbytes('')
datalen = len(data)
- if self.input_buffer:
+ if len(self.input_buffer) > 0:
input_buffer_in_use = True
result = self._decompress_buf(self.input_buffer, max_length)
else:
@@ -485,12 +486,14 @@
result = self._decompress_buf(data, max_length)
if self.left_to_process == 0:
- self.input_buffer = None
+ self.input_buffer = ""
self.needs_input = True
else:
self.needs_input = False
if not input_buffer_in_use:
- self.input_buffer = data[datalen-self.left_to_process-1:]
+ start = datalen-self.left_to_process-1
+ assert start > 0
+ self.input_buffer = data[start:]
return result
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit