New issue 3090: lzma sometimes fails to decompress a file https://bitbucket.org/pypy/pypy/issues/3090/lzma-sometimes-fails-to-decompress-a-file
Hiroshi Miura: lama.LZMADecompress.decompress\(\) sometimes return incomplete data. Python3.6, 3.7 and pypy3 has a same issue and Python3.8rc1 fixes the issue. There is a cpython issue21872 [https://bugs.python.org/issue21872](https://bugs.python.org/issue21872) and fix is [https://github.com/python/cpython/pull/14048](https://github.com/python/cpython/pull/14048) Here is a test script to reproduce a problem and test data is attached. ```python #! /usr/bin/env python3 import functools import lzma def test_lzma_return_no_last_byte(): filters = [{'id': 4}, {'id': 33, 'dict_size': 16777216}] target_size = 4302365 read_blocksize = 32248 chunk_list = [51832, 255096, 16114160, 62584, 4021328, 46712, 1847592, 45688, 1864968, 45176, 1883112, 43128, 1826448, 51832] default_max_length = 32248 expected_length = 2042728 skip_size = functools.reduce(lambda x, y: x+y, chunk_list) decompressor = lzma.LZMADecompressor(format=lzma.FORMAT_RAW, filters=filters) with open('testdata', 'rb') as f: consumed_size = 0 # skip to target chunk for clen in chunk_list: out_remaining = clen outdata = b'' while out_remaining > 0: max_length = min(out_remaining, default_max_length) if decompressor.needs_input: read_size = min(read_blocksize, skip_size - consumed_size) indata = f.read(read_size) consumed_size += len(indata) decompdata = decompressor.decompress(indata, max_length) if len(decompdata) == 0: break else: decompdata = decompressor.decompress(b'', max_length) out_remaining -= len(decompdata) outdata += decompdata assert out_remaining == 0 assert len(outdata) == clen # start decompression out_remaining = expected_length consumed_size = 0 outdata = b'' while out_remaining > 0: if not decompressor.eof: max_length = min(out_remaining, default_max_length) if decompressor.needs_input: read_size = min(read_blocksize, target_size - consumed_size) indata = f.read(read_size) consumed_size += len(indata) decompdata = decompressor.decompress(indata, max_length) if len(decompdata) == 0: # FIXME: should not come here. break else: decompdata = decompressor.decompress(b'', max_length) out_remaining -= len(decompdata) outdata += decompdata print("expected out_remaining is 0 but {}".format(out_remaining)) # assert target decompression size assert len(outdata) == expected_length # FIXME: last one byte? ``` _______________________________________________ pypy-issue mailing list pypy-issue@python.org https://mail.python.org/mailman/listinfo/pypy-issue