Serhiy Storchaka <storch...@gmail.com> added the comment:

I understand you, Martin. The time it took me to read the specification
and understand where should be the checks in the module. The patch
updated. The list of compression methods extended (in the future it can
be used for detailed output in the printdir()), flag of strict
encryption is checked directly, encrypted or compressed central
directory just will not be found (BadZipFile will be raised).

----------
Added file: http://bugs.python.org/file25528/lzma_in_zip_3.patch

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue14366>
_______________________________________
diff -r e08c3791f035 Doc/library/zipfile.rst
--- a/Doc/library/zipfile.rst   Thu May 10 16:36:02 2012 +0200
+++ b/Doc/library/zipfile.rst   Thu May 10 23:17:48 2012 +0300
@@ -97,12 +97,20 @@
 
    .. versionadded:: 3.3
 
+.. data:: ZIP_LZMA
+
+   The numeric constant for the LZMA compression method.  This requires the
+   lzma module.
+
+   .. versionadded:: 3.3
+
    .. note::
 
       The ZIP file format specification has included support for bzip2 
compression
-      since 2001. However, some tools (including older Python releases) do not
-      support it, and may either refuse to process the ZIP file altogether, or
-      fail to extract individual files.
+      since 2001, and for LZMA compression since 2006. However, some tools
+      (including older Python releases) do not support these compression
+      methods, and may either refuse to process the ZIP file altogether,
+      or fail to extract individual files.
 
 
 .. seealso::
@@ -133,11 +141,11 @@
    adding a ZIP archive to another file (such as :file:`python.exe`).  If
    *mode* is ``a`` and the file does not exist at all, it is created.
    *compression* is the ZIP compression method to use when writing the archive,
-   and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`; or
-   :const:`ZIP_DEFLATED`; unrecognized
-   values will cause :exc:`RuntimeError` to be raised.  If 
:const:`ZIP_DEFLATED` or
-   :const:`ZIP_BZIP2` is specified but the corresponded module
-   (:mod:`zlib` or :mod:`bz2`) is not available, :exc:`RuntimeError`
+   and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`,
+   :const:`ZIP_BZIP2` or :const:`ZIP_LZMA`; unrecognized
+   values will cause :exc:`RuntimeError` to be raised.  If 
:const:`ZIP_DEFLATED`,
+   :const:`ZIP_BZIP2` or :const:`ZIP_LZMA` is specified but the corresponded 
module
+   (:mod:`zlib`, :mod:`bz2` or :mod:`lzma`) is not available, 
:exc:`RuntimeError`
    is also raised. The default is :const:`ZIP_STORED`.  If *allowZip64* is
    ``True`` zipfile will create ZIP files that use the ZIP64 extensions when
    the zipfile is larger than 2 GB. If it is  false (the default) 
:mod:`zipfile`
@@ -161,7 +169,7 @@
       Added the ability to use :class:`ZipFile` as a context manager.
 
    .. versionchanged:: 3.3
-      Added support for :mod:`bzip2` compression.
+      Added support for :mod:`bzip2` and :mod:`lzma` compression.
 
 
 .. method:: ZipFile.close()
diff -r e08c3791f035 Lib/test/support.py
--- a/Lib/test/support.py       Thu May 10 16:36:02 2012 +0200
+++ b/Lib/test/support.py       Thu May 10 23:17:48 2012 +0300
@@ -45,6 +45,11 @@
 except ImportError:
     bz2 = None
 
+try:
+    import lzma
+except ImportError:
+    lzma = None
+
 __all__ = [
     "Error", "TestFailed", "ResourceDenied", "import_module",
     "verbose", "use_resources", "max_memuse", "record_original_stdout",
@@ -62,7 +67,7 @@
     "get_attribute", "swap_item", "swap_attr", "requires_IEEE_754",
     "TestHandler", "Matcher", "can_symlink", "skip_unless_symlink",
     "import_fresh_module", "requires_zlib", "PIPE_MAX_SIZE", "failfast",
-    "anticipate_failure", "run_with_tz", "requires_bz2"
+    "anticipate_failure", "run_with_tz", "requires_bz2", "requires_lzma"
     ]
 
 class Error(Exception):
@@ -513,6 +518,8 @@
 
 requires_bz2 = unittest.skipUnless(bz2, 'requires bz2')
 
+requires_lzma = unittest.skipUnless(lzma, 'requires lzma')
+
 is_jython = sys.platform.startswith('java')
 
 # Filename used for testing
diff -r e08c3791f035 Lib/test/test_zipfile.py
--- a/Lib/test/test_zipfile.py  Thu May 10 16:36:02 2012 +0200
+++ b/Lib/test/test_zipfile.py  Thu May 10 23:17:48 2012 +0300
@@ -13,7 +13,7 @@
 from random import randint, random
 from unittest import skipUnless
 
-from test.support import TESTFN, run_unittest, findfile, unlink, 
requires_zlib, requires_bz2
+from test.support import TESTFN, run_unittest, findfile, unlink, 
requires_zlib, requires_bz2, requires_lzma
 
 TESTFN2 = TESTFN + "2"
 TESTFNDIR = TESTFN + "d"
@@ -361,6 +361,55 @@
                 self.assertEqual(openobj.read(1), b'1')
                 self.assertEqual(openobj.read(1), b'2')
 
+    @requires_lzma
+    def test_lzma(self):
+        for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+            self.zip_test(f, zipfile.ZIP_LZMA)
+
+    @requires_lzma
+    def test_open_lzma(self):
+        for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+            self.zip_open_test(f, zipfile.ZIP_LZMA)
+
+    @requires_lzma
+    def test_random_open_lzma(self):
+        for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+            self.zip_random_open_test(f, zipfile.ZIP_LZMA)
+
+    @requires_lzma
+    def test_readline_read_lzma(self):
+        # Issue #7610: calls to readline() interleaved with calls to read().
+        for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+            self.zip_readline_read_test(f, zipfile.ZIP_LZMA)
+
+    @requires_lzma
+    def test_readline_lzma(self):
+        for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+            self.zip_readline_test(f, zipfile.ZIP_LZMA)
+
+    @requires_lzma
+    def test_readlines_lzma(self):
+        for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+            self.zip_readlines_test(f, zipfile.ZIP_LZMA)
+
+    @requires_lzma
+    def test_iterlines_lzma(self):
+        for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+            self.zip_iterlines_test(f, zipfile.ZIP_LZMA)
+
+    @requires_lzma
+    def test_low_compression_lzma(self):
+        """Check for cases where compressed data is larger than original."""
+        # Create the ZIP archive
+        with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_LZMA) as zipfp:
+            zipfp.writestr("strfile", '12')
+
+        # Get an open object for strfile
+        with zipfile.ZipFile(TESTFN2, "r", zipfile.ZIP_LZMA) as zipfp:
+            with zipfp.open("strfile") as openobj:
+                self.assertEqual(openobj.read(1), b'1')
+                self.assertEqual(openobj.read(1), b'2')
+
     def test_absolute_arcnames(self):
         with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED) as zipfp:
             zipfp.write(TESTFN, "/absolute")
@@ -508,6 +557,13 @@
         info = zipfp.getinfo('b.txt')
         self.assertEqual(info.compress_type, zipfile.ZIP_BZIP2)
 
+    @requires_lzma
+    def test_writestr_compression_lzma(self):
+        zipfp = zipfile.ZipFile(TESTFN2, "w")
+        zipfp.writestr("b.txt", "hello world", compress_type=zipfile.ZIP_LZMA)
+        info = zipfp.getinfo('b.txt')
+        self.assertEqual(info.compress_type, zipfile.ZIP_LZMA)
+
     def zip_test_writestr_permissions(self, f, compression):
         # Make sure that writestr creates files with mode 0600,
         # when it is passed a name rather than a ZipInfo instance.
@@ -686,6 +742,11 @@
         for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
             self.zip_test(f, zipfile.ZIP_BZIP2)
 
+    @requires_lzma
+    def test_lzma(self):
+        for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+            self.zip_test(f, zipfile.ZIP_LZMA)
+
     def test_absolute_arcnames(self):
         with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED,
                              allowZip64=True) as zipfp:
@@ -826,6 +887,16 @@
             b'\x00 \x80\x80\x81\x00\x00\x00\x00afilePK'
             b'\x05\x06\x00\x00\x00\x00\x01\x00\x01\x003\x00\x00\x00[\x00'
             b'\x00\x00\x00\x00'),
+        zipfile.ZIP_LZMA: (
+            b'PK\x03\x04\x14\x03\x00\x00\x0e\x00nu\x0c=FA'
+            b'KE\x1b\x00\x00\x00n\x00\x00\x00\x05\x00\x00\x00af'
+            b'ile\t\x04\x05\x00]\x00\x00\x00\x04\x004\x19I'
+            b'\xee\x8d\xe9\x17\x89:3`\tq!.8\x00PK'
+            b'\x01\x02\x14\x03\x14\x03\x00\x00\x0e\x00nu\x0c=FA'
+            b'KE\x1b\x00\x00\x00n\x00\x00\x00\x05\x00\x00\x00\x00\x00'
+            b'\x00\x00\x00\x00 \x80\x80\x81\x00\x00\x00\x00afil'
+            b'ePK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x003\x00\x00'
+            b'\x00>\x00\x00\x00\x00\x00'),
     }
 
     def test_unsupported_version(self):
@@ -1104,6 +1175,10 @@
     def test_testzip_with_bad_crc_bzip2(self):
         self.check_testzip_with_bad_crc(zipfile.ZIP_BZIP2)
 
+    @requires_lzma
+    def test_testzip_with_bad_crc_lzma(self):
+        self.check_testzip_with_bad_crc(zipfile.ZIP_LZMA)
+
     def check_read_with_bad_crc(self, compression):
         """Tests that files with bad CRCs raise a BadZipFile exception when 
read."""
         zipdata = self.zips_with_bad_crc[compression]
@@ -1136,6 +1211,10 @@
     def test_read_with_bad_crc_bzip2(self):
         self.check_read_with_bad_crc(zipfile.ZIP_BZIP2)
 
+    @requires_lzma
+    def test_read_with_bad_crc_lzma(self):
+        self.check_read_with_bad_crc(zipfile.ZIP_LZMA)
+
     def check_read_return_size(self, compression):
         # Issue #9837: ZipExtFile.read() shouldn't return more bytes
         # than requested.
@@ -1160,6 +1239,10 @@
     def test_read_return_size_bzip2(self):
         self.check_read_return_size(zipfile.ZIP_BZIP2)
 
+    @requires_lzma
+    def test_read_return_size_lzma(self):
+        self.check_read_return_size(zipfile.ZIP_LZMA)
+
     def test_empty_zipfile(self):
         # Check that creating a file in 'w' or 'a' mode and closing without
         # adding any files to the archives creates a valid empty ZIP file
@@ -1306,6 +1389,11 @@
         for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
             self.zip_test(f, zipfile.ZIP_BZIP2)
 
+    @requires_lzma
+    def test_lzma(self):
+        for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+            self.zip_test(f, zipfile.ZIP_LZMA)
+
     def zip_open_test(self, f, compression):
         self.make_test_archive(f, compression)
 
@@ -1351,6 +1439,11 @@
         for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
             self.zip_open_test(f, zipfile.ZIP_BZIP2)
 
+    @requires_lzma
+    def test_open_lzma(self):
+        for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+            self.zip_open_test(f, zipfile.ZIP_LZMA)
+
     def zip_random_open_test(self, f, compression):
         self.make_test_archive(f, compression)
 
@@ -1384,6 +1477,11 @@
         for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
             self.zip_random_open_test(f, zipfile.ZIP_BZIP2)
 
+    @requires_lzma
+    def test_random_open_lzma(self):
+        for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+            self.zip_random_open_test(f, zipfile.ZIP_LZMA)
+
 
 @requires_zlib
 class TestsWithMultipleOpens(unittest.TestCase):
@@ -1628,6 +1726,31 @@
         for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
             self.iterlines_test(f, zipfile.ZIP_BZIP2)
 
+    @requires_lzma
+    def test_read_lzma(self):
+        for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+            self.read_test(f, zipfile.ZIP_LZMA)
+
+    @requires_lzma
+    def test_readline_read_lzma(self):
+        for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+            self.readline_read_test(f, zipfile.ZIP_LZMA)
+
+    @requires_lzma
+    def test_readline_lzma(self):
+        for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+            self.readline_test(f, zipfile.ZIP_LZMA)
+
+    @requires_lzma
+    def test_readlines_lzma(self):
+        for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+            self.readlines_test(f, zipfile.ZIP_LZMA)
+
+    @requires_lzma
+    def test_iterlines_lzma(self):
+        for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+            self.iterlines_test(f, zipfile.ZIP_LZMA)
+
     def tearDown(self):
         for sep, fn in self.arcfiles.items():
             os.remove(fn)
diff -r e08c3791f035 Lib/zipfile.py
--- a/Lib/zipfile.py    Thu May 10 16:36:02 2012 +0200
+++ b/Lib/zipfile.py    Thu May 10 23:17:48 2012 +0300
@@ -27,8 +27,13 @@
 except ImportError:
     bz2 = None
 
+try:
+    import lzma # We may need its compression method
+except ImportError:
+    lzma = None
+
 __all__ = ["BadZipFile", "BadZipfile", "error",
-           "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2",
+           "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
            "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
 
 class BadZipFile(Exception):
@@ -52,13 +57,15 @@
 ZIP_STORED = 0
 ZIP_DEFLATED = 8
 ZIP_BZIP2 = 12
+ZIP_LZMA = 14
 # Other ZIP compression methods not supported
 
 DEFAULT_VERSION = 20
 ZIP64_VERSION = 45
 BZIP2_VERSION = 46
+LZMA_VERSION = 63
 # we recognize (but not necessarily support) all features up to that version
-MAX_EXTRACT_VERSION = 46
+MAX_EXTRACT_VERSION = 63
 
 # Below are some formats and associated data for reading/writing headers using
 # the struct module.  The names and structures of headers/records are those 
used
@@ -367,6 +374,8 @@
 
         if self.compress_type == ZIP_BZIP2:
             min_version = max(BZIP2_VERSION, min_version)
+        elif self.compress_type == ZIP_LZMA:
+            min_version = max(LZMA_VERSION, min_version)
 
         self.extract_version = max(min_version, self.extract_version)
         self.create_version = max(min_version, self.create_version)
@@ -480,6 +489,77 @@
         return c
 
 
+class LZMACompressor:
+
+    def __init__(self):
+        self._comp = None
+
+    def _init(self):
+        props = lzma.encode_filter_properties({'id': lzma.FILTER_LZMA1})
+        self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
+                lzma.decode_filter_properties(lzma.FILTER_LZMA1, props)
+        ])
+        return struct.pack('<BBH', 9, 4, len(props)) + props
+
+    def compress(self, data):
+        if self._comp is None:
+            return self._init() + self._comp.compress(data)
+        return self._comp.compress(data)
+
+    def flush(self):
+        if self._comp is None:
+            return self._init() + self._comp.flush()
+        return self._comp.flush()
+
+
+class LZMADecompressor:
+
+    def __init__(self):
+        self._decomp = None
+        self._unconsumed = b''
+        self.eof = False
+
+    def decompress(self, data):
+        if self._decomp is None:
+            self._unconsumed += data
+            if len(self._unconsumed) <= 4:
+                return b''
+            psize, = struct.unpack('<H', self._unconsumed[2:4])
+            if len(self._unconsumed) <= 4 + psize:
+                return b''
+
+            self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
+                    lzma.decode_filter_properties(lzma.FILTER_LZMA1,
+                            self._unconsumed[4:4 + psize])
+            ])
+            data = self._unconsumed[4 + psize:]
+            del self._unconsumed
+
+        result = self._decomp.decompress(data)
+        self.eof = self._decomp.eof
+        return result
+
+
+compressor_names = {
+    0: 'store',
+    1: 'shrink',
+    2: 'reduce',
+    3: 'reduce',
+    4: 'reduce',
+    5: 'reduce',
+    6: 'implode',
+    7: 'tokenize',
+    8: 'deflate',
+    9: 'deflate64',
+    10: 'implode',
+    12: 'bzip2',
+    14: 'lzma',
+    18: 'terse',
+    19: 'lz77',
+    97: 'wavpack',
+    98: 'ppmd',
+}
+
 def _check_compression(compression):
     if compression == ZIP_STORED:
         pass
@@ -491,6 +571,10 @@
         if not bz2:
             raise RuntimeError(
                     "Compression requires the (missing) bz2 module")
+    elif compression == ZIP_LZMA:
+        if not lzma:
+            raise RuntimeError(
+                    "Compression requires the (missing) lzma module")
     else:
         raise RuntimeError("That compression method is not supported")
 
@@ -501,6 +585,8 @@
              zlib.DEFLATED, -15)
     elif compress_type == ZIP_BZIP2:
         return bz2.BZ2Compressor()
+    elif compress_type == ZIP_LZMA:
+        return LZMACompressor()
     else:
         return None
 
@@ -512,19 +598,10 @@
         return zlib.decompressobj(-15)
     elif compress_type == ZIP_BZIP2:
         return bz2.BZ2Decompressor()
+    elif compress_type == ZIP_LZMA:
+        return LZMADecompressor()
     else:
-        unknown_compressors = {
-            1: 'shrink',
-            2: 'reduce',
-            3: 'reduce',
-            4: 'reduce',
-            5: 'reduce',
-            6: 'implode',
-            9: 'enhanced deflate',
-            10: 'implode',
-            14: 'lzma',
-            }
-        descr = unknown_compressors.get(compress_type)
+        descr = compressor_names.get(compress_type)
         if descr:
             raise NotImplementedError("compression type %d (%s)" % 
(compress_type, descr))
         else:
@@ -781,8 +858,8 @@
     file: Either the path to the file, or a file-like object.
           If it is a path, the file will be opened and closed by ZipFile.
     mode: The mode can be either read "r", write "w" or append "a".
-    compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib) or
-                 ZIP_BZIP2 (requires bz2).
+    compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
+                 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
     allowZip64: if True ZipFile will create files with ZIP64 extensions when
                 needed, otherwise it will raise an exception when this would
                 be necessary.
@@ -1062,6 +1139,10 @@
             # Zip 2.7: compressed patched data
             raise NotImplementedError("compressed patched data (flag bit 5)")
 
+        if zinfo.flag_bits & 0x40:
+            # strong encryption
+            raise NotImplementedError("strong encryption (flag bit 6)")
+
         if zinfo.flag_bits & 0x800:
             # UTF-8 filename
             fname_str = fname.decode("utf-8")
@@ -1220,6 +1301,9 @@
         zinfo.file_size = st.st_size
         zinfo.flag_bits = 0x00
         zinfo.header_offset = self.fp.tell()    # Start of header bytes
+        if zinfo.compress_type == ZIP_LZMA:
+            # Compressed data includes an end-of-stream (EOS) marker
+            zinfo.flag_bits |= 0x02
 
         self._writecheck(zinfo)
         self._didModify = True
@@ -1292,6 +1376,9 @@
         zinfo.header_offset = self.fp.tell()    # Start of header data
         if compress_type is not None:
             zinfo.compress_type = compress_type
+        if zinfo.compress_type == ZIP_LZMA:
+            # Compressed data includes an end-of-stream (EOS) marker
+            zinfo.flag_bits |= 0x02
 
         self._writecheck(zinfo)
         self._didModify = True
@@ -1360,6 +1447,8 @@
 
                 if zinfo.compress_type == ZIP_BZIP2:
                     min_version = max(BZIP2_VERSION, min_version)
+                elif zinfo.compress_type == ZIP_LZMA:
+                    min_version = max(LZMA_VERSION, min_version)
 
                 extract_version = max(min_version, zinfo.extract_version)
                 create_version = max(min_version, zinfo.create_version)
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to