jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/481321 )

Change subject: tools: Support LZMA and XZ formats
......................................................................

tools: Support LZMA and XZ formats

The module to support these formats, lzma, was added Python 3.3 [1].
The format has been proven to do better compression ratios at the cost
of longer compression times.

[1] https://bugs.python.org/issue6715

Change-Id: I821a48beb755d284576df8c6c9a8f6e8595cb086
---
M pywikibot/tools/__init__.py
A tests/data/xml/article-pyrus.xml.lzma
A tests/data/xml/article-pyrus.xml.xz
M tests/tools_tests.py
4 files changed, 84 insertions(+), 13 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py
index 76e0f7a..6888440 100644
--- a/pywikibot/tools/__init__.py
+++ b/pywikibot/tools/__init__.py
@@ -56,6 +56,11 @@
         warn('package bz2 and bz2file were not found', ImportWarning)
         bz2 = bz2_import_error

+try:
+    import lzma
+except ImportError as lzma_import_error:
+    lzma = lzma_import_error
+

 if PYTHON_VERSION < (3, 5):
     # although deprecated in 3 completely no message was emitted until 3.5
@@ -1249,10 +1254,11 @@
     """
     Open a file and uncompress it if needed.

-    This function supports bzip2, gzip and 7zip as compression containers. It
-    uses the packages available in the standard library for bzip2 and gzip so
-    they are always available. 7zip is only available when a 7za program is
-    available and only supports reading from it.
+    This function supports bzip2, gzip, 7zip, lzma, and xz as compression
+    containers. It uses the packages available in the standard library for
+    bzip2, gzip, lzma, and xz so they are always available. 7zip is only
+    available when a 7za program is available and only supports reading
+    from it.

     The compression is either selected via the magic number or file ending.

@@ -1274,6 +1280,11 @@
     @raises OSError: When it's not a 7z archive but the file extension is 7z.
         It is also raised by bz2 when its content is invalid. gzip does not
         immediately raise that error but only on reading it.
+    @raises lzma.LZMAError: When error occurs during compression or
+        decompression or when initializing the state with lzma or xz.
+    @raises ImportError: When file is compressed with bz2 but neither bz2 nor
+        bz2file is importable, or when file is compressed with lzma or xz but
+        lzma is not importable.
     @return: A file-like object returning the uncompressed data in binary mode.
     @rtype: file-like object
     """
@@ -1297,6 +1308,9 @@
             extension = 'gz'
         elif magic_number.startswith(b"7z\xBC\xAF'\x1C"):
             extension = '7z'
+        # Unfortunately, legacy LZMA container format has no magic number
+        elif magic_number.startswith(b'\xFD7zXZ\x00'):
+            extension = 'xz'
         else:
             extension = ''

@@ -1304,9 +1318,9 @@
         if isinstance(bz2, ImportError):
             raise bz2
         return bz2.BZ2File(filename, mode)
-    elif extension == 'gz':
+    if extension == 'gz':
         return gzip.open(filename, mode)
-    elif extension == '7z':
+    if extension == '7z':
         if mode != 'rb':
             raise NotImplementedError('It is not possible to write a 7z file.')

@@ -1327,9 +1341,16 @@
                     'Unexpected STDERR output from 7za {0}'.format(stderr))
             else:
                 return process.stdout
-    else:
-        # assume it's an uncompressed file
-        return open(filename, 'rb')
+    if extension == 'lzma':
+        if isinstance(lzma, ImportError):
+            raise lzma
+        return lzma.open(filename, mode, format=lzma.FORMAT_ALONE)
+    if extension == 'xz':
+        if isinstance(lzma, ImportError):
+            raise lzma
+        return lzma.open(filename, mode, format=lzma.FORMAT_XZ)
+    # assume it's an uncompressed file
+    return open(filename, 'rb')


 def merge_unique_dicts(*args, **kwargs):
diff --git a/tests/data/xml/article-pyrus.xml.lzma 
b/tests/data/xml/article-pyrus.xml.lzma
new file mode 100644
index 0000000..816634d
--- /dev/null
+++ b/tests/data/xml/article-pyrus.xml.lzma
Binary files differ
diff --git a/tests/data/xml/article-pyrus.xml.xz 
b/tests/data/xml/article-pyrus.xml.xz
new file mode 100644
index 0000000..1d3b79e
--- /dev/null
+++ b/tests/data/xml/article-pyrus.xml.xz
Binary files differ
diff --git a/tests/tools_tests.py b/tests/tools_tests.py
index 20ff4f1..ce8722b 100644
--- a/tests/tools_tests.py
+++ b/tests/tools_tests.py
@@ -138,7 +138,7 @@
         """Test open_archive when bz2 and bz2file are not available."""
         old_bz2 = tools.bz2
         bz2_import_error = ('This is a fake exception message that is '
-                            'used when bz2 and bz2file is not importable')
+                            'used when bz2 and bz2file are not importable')
         try:
             tools.bz2 = ImportError(bz2_import_error)
             self.assertRaisesRegex(ImportError,
@@ -167,6 +167,38 @@
                                self.base_file + '_invalid.7z',
                                use_extension=True)

+    def test_open_archive_lzma(self):
+        """Test open_archive with lzma compressor in the standard library."""
+        if isinstance(tools.lzma, ImportError):
+            raise unittest.SkipTest('lzma not importable')
+        self.assertEqual(
+            self._get_content(self.base_file + '.lzma'), self.original_content)
+        # Legacy LZMA container formet has no magic, skipping
+        # use_extension=False test here
+        self.assertEqual(
+            self._get_content(self.base_file + '.xz'), self.original_content)
+        self.assertEqual(
+            self._get_content(self.base_file + '.xz', use_extension=False),
+            self.original_content)
+
+    def test_open_archive_without_lzma(self):
+        """Test open_archive when lzma is not available."""
+        old_lzma = tools.lzma
+        lzma_import_error = ('This is a fake exception message that is '
+                             'used when lzma is not importable')
+        try:
+            tools.lzma = ImportError(lzma_import_error)
+            self.assertRaisesRegex(ImportError,
+                                   lzma_import_error,
+                                   self._get_content,
+                                   self.base_file + '.lzma')
+            self.assertRaisesRegex(ImportError,
+                                   lzma_import_error,
+                                   self._get_content,
+                                   self.base_file + '.xz')
+        finally:
+            tools.lzma = old_lzma
+

 class OpenCompressedTestCase(OpenArchiveTestCase, DeprecationTestCase):

@@ -176,9 +208,10 @@

     def _get_content(self, *args, **kwargs):
         """Use open_compressed and return content using a with-statement."""
-        # open_archive default is True, so if it's False it's not the default
-        # so use the non-default of open_compressed (which is True)
-        if kwargs.get('use_extension') is False:
+        # open_archive default is True, but open_compressed default is False.
+        # The test cases assumes a default of True and we need to make
+        # open_compressed acknowledge that.
+        if 'use_extension' not in kwargs:
             kwargs['use_extension'] = True

         with tools.open_compressed(*args, **kwargs) as f:
@@ -259,6 +292,23 @@
                                '/dev/null.7z',
                                mode='wb')

+    def test_write_archive_lzma(self):
+        """Test writing a lzma archive."""
+        if isinstance(tools.lzma, ImportError):
+            raise unittest.SkipTest('lzma not importable')
+
+        content = self._write_content('.lzma')
+        with open(self.base_file + '.lzma', 'rb') as f:
+            self.assertEqual(content, f.read())
+
+    def test_write_archive_xz(self):
+        """Test writing a xz archive."""
+        if isinstance(tools.lzma, ImportError):
+            raise unittest.SkipTest('lzma not importable')
+
+        content = self._write_content('.xz')
+        self.assertEqual(content[:6], b'\xFD7zXZ\x00')
+

 class MergeUniqueDicts(TestCase):


--
To view, visit https://gerrit.wikimedia.org/r/481321
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: I821a48beb755d284576df8c6c9a8f6e8595cb086
Gerrit-Change-Number: 481321
Gerrit-PatchSet: 4
Gerrit-Owner: Zhuyifei1999 <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Mpaa <[email protected]>
Gerrit-Reviewer: XZise <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: Zhuyifei1999 <[email protected]>
Gerrit-Reviewer: jenkins-bot (75)
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to