https://github.com/python/cpython/commit/08d09cf5ba041c9c5c3860200b56bab66fd44a23
commit: 08d09cf5ba041c9c5c3860200b56bab66fd44a23
branch: main
author: Ruben Vorderman <[email protected]>
committer: gpshead <[email protected]>
date: 2024-06-15T18:46:39Z
summary:

gh-112346: Always set OS byte to 255, simpler gzip.compress function. 
(GH-120486)

This matches the output behavior in 3.10 and earlier; the optimization in 3.11 
allowed the zlib library's "os" value to be filled in instead in the 
circumstance when mtime was 0.  this keeps things consistent.

files:
A Misc/NEWS.d/next/Library/2024-06-12-10-00-31.gh-issue-90425.5CfkKG.rst
M Doc/library/gzip.rst
M Lib/gzip.py
M Lib/test/test_gzip.py

diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst
index 965da5981f6dbc..152cba4f653cb4 100644
--- a/Doc/library/gzip.rst
+++ b/Doc/library/gzip.rst
@@ -188,9 +188,7 @@ The module defines the following items:
 
    Compress the *data*, returning a :class:`bytes` object containing
    the compressed data.  *compresslevel* and *mtime* have the same meaning as 
in
-   the :class:`GzipFile` constructor above. When *mtime* is set to ``0``, this
-   function is equivalent to :func:`zlib.compress` with *wbits* set to ``31``.
-   The zlib function is faster.
+   the :class:`GzipFile` constructor above.
 
    .. versionadded:: 3.2
    .. versionchanged:: 3.8
@@ -200,6 +198,10 @@ The module defines the following items:
       streamed fashion. Calls with *mtime* set to ``0`` are delegated to
       :func:`zlib.compress` for better speed.
 
+   .. versionchanged:: 3.13
+      The gzip header OS byte is guaranteed to be set to 255 when this function
+      is used as was the case in 3.10 and earlier.
+
 .. function:: decompress(data)
 
    Decompress the *data*, returning a :class:`bytes` object containing the
diff --git a/Lib/gzip.py b/Lib/gzip.py
index 0d19c84c59cfa7..ba753ce3050dd8 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -580,27 +580,6 @@ def _rewind(self):
         self._new_member = True
 
 
-def _create_simple_gzip_header(compresslevel: int,
-                               mtime = None) -> bytes:
-    """
-    Write a simple gzip header with no extra fields.
-    :param compresslevel: Compresslevel used to determine the xfl bytes.
-    :param mtime: The mtime (must support conversion to a 32-bit integer).
-    :return: A bytes object representing the gzip header.
-    """
-    if mtime is None:
-        mtime = time.time()
-    if compresslevel == _COMPRESS_LEVEL_BEST:
-        xfl = 2
-    elif compresslevel == _COMPRESS_LEVEL_FAST:
-        xfl = 4
-    else:
-        xfl = 0
-    # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
-    # fields added to header), mtime, xfl and os (255 for unknown OS).
-    return struct.pack("<BBBBLBB", 0x1f, 0x8b, 8, 0, int(mtime), xfl, 255)
-
-
 def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
     """Compress data in one shot and return the compressed string.
 
@@ -608,15 +587,14 @@ def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, 
mtime=None):
     mtime can be used to set the modification time. The modification time is
     set to the current time by default.
     """
-    if mtime == 0:
-        # Use zlib as it creates the header with 0 mtime by default.
-        # This is faster and with less overhead.
-        return zlib.compress(data, level=compresslevel, wbits=31)
-    header = _create_simple_gzip_header(compresslevel, mtime)
-    trailer = struct.pack("<LL", zlib.crc32(data), (len(data) & 0xffffffff))
-    # Wbits=-15 creates a raw deflate block.
-    return (header + zlib.compress(data, level=compresslevel, wbits=-15) +
-            trailer)
+    # Wbits=31 automatically includes a gzip header and trailer.
+    gzip_data = zlib.compress(data, level=compresslevel, wbits=31)
+    if mtime is None:
+        mtime = time.time()
+    # Reuse gzip header created by zlib, replace mtime and OS byte for
+    # consistency.
+    header = struct.pack("<4sLBB", gzip_data, int(mtime), gzip_data[8], 255)
+    return header + gzip_data[10:]
 
 
 def decompress(data):
diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py
index cf801278da9e9b..ae384c3849d49e 100644
--- a/Lib/test/test_gzip.py
+++ b/Lib/test/test_gzip.py
@@ -714,7 +714,6 @@ def test_compress_mtime(self):
                         self.assertEqual(f.mtime, mtime)
 
     def test_compress_correct_level(self):
-        # gzip.compress calls with mtime == 0 take a different code path.
         for mtime in (0, 42):
             with self.subTest(mtime=mtime):
                 nocompress = gzip.compress(data1, compresslevel=0, mtime=mtime)
@@ -722,6 +721,17 @@ def test_compress_correct_level(self):
                 self.assertIn(data1, nocompress)
                 self.assertNotIn(data1, yescompress)
 
+    def test_issue112346(self):
+        # The OS byte should be 255, this should not change between Python 
versions.
+        for mtime in (0, 42):
+            with self.subTest(mtime=mtime):
+                compress = gzip.compress(data1, compresslevel=1, mtime=mtime)
+                self.assertEqual(
+                    struct.unpack("<IxB", compress[4:10]),
+                    (mtime, 255),
+                    "Gzip header does not properly set either mtime or OS 
byte."
+                )
+
     def test_decompress(self):
         for data in (data1, data2):
             buf = io.BytesIO()
diff --git 
a/Misc/NEWS.d/next/Library/2024-06-12-10-00-31.gh-issue-90425.5CfkKG.rst 
b/Misc/NEWS.d/next/Library/2024-06-12-10-00-31.gh-issue-90425.5CfkKG.rst
new file mode 100644
index 00000000000000..d152af49287a0b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-06-12-10-00-31.gh-issue-90425.5CfkKG.rst
@@ -0,0 +1,2 @@
+The OS byte in gzip headers is now always set to 255 when using
+:func:`gzip.compress`.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to