https://github.com/python/cpython/commit/3227857de8ad895fdef7c3d18a9e031f29980029
commit: 3227857de8ad895fdef7c3d18a9e031f29980029
branch: 3.15
author: Miss Islington (bot) <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-05-19T20:38:47Z
summary:

[3.15] gh-149945: Fix potential OOM for gzip with large header (GH-149979) 
(GH-150093)

Do not read the whole filename and comment to memory for calculating the CRC.
(cherry picked from commit 51a5715df9c56f616944cf1b39323bd6ae009143)

Co-authored-by: Serhiy Storchaka <[email protected]>

files:
M Lib/gzip.py

diff --git a/Lib/gzip.py b/Lib/gzip.py
index a89ebf806c8572..1e05f43c0c9e24 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -484,14 +484,22 @@ def _read_exact(fp, n):
     return data
 
 
-def _read_until_null(fp, append_to):
+def _read_until_null(fp, crc=None):
     '''Read until the first encountered null byte in fp.
-       Append to given byte array object'''
-    while True:
-        s = fp.read(1)
-        append_to += s
-        if not s or s == b'\000':
-            break
+    If crc is not None, update and return the CRC.
+    '''
+    if crc is None:
+        while True:
+            s = fp.read(1)
+            if not s or s == b'\000':
+                break
+    else:
+        while True:
+            s = fp.read(1)
+            crc = zlib.crc32(s, crc)
+            if not s or s == b'\000':
+                break
+    return crc
 
 
 def _read_gzip_header(fp):
@@ -517,30 +525,32 @@ def _read_gzip_header(fp):
         return last_mtime
     if flag == FNAME:
         # Read and discard a null-terminated string containing the filename
-        while True:
-            s = fp.read(1)
-            if not s or s==b'\000':
-                break
+        _read_until_null(fp)
         return last_mtime
 
     # Processing for more complex flags. Save header parts for FHCRC checking.
-    header = bytearray(magic + base_header)
+    if flag & FHCRC:
+        crc = zlib.crc32(magic + base_header)
+    else:
+        crc = None
     if flag & FEXTRA:
         extra_len_bytes = _read_exact(fp, 2)
         extra_len, = struct.unpack("<H", extra_len_bytes)
-        header += extra_len_bytes
-        header += _read_exact(fp, extra_len)
+        extra = _read_exact(fp, extra_len)
+        if crc is not None:
+            crc = zlib.crc32(extra_len_bytes, crc)
+            crc = zlib.crc32(extra, crc)
     if flag & FNAME:
-        _read_until_null(fp, append_to=header)
+        crc = _read_until_null(fp, crc)
     if flag & FCOMMENT:
-        _read_until_null(fp, append_to=header)
-    if flag & FHCRC:
+        crc = _read_until_null(fp, crc)
+    if crc is not None:
         # Header CRC is the last 16 bits of a crc32.
         header_crc, = struct.unpack("<H", _read_exact(fp, 2))
-        true_crc = zlib.crc32(header) & 0xFFFF
-        if header_crc != true_crc:
+        crc = crc & 0xFFFF
+        if header_crc != crc:
             raise BadGzipFile(f"Corrupted gzip header. Checksums do not "
-                               f"match: {true_crc:04x} != {header_crc:04x}")
+                              f"match: {crc:04x} != {header_crc:04x}")
     return last_mtime
 
 

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to