https://github.com/python/cpython/commit/3227857de8ad895fdef7c3d18a9e031f29980029 commit: 3227857de8ad895fdef7c3d18a9e031f29980029 branch: 3.15 author: Miss Islington (bot) <[email protected]> committer: serhiy-storchaka <[email protected]> date: 2026-05-19T20:38:47Z summary:
[3.15] gh-149945: Fix potential OOM for gzip with large header (GH-149979) (GH-150093) Do not read the whole filename and comment to memory for calculating the CRC. (cherry picked from commit 51a5715df9c56f616944cf1b39323bd6ae009143) Co-authored-by: Serhiy Storchaka <[email protected]> files: M Lib/gzip.py diff --git a/Lib/gzip.py b/Lib/gzip.py index a89ebf806c8572..1e05f43c0c9e24 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -484,14 +484,22 @@ def _read_exact(fp, n): return data -def _read_until_null(fp, append_to): +def _read_until_null(fp, crc=None): '''Read until the first encountered null byte in fp. - Append to given byte array object''' - while True: - s = fp.read(1) - append_to += s - if not s or s == b'\000': - break + If crc is not None, update and return the CRC. + ''' + if crc is None: + while True: + s = fp.read(1) + if not s or s == b'\000': + break + else: + while True: + s = fp.read(1) + crc = zlib.crc32(s, crc) + if not s or s == b'\000': + break + return crc def _read_gzip_header(fp): @@ -517,30 +525,32 @@ def _read_gzip_header(fp): return last_mtime if flag == FNAME: # Read and discard a null-terminated string containing the filename - while True: - s = fp.read(1) - if not s or s==b'\000': - break + _read_until_null(fp) return last_mtime # Processing for more complex flags. Save header parts for FHCRC checking. - header = bytearray(magic + base_header) + if flag & FHCRC: + crc = zlib.crc32(magic + base_header) + else: + crc = None if flag & FEXTRA: extra_len_bytes = _read_exact(fp, 2) extra_len, = struct.unpack("<H", extra_len_bytes) - header += extra_len_bytes - header += _read_exact(fp, extra_len) + extra = _read_exact(fp, extra_len) + if crc is not None: + crc = zlib.crc32(extra_len_bytes, crc) + crc = zlib.crc32(extra, crc) if flag & FNAME: - _read_until_null(fp, append_to=header) + crc = _read_until_null(fp, crc) if flag & FCOMMENT: - _read_until_null(fp, append_to=header) - if flag & FHCRC: + crc = _read_until_null(fp, crc) + if crc is not None: # Header CRC is the last 16 bits of a crc32. header_crc, = struct.unpack("<H", _read_exact(fp, 2)) - true_crc = zlib.crc32(header) & 0xFFFF - if header_crc != true_crc: + crc = crc & 0xFFFF + if header_crc != crc: raise BadGzipFile(f"Corrupted gzip header. Checksums do not " - f"match: {true_crc:04x} != {header_crc:04x}") + f"match: {crc:04x} != {header_crc:04x}") return last_mtime _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: [email protected]
