https://github.com/python/cpython/commit/0f04f2456a2ff996cc670342a287928ab5f9b706
commit: 0f04f2456a2ff996cc670342a287928ab5f9b706
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2025-04-08T13:56:42+03:00
summary:
gh-117779: Fix reading duplicated entries in zipfile by name (GH-129254)
files:
A Misc/NEWS.d/next/Library/2025-01-24-12-30-38.gh-issue-117779.gADGXI.rst
M Lib/test/test_zipfile/test_core.py
M Lib/zipfile/__init__.py
diff --git a/Lib/test/test_zipfile/test_core.py
b/Lib/test/test_zipfile/test_core.py
index 2a4e1acf2195ca..7c8a82d821a020 100644
--- a/Lib/test/test_zipfile/test_core.py
+++ b/Lib/test/test_zipfile/test_core.py
@@ -2415,7 +2415,36 @@ def test_decompress_without_3rd_party_library(self):
self.assertRaises(RuntimeError, zf.extract, 'a.txt')
@requires_zlib()
- def test_full_overlap(self):
+ def test_full_overlap_different_names(self):
+ data = (
+ b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
+ b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00b\xed'
+ b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P'
+ b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2'
+ b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK'
+ b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
+ b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05'
+ b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00'
+ b'\x00\x00\x00'
+ )
+ with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
+ self.assertEqual(zipf.namelist(), ['a', 'b'])
+ zi = zipf.getinfo('a')
+ self.assertEqual(zi.header_offset, 0)
+ self.assertEqual(zi.compress_size, 16)
+ self.assertEqual(zi.file_size, 1033)
+ zi = zipf.getinfo('b')
+ self.assertEqual(zi.header_offset, 0)
+ self.assertEqual(zi.compress_size, 16)
+ self.assertEqual(zi.file_size, 1033)
+ self.assertEqual(len(zipf.read('b')), 1033)
+ with self.assertRaisesRegex(zipfile.BadZipFile, 'File
name.*differ'):
+ zipf.read('a')
+
+ @requires_zlib()
+ def test_full_overlap_different_names2(self):
data = (
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
@@ -2439,9 +2468,43 @@ def test_full_overlap(self):
self.assertEqual(zi.header_offset, 0)
self.assertEqual(zi.compress_size, 16)
self.assertEqual(zi.file_size, 1033)
- self.assertEqual(len(zipf.read('a')), 1033)
with self.assertRaisesRegex(zipfile.BadZipFile, 'File
name.*differ'):
zipf.read('b')
+ with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as
cm:
+ self.assertEqual(len(zipf.read('a')), 1033)
+ self.assertEqual(cm.filename, __file__)
+
+ @requires_zlib()
+ def test_full_overlap_same_name(self):
+ data = (
+ b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
+ b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
+ b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P'
+ b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2'
+ b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK'
+ b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
+ b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK\x05'
+ b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00'
+ b'\x00\x00\x00'
+ )
+ with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
+ self.assertEqual(zipf.namelist(), ['a', 'a'])
+ self.assertEqual(len(zipf.infolist()), 2)
+ zi = zipf.getinfo('a')
+ self.assertEqual(zi.header_offset, 0)
+ self.assertEqual(zi.compress_size, 16)
+ self.assertEqual(zi.file_size, 1033)
+ self.assertEqual(len(zipf.read('a')), 1033)
+ self.assertEqual(len(zipf.read(zi)), 1033)
+ self.assertEqual(len(zipf.read(zipf.infolist()[1])), 1033)
+ with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as
cm:
+ self.assertEqual(len(zipf.read(zipf.infolist()[0])), 1033)
+ self.assertEqual(cm.filename, __file__)
+ with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as
cm:
+ zipf.open(zipf.infolist()[0]).close()
+ self.assertEqual(cm.filename, __file__)
@requires_zlib()
def test_quoted_overlap(self):
@@ -2474,6 +2537,47 @@ def test_quoted_overlap(self):
zipf.read('a')
self.assertEqual(len(zipf.read('b')), 1033)
+ @requires_zlib()
+ def test_overlap_with_central_dir(self):
+ data = (
+ b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z'
+ b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00\xb4\x81\x00\x00\x00\x00aP'
+ b'K\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00/\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00'
+ )
+ with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
+ self.assertEqual(zipf.namelist(), ['a'])
+ self.assertEqual(len(zipf.infolist()), 1)
+ zi = zipf.getinfo('a')
+ self.assertEqual(zi.header_offset, 0)
+ self.assertEqual(zi.compress_size, 11)
+ self.assertEqual(zi.file_size, 1033)
+ with self.assertRaisesRegex(zipfile.BadZipFile, 'Bad magic
number'):
+ zipf.read('a')
+
+ @requires_zlib()
+ def test_overlap_with_archive_comment(self):
+ data = (
+ b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z'
+ b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00\xb4\x81E\x00\x00\x00aP'
+ b'K\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00/\x00\x00\x00\x00'
+ b'\x00\x00\x00*\x00'
+ b'PK\x03\x04\x14\x00\x00\x00\x08\x00G_|Z\xe2\x1e'
+ b'8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00aK'
+ b'L\x1c\x05\xa3`\x14\x8cx\x00\x00'
+ )
+ with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
+ self.assertEqual(zipf.namelist(), ['a'])
+ self.assertEqual(len(zipf.infolist()), 1)
+ zi = zipf.getinfo('a')
+ self.assertEqual(zi.header_offset, 69)
+ self.assertEqual(zi.compress_size, 11)
+ self.assertEqual(zi.file_size, 1033)
+ with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped
entries'):
+ zipf.read('a')
+
def tearDown(self):
unlink(TESTFN)
unlink(TESTFN2)
diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py
index e3a94215bd6700..b7840d0f945a66 100644
--- a/Lib/zipfile/__init__.py
+++ b/Lib/zipfile/__init__.py
@@ -1552,9 +1552,8 @@ def _RealGetContents(self):
print("total", total)
end_offset = self.start_dir
- for zinfo in sorted(self.filelist,
- key=lambda zinfo: zinfo.header_offset,
- reverse=True):
+ for zinfo in reversed(sorted(self.filelist,
+ key=lambda zinfo: zinfo.header_offset)):
zinfo._end_offset = end_offset
end_offset = zinfo.header_offset
@@ -1722,7 +1721,16 @@ def open(self, name, mode="r", pwd=None, *,
force_zip64=False):
if (zinfo._end_offset is not None and
zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
- raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r}
(possible zip bomb)")
+ if zinfo._end_offset == zinfo.header_offset:
+ import warnings
+ warnings.warn(
+ f"Overlapped entries: {zinfo.orig_filename!r} "
+ f"(possible zip bomb)",
+ skip_file_prefixes=(os.path.dirname(__file__),))
+ else:
+ raise BadZipFile(
+ f"Overlapped entries: {zinfo.orig_filename!r} "
+ f"(possible zip bomb)")
# check for encrypted flag & handle password
is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED
diff --git
a/Misc/NEWS.d/next/Library/2025-01-24-12-30-38.gh-issue-117779.gADGXI.rst
b/Misc/NEWS.d/next/Library/2025-01-24-12-30-38.gh-issue-117779.gADGXI.rst
new file mode 100644
index 00000000000000..115362cfc83284
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-01-24-12-30-38.gh-issue-117779.gADGXI.rst
@@ -0,0 +1,3 @@
+Fix reading duplicated entries in :mod:`zipfile` by name.
+Reading duplicated entries (except the last one) by ``ZipInfo``
+now emits a warning instead of raising an exception.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]