https://github.com/python/cpython/commit/0788948dcb980c7648b29ca363390b696d7f188f commit: 0788948dcb980c7648b29ca363390b696d7f188f branch: main author: Emma Smith <e...@emmatyping.dev> committer: gpshead <g...@krypto.org> date: 2025-04-06T13:51:42-07:00 summary:
gh-84481: Add ZipFile.data_offset attribute (#132165) * Add ZipFile.data_offset attribute This attribute provides the offset to zip data from the start of the file, when available. * Add blurb-it * Try fixing class ref in NEWS files: A Misc/NEWS.d/next/Library/2025-04-06-09-55-43.gh-issue-84481.cX4yTn.rst M Doc/library/zipfile.rst M Lib/test/test_zipfile/test_core.py M Lib/zipfile/__init__.py diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index 0e6439f75334e9..6a4fa67332e179 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -538,6 +538,14 @@ The following data attributes are also available: it should be no longer than 65535 bytes. Comments longer than this will be truncated. +.. attribute:: ZipFile.data_offset + + The offset to the start of ZIP data from the beginning of the file. When the + :class:`ZipFile` is opened in either mode ``'w'`` or ``'x'`` and the + underlying file does not support ``tell()``, the value will be ``None`` + instead. + + .. versionadded:: 3.14 .. _path-objects: diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 44e8190ac6710b..94c0a44f3758d2 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -3312,6 +3312,54 @@ def test_execute_zip64(self): self.assertIn(b'number in executable: 5', output) +class TestDataOffsetPrependedZip(unittest.TestCase): + """Test .data_offset on reading zip files with an executable prepended.""" + + def setUp(self): + self.exe_zip = findfile('exe_with_zip', subdir='archivetestdata') + self.exe_zip64 = findfile('exe_with_z64', subdir='archivetestdata') + + def _test_data_offset(self, name): + with zipfile.ZipFile(name) as zipfp: + self.assertEqual(zipfp.data_offset, 713) + + def test_data_offset_with_exe_prepended(self): + self._test_data_offset(self.exe_zip) + + def test_data_offset_with_exe_prepended_zip64(self): + self._test_data_offset(self.exe_zip64) + +class TestDataOffsetZipWrite(unittest.TestCase): + """Test .data_offset for ZipFile opened in write mode.""" + + def setUp(self): + os.mkdir(TESTFNDIR) + self.addCleanup(rmtree, TESTFNDIR) + self.test_path = os.path.join(TESTFNDIR, 'testoffset.zip') + + def test_data_offset_write_no_prefix(self): + with io.BytesIO() as fp: + with zipfile.ZipFile(fp, "w") as zipfp: + self.assertEqual(zipfp.data_offset, 0) + + def test_data_offset_write_with_prefix(self): + with io.BytesIO() as fp: + fp.write(b"this is a prefix") + with zipfile.ZipFile(fp, "w") as zipfp: + self.assertEqual(zipfp.data_offset, 16) + + def test_data_offset_write_no_tell(self): + # The initializer in ZipFile checks if tell raises AttributeError or + # OSError when creating a file in write mode when deducing the offset + # of the beginning of zip data + class NoTellBytesIO(io.BytesIO): + def tell(self): + raise OSError("Unimplemented!") + with NoTellBytesIO() as fp: + with zipfile.ZipFile(fp, "w") as zipfp: + self.assertIs(zipfp.data_offset, None) + + class EncodedMetadataTests(unittest.TestCase): file_names = ['\u4e00', '\u4e8c', '\u4e09'] # Han 'one', 'two', 'three' file_content = [ diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index b8b496ad9471f4..b061691ac6f8b9 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1413,10 +1413,12 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, self._didModify = True try: self.start_dir = self.fp.tell() + self._data_offset = self.start_dir except (AttributeError, OSError): self.fp = _Tellable(self.fp) self.start_dir = 0 self._seekable = False + self._data_offset = None else: # Some file-like objects can provide tell() but not seek() try: @@ -1486,6 +1488,10 @@ def _RealGetContents(self): # If Zip64 extension structures are present, account for them concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) + # store the offset to the beginning of data for the + # .data_offset property + self._data_offset = concat + if self.debug > 2: inferred = concat + offset_cd print("given, inferred, offset", offset_cd, inferred, concat) @@ -1551,6 +1557,12 @@ def _RealGetContents(self): zinfo._end_offset = end_offset end_offset = zinfo.header_offset + @property + def data_offset(self): + """The offset to the start of zip data in the file or None if + unavailable.""" + return self._data_offset + def namelist(self): """Return a list of file names in the archive.""" return [data.filename for data in self.filelist] diff --git a/Misc/NEWS.d/next/Library/2025-04-06-09-55-43.gh-issue-84481.cX4yTn.rst b/Misc/NEWS.d/next/Library/2025-04-06-09-55-43.gh-issue-84481.cX4yTn.rst new file mode 100644 index 00000000000000..548f4eeb0f75d0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-06-09-55-43.gh-issue-84481.cX4yTn.rst @@ -0,0 +1,5 @@ +Add the :attr:`zipfile.ZipFile.data_offset` attribute, which stores the +offset to the beginning of ZIP data in a file when available. When the +:class:`zipfile.ZipFile` is opened in either mode ``'w'`` or ``'x'`` and the +underlying file does not support ``tell()``, the value will be ``None`` +instead. _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com