https://github.com/python/cpython/commit/0788948dcb980c7648b29ca363390b696d7f188f
commit: 0788948dcb980c7648b29ca363390b696d7f188f
branch: main
author: Emma Smith <[email protected]>
committer: gpshead <[email protected]>
date: 2025-04-06T13:51:42-07:00
summary:
gh-84481: Add ZipFile.data_offset attribute (#132165)
* Add ZipFile.data_offset attribute
This attribute provides the offset to zip data from the start of the file, when
available.
* Add blurb-it
* Try fixing class ref in NEWS
files:
A Misc/NEWS.d/next/Library/2025-04-06-09-55-43.gh-issue-84481.cX4yTn.rst
M Doc/library/zipfile.rst
M Lib/test/test_zipfile/test_core.py
M Lib/zipfile/__init__.py
diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
index 0e6439f75334e9..6a4fa67332e179 100644
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -538,6 +538,14 @@ The following data attributes are also available:
it should be no longer than 65535 bytes. Comments longer than this will be
truncated.
+.. attribute:: ZipFile.data_offset
+
+ The offset to the start of ZIP data from the beginning of the file. When the
+ :class:`ZipFile` is opened in either mode ``'w'`` or ``'x'`` and the
+ underlying file does not support ``tell()``, the value will be ``None``
+ instead.
+
+ .. versionadded:: 3.14
.. _path-objects:
diff --git a/Lib/test/test_zipfile/test_core.py
b/Lib/test/test_zipfile/test_core.py
index 44e8190ac6710b..94c0a44f3758d2 100644
--- a/Lib/test/test_zipfile/test_core.py
+++ b/Lib/test/test_zipfile/test_core.py
@@ -3312,6 +3312,54 @@ def test_execute_zip64(self):
self.assertIn(b'number in executable: 5', output)
+class TestDataOffsetPrependedZip(unittest.TestCase):
+ """Test .data_offset on reading zip files with an executable prepended."""
+
+ def setUp(self):
+ self.exe_zip = findfile('exe_with_zip', subdir='archivetestdata')
+ self.exe_zip64 = findfile('exe_with_z64', subdir='archivetestdata')
+
+ def _test_data_offset(self, name):
+ with zipfile.ZipFile(name) as zipfp:
+ self.assertEqual(zipfp.data_offset, 713)
+
+ def test_data_offset_with_exe_prepended(self):
+ self._test_data_offset(self.exe_zip)
+
+ def test_data_offset_with_exe_prepended_zip64(self):
+ self._test_data_offset(self.exe_zip64)
+
+class TestDataOffsetZipWrite(unittest.TestCase):
+ """Test .data_offset for ZipFile opened in write mode."""
+
+ def setUp(self):
+ os.mkdir(TESTFNDIR)
+ self.addCleanup(rmtree, TESTFNDIR)
+ self.test_path = os.path.join(TESTFNDIR, 'testoffset.zip')
+
+ def test_data_offset_write_no_prefix(self):
+ with io.BytesIO() as fp:
+ with zipfile.ZipFile(fp, "w") as zipfp:
+ self.assertEqual(zipfp.data_offset, 0)
+
+ def test_data_offset_write_with_prefix(self):
+ with io.BytesIO() as fp:
+ fp.write(b"this is a prefix")
+ with zipfile.ZipFile(fp, "w") as zipfp:
+ self.assertEqual(zipfp.data_offset, 16)
+
+ def test_data_offset_write_no_tell(self):
+ # The initializer in ZipFile checks if tell raises AttributeError or
+ # OSError when creating a file in write mode when deducing the offset
+ # of the beginning of zip data
+ class NoTellBytesIO(io.BytesIO):
+ def tell(self):
+ raise OSError("Unimplemented!")
+ with NoTellBytesIO() as fp:
+ with zipfile.ZipFile(fp, "w") as zipfp:
+ self.assertIs(zipfp.data_offset, None)
+
+
class EncodedMetadataTests(unittest.TestCase):
file_names = ['\u4e00', '\u4e8c', '\u4e09'] # Han 'one', 'two', 'three'
file_content = [
diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py
index b8b496ad9471f4..b061691ac6f8b9 100644
--- a/Lib/zipfile/__init__.py
+++ b/Lib/zipfile/__init__.py
@@ -1413,10 +1413,12 @@ def __init__(self, file, mode="r",
compression=ZIP_STORED, allowZip64=True,
self._didModify = True
try:
self.start_dir = self.fp.tell()
+ self._data_offset = self.start_dir
except (AttributeError, OSError):
self.fp = _Tellable(self.fp)
self.start_dir = 0
self._seekable = False
+ self._data_offset = None
else:
# Some file-like objects can provide tell() but not seek()
try:
@@ -1486,6 +1488,10 @@ def _RealGetContents(self):
# If Zip64 extension structures are present, account for them
concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
+ # store the offset to the beginning of data for the
+ # .data_offset property
+ self._data_offset = concat
+
if self.debug > 2:
inferred = concat + offset_cd
print("given, inferred, offset", offset_cd, inferred, concat)
@@ -1551,6 +1557,12 @@ def _RealGetContents(self):
zinfo._end_offset = end_offset
end_offset = zinfo.header_offset
+ @property
+ def data_offset(self):
+ """The offset to the start of zip data in the file or None if
+ unavailable."""
+ return self._data_offset
+
def namelist(self):
"""Return a list of file names in the archive."""
return [data.filename for data in self.filelist]
diff --git
a/Misc/NEWS.d/next/Library/2025-04-06-09-55-43.gh-issue-84481.cX4yTn.rst
b/Misc/NEWS.d/next/Library/2025-04-06-09-55-43.gh-issue-84481.cX4yTn.rst
new file mode 100644
index 00000000000000..548f4eeb0f75d0
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-04-06-09-55-43.gh-issue-84481.cX4yTn.rst
@@ -0,0 +1,5 @@
+Add the :attr:`zipfile.ZipFile.data_offset` attribute, which stores the
+offset to the beginning of ZIP data in a file when available. When the
+:class:`zipfile.ZipFile` is opened in either mode ``'w'`` or ``'x'`` and the
+underlying file does not support ``tell()``, the value will be ``None``
+instead.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]