https://github.com/python/cpython/commit/bb911a2319365a4155e7398b4b7978589d8bed49 commit: bb911a2319365a4155e7398b4b7978589d8bed49 branch: main author: Amp Tell <[email protected]> committer: ethanfurman <[email protected]> date: 2026-05-01T15:39:58-07:00 summary:
gh-75707: tarfile: Add optional open() argument "mtime" (GH-138117) This makes it possible to set the gzip header mtime field without overriding time.time(), making it useful when creating reproducible archives. * 📜🤖 Added by blurb_it. --------- Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Ethan Furman <[email protected]> files: A Misc/NEWS.d/next/Library/2025-08-24-15-09-30.gh-issue-75707.GOWZrC.rst M Doc/library/tarfile.rst M Lib/tarfile.py M Lib/test/test_tarfile.py diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index a86469bb9ad704..6f1e01cf5aa6ee 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -142,6 +142,10 @@ Some facts and figures: a Zstandard dictionary used to improve compression of smaller amounts of data. + For modes ``'w:gz'`` and ``'w|gz'``, :func:`tarfile.open` accepts the + keyword argument *mtime* to create a gzip archive header with that mtime. By + default, the mtime is set to the time of creation of the archive. + For special purposes, there is a second format for *mode*: ``'filemode|[compression]'``. :func:`tarfile.open` will return a :class:`TarFile` object that processes its data as a stream of blocks. No random seeking will diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 7f0b0b3c632573..4f47aaab9028d0 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -337,7 +337,7 @@ class _Stream: """ def __init__(self, name, mode, comptype, fileobj, bufsize, - compresslevel, preset): + compresslevel, preset, mtime): """Construct a _Stream object. """ self._extfileobj = True @@ -372,7 +372,7 @@ def __init__(self, name, mode, comptype, fileobj, bufsize, self.exception = zlib.error self._init_read_gz() else: - self._init_write_gz(compresslevel) + self._init_write_gz(compresslevel, mtime) elif comptype == "bz2": try: @@ -421,7 +421,7 @@ def __del__(self): if hasattr(self, "closed") and not self.closed: self.close() - def _init_write_gz(self, compresslevel): + def _init_write_gz(self, compresslevel, mtime): """Initialize for writing with gzip compression. """ self.cmp = self.zlib.compressobj(compresslevel, @@ -429,7 +429,9 @@ def _init_write_gz(self, compresslevel): -self.zlib.MAX_WBITS, self.zlib.DEF_MEM_LEVEL, 0) - timestamp = struct.pack("<L", int(time.time())) + if mtime is None: + mtime = int(time.time()) + timestamp = struct.pack("<L", mtime) self.__write(b"\037\213\010\010" + timestamp + b"\002\377") if self.name.endswith(".gz"): self.name = self.name[:-3] @@ -1745,7 +1747,7 @@ class TarFile(object): def __init__(self, name=None, mode="r", fileobj=None, format=None, tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, errors="surrogateescape", pax_headers=None, debug=None, - errorlevel=None, copybufsize=None, stream=False): + errorlevel=None, copybufsize=None, stream=False, mtime=None): """Open an (uncompressed) tar archive 'name'. 'mode' is either 'r' to read from an existing archive, 'a' to append data to an existing file or 'w' to create a new file overwriting an existing one. 'mode' @@ -1951,8 +1953,9 @@ def not_compressed(comptype): compresslevel = kwargs.pop("compresslevel", 6) preset = kwargs.pop("preset", None) + mtime = kwargs.pop("mtime", None) stream = _Stream(name, filemode, comptype, fileobj, bufsize, - compresslevel, preset) + compresslevel, preset, mtime) try: t = cls(name, filemode, stream, **kwargs) except: @@ -1988,7 +1991,8 @@ def gzopen(cls, name, mode="r", fileobj=None, compresslevel=6, **kwargs): raise CompressionError("gzip module is not available") from None try: - fileobj = GzipFile(name, mode + "b", compresslevel, fileobj) + mtime = kwargs.pop("mtime", None) + fileobj = GzipFile(name, mode + "b", compresslevel, fileobj, mtime=mtime) except OSError as e: if fileobj is not None and mode == 'r': raise ReadError("not a gzip file") from e diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index f2babaacc27d96..e270cbb22e2d1a 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -10,6 +10,7 @@ import re import warnings import stat +import time import unittest import unittest.mock @@ -1828,6 +1829,19 @@ def test_source_directory_not_leaked(self): payload = pathlib.Path(tmpname).read_text(encoding='latin-1') assert os.path.dirname(tmpname) not in payload + def test_create_with_mtime(self): + tarfile.open(tmpname, self.mode, mtime=0).close() + with self.open(tmpname, 'r') as fobj: + fobj.read() + self.assertEqual(fobj.mtime, 0) + + def test_create_without_mtime(self): + before = int(time.time()) + tarfile.open(tmpname, self.mode).close() + after = int(time.time()) + with self.open(tmpname, 'r') as fobj: + fobj.read() + self.assertTrue(before <= fobj.mtime <= after) class Bz2StreamWriteTest(Bz2Test, StreamWriteTest): decompressor = bz2.BZ2Decompressor if bz2 else None @@ -2134,6 +2148,19 @@ def test_create_with_compresslevel(self): with tarfile.open(tmpname, 'r:gz', compresslevel=1) as tobj: pass + def test_create_with_mtime(self): + tarfile.open(tmpname, self.mode, mtime=0).close() + with self.open(tmpname, 'rb') as fobj: + fobj.read() + self.assertEqual(fobj.mtime, 0) + + def test_create_without_mtime(self): + before = int(time.time()) + tarfile.open(tmpname, self.mode).close() + after = int(time.time()) + with self.open(tmpname, 'r') as fobj: + fobj.read() + self.assertTrue(before <= fobj.mtime <= after) class Bz2CreateTest(Bz2Test, CreateTest): diff --git a/Misc/NEWS.d/next/Library/2025-08-24-15-09-30.gh-issue-75707.GOWZrC.rst b/Misc/NEWS.d/next/Library/2025-08-24-15-09-30.gh-issue-75707.GOWZrC.rst new file mode 100644 index 00000000000000..b2ff8a0cdf6f72 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-08-24-15-09-30.gh-issue-75707.GOWZrC.rst @@ -0,0 +1 @@ +Add optional ``mtime`` argument to :func:`tarfile.open`, for setting the ``mtime`` header field in ``.tar.gz`` archives. _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: [email protected]
