https://github.com/python/cpython/commit/481d5b54556e97fed4cf1f48a2ccbc7b4f7aaa42
commit: 481d5b54556e97fed4cf1f48a2ccbc7b4f7aaa42
branch: main
author: Alexander Urieles <[email protected]>
committer: encukou <[email protected]>
date: 2025-08-06T14:59:22+02:00
summary:
gh-75989: TarFile.extractall and TarFile.extract now overwrite symlinks when
extracting hardlinks (GH-137316)
files:
A Misc/NEWS.d/next/Library/2025-08-01-23-52-49.gh-issue-75989.5aYXNJ.rst
M Lib/tarfile.py
M Lib/test/test_tarfile.py
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 4bcca096651569..c603ba019ab481 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -2723,6 +2723,9 @@ def makelink_with_filter(self, tarinfo, targetpath,
return
else:
if os.path.exists(tarinfo._link_target):
+ if os.path.lexists(targetpath):
+ # Avoid FileExistsError on following os.link.
+ os.unlink(targetpath)
os.link(tarinfo._link_target, targetpath)
return
except symlink_exception:
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 624468a222a200..860413b88eb6b5 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -841,6 +841,57 @@ def test_next_on_empty_tarfile(self):
with tarfile.open(fileobj=fd, mode="r") as tf:
self.assertEqual(tf.next(), None)
+ def _setup_symlink_to_target(self, temp_dirpath):
+ target_filepath = os.path.join(temp_dirpath, "target")
+ ustar_dirpath = os.path.join(temp_dirpath, "ustar")
+ hardlink_filepath = os.path.join(ustar_dirpath, "lnktype")
+ with open(target_filepath, "wb") as f:
+ f.write(b"target")
+ os.makedirs(ustar_dirpath)
+ os.symlink(target_filepath, hardlink_filepath)
+ return target_filepath, hardlink_filepath
+
+ def _assert_on_file_content(self, filepath, digest):
+ with open(filepath, "rb") as f:
+ data = f.read()
+ self.assertEqual(sha256sum(data), digest)
+
+ @unittest.skipUnless(
+ hasattr(os, "link"), "Missing hardlink implementation"
+ )
+ @os_helper.skip_unless_symlink
+ def test_extract_hardlink_on_symlink(self):
+ """
+ This test verifies that extracting a hardlink will not follow an
+ existing symlink after a FileExistsError on os.link.
+ """
+ with os_helper.temp_dir() as DIR:
+ target_filepath, hardlink_filepath =
self._setup_symlink_to_target(DIR)
+ with tarfile.open(tarname, encoding="iso8859-1") as tar:
+ tar.extract("ustar/regtype", DIR, filter="data")
+ tar.extract("ustar/lnktype", DIR, filter="data")
+ self._assert_on_file_content(target_filepath,
sha256sum(b"target"))
+ self._assert_on_file_content(hardlink_filepath, sha256_regtype)
+
+ @unittest.skipUnless(
+ hasattr(os, "link"), "Missing hardlink implementation"
+ )
+ @os_helper.skip_unless_symlink
+ def test_extractall_hardlink_on_symlink(self):
+ """
+ This test verifies that extracting a hardlink will not follow an
+ existing symlink after a FileExistsError on os.link.
+ """
+ with os_helper.temp_dir() as DIR:
+ target_filepath, hardlink_filepath =
self._setup_symlink_to_target(DIR)
+ with tarfile.open(tarname, encoding="iso8859-1") as tar:
+ tar.extractall(
+ DIR, members=["ustar/regtype", "ustar/lnktype"],
filter="data",
+ )
+ self._assert_on_file_content(target_filepath,
sha256sum(b"target"))
+ self._assert_on_file_content(hardlink_filepath, sha256_regtype)
+
+
class MiscReadTest(MiscReadTestBase, unittest.TestCase):
test_fail_comp = None
diff --git
a/Misc/NEWS.d/next/Library/2025-08-01-23-52-49.gh-issue-75989.5aYXNJ.rst
b/Misc/NEWS.d/next/Library/2025-08-01-23-52-49.gh-issue-75989.5aYXNJ.rst
new file mode 100644
index 00000000000000..00b15503b50ba3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-08-01-23-52-49.gh-issue-75989.5aYXNJ.rst
@@ -0,0 +1,3 @@
+:func:`tarfile.TarFile.extractall` and :func:`tarfile.TarFile.extract` now
+overwrite symlinks when extracting hardlinks.
+(Contributed by Alexander Enrique Urieles Nieto in :gh:`75989`.)
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]