https://github.com/python/cpython/commit/ae99fe3a33b43e303a05f012815cef60b611a9c7
commit: ae99fe3a33b43e303a05f012815cef60b611a9c7
branch: 3.13
author: Miss Islington (bot) <[email protected]>
committer: encukou <[email protected]>
date: 2026-03-17T10:51:43+01:00
summary:

[3.13] gh-141707: Skip TarInfo DIRTYPE normalization during GNU long name 
handling (GH-145818)

(cherry picked from commit 42d754e34c06e57ad6b8e7f92f32af679912d8ab)

Co-authored-by: Seth Michael Larson <[email protected]>
Co-authored-by: Eashwar Ranganathan <[email protected]>

files:
A Misc/NEWS.d/next/Library/2025-11-18-06-35-53.gh-issue-141707.DBmQIy.rst
M Lib/tarfile.py
M Lib/test/test_tarfile.py
M Misc/ACKS

diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 26aa429fed0f27..533c0cc87364d0 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -1267,6 +1267,20 @@ def _create_pax_generic_header(cls, pax_headers, type, 
encoding):
     @classmethod
     def frombuf(cls, buf, encoding, errors):
         """Construct a TarInfo object from a 512 byte bytes object.
+
+        To support the old v7 tar format AREGTYPE headers are
+        transformed to DIRTYPE headers if their name ends in '/'.
+        """
+        return cls._frombuf(buf, encoding, errors)
+
+    @classmethod
+    def _frombuf(cls, buf, encoding, errors, *, dircheck=True):
+        """Construct a TarInfo object from a 512 byte bytes object.
+
+        If ``dircheck`` is set to ``True`` then ``AREGTYPE`` headers will
+        be normalized to ``DIRTYPE`` if the name ends in a trailing slash.
+        ``dircheck`` must be set to ``False`` if this function is called
+        on a follow-up header such as ``GNUTYPE_LONGNAME``.
         """
         if len(buf) == 0:
             raise EmptyHeaderError("empty header")
@@ -1297,7 +1311,7 @@ def frombuf(cls, buf, encoding, errors):
 
         # Old V7 tar format represents a directory as a regular
         # file with a trailing slash.
-        if obj.type == AREGTYPE and obj.name.endswith("/"):
+        if dircheck and obj.type == AREGTYPE and obj.name.endswith("/"):
             obj.type = DIRTYPE
 
         # The old GNU sparse format occupies some of the unused
@@ -1332,8 +1346,15 @@ def fromtarfile(cls, tarfile):
         """Return the next TarInfo object from TarFile object
            tarfile.
         """
+        return cls._fromtarfile(tarfile)
+
+    @classmethod
+    def _fromtarfile(cls, tarfile, *, dircheck=True):
+        """
+        See dircheck documentation in _frombuf().
+        """
         buf = tarfile.fileobj.read(BLOCKSIZE)
-        obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
+        obj = cls._frombuf(buf, tarfile.encoding, tarfile.errors, 
dircheck=dircheck)
         obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
         return obj._proc_member(tarfile)
 
@@ -1391,7 +1412,7 @@ def _proc_gnulong(self, tarfile):
 
         # Fetch the next header and process it.
         try:
-            next = self.fromtarfile(tarfile)
+            next = self._fromtarfile(tarfile, dircheck=False)
         except HeaderError as e:
             raise SubsequentHeaderError(str(e)) from None
 
@@ -1526,7 +1547,7 @@ def _proc_pax(self, tarfile):
 
         # Fetch the next header.
         try:
-            next = self.fromtarfile(tarfile)
+            next = self._fromtarfile(tarfile, dircheck=False)
         except HeaderError as e:
             raise SubsequentHeaderError(str(e)) from None
 
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 21c97e90e36bcc..f1f99bb84bf9a7 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -1216,6 +1216,25 @@ def test_longname_directory(self):
                 self.assertIsNotNone(tar.getmember(longdir))
                 self.assertIsNotNone(tar.getmember(longdir.removesuffix('/')))
 
+    def test_longname_file_not_directory(self):
+        # Test reading a longname file and ensure it is not handled as a 
directory
+        # Issue #141707
+        buf = io.BytesIO()
+        with tarfile.open(mode='w', fileobj=buf, format=self.format) as tar:
+            ti = tarfile.TarInfo()
+            ti.type = tarfile.AREGTYPE
+            ti.name = ('a' * 99) + '/' + ('b' * 3)
+            tar.addfile(ti)
+
+            expected = {t.name: t.type for t in tar.getmembers()}
+
+        buf.seek(0)
+        with tarfile.open(mode='r', fileobj=buf) as tar:
+            actual = {t.name: t.type for t in tar.getmembers()}
+
+        self.assertEqual(expected, actual)
+
+
 class GNUReadTest(LongnameTest, ReadTest, unittest.TestCase):
 
     subdir = "gnu"
diff --git a/Misc/ACKS b/Misc/ACKS
index d4f0e5f9f80625..d7eb6f44300c04 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1512,6 +1512,7 @@ Ashwin Ramaswami
 Jeff Ramnani
 Grant Ramsay
 Bayard Randel
+Eashwar Ranganathan
 Varpu Rantala
 Brodie Rao
 Rémi Rampin
diff --git 
a/Misc/NEWS.d/next/Library/2025-11-18-06-35-53.gh-issue-141707.DBmQIy.rst 
b/Misc/NEWS.d/next/Library/2025-11-18-06-35-53.gh-issue-141707.DBmQIy.rst
new file mode 100644
index 00000000000000..1f5b8ed90b8a90
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-18-06-35-53.gh-issue-141707.DBmQIy.rst
@@ -0,0 +1,2 @@
+Don't change :class:`tarfile.TarInfo` type from ``AREGTYPE`` to ``DIRTYPE`` 
when parsing
+GNU long name or link headers.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to