https://github.com/python/cpython/commit/f216c8963139bf858415e2d09cf938d3e29558df
commit: f216c8963139bf858415e2d09cf938d3e29558df
branch: 3.15
author: Miss Islington (bot) <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-05-27T21:47:03Z
summary:

[3.15] gh-84353: Preserve non-UTF-8 filenames when appending to ZipFile 
(GH-150091) (GH-150527)

Preserve non-UTF-8 filenames when appending to a ZipFile.

---------
(cherry picked from commit 24c6bbc92b6dd0ce9b7ff799049498299f70f97d)

Co-authored-by: Serhiy Storchaka <[email protected]>
Co-authored-by: Gregory P. Smith <[email protected]>

files:
A Misc/NEWS.d/next/Library/2026-05-19-19-00-49.gh-issue-84353.ZU5zaQ.rst
M Lib/test/test_zipfile/test_core.py
M Lib/zipfile/__init__.py

diff --git a/Lib/test/test_zipfile/test_core.py 
b/Lib/test/test_zipfile/test_core.py
index 30550263ad50aab..ffed328b171fda2 100644
--- a/Lib/test/test_zipfile/test_core.py
+++ b/Lib/test/test_zipfile/test_core.py
@@ -3640,29 +3640,23 @@ def test_read_with_unsuitable_metadata_encoding(self):
 
     def test_read_after_append(self):
         newname = '\u56db'  # Han 'four'
-        expected_names = [name.encode('shift_jis').decode('cp437')
-                          for name in self.file_names[:2]] + 
self.file_names[2:]
-        expected_names.append(newname)
-        expected_content = (*self.file_content, b"newcontent")
+        newname2 = 'fünf'  # representable in cp437, but still stored as UTF-8
+        expected_names = [*self.file_names, newname, newname2]
+        mojibake_expected_names = [name.encode('shift_jis').decode('cp437')
+                                   if i < 2 else name
+                                   for i, name in enumerate(expected_names)]
+        expected_content = (*self.file_content, b"newcontent", b"newcontent2")
 
         with zipfile.ZipFile(TESTFN, "a") as zipfp:
             zipfp.writestr(newname, "newcontent")
-            self.assertEqual(sorted(zipfp.namelist()), sorted(expected_names))
+            zipfp.writestr(newname2, "newcontent2")
+            self.assertEqual(sorted(zipfp.namelist()), 
sorted(mojibake_expected_names))
 
         with zipfile.ZipFile(TESTFN, "r") as zipfp:
-            self._test_read(zipfp, expected_names, expected_content)
+            self._test_read(zipfp, mojibake_expected_names, expected_content)
 
         with zipfile.ZipFile(TESTFN, "r", metadata_encoding='shift_jis') as 
zipfp:
-            self.assertEqual(sorted(zipfp.namelist()), sorted(expected_names))
-            for i, (name, content) in enumerate(zip(expected_names, 
expected_content)):
-                info = zipfp.getinfo(name)
-                self.assertEqual(info.filename, name)
-                self.assertEqual(info.file_size, len(content))
-                if i < 2:
-                    with self.assertRaises(zipfile.BadZipFile):
-                        zipfp.read(name)
-                else:
-                    self.assertEqual(zipfp.read(name), content)
+            self._test_read(zipfp, expected_names, expected_content)
 
     def test_write_with_metadata_encoding(self):
         ZF = zipfile.ZipFile
@@ -3671,6 +3665,20 @@ def test_write_with_metadata_encoding(self):
                                         "^metadata_encoding is only"):
                 ZF("nonesuch.zip", mode, metadata_encoding="shift_jis")
 
+    def test_add_comment(self):
+        with zipfile.ZipFile(TESTFN, "r") as zipfp:
+            mojibake_expected_names = zipfp.namelist()
+
+        with zipfile.ZipFile(TESTFN, "a") as zipfp:
+            zipfp.comment = b'comment'
+            self.assertEqual(zipfp.namelist(), mojibake_expected_names)
+
+        with zipfile.ZipFile(TESTFN, "r") as zipfp:
+            self._test_read(zipfp, mojibake_expected_names, self.file_content)
+
+        with zipfile.ZipFile(TESTFN, "r", metadata_encoding='shift_jis') as 
zipfp:
+            self._test_read(zipfp, self.file_names, self.file_content)
+
     def test_cli_with_metadata_encoding(self):
         errmsg = "Non-conforming encodings not supported with -c."
         args = ["--metadata-encoding=shift_jis", "-c", "nonesuch", "nonesuch"]
diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py
index d91cb509a6ff4ff..71e4dd4f6f625ce 100644
--- a/Lib/zipfile/__init__.py
+++ b/Lib/zipfile/__init__.py
@@ -566,8 +566,12 @@ def FileHeader(self, zip64=None):
         return header + filename + extra
 
     def _encodeFilenameFlags(self):
+        if self.flag_bits & _MASK_UTF_FILENAME:
+            encoding = 'ascii'
+        else:
+            encoding = 'cp437'
         try:
-            return self.filename.encode('ascii'), self.flag_bits
+            return self.filename.encode(encoding), self.flag_bits & 
~_MASK_UTF_FILENAME
         except UnicodeEncodeError:
             return self.filename.encode('utf-8'), self.flag_bits | 
_MASK_UTF_FILENAME
 
@@ -1812,7 +1816,7 @@ def _open_to_write(self, zinfo, force_zip64=False):
         zinfo.compress_size = 0
         zinfo.CRC = 0
 
-        zinfo.flag_bits = 0x00
+        zinfo.flag_bits = _MASK_UTF_FILENAME
         if zinfo.compress_type == ZIP_LZMA:
             # Compressed data includes an end-of-stream (EOS) marker
             zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
diff --git 
a/Misc/NEWS.d/next/Library/2026-05-19-19-00-49.gh-issue-84353.ZU5zaQ.rst 
b/Misc/NEWS.d/next/Library/2026-05-19-19-00-49.gh-issue-84353.ZU5zaQ.rst
new file mode 100644
index 000000000000000..84fb12e2abd81a0
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-05-19-19-00-49.gh-issue-84353.ZU5zaQ.rst
@@ -0,0 +1,5 @@
+Preserve non-UTF-8 encoded filenames when appending to a
+:class:`zipfile.ZipFile`.  Previously, non-ASCII names stored in a legacy
+encoding (without the UTF-8 flag bit set) could be corrupted when the
+central directory was rewritten: they were decoded as cp437 and then
+re-stored as UTF-8.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to