https://github.com/python/cpython/commit/c75df4b13a1ec6317699142b29380bc500f52dcf
commit: c75df4b13a1ec6317699142b29380bc500f52dcf
branch: 3.11
author: Miss Islington (bot) <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2024-03-05T17:32:30Z
summary:

[3.11] gh-76511: Fix email.Message.as_string() for non-ASCII message with ASCII 
charset (GH-116125) (GH-116365)

(cherry picked from commit f97f25ef5dfcdfec0d9a359fd970abd139cf3428)

Co-authored-by: Serhiy Storchaka <[email protected]>

files:
A Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst
M Lib/email/generator.py
M Lib/email/message.py
M Lib/test/test_email/test_email.py

diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index b8c10917a5d98c..eb597de76d42ef 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -243,7 +243,7 @@ def _handle_text(self, msg):
                 # existing message.
                 msg = deepcopy(msg)
                 del msg['content-transfer-encoding']
-                msg.set_payload(payload, charset)
+                msg.set_payload(msg._payload, charset)
                 payload = msg.get_payload()
                 self._munge_cte = (msg['content-transfer-encoding'],
                                    msg['content-type'])
diff --git a/Lib/email/message.py b/Lib/email/message.py
index 4e9536b8563e35..492a6b9a4309fa 100644
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -340,7 +340,7 @@ def set_payload(self, payload, charset=None):
                 return
             if not isinstance(charset, Charset):
                 charset = Charset(charset)
-            payload = payload.encode(charset.output_charset)
+            payload = payload.encode(charset.output_charset, 'surrogateescape')
         if hasattr(payload, 'decode'):
             self._payload = payload.decode('ascii', 'surrogateescape')
         else:
diff --git a/Lib/test/test_email/test_email.py 
b/Lib/test/test_email/test_email.py
index 677f2094b835f3..785696e5c541fb 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -337,6 +337,21 @@ def test_nonascii_as_string_without_cte(self):
         msg = email.message_from_bytes(source)
         self.assertEqual(msg.as_string(), expected)
 
+    def test_nonascii_as_string_with_ascii_charset(self):
+        m = textwrap.dedent("""\
+            MIME-Version: 1.0
+            Content-type: text/plain; charset="us-ascii"
+            Content-Transfer-Encoding: 8bit
+
+            Test if non-ascii messages with no Content-Transfer-Encoding set
+            can be as_string'd:
+            Föö bär
+            """)
+        source = m.encode('iso-8859-1')
+        expected = source.decode('ascii', 'replace')
+        msg = email.message_from_bytes(source)
+        self.assertEqual(msg.as_string(), expected)
+
     def test_nonascii_as_string_without_content_type_and_cte(self):
         m = textwrap.dedent("""\
             MIME-Version: 1.0
diff --git 
a/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst 
b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst
new file mode 100644
index 00000000000000..da62f8a2450711
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst
@@ -0,0 +1,4 @@
+Fix UnicodeEncodeError in :meth:`email.Message.as_string` that results when
+a message that claims to be in the ascii character set actually has non-ascii
+characters. Non-ascii characters are now replaced with the U+FFFD replacement
+character, like in the ``replace`` error handler.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to