https://github.com/python/cpython/commit/0777a58d8012bbdd0d72654b56f9112686ae6ff0
commit: 0777a58d8012bbdd0d72654b56f9112686ae6ff0
branch: main
author: dev <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-06-16T14:49:54+03:00
summary:
gh-150771: Fix email serialization for shift_jis and euc-jp (GH-151120)
Encode the payload with output_charset instead of input_charset.
files:
A Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst
M Lib/email/contentmanager.py
M Lib/test/test_email/test_contentmanager.py
diff --git a/Lib/email/contentmanager.py b/Lib/email/contentmanager.py
index faf2626bccce651..c0090af716575d7 100644
--- a/Lib/email/contentmanager.py
+++ b/Lib/email/contentmanager.py
@@ -174,7 +174,8 @@ def set_text_content(msg, string, subtype="plain",
charset='utf-8', cte=None,
params=None, headers=None):
_prepare_set(msg, 'text', subtype, headers)
- charset = email.charset.Charset(charset).input_charset
+ cs = email.charset.Charset(charset)
+ charset = cs.output_charset
cte, payload = _encode_text(string, charset, cte, msg.policy)
msg.set_payload(payload)
msg.set_param('charset', charset, replace=True)
diff --git a/Lib/test/test_email/test_contentmanager.py
b/Lib/test/test_email/test_contentmanager.py
index 0b1b6e89f8c9922..3115941f8703194 100644
--- a/Lib/test/test_email/test_contentmanager.py
+++ b/Lib/test/test_email/test_contentmanager.py
@@ -362,6 +362,46 @@ def test_set_text_charset_cp949(self):
self.assertEqual(m.get_payload(decode=True),
content.encode('ks_c_5601-1987'))
self.assertEqual(m.get_content(), content)
+ def test_set_text_charset_shift_jis(self):
+ m = self._make_message()
+ content = "\u65e5\u672c\u8a9e\n"
+ raw_data_manager.set_content(m, content, charset='shift_jis')
+ self.assertEqual(m['Content-Type'], 'text/plain;
charset="iso-2022-jp"')
+ self.assertEqual(m.get_payload(decode=True),
content.encode('iso-2022-jp'))
+ self.assertEqual(m.get_content(), content)
+ self.assertEqual(str(m), textwrap.dedent("""\
+ Content-Type: text/plain; charset="iso-2022-jp"
+ Content-Transfer-Encoding: 7bit
+
+ \x1b$BF|K\\8l\x1b(B
+ """))
+ self.assertEqual(bytes(m), textwrap.dedent("""\
+ Content-Type: text/plain; charset="iso-2022-jp"
+ Content-Transfer-Encoding: 7bit
+
+ \u65e5\u672c\u8a9e
+ """).encode('iso-2022-jp'))
+
+ def test_set_text_charset_euc_jp(self):
+ m = self._make_message()
+ content = "\u65e5\u672c\u8a9e\n"
+ raw_data_manager.set_content(m, content, charset='euc-jp')
+ self.assertEqual(m['Content-Type'], 'text/plain;
charset="iso-2022-jp"')
+ self.assertEqual(m.get_payload(decode=True),
content.encode('iso-2022-jp'))
+ self.assertEqual(m.get_content(), content)
+ self.assertEqual(str(m), textwrap.dedent("""\
+ Content-Type: text/plain; charset="iso-2022-jp"
+ Content-Transfer-Encoding: 7bit
+
+ \x1b$BF|K\\8l\x1b(B
+ """))
+ self.assertEqual(bytes(m), textwrap.dedent("""\
+ Content-Type: text/plain; charset="iso-2022-jp"
+ Content-Transfer-Encoding: 7bit
+
+ \u65e5\u672c\u8a9e
+ """).encode('iso-2022-jp'))
+
def test_set_text_plain_long_line_heuristics(self):
m = self._make_message()
content = ("Simple but long message that is over 78 characters"
diff --git
a/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst
b/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst
new file mode 100644
index 000000000000000..6535e5c48bf0360
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst
@@ -0,0 +1,4 @@
+Fix :mod:`email` messages created with ``shift_jis`` or ``euc-jp`` charsets.
+``set_content()`` now stores the payload using the output charset
+(``iso-2022-jp``) so printing the message no longer raises
+:exc:`UnicodeEncodeError`.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]