https://github.com/python/cpython/commit/ad3bbb6b0df9a83e5e9fc26344468747e856d0b6
commit: ad3bbb6b0df9a83e5e9fc26344468747e856d0b6
branch: 3.13
author: Miss Islington (bot) <[email protected]>
committer: bitdancer <[email protected]>
date: 2025-01-07T12:43:04-05:00
summary:

[3.13] gh-98188: Fix EmailMessage.get_payload to decode data when CTE value has 
extra text (GH-127547) (#128528)

gh-98188: Fix EmailMessage.get_payload to decode data when CTE value has extra 
text (GH-127547)

Up to this point message handling has been very strict with regards to content 
encoding values: mixed case was accepted, but trailing blanks or other text 
would cause decoding failure, even if the first token was a valid encoding.  By 
Postel's Rule we should go ahead and decode as long as we can recognize that 
first token.  We have not thought of any security or backward compatibility 
concerns with this fix.

This fix does introduce a new technique/pattern to the Message code: we look to 
see if the header has a 'cte' attribute, and if so we use that.  This 
effectively promotes the header API exposed by HeaderRegistry to an API that 
any header parser "should" support.  This seems like a reasonable thing to do.  
It is not, however, a requirement, as the string value of the header is still 
used if there is no cte attribute.

The full fix (ignore any trailing blanks or blank-separated trailing text) 
applies only to the non-compat32 API.  compat32 is only fixed to the extent 
that it now ignores trailing spaces.  Note that the HeaderRegistry parsing 
still records a HeaderDefect if there is extra text.

(cherry picked from commit a62ba52f1439c1f878a3ff9b8544caf9aeef9b90)

Co-authored-by: RanKKI <[email protected]>
Co-authored-by: Bénédikt Tran <[email protected]>

files:
A Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst
M Lib/email/message.py
M Lib/test/test_email/test_email.py
M Lib/test/test_email/test_headerregistry.py
M Misc/ACKS

diff --git a/Lib/email/message.py b/Lib/email/message.py
index 46bb8c21942af8..6b7c3a2377765a 100644
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -286,8 +286,12 @@ def get_payload(self, i=None, decode=False):
         if i is not None and not isinstance(self._payload, list):
             raise TypeError('Expected list, got %s' % type(self._payload))
         payload = self._payload
-        # cte might be a Header, so for now stringify it.
-        cte = str(self.get('content-transfer-encoding', '')).lower()
+        cte = self.get('content-transfer-encoding', '')
+        if hasattr(cte, 'cte'):
+            cte = cte.cte
+        else:
+            # cte might be a Header, so for now stringify it.
+            cte = str(cte).strip().lower()
         # payload may be bytes here.
         if not decode:
             if isinstance(payload, str) and utils._has_surrogates(payload):
diff --git a/Lib/test/test_email/test_email.py 
b/Lib/test/test_email/test_email.py
index 65ddbabcaa1997..925a638572d79c 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -810,6 +810,16 @@ def test_unicode_body_defaults_to_utf8_encoding(self):
             w4kgdGVzdGFiYwo=
             """))
 
+    def test_string_payload_with_base64_cte(self):
+        msg = email.message_from_string(textwrap.dedent("""\
+        Content-Transfer-Encoding: base64
+
+        SGVsbG8uIFRlc3Rpbmc=
+        """), policy=email.policy.default)
+        self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing")
+        self.assertDefectsEqual(msg['content-transfer-encoding'].defects, [])
+
+
 
 # Test the email.encoders module
 class TestEncoders(unittest.TestCase):
@@ -2352,6 +2362,40 @@ def test_missing_header_body_separator(self):
         self.assertDefectsEqual(msg.defects,
                                 [errors.MissingHeaderBodySeparatorDefect])
 
+    def test_string_payload_with_extra_space_after_cte(self):
+        # https://github.com/python/cpython/issues/98188
+        cte = "base64 "
+        msg = email.message_from_string(textwrap.dedent(f"""\
+        Content-Transfer-Encoding: {cte}
+
+        SGVsbG8uIFRlc3Rpbmc=
+        """), policy=email.policy.default)
+        self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing")
+        self.assertDefectsEqual(msg['content-transfer-encoding'].defects, [])
+
+    def test_string_payload_with_extra_text_after_cte(self):
+        msg = email.message_from_string(textwrap.dedent("""\
+        Content-Transfer-Encoding: base64 some text
+
+        SGVsbG8uIFRlc3Rpbmc=
+        """), policy=email.policy.default)
+        self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing")
+        cte = msg['content-transfer-encoding']
+        self.assertDefectsEqual(cte.defects, 
[email.errors.InvalidHeaderDefect])
+
+    def test_string_payload_with_extra_space_after_cte_compat32(self):
+        cte = "base64 "
+        msg = email.message_from_string(textwrap.dedent(f"""\
+        Content-Transfer-Encoding: {cte}
+
+        SGVsbG8uIFRlc3Rpbmc=
+        """), policy=email.policy.compat32)
+        pasted_cte = msg['content-transfer-encoding']
+        self.assertEqual(pasted_cte, cte)
+        self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing")
+        self.assertDefectsEqual(msg.defects, [])
+
+
 
 # Test RFC 2047 header encoding and decoding
 class TestRFC2047(TestEmailBase):
diff --git a/Lib/test/test_email/test_headerregistry.py 
b/Lib/test/test_email/test_headerregistry.py
index 5a608a033c7e54..a579f9eeb7f4bf 100644
--- a/Lib/test/test_email/test_headerregistry.py
+++ b/Lib/test/test_email/test_headerregistry.py
@@ -838,6 +838,11 @@ def cte_as_value(self,
             '7bit',
             [errors.InvalidHeaderDefect]),
 
+        'extra_space_after_cte': (
+            'base64 ',
+            'base64',
+            []),
+
     }
 
 
diff --git a/Misc/ACKS b/Misc/ACKS
index bed3e028d0a18d..206c9b849cf0ea 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1122,6 +1122,7 @@ Gregor Lingl
 Everett Lipman
 Mirko Liss
 Alexander Liu
+Hui Liu
 Yuan Liu
 Nick Lockwood
 Stephanie Lockwood
diff --git 
a/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst 
b/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst
new file mode 100644
index 00000000000000..30ab8cfc3f0bc6
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst
@@ -0,0 +1,3 @@
+Fix an issue in :meth:`email.message.Message.get_payload` where data
+cannot be decoded if the Content Transfer Encoding mechanism contains
+trailing whitespaces or additional junk text. Patch by Hui Liu.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to