https://github.com/python/cpython/commit/a4ef689ce670684ec132204b1cd03720c8e0a03d
commit: a4ef689ce670684ec132204b1cd03720c8e0a03d
branch: 3.10
author: R. David Murray <[email protected]>
committer: ambv <[email protected]>
date: 2025-04-03T18:58:22+02:00
summary:
[3.10] gh-80222: Fix email address header folding with long quoted-string
(GH-122753) (GH-129111)
Email generators using email.policy.default could incorrectly omit the
quote ('"') characters from a quoted-string during header refolding,
leading to invalid address headers and enabling header spoofing. This
change restores the quote characters on a bare-quoted-string as the
header is refolded, and escapes backslash and quote chars in the string.
(cherry picked from commit 5aaf4168583)
Co-authored-by: Mike Edmunds <[email protected]>
Co-authored-by: Ćukasz Langa <[email protected]>
files:
A Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst
M Lib/email/_header_value_parser.py
M Lib/test/test_email/test__header_value_parser.py
diff --git a/Lib/email/_header_value_parser.py
b/Lib/email/_header_value_parser.py
index 613a5f28cc139c..dbc0bd8196af52 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -95,8 +95,16 @@
NLSET = {'\n', '\r'}
SPECIALSNL = SPECIALS | NLSET
+
+def make_quoted_pairs(value):
+ """Escape dquote and backslash for use within a quoted-string."""
+ return str(value).replace('\\', '\\\\').replace('"', '\\"')
+
+
def quote_string(value):
- return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
+ escaped = make_quoted_pairs(value)
+ return f'"{escaped}"'
+
# Match a RFC 2047 word, looks like =?utf-8?q?someword?=
rfc2047_matcher = re.compile(r'''
@@ -2848,6 +2856,15 @@ def _refold_parse_tree(parse_tree, *, policy):
if not hasattr(part, 'encode'):
# It's not a terminal, try folding the subparts.
newparts = list(part)
+ if part.token_type == 'bare-quoted-string':
+ # To fold a quoted string we need to create a list of terminal
+ # tokens that will render the leading and trailing quotes
+ # and use quoted pairs in the value as appropriate.
+ newparts = (
+ [ValueTerminal('"', 'ptext')] +
+ [ValueTerminal(make_quoted_pairs(p), 'ptext')
+ for p in newparts] +
+ [ValueTerminal('"', 'ptext')])
if not part.as_ew_allowed:
wrap_as_ew_blocked += 1
newparts.append(end_ew_not_allowed)
diff --git a/Lib/test/test_email/test__header_value_parser.py
b/Lib/test/test_email/test__header_value_parser.py
index c9c5dbe9416b43..6a4ecafd68b4ab 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -2946,6 +2946,33 @@ def test_address_list_with_unicode_names_in_quotes(self):
'=?utf-8?q?H=C3=BCbsch?= Kaktus <[email protected]>,\n'
' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= <[email protected]>\n')
+ def test_address_list_with_specials_in_long_quoted_string(self):
+ # Regression for gh-80222.
+ policy = self.policy.clone(max_line_length=40)
+ cases = [
+ # (to, folded)
+ ('"Exfiltrator <[email protected]> (unclosed comment?"
<[email protected]>',
+ '"Exfiltrator <[email protected]> (unclosed\n'
+ ' comment?" <[email protected]>\n'),
+ ('"Escaped \\" chars \\\\ in quoted-string stay escaped"
<[email protected]>',
+ '"Escaped \\" chars \\\\ in quoted-string\n'
+ ' stay escaped" <[email protected]>\n'),
+ ('This long display name does not need quotes <[email protected]>',
+ 'This long display name does not need\n'
+ ' quotes <[email protected]>\n'),
+ ('"Quotes are not required but are retained here"
<[email protected]>',
+ '"Quotes are not required but are\n'
+ ' retained here" <[email protected]>\n'),
+ ('"A quoted-string, it can be a valid local-part"@example.com',
+ '"A quoted-string, it can be a valid\n'
+ ' local-part"@example.com\n'),
+ ('"[email protected]"@example.com',
+
'"[email protected]"@example.com\n'),
+ ]
+ for (to, folded) in cases:
+ with self.subTest(to=to):
+ self._test(parser.get_address_list(to)[0], folded,
policy=policy)
+
def test_address_list_with_specials_in_encoded_word(self):
# An encoded-word parsed from a structured header must remain
# encoded when it contains specials. Regression for gh-121284.
diff --git
a/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst
b/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst
new file mode 100644
index 00000000000000..0f0661d0b1cf4a
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst
@@ -0,0 +1,6 @@
+Fix bug in the folding of quoted strings when flattening an email message using
+a modern email policy. Previously when a quoted string was folded so that
+it spanned more than one line, the surrounding quotes and internal escapes
+would be omitted. This could theoretically be used to spoof header lines
+using a carefully constructed quoted string if the resulting rendered email
+was transmitted or re-parsed.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]