https://github.com/python/cpython/commit/09fab93c3d857496c0bd162797fab816c311ee48
commit: 09fab93c3d857496c0bd162797fab816c311ee48
branch: main
author: Thomas Weißschuh <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2024-02-17T10:13:46Z
summary:
gh-100884: email/_header_value_parser: don't encode list separators (GH-100885)
ListSeparator should not be encoded. This could happen when a long line
pushes its separator to the next line, which would have been encoded.
files:
A Misc/NEWS.d/next/Library/2023-01-09-14-08-02.gh-issue-100884.DcmdLl.rst
M Lib/email/_header_value_parser.py
M Lib/test/test_email/test__header_value_parser.py
diff --git a/Lib/email/_header_value_parser.py
b/Lib/email/_header_value_parser.py
index 5b653f66c18554..e4a342d446f6a3 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -949,6 +949,7 @@ class _InvalidEwError(errors.HeaderParseError):
# up other parse trees. Maybe should have tests for that, too.
DOT = ValueTerminal('.', 'dot')
ListSeparator = ValueTerminal(',', 'list-separator')
+ListSeparator.as_ew_allowed = False
RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
#
@@ -2022,7 +2023,7 @@ def get_address_list(value):
address_list.defects.append(errors.InvalidHeaderDefect(
"invalid address in address-list"))
if value: # Must be a , at this point.
- address_list.append(ValueTerminal(',', 'list-separator'))
+ address_list.append(ListSeparator)
value = value[1:]
return address_list, value
diff --git a/Lib/test/test_email/test__header_value_parser.py
b/Lib/test/test_email/test__header_value_parser.py
index bdb0e55f21069f..f7e80749c456f8 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -2985,6 +2985,11 @@ def test_address_list_with_unicode_names_in_quotes(self):
'=?utf-8?q?H=C3=BCbsch?= Kaktus <[email protected]>,\n'
' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= <[email protected]>\n')
+ def test_address_list_with_list_separator_after_fold(self):
+ to = '0123456789' * 8 + '@foo, ä <foo@bar>'
+ self._test(parser.get_address_list(to)[0],
+ '0123456789' * 8 + '@foo,\n =?utf-8?q?=C3=A4?= <foo@bar>\n')
+
# XXX Need tests with comments on various sides of a unicode token,
# and with unicode tokens in the comments. Spaces inside the quotes
# currently don't do the right thing.
diff --git
a/Misc/NEWS.d/next/Library/2023-01-09-14-08-02.gh-issue-100884.DcmdLl.rst
b/Misc/NEWS.d/next/Library/2023-01-09-14-08-02.gh-issue-100884.DcmdLl.rst
new file mode 100644
index 00000000000000..2a388178810835
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-01-09-14-08-02.gh-issue-100884.DcmdLl.rst
@@ -0,0 +1,2 @@
+email: fix misfolding of comma in address-lists over multiple lines in
+combination with unicode encoding.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]