https://github.com/python/cpython/commit/aec1dac4efe36a7db51f08385ddcce978814dbe3
commit: aec1dac4efe36a7db51f08385ddcce978814dbe3
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2024-04-17T13:00:25+03:00
summary:

gh-117313: Fix re-folding email messages containing non-standard line 
separators (GH-117369)

Only treat '\n', '\r' and '\r\n' as line separators in re-folding the email
messages.  Preserve control characters '\v', '\f', '\x1c', '\x1d' and '\x1e'
and Unicode line separators '\x85', '\u2028' and '\u2029' as is.

files:
A Misc/NEWS.d/next/Library/2024-03-29-15-14-51.gh-issue-117313.ks_ONu.rst
M Lib/email/policy.py
M Lib/test/test_email/test_generator.py

diff --git a/Lib/email/policy.py b/Lib/email/policy.py
index 8816c84ed175a7..46b7de5bb6d8ae 100644
--- a/Lib/email/policy.py
+++ b/Lib/email/policy.py
@@ -21,7 +21,7 @@
     'HTTP',
     ]
 
-linesep_splitter = re.compile(r'\n|\r')
+linesep_splitter = re.compile(r'\n|\r\n?')
 
 @_extend_docstrings
 class EmailPolicy(Policy):
@@ -205,7 +205,8 @@ def _fold(self, name, value, refold_binary=False):
         if hasattr(value, 'name'):
             return value.fold(policy=self)
         maxlen = self.max_line_length if self.max_line_length else sys.maxsize
-        lines = value.splitlines()
+        # We can't use splitlines here because it splits on more than \r and 
\n.
+        lines = linesep_splitter.split(value)
         refold = (self.refold_source == 'all' or
                   self.refold_source == 'long' and
                     (lines and len(lines[0])+len(name)+2 > maxlen or
diff --git a/Lib/test/test_email/test_generator.py 
b/Lib/test/test_email/test_generator.py
index 89e7edeb63a892..3ebcb684d006d0 100644
--- a/Lib/test/test_email/test_generator.py
+++ b/Lib/test/test_email/test_generator.py
@@ -140,6 +140,39 @@ def test_flatten_linesep_overrides_policy(self):
         g.flatten(msg, linesep='\n')
         self.assertEqual(s.getvalue(), self.typ(expected))
 
+    def test_flatten_linesep(self):
+        source = 'Subject: one\n two\r three\r\n four\r\n\r\ntest body\r\n'
+        msg = self.msgmaker(self.typ(source))
+        self.assertEqual(msg['Subject'], 'one two three four')
+
+        expected = 'Subject: one\n two\n three\n four\n\ntest body\n'
+        s = self.ioclass()
+        g = self.genclass(s)
+        g.flatten(msg)
+        self.assertEqual(s.getvalue(), self.typ(expected))
+
+        expected = 'Subject: one two three four\n\ntest body\n'
+        s = self.ioclass()
+        g = self.genclass(s, policy=self.policy.clone(refold_source='all'))
+        g.flatten(msg)
+        self.assertEqual(s.getvalue(), self.typ(expected))
+
+    def test_flatten_control_linesep(self):
+        source = 'Subject: one\v two\f three\x1c four\x1d five\x1e 
six\r\n\r\ntest body\r\n'
+        msg = self.msgmaker(self.typ(source))
+        self.assertEqual(msg['Subject'], 'one\v two\f three\x1c four\x1d 
five\x1e six')
+
+        expected = 'Subject: one\v two\f three\x1c four\x1d five\x1e 
six\n\ntest body\n'
+        s = self.ioclass()
+        g = self.genclass(s)
+        g.flatten(msg)
+        self.assertEqual(s.getvalue(), self.typ(expected))
+
+        s = self.ioclass()
+        g = self.genclass(s, policy=self.policy.clone(refold_source='all'))
+        g.flatten(msg)
+        self.assertEqual(s.getvalue(), self.typ(expected))
+
     def test_set_mangle_from_via_policy(self):
         source = textwrap.dedent("""\
             Subject: test that
@@ -224,6 +257,22 @@ class TestGenerator(TestGeneratorBase, TestEmailBase):
     ioclass = io.StringIO
     typ = str
 
+    def test_flatten_unicode_linesep(self):
+        source = 'Subject: one\x85 two\u2028 three\u2029 four\r\n\r\ntest 
body\r\n'
+        msg = self.msgmaker(self.typ(source))
+        self.assertEqual(msg['Subject'], 'one\x85 two\u2028 three\u2029 four')
+
+        expected = 'Subject: =?utf-8?b?b25lwoUgdHdv4oCoIHRocmVl4oCp?= 
four\n\ntest body\n'
+        s = self.ioclass()
+        g = self.genclass(s)
+        g.flatten(msg)
+        self.assertEqual(s.getvalue(), self.typ(expected))
+
+        s = self.ioclass()
+        g = self.genclass(s, policy=self.policy.clone(refold_source='all'))
+        g.flatten(msg)
+        self.assertEqual(s.getvalue(), self.typ(expected))
+
 
 class TestBytesGenerator(TestGeneratorBase, TestEmailBase):
 
diff --git 
a/Misc/NEWS.d/next/Library/2024-03-29-15-14-51.gh-issue-117313.ks_ONu.rst 
b/Misc/NEWS.d/next/Library/2024-03-29-15-14-51.gh-issue-117313.ks_ONu.rst
new file mode 100644
index 00000000000000..e67576ee574f92
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-03-29-15-14-51.gh-issue-117313.ks_ONu.rst
@@ -0,0 +1,4 @@
+Only treat ``'\n'``, ``'\r'`` and ``'\r\n'`` as line separators in
+re-folding the :mod:`email` messages. Preserve control characters ``'\v'``,
+``'\f'``, ``'\x1c'``, ``'\x1d'`` and ``'\x1e'`` and Unicode line separators
+``'\x85'``, ``'\u2028'`` and ``'\u2029'`` as is.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to