Mark Sapiro pushed to branch master at GNU Mailman / Mailman Core
Commits:
e5f3f118 by Mark Sapiro at 2020-12-04T09:52:50+00:00
Fix issue converting non-ascii html to plain text.
- - - - -
45edbcb8 by Mark Sapiro at 2020-12-04T09:52:50+00:00
Merge branch 'mimedel' into 'master'
Fix issue converting non-ascii html to plain text.
Closes #798
See merge request mailman/mailman!742
- - - - -
4 changed files:
- src/mailman/docs/NEWS.rst
- src/mailman/handlers/mime_delete.py
- + src/mailman/handlers/tests/data/html_to_plain.eml
- src/mailman/handlers/tests/test_mimedel.py
Changes:
=====================================
src/mailman/docs/NEWS.rst
=====================================
@@ -23,8 +23,10 @@ Bugs
* Implemented a ``scrubber`` for plain text digests. (Closes #473)
* The ``mailman gatenews`` command now adds ``original_size`` as a message
attribute. (Extends fix for #762)
-* Handle FileNotFoundError when creating digest.mmdf file without
- parent directory present (Closes #699)
+* Handle FileNotFoundError when creating digest.mmdf file without a
+ parent directory present. (Closes #699)
+* Fixed an issue where content filtering can throw UnicodeEncodeError when
+ converting HTML to plain text. (Closes #798)
New Features
------------
=====================================
src/mailman/handlers/mime_delete.py
=====================================
@@ -277,9 +277,9 @@ def to_plaintext(msg):
resources.callback(shutil.rmtree, tempdir)
for subpart in typed_subpart_iterator(msg, 'text', 'html'):
filename = os.path.join(tempdir, '{}.html'.format(next(counter)))
- ctype = msg.get_content_charset('us-ascii')
+ cset = subpart.get_content_charset('us-ascii')
with open(filename, 'w', encoding='utf-8') as fp:
- fp.write(subpart.get_payload(decode=True).decode(ctype,
+ fp.write(subpart.get_payload(decode=True).decode(cset,
errors='replace'))
template = Template(config.mailman.html_to_plain_text_command)
command = template.safe_substitute(filename=filename).split()
@@ -291,7 +291,7 @@ def to_plaintext(msg):
# Replace the payload of the subpart with the converted text
# and tweak the content type.
del subpart['content-transfer-encoding']
- subpart.set_payload(stdout, charset=ctype)
+ subpart.set_payload(stdout, charset=cset)
subpart.set_type('text/plain')
changedp += 1
return changedp
=====================================
src/mailman/handlers/tests/data/html_to_plain.eml
=====================================
@@ -0,0 +1,25 @@
+To: [email protected]
+From: [email protected]
+Subject: Test Message
+Message-ID: <[email protected]>
+Date: Thu, 3 Dec 2020 15:18:27 +0100
+MIME-Version: 1.0
+Content-Type: multipart/mixed;
+ boundary="------------04218E0A720FDBFA6DB11AF1"
+
+--------------04218E0A720FDBFA6DB11AF1
+Content-Type: text/plain; charset=utf-8; format=flowed
+Content-Transfer-Encoding: quoted-printable
+
+This is a plain text body
+
+--------------04218E0A720FDBFA6DB11AF1
+Content-Type: text/html; charset=UTF-8;
+ name="junk.html"
+Content-Disposition: attachment;
+ filename="junk.html"
+Content-Transfer-Encoding: base64
+
+VW0gZnLDvGhlcmUgTmFjaHJpY2h0ZW4K
+
+--------------04218E0A720FDBFA6DB11AF1--
=====================================
src/mailman/handlers/tests/test_mimedel.py
=====================================
@@ -351,6 +351,22 @@ MIME-Version: 1.0
payload_lines = msg.get_payload().splitlines()
self.assertEqual(payload_lines[0], '<html><head></head>')
+ def test_html_part_with_non_ascii(self):
+ # Ensure we can convert HTML to plain text in an HTML sub-part which
+ # contains non-ascii.
+ with resource_open(
+ 'mailman.handlers.tests.data',
+ 'html_to_plain.eml') as fp:
+ msg = email.message_from_binary_file(fp)
+ process = config.handlers['mime-delete'].process
+ with dummy_script():
+ process(self._mlist, msg, {})
+ part = msg.get_payload(1)
+ cset = part.get_content_charset('us-ascii')
+ text = part.get_payload(decode=True).decode(cset).splitlines()
+ self.assertEqual(text[0], 'Converted text/html to text/plain')
+ self.assertEqual(text[2], 'Um frühere Nachrichten')
+
class TestMiscellaneous(unittest.TestCase):
"""Test various miscellaneous filtering actions."""
View it on GitLab:
https://gitlab.com/mailman/mailman/-/compare/9176bf6ee3fa8c707e8a60aeca495e799c07e216...45edbcb884ac9198d42f06833ea381585a72b68d
--
View it on GitLab:
https://gitlab.com/mailman/mailman/-/compare/9176bf6ee3fa8c707e8a60aeca495e799c07e216...45edbcb884ac9198d42f06833ea381585a72b68d
You're receiving this email because of your account on gitlab.com.
_______________________________________________
Mailman-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/mailman-checkins.python.org/
Member address: [email protected]