Aurélien Bompard has proposed merging lp:~abompard/mailman/bug-1060951 into 
lp:mailman.

Requested reviews:
  Mailman Coders (mailman-coders)
Related bugs:
  Bug #1060951 in GNU Mailman: "Bug getting i18n'ed attachment filenames 
(RFC2231)"
  https://bugs.launchpad.net/mailman/+bug/1060951

For more details, see:
https://code.launchpad.net/~abompard/mailman/bug-1060951/+merge/243401

This branch solves bug #1060951 by removing the auto-conversion of headers to 
unicode in the Message class.
-- 
Your team Mailman Coders is requested to review the proposed merge of 
lp:~abompard/mailman/bug-1060951 into lp:mailman.
=== modified file 'src/mailman/app/bounces.py'
--- src/mailman/app/bounces.py	2014-01-07 03:43:59 +0000
+++ src/mailman/app/bounces.py	2014-12-02 14:26:47 +0000
@@ -200,10 +200,13 @@
                 optionsurl=member.options_url,
                 owneraddr=mlist.owner_address,
                 )
+    message_id = msg['message-id']
+    if not isinstance(message_id, unicode):
+        message_id = message_id.decode("ascii")
     pendable = _ProbePendable(
         # We can only pend unicodes.
         member_id=member.member_id.hex,
-        message_id=msg['message-id'],
+        message_id=message_id,
         )
     token = getUtility(IPendings).add(pendable)
     mailbox, domain_parts = split_email(mlist.bounces_address)

=== modified file 'src/mailman/app/moderator.py'
--- src/mailman/app/moderator.py	2014-04-28 15:23:35 +0000
+++ src/mailman/app/moderator.py	2014-12-02 14:26:47 +0000
@@ -87,8 +87,8 @@
     message_id = msg.get('message-id')
     if message_id is None:
         msg['Message-ID'] = message_id = unicode(make_msgid())
-    assert isinstance(message_id, unicode), (
-        'Message-ID is not a unicode: %s' % message_id)
+    if not isinstance(message_id, unicode):
+        message_id = message_id.decode("ascii")
     getUtility(IMessageStore).add(msg)
     # Prepare the message metadata with some extra information needed only by
     # the moderation interface.

=== modified file 'src/mailman/archiving/mailarchive.py'
--- src/mailman/archiving/mailarchive.py	2014-01-01 14:59:42 +0000
+++ src/mailman/archiving/mailarchive.py	2014-12-02 14:26:47 +0000
@@ -68,6 +68,8 @@
         message_id_hash = msg.get('x-message-id-hash')
         if message_id_hash is None:
             return None
+        if not isinstance(message_id_hash, unicode):
+            message_id_hash = message_id_hash.decode("ascii")
         return urljoin(self.base_url, message_id_hash)
 
     def archive_message(self, mlist, msg):

=== modified file 'src/mailman/archiving/mhonarc.py'
--- src/mailman/archiving/mhonarc.py	2014-01-01 14:59:42 +0000
+++ src/mailman/archiving/mhonarc.py	2014-12-02 14:26:47 +0000
@@ -73,6 +73,8 @@
         message_id_hash = msg.get('x-message-id-hash')
         if message_id_hash is None:
             return None
+        if not isinstance(message_id_hash, unicode):
+            message_id_hash = message_id_hash.decode("ascii")
         return urljoin(self.list_url(mlist), message_id_hash)
 
     def archive_message(self, mlist, msg):

=== modified file 'src/mailman/archiving/prototype.py'
--- src/mailman/archiving/prototype.py	2014-03-02 20:59:30 +0000
+++ src/mailman/archiving/prototype.py	2014-12-02 14:26:47 +0000
@@ -68,6 +68,8 @@
         message_id_hash = msg.get('x-message-id-hash')
         if message_id_hash is None:
             return None
+        if not isinstance(message_id_hash, unicode):
+            message_id_hash = message_id_hash.decode("ascii")
         return urljoin(Prototype.list_url(mlist), message_id_hash)
 
     @staticmethod

=== modified file 'src/mailman/commands/docs/unshunt.rst'
--- src/mailman/commands/docs/unshunt.rst	2014-04-28 15:23:35 +0000
+++ src/mailman/commands/docs/unshunt.rst	2014-12-02 14:26:47 +0000
@@ -83,7 +83,7 @@
     2
 
     >>> sorted(item.msg['message-id'] for item in items)
-    [u'<badgers>', u'<crow>']
+    ['<badgers>', '<crow>']
 
 
 Return to the original queue

=== modified file 'src/mailman/commands/eml_membership.py'
--- src/mailman/commands/eml_membership.py	2014-01-01 14:59:42 +0000
+++ src/mailman/commands/eml_membership.py	2014-12-02 14:26:47 +0000
@@ -72,6 +72,8 @@
             print(_('$self.name: No valid address found to subscribe'),
                   file=results)
             return ContinueProcessing.no
+        if not isinstance(address, unicode):
+            address = address.decode("ascii")
         # Have we already seen one join request from this user during the
         # processing of this email?
         joins = getattr(results, 'joins', set())

=== modified file 'src/mailman/email/message.py'
--- src/mailman/email/message.py	2014-04-28 15:23:35 +0000
+++ src/mailman/email/message.py	2014-12-02 14:26:47 +0000
@@ -53,29 +53,6 @@
         self.__version__ = VERSION
         email.message.Message.__init__(self)
 
-    def __getitem__(self, key):
-        # Ensure that header values are unicodes.
-        value = email.message.Message.__getitem__(self, key)
-        if isinstance(value, str):
-            return unicode(value, 'ascii')
-        return value
-
-    def get(self, name, failobj=None):
-        # Ensure that header values are unicodes.
-        value = email.message.Message.get(self, name, failobj)
-        if isinstance(value, str):
-            return unicode(value, 'ascii')
-        return value
-
-    def get_all(self, name, failobj=None):
-        # Ensure all header values are unicodes.
-        missing = object()
-        all_values = email.message.Message.get_all(self, name, missing)
-        if all_values is missing:
-            return failobj
-        return [(unicode(value, 'ascii') if isinstance(value, str) else value)
-                for value in all_values]
-
     # BAW: For debugging w/ bin/dumpdb.  Apparently pprint uses repr.
     def __repr__(self):
         return self.__str__()
@@ -144,18 +121,15 @@
                 field_values = self.get_all(header, [])
                 senders.extend(address.lower() for (display_name, address)
                                in email.utils.getaddresses(field_values))
-        # Filter out None and the empty string.
-        return [sender for sender in senders if sender]
-
-    def get_filename(self, failobj=None):
-        """Some MUA have bugs in RFC2231 filename encoding and cause
-        Mailman to stop delivery in Scrubber.py (called from ToDigest.py).
-        """
-        try:
-            filename = email.message.Message.get_filename(self, failobj)
-            return filename
-        except (UnicodeError, LookupError, ValueError):
-            return failobj
+        # Filter out None and the empty string, and convert to unicode.
+        clean_senders = []
+        for sender in senders:
+            if not sender:
+                continue
+            if not isinstance(sender, unicode):
+                sender = sender.decode("ascii")
+            clean_senders.append(sender)
+        return clean_senders
 
 
 

=== modified file 'src/mailman/email/tests/test_message.py'
--- src/mailman/email/tests/test_message.py	2014-01-01 14:59:42 +0000
+++ src/mailman/email/tests/test_message.py	2014-12-02 14:26:47 +0000
@@ -26,9 +26,10 @@
 
 
 import unittest
+from email.parser import FeedParser
 
 from mailman.app.lifecycle import create_list
-from mailman.email.message import UserNotification
+from mailman.email.message import UserNotification, Message
 from mailman.testing.helpers import get_queue_messages
 from mailman.testing.layers import ConfigLayer
 
@@ -58,3 +59,35 @@
         self.assertEqual(len(messages), 1)
         self.assertEqual(messages[0].msg.get_all('precedence'), 
                          ['omg wtf bbq'])
+
+
+
+class TestMessageSubclass(unittest.TestCase):
+
+    def test_i18n_filenames(self):
+        parser = FeedParser(_factory=Message)
+        parser.feed(b"""Message-ID: <b...@example.com>
+Content-Type: multipart/mixed; boundary="------------050607040206050605060208"
+
+This is a multi-part message in MIME format.
+--------------050607040206050605060208
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: quoted-printable
+
+Test message containing an attachment with an accented filename
+
+--------------050607040206050605060208
+Content-Disposition: attachment;
+	filename*=UTF-8''d%C3%A9jeuner.txt
+
+Test content
+--------------050607040206050605060208--
+""")
+        msg = parser.close()
+        attachment = msg.get_payload()[1]
+        try:
+            filename = attachment.get_filename()
+        except TypeError, e:
+            raise
+            self.fail(e)
+        self.assertEqual(filename, u"d\xe9jeuner.txt")

=== modified file 'src/mailman/model/bounce.py'
--- src/mailman/model/bounce.py	2014-09-22 18:47:02 +0000
+++ src/mailman/model/bounce.py	2014-12-02 14:26:47 +0000
@@ -57,7 +57,10 @@
         self.list_id = list_id
         self.email = email
         self.timestamp = now()
-        self.message_id = msg['message-id']
+        msgid = msg['message-id']
+        if not isinstance(msgid, unicode):
+            msgid = msgid.decode("ascii")
+        self.message_id = msgid
         self.context = (BounceContext.normal if context is None else context)
         self.processed = False
 

=== modified file 'src/mailman/model/messagestore.py'
--- src/mailman/model/messagestore.py	2014-09-28 00:17:05 +0000
+++ src/mailman/model/messagestore.py	2014-12-02 14:26:47 +0000
@@ -58,6 +58,8 @@
             raise ValueError('Exactly one Message-ID header required')
         # Calculate and insert the X-Message-ID-Hash.
         message_id = message_ids[0]
+        if not isinstance(message_id, unicode):
+            message_id = message_id.decode("ascii")
         # Complain if the Message-ID already exists in the storage.
         existing = store.query(Message).filter(
             Message.message_id == message_id).first()

=== modified file 'src/mailman/rules/implicit_dest.py'
--- src/mailman/rules/implicit_dest.py	2014-01-01 14:59:42 +0000
+++ src/mailman/rules/implicit_dest.py	2014-12-02 14:26:47 +0000
@@ -73,6 +73,8 @@
         recipients = set()
         for header in ('to', 'cc', 'resent-to', 'resent-cc'):
             for fullname, address in getaddresses(msg.get_all(header, [])):
+                if not isinstance(address, unicode):
+                    address = address.decode("ascii")
                 address = address.lower()
                 if address in aliases:
                     return False

=== modified file 'src/mailman/utilities/email.py'
--- src/mailman/utilities/email.py	2014-04-28 15:23:35 +0000
+++ src/mailman/utilities/email.py	2014-12-02 14:26:47 +0000
@@ -62,6 +62,8 @@
     message_id = msg.get('message-id')
     if message_id is None:
         return
+    if not isinstance(message_id, unicode):
+        message_id = message_id.decode("ascii")
     # The angle brackets are not part of the Message-ID.  See RFC 2822
     # and http://wiki.list.org/display/DEV/Stable+URLs
     if message_id.startswith('<') and message_id.endswith('>'):

_______________________________________________
Mailman-coders mailing list
Mailman-coders@python.org
https://mail.python.org/mailman/listinfo/mailman-coders

Reply via email to