Attached is a patch that should fix all the problems I've reported. It
bypasses the Python email library's mboxo-escaping and does
mboxrd-escaping manually. This is also applied to multipart messages,
so it also fixes #633735. Although #633780 turned out to be a
non-issue, I left the line that deletes the Content-Length -field. It
doesn't hurt to make sure.
diff -ur getmail4-4.20.3/getmailcore/message.py
getmail4-4.20.3.new/getmailcore/message.py
--- getmail4-4.20.3/getmailcore/message.py 2011-07-14 00:32:06.000000000
+0300
+++ getmail4-4.20.3.new/getmailcore/message.py 2011-07-14 00:48:32.503572633
+0300
@@ -9,6 +9,7 @@
import os
import time
import cStringIO
+import re
import email
import email.Errors
import email.Utils
@@ -28,6 +29,9 @@
'recipient'
)
+# Lines that should be escaped in mboxrd mailboxes
+from_re = re.compile("^(>*From )", re.MULTILINE)
+
#######################################
def corrupt_message(why, fromlines=None, fromstring=None):
log = getmailcore.logging.Logger()
@@ -151,7 +155,18 @@
content += '; ' + time.strftime('%d %b %Y %H:%M:%S -0000',
time.gmtime())
f.write(format_header('Received', content))
- gen = Generator(f, mangle_from, 0)
+ # Since we reformat the message with escaping and EOL
+ # conversion, Content-Length is probably no longer accurate. The
+ # header isn't needed for mboxrd anyway.
+ del self.__msg['Content-Length']
+ # We escape the payload by ourselves, since the generator would
+ # produce mboxo.
+ gen = Generator(f, False, 0)
+ if mangle_from:
+ payload = self.__msg.get_payload()
+ # The payload is always a string since it is produced by
+ # HeaderParser.
+ self.__msg.set_payload(from_re.sub(">\\1", payload))
# From_ handled above, always tell the generator not to include it
try:
gen.flatten(self.__msg, False)