reopen 320185
tags 320185 + patch
thanks

On August 20, 2005 at 1:22PM +0900,
tats (at vega.ocn.ne.jp) wrote:

> > > I actually encountered raw non-ASCII bytes in From field.

> >   http://nedko.arnaudov.name/soft/rss2email-2.55-folding.patch
> >
> > This patch uses email/Header.py instead of mimify.py, and it fixes
> > both the newline bug and the raw non-ASCII bug.
>
> Oops, the above patch is not fine in `From:' and `To:'.  After
> applying the patch, `From:' has encoded text between `"' and `"',
> and `To:' encodes the email addresses incorrectly.
>
> email/Header.py seems to be better than mimify.py.  However, to use
> email/Header.py, we should have more modification in rss2email.py.

To fix this bug, Nedko Arnaudov revised the patch, and I sorted out
and revised it.

I can now recommend the attached patch.

* feedparser.py (_sync_author_detail): Replace '<>' with ''.
* rss2email.py (header7bit): Use email.Header instead of mimify.
* rss2email.py (header7bit_ifnonatom): New function.
* rss2email.py (run): Encode `From:' with header7bit_ifnonatom(), and don't
  encode `To:'.
* rss2email.py (run): Insert `Mime-Version:' and `Content-Transfer-Encoding:'.

--
Tatsuya Kinoshita
--- rss2email-2.55-1/feedparser.py
+++ rss2email-2.55/feedparser.py
@@ -811,6 +811,7 @@
             # probably a better way to do the following, but it passes all the 
tests
             author = author.replace(email, '')
             author = author.replace('()', '')
+            author = author.replace('<>', '')
             author = author.strip()
             if author and (author[0] == '('):
                 author = author[1:]
--- rss2email-2.55-1/rss2email.py
+++ rss2email-2.55/rss2email.py
@@ -107,6 +107,8 @@
 for e in ['error', 'gaierror']:
        if hasattr(socket, e): socket_errors.append(getattr(socket, e))
 import mimify; from StringIO import StringIO as SIO; mimify.CHARSET = 'utf-8'
+from email.Header import Header
+import re
 if SMTP_SEND: import smtplib; smtpserver = smtplib.SMTP(SMTP_SERVER)
 else: smtpserver = None

@@ -135,13 +137,24 @@
        """Quote names in email according to RFC822."""
        return '"' + unu(s).replace("\\", "\\\\").replace('"', '\\"') + '"'

+nonascii = re.compile('[^\000-\177]')
+nonatom = 
re.compile('[^a-zA-Z0-9\012\015\040\!\#\$\%\&\'\*\+\-\/\=\?\^\_\`\{\|\}\~]') # 
ref. RFC2822, atom.  comment is not supported
+
 def header7bit(s):
        """QP_CORRUPT headers."""
-       #return mimify.mime_encode_header(s + ' ')[:-1]
-       # XXX due to mime_encode_header bug
-       import re
-       p = re.compile('=\n([^ \t])');
-       return p.sub(r'\1', mimify.mime_encode_header(s + ' ')[:-1])
+       charset = 'us-ascii'
+       if nonascii.search(s):
+               charset = 'utf-8'
+       h = Header(s, charset, 50)
+       return h.encode()
+
+def header7bit_ifnonatom(s):
+       """QP_CORRUPT headers if non-atom character exists."""
+       charset = 'us-ascii'
+       if nonatom.search(s):
+               charset = 'utf-8'
+       h = Header(s, charset, 50)
+       return h.encode()

 ### Parsing Utilities ###

@@ -405,12 +418,14 @@
                                        from_addr = unu(getEmail(r.feed, entry))

                                        message = (
-                                       "From: " + 
quote822(header7bit(getName(r, entry))) + " <"+from_addr+">" +
-                                       "\nTo: " + header7bit(unu(f.to or 
default_to)) + # set a default email!
+                                       "From: " + 
header7bit_ifnonatom(unu(getName(r, entry))) + " <"+from_addr+">" +
+                                       "\nTo: " + unu(f.to or default_to) + # 
set a default email!
                                        "\nSubject: " + header7bit(title) +
                                        "\nDate: " + time.strftime("%a, %d %b 
%Y %H:%M:%S -0000", datetime) +
                                        "\nUser-Agent: rss2email" + # really 
should be X-Mailer
                                        BONUS_HEADER +
+                                       "\nMime-Version: 1.0" +
+                                       "\nContent-Transfer-Encoding: 8bit" +
                                        "\nContent-Type: ")         # but 
backwards-compatibility

                                        if ishtml(content):

Attachment: pgpWu494XVbjr.pgp
Description: PGP signature

Reply via email to