reopen 320185 tags 320185 + patch thanks On August 20, 2005 at 1:22PM +0900, tats (at vega.ocn.ne.jp) wrote:
> > > I actually encountered raw non-ASCII bytes in From field. > > http://nedko.arnaudov.name/soft/rss2email-2.55-folding.patch > > > > This patch uses email/Header.py instead of mimify.py, and it fixes > > both the newline bug and the raw non-ASCII bug. > > Oops, the above patch is not fine in `From:' and `To:'. After > applying the patch, `From:' has encoded text between `"' and `"', > and `To:' encodes the email addresses incorrectly. > > email/Header.py seems to be better than mimify.py. However, to use > email/Header.py, we should have more modification in rss2email.py. To fix this bug, Nedko Arnaudov revised the patch, and I sorted out and revised it. I can now recommend the attached patch. * feedparser.py (_sync_author_detail): Replace '<>' with ''. * rss2email.py (header7bit): Use email.Header instead of mimify. * rss2email.py (header7bit_ifnonatom): New function. * rss2email.py (run): Encode `From:' with header7bit_ifnonatom(), and don't encode `To:'. * rss2email.py (run): Insert `Mime-Version:' and `Content-Transfer-Encoding:'. -- Tatsuya Kinoshita
--- rss2email-2.55-1/feedparser.py
+++ rss2email-2.55/feedparser.py
@@ -811,6 +811,7 @@
# probably a better way to do the following, but it passes all the
tests
author = author.replace(email, '')
author = author.replace('()', '')
+ author = author.replace('<>', '')
author = author.strip()
if author and (author[0] == '('):
author = author[1:]
--- rss2email-2.55-1/rss2email.py
+++ rss2email-2.55/rss2email.py
@@ -107,6 +107,8 @@
for e in ['error', 'gaierror']:
if hasattr(socket, e): socket_errors.append(getattr(socket, e))
import mimify; from StringIO import StringIO as SIO; mimify.CHARSET = 'utf-8'
+from email.Header import Header
+import re
if SMTP_SEND: import smtplib; smtpserver = smtplib.SMTP(SMTP_SERVER)
else: smtpserver = None
@@ -135,13 +137,24 @@
"""Quote names in email according to RFC822."""
return '"' + unu(s).replace("\\", "\\\\").replace('"', '\\"') + '"'
+nonascii = re.compile('[^\000-\177]')
+nonatom =
re.compile('[^a-zA-Z0-9\012\015\040\!\#\$\%\&\'\*\+\-\/\=\?\^\_\`\{\|\}\~]') #
ref. RFC2822, atom. comment is not supported
+
def header7bit(s):
"""QP_CORRUPT headers."""
- #return mimify.mime_encode_header(s + ' ')[:-1]
- # XXX due to mime_encode_header bug
- import re
- p = re.compile('=\n([^ \t])');
- return p.sub(r'\1', mimify.mime_encode_header(s + ' ')[:-1])
+ charset = 'us-ascii'
+ if nonascii.search(s):
+ charset = 'utf-8'
+ h = Header(s, charset, 50)
+ return h.encode()
+
+def header7bit_ifnonatom(s):
+ """QP_CORRUPT headers if non-atom character exists."""
+ charset = 'us-ascii'
+ if nonatom.search(s):
+ charset = 'utf-8'
+ h = Header(s, charset, 50)
+ return h.encode()
### Parsing Utilities ###
@@ -405,12 +418,14 @@
from_addr = unu(getEmail(r.feed, entry))
message = (
- "From: " +
quote822(header7bit(getName(r, entry))) + " <"+from_addr+">" +
- "\nTo: " + header7bit(unu(f.to or
default_to)) + # set a default email!
+ "From: " +
header7bit_ifnonatom(unu(getName(r, entry))) + " <"+from_addr+">" +
+ "\nTo: " + unu(f.to or default_to) + #
set a default email!
"\nSubject: " + header7bit(title) +
"\nDate: " + time.strftime("%a, %d %b
%Y %H:%M:%S -0000", datetime) +
"\nUser-Agent: rss2email" + # really
should be X-Mailer
BONUS_HEADER +
+ "\nMime-Version: 1.0" +
+ "\nContent-Transfer-Encoding: 8bit" +
"\nContent-Type: ") # but
backwards-compatibility
if ishtml(content):
pgpWu494XVbjr.pgp
Description: PGP signature

