Update of /cvsroot/spambayes/spambayes/utilities
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv30085/utilities
Modified Files:
extractmessages.py
Log Message:
Add simple parts of [ 824651 ] Multibyte (CJK etc.) message support
(Lets extractmessages and scoremsg work with charsets other than us-ascii, and
lets Outlook plug-in handle tokens that aren't in the right encodng).
Index: extractmessages.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/utilities/extractmessages.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** extractmessages.py 15 Jan 2004 03:05:22 -0000 1.3
--- extractmessages.py 10 Jun 2006 04:57:11 -0000 1.4
***************
*** 25,28 ****
--- 25,30 ----
import re
import cPickle as pickle
+ import locale
+ from email.Header import make_header, decode_header
from spambayes.mboxutils import getmbox
***************
*** 86,89 ****
--- 88,95 ----
return 1
+ charset = locale.getdefaultlocale()[1]
+ if not charset:
+ charset = 'us-ascii'
+
mapfile = spamfile = hamfile = None
features = set()
***************
*** 99,103 ****
spamfile = arg
elif opt in ("-f", "--feature"):
! features.add(arg)
if hamfile is None and spamfile is None:
--- 105,109 ----
spamfile = arg
elif opt in ("-f", "--feature"):
! features.add(unicode(arg, charset))
if hamfile is None and spamfile is None:
***************
*** 126,132 ****
evidence = msg.get("X-Spambayes-Evidence", "")
evidence = re.sub(r"\s+", " ", evidence)
! features = [e.rsplit(": ", 1)[0]
! for e in evidence.split("; ")[2:]]
! features = set([eval(f) for f in features])
if not features:
usage("No X-Spambayes-Evidence headers found")
--- 132,143 ----
evidence = msg.get("X-Spambayes-Evidence", "")
evidence = re.sub(r"\s+", " ", evidence)
! l = [e.rsplit(": ", 1)[0]
! for e in evidence.split("; ")[2:]]
! for s in l:
! try:
! s = make_header(decode_header(s)).__unicode__()
! except:
! s = unicode(s, 'us-ascii', 'replace')
! features.add(s)
if not features:
usage("No X-Spambayes-Evidence headers found")
_______________________________________________
Spambayes-checkins mailing list
[email protected]
http://mail.python.org/mailman/listinfo/spambayes-checkins