Update of /cvsroot/spambayes/spambayes/utilities
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv30085/utilities

Modified Files:
        extractmessages.py 
Log Message:
Add simple parts of [ 824651 ] Multibyte (CJK etc.) message support

(Lets extractmessages and scoremsg work with charsets other than us-ascii, and 
lets Outlook plug-in handle tokens that aren't in the right encodng).

Index: extractmessages.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/utilities/extractmessages.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** extractmessages.py  15 Jan 2004 03:05:22 -0000      1.3
--- extractmessages.py  10 Jun 2006 04:57:11 -0000      1.4
***************
*** 25,28 ****
--- 25,30 ----
  import re
  import cPickle as pickle
+ import locale
+ from email.Header import make_header, decode_header
  
  from spambayes.mboxutils import getmbox
***************
*** 86,89 ****
--- 88,95 ----
          return 1
  
+     charset = locale.getdefaultlocale()[1]
+     if not charset:
+         charset = 'us-ascii'
+ 
      mapfile = spamfile = hamfile = None
      features = set()
***************
*** 99,103 ****
              spamfile = arg
          elif opt in ("-f", "--feature"):
!             features.add(arg)
  
      if hamfile is None and spamfile is None:
--- 105,109 ----
              spamfile = arg
          elif opt in ("-f", "--feature"):
!             features.add(unicode(arg, charset))
  
      if hamfile is None and spamfile is None:
***************
*** 126,132 ****
                  evidence = msg.get("X-Spambayes-Evidence", "")
                  evidence = re.sub(r"\s+", " ", evidence)
!                 features = [e.rsplit(": ", 1)[0]
!                               for e in evidence.split("; ")[2:]]
!                 features = set([eval(f) for f in features])
          if not features:
              usage("No X-Spambayes-Evidence headers found")
--- 132,143 ----
                  evidence = msg.get("X-Spambayes-Evidence", "")
                  evidence = re.sub(r"\s+", " ", evidence)
!                 l = [e.rsplit(": ", 1)[0]
!                      for e in evidence.split("; ")[2:]]
!                 for s in l:
!                     try:
!                         s = make_header(decode_header(s)).__unicode__()
!                     except:
!                         s = unicode(s, 'us-ascii', 'replace')
!                     features.add(s)
          if not features:
              usage("No X-Spambayes-Evidence headers found")

_______________________________________________
Spambayes-checkins mailing list
[email protected]
http://mail.python.org/mailman/listinfo/spambayes-checkins

Reply via email to