contrib

montanaro Mon, 24 Nov 2008 19:50:45 -0800

Revision: 3212
          http://spambayes.svn.sourceforge.net/spambayes/?rev=3212&view=rev
Author:   montanaro
Date:     2008-11-25 03:50:08 +0000 (Tue, 25 Nov 2008)


Log Message:
-----------
pylint nits, use safepickle functions

Modified Paths:
--------------
    trunk/spambayes/contrib/SmarterHTTPServer.py
    trunk/spambayes/contrib/bulkgraph.py
    trunk/spambayes/contrib/findbest.py
    trunk/spambayes/contrib/mod_spambayes.py
    trunk/spambayes/contrib/nway.py
    trunk/spambayes/contrib/pycksum.py
    trunk/spambayes/contrib/sb_culler.py
    trunk/spambayes/contrib/spamcounts.py
    trunk/spambayes/contrib/tte.py

Modified: trunk/spambayes/contrib/SmarterHTTPServer.py
===================================================================
--- trunk/spambayes/contrib/SmarterHTTPServer.py        2008-11-25 03:45:39 UTC 
(rev 3211)
+++ trunk/spambayes/contrib/SmarterHTTPServer.py        2008-11-25 03:50:08 UTC 
(rev 3212)
@@ -20,7 +20,6 @@
 import SimpleHTTPServer
 import urllib
 import cgi
-import shutil
 import mimetypes
 import re
 try:

Modified: trunk/spambayes/contrib/bulkgraph.py
===================================================================
--- trunk/spambayes/contrib/bulkgraph.py        2008-11-25 03:45:39 UTC (rev 
3211)
+++ trunk/spambayes/contrib/bulkgraph.py        2008-11-25 03:50:08 UTC (rev 
3212)
@@ -32,15 +32,15 @@
         quiet mode; no output
 """
 
-import mboxutils
 import getopt
-import hammie
 import sys
 import os
 import re
 import time
 import filecmp
 
+from spambayes import mboxutils, hammie
+
 program = sys.argv[0]
 loud = True
 day = 24 * 60 * 60

Modified: trunk/spambayes/contrib/findbest.py
===================================================================
--- trunk/spambayes/contrib/findbest.py 2008-11-25 03:45:39 UTC (rev 3211)
+++ trunk/spambayes/contrib/findbest.py 2008-11-25 03:50:08 UTC (rev 3212)
@@ -66,7 +66,6 @@
 
 import sys
 import os
-import cPickle as pickle
 import getopt
 import math
 
@@ -75,6 +74,8 @@
 from spambayes.hammie import Hammie
 from spambayes.tokenizer import tokenize
 from spambayes.Options import options
+from spambayes import storage
+from spambayes.safepickle import pickle_read, pickle_write
 
 cls = Classifier()
 h = Hammie(cls)
@@ -98,7 +99,6 @@
 def score(unsure, h, cls, scores, msgids=None, skipspam=False):
     """See what effect on others each msg in unsure has"""
 
-    ham_cutoff = options["Categorization", "ham_cutoff"]
     spam_cutoff = options["Categorization", "spam_cutoff"]
 
     # compute a base - number of messages in unsure already in the
@@ -223,7 +223,7 @@
     print "scoring"
 
     if best:
-        last_scores = pickle.load(file(bestfile))
+        last_scores = pickle_read(bestfile)
         last_scores = last_scores.items()
         last_scores.sort()
         msgids = set()
@@ -240,7 +240,7 @@
         pass
 
     if not best:
-        pickle.dump(scores, file(bestfile, 'w'))
+        pickle_write(bestfile, scores)
 
     return 0
 

Modified: trunk/spambayes/contrib/mod_spambayes.py
===================================================================
--- trunk/spambayes/contrib/mod_spambayes.py    2008-11-25 03:45:39 UTC (rev 
3211)
+++ trunk/spambayes/contrib/mod_spambayes.py    2008-11-25 03:50:08 UTC (rev 
3212)
@@ -5,26 +5,24 @@
 ## Author: Skip Montanaro <[EMAIL PROTECTED]>
 ##
 
-import os
-
 from proxy3_filter import *
 import proxy3_options
 
-from spambayes import hammie, Options, mboxutils
+from spambayes import hammie, Options
 dbf = Options.get_pathname_option("Storage", "persistent_storage_file")
 
 class SpambayesFilter(BufferAllFilter):
-    hammie = hammie.open(dbf, 1, 'r')
+    checker = hammie.open(dbf, 1, 'r')
 
     def filter(self, s):
         if self.reply.split()[1] == '200':
-            prob = self.hammie.score("%s\r\n%s" % (self.serverheaders, s))
+            prob = self.checker.score("%s\r\n%s" % (self.serverheaders, s))
             print "|  prob: %.5f" % prob
             if prob >= Options.options["Categorization", "spam_cutoff"]:
                 print self.serverheaders
                 print "text:", s[0:40], "...", s[-40:]
                 return "not authorized"
-    return s
+        return s
 
 from proxy3_util import *
 

Modified: trunk/spambayes/contrib/nway.py
===================================================================
--- trunk/spambayes/contrib/nway.py     2008-11-25 03:45:39 UTC (rev 3211)
+++ trunk/spambayes/contrib/nway.py     2008-11-25 03:50:08 UTC (rev 3212)
@@ -70,7 +70,7 @@
 
 prog = os.path.basename(sys.argv[0])
 
-def help():
+def usage():
     print >> sys.stderr, __doc__ % globals()
 
 def main(args):
@@ -78,10 +78,9 @@
 
     for opt, arg in opts:
         if opt == '-h':
-            help()
+            usage()
             return 0
 
-    tagdb_list = []
     msg = mboxutils.get_message(sys.stdin)
     try:
         del msg["X-Spambayes-Classification"]

Modified: trunk/spambayes/contrib/pycksum.py
===================================================================
--- trunk/spambayes/contrib/pycksum.py  2008-11-25 03:45:39 UTC (rev 3211)
+++ trunk/spambayes/contrib/pycksum.py  2008-11-25 03:50:08 UTC (rev 3212)
@@ -39,7 +39,10 @@
 import sys
 import email.Parser
 import email.generator
-import md5
+try:
+    from hashlib import md5
+except ImportError:
+    from md5 import new as md5
 import anydbm
 import re
 import time
@@ -97,12 +100,12 @@
     body = text.split("\n\n", 1)[1]
     lines = clean(body).split("\n")
     chunksize = len(lines)//4+1
-    sum = []
+    digest = []
     for i in range(4):
         chunk = "\n".join(lines[i*chunksize:(i+1)*chunksize])
-        sum.append(md5.new(chunk).hexdigest())
+        digest.append(md5(chunk).hexdigest())
 
-    return ".".join(sum)
+    return ".".join(digest)
 
 def save_checksum(cksum, f):
     pieces = cksum.split('.')
@@ -118,12 +121,12 @@
         if not db.has_key(subsum):
             db[subsum] = str(time.time())
             if len(db) > maxdblen:
-                items = [(float(db[k]),k) for k in db.keys()]
+                items = [(float(db[k]), k) for k in db.keys()]
                 items.sort()
                 # the -20 brings us down a bit below the max so we aren't
                 # constantly running this chunk of code
                 items = items[:-(maxdblen-20)]
-                for v,k in items:
+                for v, k in items:
                     del db[k]
         else:
             result = 0

Modified: trunk/spambayes/contrib/sb_culler.py
===================================================================
--- trunk/spambayes/contrib/sb_culler.py        2008-11-25 03:45:39 UTC (rev 
3211)
+++ trunk/spambayes/contrib/sb_culler.py        2008-11-25 03:50:08 UTC (rev 
3212)
@@ -30,20 +30,23 @@
 This program requires Python 2.3 or newer.
 """
 
-import sets, traceback, md5, os
+import socket
+socket.setdefaulttimeout(10)
+
+import traceback, md5, os
 import poplib
 import posixpath
+
+import sets
 from email import Header, Utils
 from spambayes import mboxutils, hammie
+from spambayes.Options import options
 
-import socket
-socket.setdefaulttimeout(10)
-
 DO_ACTIONS = 1
 VERBOSE_LEVEL = 1
 
 APPEND_TO_FILE = "append_to_file"
-DELETE = "delete"
+DELETE_FROM_MAILBOX = "delete"
 KEEP_IN_MAILBOX = "keep in mailbox"
 SPAM = "spam"
 VIRUS = "virus"
@@ -108,7 +111,7 @@
 
 def DELETE(mi, log):
     """Action: delete message from mailbox"""
-    log.do_action(DELETE)
+    log.do_action(DELETE_FROM_MAILBOX)
     if not DO_ACTIONS:
         return
     mi.mailbox.dele(mi.i)

Modified: trunk/spambayes/contrib/spamcounts.py
===================================================================
--- trunk/spambayes/contrib/spamcounts.py       2008-11-25 03:45:39 UTC (rev 
3211)
+++ trunk/spambayes/contrib/spamcounts.py       2008-11-25 03:50:08 UTC (rev 
3212)
@@ -19,13 +19,11 @@
 import getopt
 import re
 import sets
-import os
-import shelve
 import csv
 
-from spambayes.Options import options, get_pathname_option
+from spambayes.Options import options
 from spambayes.tokenizer import tokenize
-from spambayes.storage import STATE_KEY, database_type, open_storage
+from spambayes.storage import database_type, open_storage
 
 prog = sys.argv[0]
 

Modified: trunk/spambayes/contrib/tte.py
===================================================================
--- trunk/spambayes/contrib/tte.py      2008-11-25 03:45:39 UTC (rev 3211)
+++ trunk/spambayes/contrib/tte.py      2008-11-25 03:50:08 UTC (rev 3212)
@@ -100,7 +100,7 @@
 
 def train(store, hambox, spambox, maxmsgs, maxrounds, tdict, reverse, verbose,
           ratio):
-    smisses = hmisses = round = 0
+    round = 0
     ham_cutoff = Options.options["Categorization", "ham_cutoff"]
     spam_cutoff = Options.options["Categorization", "spam_cutoff"]
 
@@ -114,19 +114,19 @@
         hambone_ = list(reversed(hambone_))
         spamcan_ = list(reversed(spamcan_))
     
-    nspam,nham = len(spamcan_),len(hambone_)
+    nspam, nham = len(spamcan_), len(hambone_)
     if ratio:
-        rspam,rham = ratio
+        rspam, rham = ratio
         # If the actual ratio of spam to ham in the database is better than
         # what was asked for, use that better ratio.
         if (rspam > rham) == (rspam * nham > rham * nspam):
-            rspam,rham = nspam,nham
+            rspam, rham = nspam, nham
 
     # define some indexing constants
     ham = 0
     spam = 1
     name = ('ham','spam')
-    misses = [0,0]
+    misses = [0, 0]
 
     misclassified = lambda is_spam, score: (
         is_spam and score < spam_cutoff or not is_spam and score > ham_cutoff)
@@ -140,9 +140,9 @@
         hambone = iter(hambone_)
         spamcan = iter(spamcan_)
 
-        i = [0,0]
+        i = [0, 0]
         msgs_processed = 0
-        misses = [0,0]
+        misses = [0, 0]
         training_sets = [hambone, spamcan]
 
         while not maxmsgs or msgs_processed < maxmsgs:
@@ -153,7 +153,7 @@
             try:
                 train_msg = training_sets[train_spam].next()
             except StopIteration:
-                break;
+                break
 
             i[train_spam] += 1
             msgs_processed += 1
@@ -164,7 +164,7 @@
             score = store.spamprob(tokens)
             selector = train_msg["message-id"] or train_msg["subject"]
 
-            if misclassified(train_spam,score) and selector is not None:
+            if misclassified(train_spam, score) and selector is not None:
                 if verbose:
                     print >> sys.stderr, "\tmiss %s: %.6f %s" % (
                         name[train_spam], score, selector)
@@ -179,24 +179,25 @@
         print "\rround: %2d, msgs: %4d, ham misses: %3d, spam misses: %3d, 
%.1fs" % \
               (round, msgs_processed, misses[0], misses[1], seconds)
 
-    training_sets = [hambone,spamcan]
+    training_sets = [hambone, spamcan]
     
     # We count all untrained messages so the user knows what was skipped.
     # We also tag them for saving so we don't lose messages which might have
     # value in a future run
-    for is_spam in ham,spam:
+    for is_spam in ham, spam:
         nleft = 0
         try:
             while True:
                 msg = training_sets[is_spam].next()
                 score = store.spamprob(tokenize(msg))
                 
-                if misclassified(is_spam,score):
+                if misclassified(is_spam, score):
                     tdict[msg["message-id"]] = True
                     nleft += 1
                     
         except StopIteration:
-            if nleft: print nleft, "untrained %ss" % name[is_spam]
+            if nleft:
+                print nleft, "untrained %ss" % name[is_spam]
 
 def cull(mbox_name, cullext, designation, tdict):
     print "writing new %s mbox..." % designation


This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.
_______________________________________________
Spambayes-checkins mailing list
[email protected]
http://mail.python.org/mailman/listinfo/spambayes-checkins

[Spambayes-checkins] SF.net SVN: spambayes:[3212] trunk/spambayes/contrib

Reply via email to