Revision: 3212
http://spambayes.svn.sourceforge.net/spambayes/?rev=3212&view=rev
Author: montanaro
Date: 2008-11-25 03:50:08 +0000 (Tue, 25 Nov 2008)
Log Message:
-----------
pylint nits, use safepickle functions
Modified Paths:
--------------
trunk/spambayes/contrib/SmarterHTTPServer.py
trunk/spambayes/contrib/bulkgraph.py
trunk/spambayes/contrib/findbest.py
trunk/spambayes/contrib/mod_spambayes.py
trunk/spambayes/contrib/nway.py
trunk/spambayes/contrib/pycksum.py
trunk/spambayes/contrib/sb_culler.py
trunk/spambayes/contrib/spamcounts.py
trunk/spambayes/contrib/tte.py
Modified: trunk/spambayes/contrib/SmarterHTTPServer.py
===================================================================
--- trunk/spambayes/contrib/SmarterHTTPServer.py 2008-11-25 03:45:39 UTC
(rev 3211)
+++ trunk/spambayes/contrib/SmarterHTTPServer.py 2008-11-25 03:50:08 UTC
(rev 3212)
@@ -20,7 +20,6 @@
import SimpleHTTPServer
import urllib
import cgi
-import shutil
import mimetypes
import re
try:
Modified: trunk/spambayes/contrib/bulkgraph.py
===================================================================
--- trunk/spambayes/contrib/bulkgraph.py 2008-11-25 03:45:39 UTC (rev
3211)
+++ trunk/spambayes/contrib/bulkgraph.py 2008-11-25 03:50:08 UTC (rev
3212)
@@ -32,15 +32,15 @@
quiet mode; no output
"""
-import mboxutils
import getopt
-import hammie
import sys
import os
import re
import time
import filecmp
+from spambayes import mboxutils, hammie
+
program = sys.argv[0]
loud = True
day = 24 * 60 * 60
Modified: trunk/spambayes/contrib/findbest.py
===================================================================
--- trunk/spambayes/contrib/findbest.py 2008-11-25 03:45:39 UTC (rev 3211)
+++ trunk/spambayes/contrib/findbest.py 2008-11-25 03:50:08 UTC (rev 3212)
@@ -66,7 +66,6 @@
import sys
import os
-import cPickle as pickle
import getopt
import math
@@ -75,6 +74,8 @@
from spambayes.hammie import Hammie
from spambayes.tokenizer import tokenize
from spambayes.Options import options
+from spambayes import storage
+from spambayes.safepickle import pickle_read, pickle_write
cls = Classifier()
h = Hammie(cls)
@@ -98,7 +99,6 @@
def score(unsure, h, cls, scores, msgids=None, skipspam=False):
"""See what effect on others each msg in unsure has"""
- ham_cutoff = options["Categorization", "ham_cutoff"]
spam_cutoff = options["Categorization", "spam_cutoff"]
# compute a base - number of messages in unsure already in the
@@ -223,7 +223,7 @@
print "scoring"
if best:
- last_scores = pickle.load(file(bestfile))
+ last_scores = pickle_read(bestfile)
last_scores = last_scores.items()
last_scores.sort()
msgids = set()
@@ -240,7 +240,7 @@
pass
if not best:
- pickle.dump(scores, file(bestfile, 'w'))
+ pickle_write(bestfile, scores)
return 0
Modified: trunk/spambayes/contrib/mod_spambayes.py
===================================================================
--- trunk/spambayes/contrib/mod_spambayes.py 2008-11-25 03:45:39 UTC (rev
3211)
+++ trunk/spambayes/contrib/mod_spambayes.py 2008-11-25 03:50:08 UTC (rev
3212)
@@ -5,26 +5,24 @@
## Author: Skip Montanaro <[EMAIL PROTECTED]>
##
-import os
-
from proxy3_filter import *
import proxy3_options
-from spambayes import hammie, Options, mboxutils
+from spambayes import hammie, Options
dbf = Options.get_pathname_option("Storage", "persistent_storage_file")
class SpambayesFilter(BufferAllFilter):
- hammie = hammie.open(dbf, 1, 'r')
+ checker = hammie.open(dbf, 1, 'r')
def filter(self, s):
if self.reply.split()[1] == '200':
- prob = self.hammie.score("%s\r\n%s" % (self.serverheaders, s))
+ prob = self.checker.score("%s\r\n%s" % (self.serverheaders, s))
print "| prob: %.5f" % prob
if prob >= Options.options["Categorization", "spam_cutoff"]:
print self.serverheaders
print "text:", s[0:40], "...", s[-40:]
return "not authorized"
- return s
+ return s
from proxy3_util import *
Modified: trunk/spambayes/contrib/nway.py
===================================================================
--- trunk/spambayes/contrib/nway.py 2008-11-25 03:45:39 UTC (rev 3211)
+++ trunk/spambayes/contrib/nway.py 2008-11-25 03:50:08 UTC (rev 3212)
@@ -70,7 +70,7 @@
prog = os.path.basename(sys.argv[0])
-def help():
+def usage():
print >> sys.stderr, __doc__ % globals()
def main(args):
@@ -78,10 +78,9 @@
for opt, arg in opts:
if opt == '-h':
- help()
+ usage()
return 0
- tagdb_list = []
msg = mboxutils.get_message(sys.stdin)
try:
del msg["X-Spambayes-Classification"]
Modified: trunk/spambayes/contrib/pycksum.py
===================================================================
--- trunk/spambayes/contrib/pycksum.py 2008-11-25 03:45:39 UTC (rev 3211)
+++ trunk/spambayes/contrib/pycksum.py 2008-11-25 03:50:08 UTC (rev 3212)
@@ -39,7 +39,10 @@
import sys
import email.Parser
import email.generator
-import md5
+try:
+ from hashlib import md5
+except ImportError:
+ from md5 import new as md5
import anydbm
import re
import time
@@ -97,12 +100,12 @@
body = text.split("\n\n", 1)[1]
lines = clean(body).split("\n")
chunksize = len(lines)//4+1
- sum = []
+ digest = []
for i in range(4):
chunk = "\n".join(lines[i*chunksize:(i+1)*chunksize])
- sum.append(md5.new(chunk).hexdigest())
+ digest.append(md5(chunk).hexdigest())
- return ".".join(sum)
+ return ".".join(digest)
def save_checksum(cksum, f):
pieces = cksum.split('.')
@@ -118,12 +121,12 @@
if not db.has_key(subsum):
db[subsum] = str(time.time())
if len(db) > maxdblen:
- items = [(float(db[k]),k) for k in db.keys()]
+ items = [(float(db[k]), k) for k in db.keys()]
items.sort()
# the -20 brings us down a bit below the max so we aren't
# constantly running this chunk of code
items = items[:-(maxdblen-20)]
- for v,k in items:
+ for v, k in items:
del db[k]
else:
result = 0
Modified: trunk/spambayes/contrib/sb_culler.py
===================================================================
--- trunk/spambayes/contrib/sb_culler.py 2008-11-25 03:45:39 UTC (rev
3211)
+++ trunk/spambayes/contrib/sb_culler.py 2008-11-25 03:50:08 UTC (rev
3212)
@@ -30,20 +30,23 @@
This program requires Python 2.3 or newer.
"""
-import sets, traceback, md5, os
+import socket
+socket.setdefaulttimeout(10)
+
+import traceback, md5, os
import poplib
import posixpath
+
+import sets
from email import Header, Utils
from spambayes import mboxutils, hammie
+from spambayes.Options import options
-import socket
-socket.setdefaulttimeout(10)
-
DO_ACTIONS = 1
VERBOSE_LEVEL = 1
APPEND_TO_FILE = "append_to_file"
-DELETE = "delete"
+DELETE_FROM_MAILBOX = "delete"
KEEP_IN_MAILBOX = "keep in mailbox"
SPAM = "spam"
VIRUS = "virus"
@@ -108,7 +111,7 @@
def DELETE(mi, log):
"""Action: delete message from mailbox"""
- log.do_action(DELETE)
+ log.do_action(DELETE_FROM_MAILBOX)
if not DO_ACTIONS:
return
mi.mailbox.dele(mi.i)
Modified: trunk/spambayes/contrib/spamcounts.py
===================================================================
--- trunk/spambayes/contrib/spamcounts.py 2008-11-25 03:45:39 UTC (rev
3211)
+++ trunk/spambayes/contrib/spamcounts.py 2008-11-25 03:50:08 UTC (rev
3212)
@@ -19,13 +19,11 @@
import getopt
import re
import sets
-import os
-import shelve
import csv
-from spambayes.Options import options, get_pathname_option
+from spambayes.Options import options
from spambayes.tokenizer import tokenize
-from spambayes.storage import STATE_KEY, database_type, open_storage
+from spambayes.storage import database_type, open_storage
prog = sys.argv[0]
Modified: trunk/spambayes/contrib/tte.py
===================================================================
--- trunk/spambayes/contrib/tte.py 2008-11-25 03:45:39 UTC (rev 3211)
+++ trunk/spambayes/contrib/tte.py 2008-11-25 03:50:08 UTC (rev 3212)
@@ -100,7 +100,7 @@
def train(store, hambox, spambox, maxmsgs, maxrounds, tdict, reverse, verbose,
ratio):
- smisses = hmisses = round = 0
+ round = 0
ham_cutoff = Options.options["Categorization", "ham_cutoff"]
spam_cutoff = Options.options["Categorization", "spam_cutoff"]
@@ -114,19 +114,19 @@
hambone_ = list(reversed(hambone_))
spamcan_ = list(reversed(spamcan_))
- nspam,nham = len(spamcan_),len(hambone_)
+ nspam, nham = len(spamcan_), len(hambone_)
if ratio:
- rspam,rham = ratio
+ rspam, rham = ratio
# If the actual ratio of spam to ham in the database is better than
# what was asked for, use that better ratio.
if (rspam > rham) == (rspam * nham > rham * nspam):
- rspam,rham = nspam,nham
+ rspam, rham = nspam, nham
# define some indexing constants
ham = 0
spam = 1
name = ('ham','spam')
- misses = [0,0]
+ misses = [0, 0]
misclassified = lambda is_spam, score: (
is_spam and score < spam_cutoff or not is_spam and score > ham_cutoff)
@@ -140,9 +140,9 @@
hambone = iter(hambone_)
spamcan = iter(spamcan_)
- i = [0,0]
+ i = [0, 0]
msgs_processed = 0
- misses = [0,0]
+ misses = [0, 0]
training_sets = [hambone, spamcan]
while not maxmsgs or msgs_processed < maxmsgs:
@@ -153,7 +153,7 @@
try:
train_msg = training_sets[train_spam].next()
except StopIteration:
- break;
+ break
i[train_spam] += 1
msgs_processed += 1
@@ -164,7 +164,7 @@
score = store.spamprob(tokens)
selector = train_msg["message-id"] or train_msg["subject"]
- if misclassified(train_spam,score) and selector is not None:
+ if misclassified(train_spam, score) and selector is not None:
if verbose:
print >> sys.stderr, "\tmiss %s: %.6f %s" % (
name[train_spam], score, selector)
@@ -179,24 +179,25 @@
print "\rround: %2d, msgs: %4d, ham misses: %3d, spam misses: %3d,
%.1fs" % \
(round, msgs_processed, misses[0], misses[1], seconds)
- training_sets = [hambone,spamcan]
+ training_sets = [hambone, spamcan]
# We count all untrained messages so the user knows what was skipped.
# We also tag them for saving so we don't lose messages which might have
# value in a future run
- for is_spam in ham,spam:
+ for is_spam in ham, spam:
nleft = 0
try:
while True:
msg = training_sets[is_spam].next()
score = store.spamprob(tokenize(msg))
- if misclassified(is_spam,score):
+ if misclassified(is_spam, score):
tdict[msg["message-id"]] = True
nleft += 1
except StopIteration:
- if nleft: print nleft, "untrained %ss" % name[is_spam]
+ if nleft:
+ print nleft, "untrained %ss" % name[is_spam]
def cull(mbox_name, cullext, designation, tdict):
print "writing new %s mbox..." % designation
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
_______________________________________________
Spambayes-checkins mailing list
[email protected]
http://mail.python.org/mailman/listinfo/spambayes-checkins