Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv20242/spambayes
Modified Files:
Tag: CORESVR
CoreUI.py ProxyUI.py UserInterface.py
Log Message:
A couple more refactorings. More will be possible once I get rid of holding
state as a module-level global variable in the pop3 proxy.
Index: CoreUI.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/CoreUI.py,v
retrieving revision 1.1.2.8
retrieving revision 1.1.2.9
diff -C2 -d -r1.1.2.8 -r1.1.2.9
*** CoreUI.py 8 Jun 2007 02:09:51 -0000 1.1.2.8
--- CoreUI.py 8 Jun 2007 12:08:46 -0000 1.1.2.9
***************
*** 54,58 ****
import sys
- import re
import cgi
import time
--- 54,57 ----
***************
*** 72,81 ****
from spambayes.compatsets import Set
- from email.Iterators import typed_subpart_iterator
-
import UserInterface
- from spambayes import tokenizer
from spambayes.Options import options, load_options, get_pathname_option, _
! from spambayes import i18n
from spambayes import storage
from spambayes import Stats
--- 71,78 ----
from spambayes.compatsets import Set
import UserInterface
from spambayes.Options import options, load_options, get_pathname_option, _
! ## no i18n yet...
! ##from spambayes import i18n
from spambayes import storage
from spambayes import Stats
***************
*** 161,165 ****
self.state = state
self.app_for_version = "SpamBayes Proxy"
- self.previous_sort = None
if not state.can_stop:
self.html._readonly = False
--- 158,161 ----
***************
*** 246,351 ****
return keys, date, prior, start, end
- def _sortMessages(self, messages, sort_order, reverse=False):
- """Sorts the message by the appropriate attribute. If this was the
- previous sort order, then reverse it."""
- if sort_order is None or sort_order == "received":
- # Default sorting, which is in reverse order of appearance.
- # This is complicated because the 'received' info is the key.
- messages.sort()
- if self.previous_sort == sort_order:
- messages.reverse()
- self.previous_sort = None
- else:
- self.previous_sort = 'received'
- return messages
- tmplist = [(getattr(x[1], sort_order), x) for x in messages]
- tmplist.sort()
- if reverse:
- tmplist.reverse()
- return [x for (key, x) in tmplist]
-
- def _appendMessages(self, table, keyedMessageInfo, label, sort_order,
- reverse=False):
- """Appends the rows of a table of messages to 'table'."""
- stripe = 0
-
- keyedMessageInfo = self._sortMessages(keyedMessageInfo, sort_order,
- reverse)
- nrows = options["html_ui", "rows_per_section"]
- for key, messageInfo in keyedMessageInfo[:nrows]:
- unused, unused, messageInfo.received = \
- self._getTimeRange(self._keyToTimestamp(key))
- row = self.html.reviewRow.clone()
- try:
- score = messageInfo.score
- except ValueError:
- score = None
- if label == _('Spam'):
- if score is not None \
- and score > options["html_ui", "spam_discard_level"]:
- r_att = getattr(row, 'discard')
- else:
- r_att = getattr(row, options["html_ui",
- "default_spam_action"])
- elif label == _('Ham'):
- if score is not None \
- and score < options["html_ui", "ham_discard_level"]:
- r_att = getattr(row, 'discard')
- else:
- r_att = getattr(row, options["html_ui",
- "default_ham_action"])
- else:
- r_att = getattr(row, options["html_ui",
- "default_unsure_action"])
- setattr(r_att, "checked", 1)
-
- row.optionalHeadersValues = '' # make way for real list
- for header in options["html_ui", "display_headers"]:
- header = header.lower()
- text = getattr(messageInfo, "%sHeader" % (header,))
- if header == "subject":
- # Subject is special, because it links to the body.
- # If the user doesn't display the subject, then there
- # is no link to the body.
- h = self.html.reviewRow.linkedHeaderValue.clone()
- h.text.title = messageInfo.bodySummary
- h.text.href = "view?key=%s&corpus=%s" % (key, label)
- else:
- h = self.html.reviewRow.headerValue.clone()
- h.text = text
- row.optionalHeadersValues += h
-
- # Apart from any message headers, we may also wish to display
- # the message score, and the time the message was received.
- if options["html_ui", "display_score"]:
- if isinstance(messageInfo.score, types.StringTypes):
- # Presumably either "?" or "Err".
- row.score_ = messageInfo.score
- else:
- row.score_ = "%.2f%%" % (messageInfo.score,)
- else:
- del row.score_
- if options["html_ui", "display_received_time"]:
- row.received_ = messageInfo.received
- else:
- del row.received_
-
- # Many characters can't go in the URL or they cause problems
- # (&, ;, ?, etc). So we use the hex values for them all.
- subj_list = []
- for c in messageInfo.subjectHeader:
- subj_list.append("%%%s" % (hex(ord(c))[2:],))
- subj = "".join(subj_list)
- row.classify.href = "showclues?key=%s&subject=%s" % (key, subj)
- row.tokens.href = ("showclues?key=%s&subject=%s&tokens=1" %
- (key, subj))
- setattr(row, 'class', ['stripe_on', 'stripe_off'][stripe]) # Grr!
- setattr(row, 'onMouseOut',
- ["this.className='stripe_on';",
- "this.className='stripe_off';"][stripe])
- row = str(row).replace('TYPE', label).replace('KEY', key)
- table += row
- stripe = stripe ^ 1
-
def onReview(self, **params):
"""Present a list of message for (re)training."""
--- 242,245 ----
***************
*** 619,631 ****
self._writePostamble(help_topic="review")
- def _contains(self, a, b, ignore_case=False):
- """Return true if substring b is part of string a."""
- assert isinstance(a, types.StringTypes)
- assert isinstance(b, types.StringTypes)
- if ignore_case:
- a = a.lower()
- b = b.lower()
- return a.find(b) >= 0
-
def onView(self, key, corpus):
"""View a message - linked from the Review page."""
--- 513,516 ----
***************
*** 688,755 ****
self.write(html)
- def _makeMessageInfo(self, message):
- """Given an email.Message, return an object with subjectHeader,
- bodySummary and other header (as needed) attributes. These objects
- are passed into appendMessages by onReview - passing email.Message
- objects directly uses too much memory.
- """
- # Remove notations before displaying - see:
- # [ 848365 ] Remove subject annotations from message review page
- message.delNotations()
- subjectHeader = message["Subject"] or "(none)"
- headers = {"subject" : subjectHeader}
- for header in options["html_ui", "display_headers"]:
- headers[header.lower()] = (message[header] or "(none)")
- score = message[options["Headers", "score_header_name"]]
- if score:
- # the score might have the log info at the end
- op = score.find('(')
- if op >= 0:
- score = score[:op]
- try:
- score = float(score) * 100
- except ValueError:
- # Hmm. The score header should only contain a floating
- # point number. What's going on here, then?
- score = "Err" # Let the user know something is wrong.
- else:
- # If the lookup fails, this means that the "include_score"
- # option isn't activated. We have the choice here to either
- # calculate it now, which is pretty inefficient, since we have
- # already done so, or to admit that we don't know what it is.
- # We'll go with the latter.
- score = "?"
- try:
- part = typed_subpart_iterator(message, 'text', 'plain').next()
- text = part.get_payload()
- except StopIteration:
- try:
- part = typed_subpart_iterator(message, 'text', 'html').next()
- text = part.get_payload()
- text, unused = tokenizer.crack_html_style(text)
- text, unused = tokenizer.crack_html_comment(text)
- text = tokenizer.html_re.sub(' ', text)
- text = _('(this message only has an HTML body)\n') + text
- except StopIteration:
- text = _('(this message has no text body)')
- if type(text) == type([]): # gotta be a 'right' way to do this
- text = _("(this message is a digest of %s messages)") %
(len(text))
- elif text is None:
- text = _("(this message has no body)")
- else:
- text = text.replace(' ', ' ') # Else they'll be quoted
- text = re.sub(r'(\s)\s+', r'\1', text) # Eg. multiple blank lines
- text = text.strip()
-
- class _MessageInfo:
- pass
- messageInfo = _MessageInfo()
- for headerName, headerValue in headers.items():
- headerValue = self._trimHeader(headerValue, 45, True)
- setattr(messageInfo, "%sHeader" % (headerName,), headerValue)
- messageInfo.score = score
- messageInfo.bodySummary = self._trimHeader(text, 200)
- return messageInfo
-
def close_database(self):
self.state.close()
--- 573,576 ----
***************
*** 824,827 ****
--- 645,651 ----
self.is_test = False
+ self.spamCorpus = self.hamCorpus = self.unknownCorpus = None
+ self.spam_trainer = self.ham_trainer = None
+
self.init()
Index: ProxyUI.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/ProxyUI.py,v
retrieving revision 1.64.2.2
retrieving revision 1.64.2.3
diff -C2 -d -r1.64.2.2 -r1.64.2.3
*** ProxyUI.py 8 Jun 2007 02:09:51 -0000 1.64.2.2
--- ProxyUI.py 8 Jun 2007 12:08:46 -0000 1.64.2.3
***************
*** 54,58 ****
True, False = 1, 0
- import re
import cgi
import time
--- 54,57 ----
***************
*** 72,81 ****
from spambayes.compatsets import Set
- import tokenizer
import UserInterface
from spambayes.Options import options, _
- from email.Iterators import typed_subpart_iterator
! global state
# These are the options that will be offered on the configuration page.
--- 71,78 ----
from spambayes.compatsets import Set
import UserInterface
from spambayes.Options import options, _
! state = None
# These are the options that will be offered on the configuration page.
***************
*** 174,178 ****
self.state_recreator = state_recreator # ugly
self.app_for_version = "SpamBayes Proxy"
- self.previous_sort = None
if not proxy_state.can_stop:
self.html._readonly = False
--- 171,174 ----
***************
*** 261,365 ****
return keys, date, prior, start, end
- def _sortMessages(self, messages, sort_order, reverse=False):
- """Sorts the message by the appropriate attribute. If this was the
- previous sort order, then reverse it."""
- if sort_order is None or sort_order == "received":
- # Default sorting, which is in reverse order of appearance.
- # This is complicated because the 'received' info is the key.
- messages.sort()
- if self.previous_sort == sort_order:
- messages.reverse()
- self.previous_sort = None
- else:
- self.previous_sort = 'received'
- return messages
- tmplist = [(getattr(x[1], sort_order), x) for x in messages]
- tmplist.sort()
- if reverse:
- tmplist.reverse()
- return [x for (key, x) in tmplist]
-
- def _appendMessages(self, table, keyedMessageInfo, label, sort_order,
- reverse=False):
- """Appends the rows of a table of messages to 'table'."""
- stripe = 0
-
- keyedMessageInfo = self._sortMessages(keyedMessageInfo, sort_order,
- reverse)
- nrows = options["html_ui", "rows_per_section"]
- for key, messageInfo in keyedMessageInfo[:nrows]:
- unused, unused, messageInfo.received = \
- self._getTimeRange(self._keyToTimestamp(key))
- row = self.html.reviewRow.clone()
- try:
- score = messageInfo.score
- except ValueError:
- score = None
- if label == _('Spam'):
- if score is not None \
- and score > options["html_ui", "spam_discard_level"]:
- r_att = getattr(row, 'discard')
- else:
- r_att = getattr(row, options["html_ui",
- "default_spam_action"])
- elif label == _('Ham'):
- if score is not None \
- and score < options["html_ui", "ham_discard_level"]:
- r_att = getattr(row, 'discard')
- else:
- r_att = getattr(row, options["html_ui",
- "default_ham_action"])
- else:
- r_att = getattr(row, options["html_ui",
- "default_unsure_action"])
- setattr(r_att, "checked", 1)
-
- row.optionalHeadersValues = '' # make way for real list
- for header in options["html_ui", "display_headers"]:
- header = header.lower()
- text = getattr(messageInfo, "%sHeader" % (header,))
- if header == "subject":
- # Subject is special, because it links to the body.
- # If the user doesn't display the subject, then there
- # is no link to the body.
- h = self.html.reviewRow.linkedHeaderValue.clone()
- h.text.title = messageInfo.bodySummary
- h.text.href = "view?key=%s&corpus=%s" % (key, label)
- else:
- h = self.html.reviewRow.headerValue.clone()
- h.text = text
- row.optionalHeadersValues += h
-
- # Apart from any message headers, we may also wish to display
- # the message score, and the time the message was received.
- if options["html_ui", "display_score"]:
- if isinstance(messageInfo.score, types.StringTypes):
- # Presumably either "?" or "Err".
- row.score_ = messageInfo.score
- else:
- row.score_ = "%.2f%%" % (messageInfo.score,)
- else:
- del row.score_
- if options["html_ui", "display_received_time"]:
- row.received_ = messageInfo.received
- else:
- del row.received_
-
- # Many characters can't go in the URL or they cause problems
- # (&, ;, ?, etc). So we use the hex values for them all.
- subj_list = []
- for c in messageInfo.subjectHeader:
- subj_list.append("%%%s" % (hex(ord(c))[2:],))
- subj = "".join(subj_list)
- row.classify.href="showclues?key=%s&subject=%s" % (key, subj)
- row.tokens.href="showclues?key=%s&subject=%s&tokens=1" % (key,
subj)
- setattr(row, 'class', ['stripe_on', 'stripe_off'][stripe]) # Grr!
- setattr(row, 'onMouseOut',
- ["this.className='stripe_on';",
- "this.className='stripe_off';"][stripe])
- row = str(row).replace('TYPE', label).replace('KEY', key)
- table += row
- stripe = stripe ^ 1
-
def onReview(self, **params):
"""Present a list of message for (re)training."""
--- 257,260 ----
***************
*** 632,644 ****
self._writePostamble(help_topic="review")
- def _contains(self, a, b, ignore_case=False):
- """Return true if substring b is part of string a."""
- assert isinstance(a, types.StringTypes)
- assert isinstance(b, types.StringTypes)
- if ignore_case:
- a = a.lower()
- b = b.lower()
- return a.find(b) >= 0
-
def onView(self, key, corpus):
"""View a message - linked from the Review page."""
--- 527,530 ----
***************
*** 685,752 ****
self._writePostamble()
- def _makeMessageInfo(self, message):
- """Given an email.Message, return an object with subjectHeader,
- bodySummary and other header (as needed) attributes. These objects
- are passed into appendMessages by onReview - passing email.Message
- objects directly uses too much memory.
- """
- # Remove notations before displaying - see:
- # [ 848365 ] Remove subject annotations from message review page
- message.delNotations()
- subjectHeader = message["Subject"] or "(none)"
- headers = {"subject" : subjectHeader}
- for header in options["html_ui", "display_headers"]:
- headers[header.lower()] = (message[header] or "(none)")
- score = message[options["Headers", "score_header_name"]]
- if score:
- # the score might have the log info at the end
- op = score.find('(')
- if op >= 0:
- score = score[:op]
- try:
- score = float(score) * 100
- except ValueError:
- # Hmm. The score header should only contain a floating
- # point number. What's going on here, then?
- score = "Err" # Let the user know something is wrong.
- else:
- # If the lookup fails, this means that the "include_score"
- # option isn't activated. We have the choice here to either
- # calculate it now, which is pretty inefficient, since we have
- # already done so, or to admit that we don't know what it is.
- # We'll go with the latter.
- score = "?"
- try:
- part = typed_subpart_iterator(message, 'text', 'plain').next()
- text = part.get_payload()
- except StopIteration:
- try:
- part = typed_subpart_iterator(message, 'text', 'html').next()
- text = part.get_payload()
- text, unused = tokenizer.crack_html_style(text)
- text, unused = tokenizer.crack_html_comment(text)
- text = tokenizer.html_re.sub(' ', text)
- text = _('(this message only has an HTML body)\n') + text
- except StopIteration:
- text = _('(this message has no text body)')
- if type(text) == type([]): # gotta be a 'right' way to do this
- text = _("(this message is a digest of %s messages)") %
(len(text))
- elif text is None:
- text = _("(this message has no body)")
- else:
- text = text.replace(' ', ' ') # Else they'll be quoted
- text = re.sub(r'(\s)\s+', r'\1', text) # Eg. multiple blank lines
- text = text.strip()
-
- class _MessageInfo:
- pass
- messageInfo = _MessageInfo()
- for headerName, headerValue in headers.items():
- headerValue = self._trimHeader(headerValue, 45, True)
- setattr(messageInfo, "%sHeader" % (headerName,), headerValue)
- messageInfo.score = score
- messageInfo.bodySummary = self._trimHeader(text, 200)
- return messageInfo
-
def close_database(self):
state.close()
--- 571,574 ----
Index: UserInterface.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/UserInterface.py,v
retrieving revision 1.61.2.2
retrieving revision 1.61.2.3
diff -C2 -d -r1.61.2.2 -r1.61.2.3
*** UserInterface.py 8 Jun 2007 02:09:51 -0000 1.61.2.2
--- UserInterface.py 8 Jun 2007 12:08:46 -0000 1.61.2.3
***************
*** 80,83 ****
--- 80,84 ----
import types
import StringIO
+ from email.Iterators import typed_subpart_iterator
import oe_mailbox
***************
*** 277,280 ****
--- 278,282 ----
self.stats = stats
self.app_for_version = None # subclasses must fill this in
+ self.previous_sort = None
def onClassify(self, file, text, which):
***************
*** 1271,1273 ****
--- 1273,1448 ----
return time.mktime(start), time.mktime(end), date
+ def _sortMessages(self, messages, sort_order, reverse=False):
+ """Sorts the message by the appropriate attribute. If this was the
+ previous sort order, then reverse it."""
+ if sort_order is None or sort_order == "received":
+ # Default sorting, which is in reverse order of appearance.
+ # This is complicated because the 'received' info is the key.
+ messages.sort()
+ if self.previous_sort == sort_order:
+ messages.reverse()
+ self.previous_sort = None
+ else:
+ self.previous_sort = 'received'
+ return messages
+ tmplist = [(getattr(x[1], sort_order), x) for x in messages]
+ tmplist.sort()
+ if reverse:
+ tmplist.reverse()
+ return [x for (key, x) in tmplist]
+
+ def _appendMessages(self, table, keyedMessageInfo, label, sort_order,
+ reverse=False):
+ """Appends the rows of a table of messages to 'table'."""
+ stripe = 0
+
+ keyedMessageInfo = self._sortMessages(keyedMessageInfo, sort_order,
+ reverse)
+ nrows = options["html_ui", "rows_per_section"]
+ for key, messageInfo in keyedMessageInfo[:nrows]:
+ unused, unused, messageInfo.received = \
+ self._getTimeRange(self._keyToTimestamp(key))
+ row = self.html.reviewRow.clone()
+ try:
+ score = messageInfo.score
+ except ValueError:
+ score = None
+ if label == _('Spam'):
+ if score is not None \
+ and score > options["html_ui", "spam_discard_level"]:
+ r_att = getattr(row, 'discard')
+ else:
+ r_att = getattr(row, options["html_ui",
+ "default_spam_action"])
+ elif label == _('Ham'):
+ if score is not None \
+ and score < options["html_ui", "ham_discard_level"]:
+ r_att = getattr(row, 'discard')
+ else:
+ r_att = getattr(row, options["html_ui",
+ "default_ham_action"])
+ else:
+ r_att = getattr(row, options["html_ui",
+ "default_unsure_action"])
+ setattr(r_att, "checked", 1)
+
+ row.optionalHeadersValues = '' # make way for real list
+ for header in options["html_ui", "display_headers"]:
+ header = header.lower()
+ text = getattr(messageInfo, "%sHeader" % (header,))
+ if header == "subject":
+ # Subject is special, because it links to the body.
+ # If the user doesn't display the subject, then there
+ # is no link to the body.
+ h = self.html.reviewRow.linkedHeaderValue.clone()
+ h.text.title = messageInfo.bodySummary
+ h.text.href = "view?key=%s&corpus=%s" % (key, label)
+ else:
+ h = self.html.reviewRow.headerValue.clone()
+ h.text = text
+ row.optionalHeadersValues += h
+
+ # Apart from any message headers, we may also wish to display
+ # the message score, and the time the message was received.
+ if options["html_ui", "display_score"]:
+ if isinstance(messageInfo.score, types.StringTypes):
+ # Presumably either "?" or "Err".
+ row.score_ = messageInfo.score
+ else:
+ row.score_ = "%.2f%%" % (messageInfo.score,)
+ else:
+ del row.score_
+ if options["html_ui", "display_received_time"]:
+ row.received_ = messageInfo.received
+ else:
+ del row.received_
+ # Many characters can't go in the URL or they cause problems
+ # (&, ;, ?, etc). So we use the hex values for them all.
+ subj_list = []
+ for c in messageInfo.subjectHeader:
+ subj_list.append("%%%s" % (hex(ord(c))[2:],))
+ subj = "".join(subj_list)
+ row.classify.href = "showclues?key=%s&subject=%s" % (key, subj)
+ row.tokens.href = ("showclues?key=%s&subject=%s&tokens=1" %
+ (key, subj))
+ setattr(row, 'class', ['stripe_on', 'stripe_off'][stripe]) # Grr!
+ setattr(row, 'onMouseOut',
+ ["this.className='stripe_on';",
+ "this.className='stripe_off';"][stripe])
+ row = str(row).replace('TYPE', label).replace('KEY', key)
+ table += row
+ stripe = stripe ^ 1
+
+ def _contains(self, a, b, ignore_case=False):
+ """Return true if substring b is part of string a."""
+ assert isinstance(a, types.StringTypes)
+ assert isinstance(b, types.StringTypes)
+ if ignore_case:
+ a = a.lower()
+ b = b.lower()
+ return a.find(b) >= 0
+
+ def _makeMessageInfo(self, message):
+ """Given an email.Message, return an object with subjectHeader,
+ bodySummary and other header (as needed) attributes. These objects
+ are passed into appendMessages by onReview - passing email.Message
+ objects directly uses too much memory.
+ """
+ # Remove notations before displaying - see:
+ # [ 848365 ] Remove subject annotations from message review page
+ message.delNotations()
+ subjectHeader = message["Subject"] or "(none)"
+ headers = {"subject" : subjectHeader}
+ for header in options["html_ui", "display_headers"]:
+ headers[header.lower()] = (message[header] or "(none)")
+ score = message[options["Headers", "score_header_name"]]
+ if score:
+ # the score might have the log info at the end
+ op = score.find('(')
+ if op >= 0:
+ score = score[:op]
+ try:
+ score = float(score) * 100
+ except ValueError:
+ # Hmm. The score header should only contain a floating
+ # point number. What's going on here, then?
+ score = "Err" # Let the user know something is wrong.
+ else:
+ # If the lookup fails, this means that the "include_score"
+ # option isn't activated. We have the choice here to either
+ # calculate it now, which is pretty inefficient, since we have
+ # already done so, or to admit that we don't know what it is.
+ # We'll go with the latter.
+ score = "?"
+ try:
+ part = typed_subpart_iterator(message, 'text', 'plain').next()
+ text = part.get_payload()
+ except StopIteration:
+ try:
+ part = typed_subpart_iterator(message, 'text', 'html').next()
+ text = part.get_payload()
+ text, unused = tokenizer.crack_html_style(text)
+ text, unused = tokenizer.crack_html_comment(text)
+ text = tokenizer.html_re.sub(' ', text)
+ text = _('(this message only has an HTML body)\n') + text
+ except StopIteration:
+ text = _('(this message has no text body)')
+ if type(text) == type([]): # gotta be a 'right' way to do this
+ text = _("(this message is a digest of %s messages)") %
(len(text))
+ elif text is None:
+ text = _("(this message has no body)")
+ else:
+ text = text.replace(' ', ' ') # Else they'll be quoted
+ text = re.sub(r'(\s)\s+', r'\1', text) # Eg. multiple blank lines
+ text = text.strip()
+
+ class _MessageInfo:
+ pass
+ messageInfo = _MessageInfo()
+ for headerName, headerValue in headers.items():
+ headerValue = self._trimHeader(headerValue, 45, True)
+ setattr(messageInfo, "%sHeader" % (headerName,), headerValue)
+ messageInfo.score = score
+ messageInfo.bodySummary = self._trimHeader(text, 200)
+ return messageInfo
_______________________________________________
Spambayes-checkins mailing list
[email protected]
http://mail.python.org/mailman/listinfo/spambayes-checkins