Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv11482/spambayes

Modified Files:
        storage.py 
Log Message:
Improvements to ZODB and ZEO storage classes:

 * don't try storing to a read-only database;
 * make packing on close optional;
 * don't pack after closing;
 * optionally remove the pack backup file;
 * handle username, password, storage name, wait, and wait timeout for ZEO;
 * use persistent caches for ZEO.

Index: storage.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/storage.py,v
retrieving revision 1.60
retrieving revision 1.61
diff -C2 -d -r1.60 -r1.61
*** storage.py  22 Apr 2006 04:41:54 -0000      1.60
--- storage.py  27 Mar 2007 10:57:53 -0000      1.61
***************
*** 67,70 ****
--- 67,71 ----
  import time
  import types
+ import tempfile
  from spambayes import classifier
  from spambayes.Options import options, get_pathname_option
***************
*** 676,679 ****
--- 677,681 ----
      except ImportError:
          Persistent = object
+ 
  class _PersistentClassifier(classifier.Classifier, Persistent):
      def __init__(self):
***************
*** 759,762 ****
--- 761,765 ----
              abort = ZODB.Transaction.get_transaction().abort
          from ZODB.POSException import ConflictError
+         from ZODB.POSException import ReadOnlyError
          from ZODB.POSException import TransactionFailedError
  
***************
*** 781,786 ****
                    self.db_name
              abort()
  
!     def close(self):
          # Ensure that the db is saved before closing.  Alternatively, we
          # could abort any waiting transaction.  We need to do *something*
--- 784,792 ----
                    self.db_name
              abort()
+         except ReadOnlyError:
+             print >> sys.stderr, "Can't store transaction to read-only db."
+             abort()
  
!     def close(self, pack=True, retain_backup=True):
          # Ensure that the db is saved before closing.  Alternatively, we
          # could abort any waiting transaction.  We need to do *something*
***************
*** 791,805 ****
              self.store()
  
-         # Do the closing.        
-         self.DB.close()
- 
          # We don't make any use of the 'undo' capabilities of the
          # FileStorage at the moment, so might as well pack the database
          # each time it is closed, to save as much disk space as possible.
          # Pack it up to where it was 'yesterday'.
!         # XXX What is the 'referencesf' parameter for pack()?  It doesn't
!         # XXX seem to do anything according to the source.
!         if self.mode != 'r' and hasattr(self.storage, "pack"):
!             self.storage.pack(time.time()-60*60*24, None)
          self.storage.close()
  
--- 797,809 ----
              self.store()
  
          # We don't make any use of the 'undo' capabilities of the
          # FileStorage at the moment, so might as well pack the database
          # each time it is closed, to save as much disk space as possible.
          # Pack it up to where it was 'yesterday'.
!         if pack and self.mode != 'r':
!             self.pack(time.time()-60*60*24, retain_backup)
! 
!         # Do the closing.        
!         self.DB.close()
          self.storage.close()
  
***************
*** 811,814 ****
--- 815,831 ----
              print >> sys.stderr, 'Closed', self.db_name, 'database'
  
+     def pack(self, t, retain_backup=True):
+         """Like FileStorage pack(), but optionally remove the .old
+         backup file that is created.  Often for our purposes we do
+         not care about being able to recover from this.  Also
+         ignore the referencesf parameter, which appears to not do
+         anything."""
+         if hasattr(self.storage, "pack"):
+             self.storage.pack(t, None)
+         if not retain_backup:
+             old_name = self.db_filename + ".old"
+             if os.path.exists(old_name):
+                 os.remove(old_name)
+ 
  
  class ZEOClassifier(ZODBClassifier):
***************
*** 818,828 ****
          self.port = None
          db_name = "SpamBayes"
          for info in source_info:
              if info.startswith("host"):
!                 self.host = info[5:]
              elif info.startswith("port"):
                  self.port = int(info[5:])
              elif info.startswith("dbname"):
                  db_name = info[7:]
          ZODBClassifier.__init__(self, db_name)
  
--- 835,865 ----
          self.port = None
          db_name = "SpamBayes"
+         self.username = ''
+         self.password = ''
+         self.storage_name = '1'
+         self.wait = None
+         self.wait_timeout = None
          for info in source_info:
              if info.startswith("host"):
!                 try:
!                     # ZEO only accepts strings, not unicode.
!                     self.host = str(info[5:])
!                 except UnicodeDecodeError, e:
!                     print >> sys.stderr, "Couldn't set host", \
!                           info[5:], str(e)
              elif info.startswith("port"):
                  self.port = int(info[5:])
              elif info.startswith("dbname"):
                  db_name = info[7:]
+             elif info.startswith("user"):
+                 self.username = info[5:]
+             elif info.startswith("pass"):
+                 self.password = info[5:]
+             elif info.startswith("storage_name"):
+                 self.storage_name = info[13:]
+             elif info.startswith("wait_timeout"):
+                 self.wait_timeout = int(info[13:])
+             elif info.startswith("wait"):
+                 self.wait = info[5:] == "True"
          ZODBClassifier.__init__(self, db_name)
  
***************
*** 833,837 ****
          else:
              addr = self.host
!         self.storage = ClientStorage(addr)
  
  
--- 870,908 ----
          else:
              addr = self.host
!         if options["globals", "verbose"]:
!             print >> sys.stderr, "Connecting to ZEO server", addr, \
!                   self.username, self.password
!         # Use persistent caches, with the cache in the temp directory.
!         # If the temp directory is cleared out, we lose the cache, but
!         # that doesn't really matter, and we should always be able to
!         # write to it.
!         try:
!             self.storage = ClientStorage(addr, name=self.db_name,
!                                          read_only=self.mode=='r',
!                                          username=self.username,
!                                          client=self.db_name,
!                                          wait=self.wait,
!                                          wait_timeout=self.wait_timeout,
!                                          storage=self.storage_name,
!                                          var=tempfile.gettempdir(),
!                                          password=self.password)
!         except ValueError:
!             # Probably bad cache; remove it and try without the cache.
!             try:
!                 os.remove(os.path.join(tempfile.gettempdir(),
!                                        self.db_name + \
!                                        self.storage_name + ".zec"))
!             except OSError:
!                 pass
!             self.storage = ClientStorage(addr, name=self.db_name,
!                                          read_only=self.mode=='r',
!                                          username=self.username,
!                                          wait=self.wait,
!                                          wait_timeout=self.wait_timeout,
!                                          storage=self.storage_name,
!                                          password=self.password)
! 
!     def is_connected(self):
!         return self.storage.is_connected()
  
  
***************
*** 840,849 ****
  NO_TRAINING_FLAG = 1
  
! class Trainer:
      '''Associates a Classifier object and one or more Corpora, \
      is an observer of the corpora'''
  
      def __init__(self, bayes, is_spam, updateprobs=NO_UPDATEPROBS):
!         '''Constructor(Classifier, is_spam(True|False), 
updprobs(True|False)'''
  
          self.bayes = bayes
--- 911,921 ----
  NO_TRAINING_FLAG = 1
  
! class Trainer(object):
      '''Associates a Classifier object and one or more Corpora, \
      is an observer of the corpora'''
  
      def __init__(self, bayes, is_spam, updateprobs=NO_UPDATEPROBS):
!         '''Constructor(Classifier, is_spam(True|False),
!         updateprobs(True|False)'''
  
          self.bayes = bayes
***************
*** 860,867 ****
  
          if options["globals", "verbose"]:
!             print >> sys.stderr, 'training with',message.key()
  
          self.bayes.learn(message.tokenize(), self.is_spam)
- #                         self.updateprobs)
          message.setId(message.key())
          message.RememberTrained(self.is_spam)
--- 932,938 ----
  
          if options["globals", "verbose"]:
!             print >> sys.stderr, 'training with ', message.key()
  
          self.bayes.learn(message.tokenize(), self.is_spam)
          message.setId(message.key())
          message.RememberTrained(self.is_spam)

_______________________________________________
Spambayes-checkins mailing list
[email protected]
http://mail.python.org/mailman/listinfo/spambayes-checkins

Reply via email to