Revision: 8095
Author:   russblau
Date:     2010-04-15 18:16:10 +0000 (Thu, 15 Apr 2010)

Log Message:
-----------
Revert to a single background thread for asynchronous saves, instead of a 
thread per request; this should mean less overhead and better performance.

Modified Paths:
--------------
    branches/rewrite/pywikibot/__init__.py
    branches/rewrite/pywikibot/config2.py
    branches/rewrite/pywikibot/page.py

Modified: branches/rewrite/pywikibot/__init__.py
===================================================================
--- branches/rewrite/pywikibot/__init__.py      2010-04-15 17:04:13 UTC (rev 
8094)
+++ branches/rewrite/pywikibot/__init__.py      2010-04-15 18:16:10 UTC (rev 
8095)
@@ -14,6 +14,8 @@
 import logging
 import re
 import sys
+import threading
+from Queue import Queue
 
 import config2 as config
 from bot import *
@@ -248,7 +250,6 @@
 
 # Throttle and thread handling
 
-threadpool = []   # add page-putting threads to this list as they are created
 stopped = False
 
 def stopme():
@@ -263,20 +264,70 @@
 
     if not stopped:
         pywikibot.debug(u"stopme() called", _logger)
-        count = sum(1 for thd in threadpool if thd.isAlive())
-        if count:
-            pywikibot.output(u"Waiting for about %(count)s pages to be saved."
-                              % locals())
-            for thd in threadpool:
-                if thd.isAlive():
-                    thd.join()
+        def remaining():
+            import datetime
+            remainingPages = page_put_queue.qsize() - 1
+                # -1 because we added a None element to stop the queue
+            remainingSeconds = datetime.timedelta(
+                    seconds=(remainingPages * config.put_throttle))
+            return (remainingPages, remainingSeconds)
+
+        page_put_queue.put((None, [], {}))
         stopped = True
+
+        if page_put_queue.qsize() > 1:
+            output(u'Waiting for %i pages to be put. Estimated time remaining: 
%s'
+                   % remaining())
+
+        while(_putthread.isAlive()):
+            try:
+                _putthread.join(1)
+            except KeyboardInterrupt:
+                answer = inputChoice(u"""\
+There are %i pages remaining in the queue. Estimated time remaining: %s
+Really exit?"""
+                                         % remaining(),
+                                     ['yes', 'no'], ['y', 'N'], 'N')
+                if answer == 'y':
+                    return
+
     # only need one drop() call because all throttles use the same global pid
     try:
-        _sites[_sites.keys()[0]].throttle.drop()
+        _sites.values()[0].throttle.drop()
         pywikibot.log(u"Dropped throttle(s).")
     except IndexError:
         pass
 
 import atexit
 atexit.register(stopme)
+
+# Create a separate thread for asynchronous page saves (and other requests)
+
+def async_manager():
+    """Daemon; take requests from the queue and execute them in background."""
+    while True:
+        (request, args, kwargs) = page_put_queue.get()
+        if request is None:
+            break
+        request(*args, **kwargs)
+
+def async_request(request, *args, **kwargs):
+    """Put a request on the queue, and start the daemon if necessary."""
+    if not _putthread.isAlive():
+        try:
+            page_put_queue.mutex.acquire()
+            try:
+                _putthread.start()
+            except (AssertionError, RuntimeError):
+                pass
+        finally:
+            page_put_queue.mutex.release()
+    page_put_queue.put((request, args, kwargs))
+
+# queue to hold pending requests
+page_put_queue = Queue(config.max_queue_size)
+# set up the background thread
+_putthread = threading.Thread(target=async_manager)
+# identification for debugging purposes
+_putthread.setName('Put-Thread')
+_putthread.setDaemon(True)

Modified: branches/rewrite/pywikibot/config2.py
===================================================================
--- branches/rewrite/pywikibot/config2.py       2010-04-15 17:04:13 UTC (rev 
8094)
+++ branches/rewrite/pywikibot/config2.py       2010-04-15 18:16:10 UTC (rev 
8095)
@@ -490,6 +490,12 @@
 # Configuration variable 'socks' is defined but unknown. Misspelled?proxy = 
None
 proxy = None
 
+# How many pages should be put to a queue in asynchroneous mode.
+# If maxsize is <= 0, the queue size is infinite.
+# Increasing this value will increase memory space but could speed up
+# processing. As higher this value this effect will decrease.
+max_queue_size = 64
+
 # End of configuration section
 # ============================
 # System-level and User-level changes.

Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py  2010-04-15 17:04:13 UTC (rev 8094)
+++ branches/rewrite/pywikibot/page.py  2010-04-15 18:16:10 UTC (rev 8095)
@@ -718,16 +718,12 @@
                 "Page %s not saved; editing restricted by {{bots}} template"
                 % self.title(asLink=True))
         if async:
-            thd = threading.Thread(
-                      target=self._save,
-                      args=(comment, minor, watch, unwatch, callback)
-                  )
-            pywikibot.threadpool.append(thd)
-            thd.start()
+            pywikibot.async_request(self._save, comment, minor, watch, unwatch,
+                                    async, callback)
         else:
-            self._save(comment, minor, watch, unwatch, callback)
+            self._save(comment, minor, watch, unwatch, async, callback)
 
-    def _save(self, comment, minor, watch, unwatch, callback):
+    def _save(self, comment, minor, watch, unwatch, async, callback):
         err = None
         link = self.title(asLink=True)
         try:
@@ -741,13 +737,14 @@
         except pywikibot.LockedPage, err:
             # re-raise the LockedPage exception so that calling program
             # can re-try if appropriate
-            if not callback:
+            if not callback and not async:
                 raise
         # TODO: other "expected" error types to catch?
         except pywikibot.Error, err:
-            pywikibot.log(u"Error saving page %s\n" % link, exc_info=True)
-            if not callback:
-                raise pywikibot.PageNotSaved(link)
+            pywikibot.log(u"Error saving page %s (%s)\n" % (link, err),
+                          exc_info=True)
+            if not callback and not async:
+                raise pywikibot.PageNotSaved("%s: %s" %(link, err))
         if callback:
             callback(self, err)
 



_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn

Reply via email to