Revision: 8095
Author: russblau
Date: 2010-04-15 18:16:10 +0000 (Thu, 15 Apr 2010)
Log Message:
-----------
Revert to a single background thread for asynchronous saves, instead of a
thread per request; this should mean less overhead and better performance.
Modified Paths:
--------------
branches/rewrite/pywikibot/__init__.py
branches/rewrite/pywikibot/config2.py
branches/rewrite/pywikibot/page.py
Modified: branches/rewrite/pywikibot/__init__.py
===================================================================
--- branches/rewrite/pywikibot/__init__.py 2010-04-15 17:04:13 UTC (rev
8094)
+++ branches/rewrite/pywikibot/__init__.py 2010-04-15 18:16:10 UTC (rev
8095)
@@ -14,6 +14,8 @@
import logging
import re
import sys
+import threading
+from Queue import Queue
import config2 as config
from bot import *
@@ -248,7 +250,6 @@
# Throttle and thread handling
-threadpool = [] # add page-putting threads to this list as they are created
stopped = False
def stopme():
@@ -263,20 +264,70 @@
if not stopped:
pywikibot.debug(u"stopme() called", _logger)
- count = sum(1 for thd in threadpool if thd.isAlive())
- if count:
- pywikibot.output(u"Waiting for about %(count)s pages to be saved."
- % locals())
- for thd in threadpool:
- if thd.isAlive():
- thd.join()
+ def remaining():
+ import datetime
+ remainingPages = page_put_queue.qsize() - 1
+ # -1 because we added a None element to stop the queue
+ remainingSeconds = datetime.timedelta(
+ seconds=(remainingPages * config.put_throttle))
+ return (remainingPages, remainingSeconds)
+
+ page_put_queue.put((None, [], {}))
stopped = True
+
+ if page_put_queue.qsize() > 1:
+ output(u'Waiting for %i pages to be put. Estimated time remaining:
%s'
+ % remaining())
+
+ while(_putthread.isAlive()):
+ try:
+ _putthread.join(1)
+ except KeyboardInterrupt:
+ answer = inputChoice(u"""\
+There are %i pages remaining in the queue. Estimated time remaining: %s
+Really exit?"""
+ % remaining(),
+ ['yes', 'no'], ['y', 'N'], 'N')
+ if answer == 'y':
+ return
+
# only need one drop() call because all throttles use the same global pid
try:
- _sites[_sites.keys()[0]].throttle.drop()
+ _sites.values()[0].throttle.drop()
pywikibot.log(u"Dropped throttle(s).")
except IndexError:
pass
import atexit
atexit.register(stopme)
+
+# Create a separate thread for asynchronous page saves (and other requests)
+
+def async_manager():
+ """Daemon; take requests from the queue and execute them in background."""
+ while True:
+ (request, args, kwargs) = page_put_queue.get()
+ if request is None:
+ break
+ request(*args, **kwargs)
+
+def async_request(request, *args, **kwargs):
+ """Put a request on the queue, and start the daemon if necessary."""
+ if not _putthread.isAlive():
+ try:
+ page_put_queue.mutex.acquire()
+ try:
+ _putthread.start()
+ except (AssertionError, RuntimeError):
+ pass
+ finally:
+ page_put_queue.mutex.release()
+ page_put_queue.put((request, args, kwargs))
+
+# queue to hold pending requests
+page_put_queue = Queue(config.max_queue_size)
+# set up the background thread
+_putthread = threading.Thread(target=async_manager)
+# identification for debugging purposes
+_putthread.setName('Put-Thread')
+_putthread.setDaemon(True)
Modified: branches/rewrite/pywikibot/config2.py
===================================================================
--- branches/rewrite/pywikibot/config2.py 2010-04-15 17:04:13 UTC (rev
8094)
+++ branches/rewrite/pywikibot/config2.py 2010-04-15 18:16:10 UTC (rev
8095)
@@ -490,6 +490,12 @@
# Configuration variable 'socks' is defined but unknown. Misspelled?proxy =
None
proxy = None
+# How many pages should be put to a queue in asynchroneous mode.
+# If maxsize is <= 0, the queue size is infinite.
+# Increasing this value will increase memory space but could speed up
+# processing. As higher this value this effect will decrease.
+max_queue_size = 64
+
# End of configuration section
# ============================
# System-level and User-level changes.
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2010-04-15 17:04:13 UTC (rev 8094)
+++ branches/rewrite/pywikibot/page.py 2010-04-15 18:16:10 UTC (rev 8095)
@@ -718,16 +718,12 @@
"Page %s not saved; editing restricted by {{bots}} template"
% self.title(asLink=True))
if async:
- thd = threading.Thread(
- target=self._save,
- args=(comment, minor, watch, unwatch, callback)
- )
- pywikibot.threadpool.append(thd)
- thd.start()
+ pywikibot.async_request(self._save, comment, minor, watch, unwatch,
+ async, callback)
else:
- self._save(comment, minor, watch, unwatch, callback)
+ self._save(comment, minor, watch, unwatch, async, callback)
- def _save(self, comment, minor, watch, unwatch, callback):
+ def _save(self, comment, minor, watch, unwatch, async, callback):
err = None
link = self.title(asLink=True)
try:
@@ -741,13 +737,14 @@
except pywikibot.LockedPage, err:
# re-raise the LockedPage exception so that calling program
# can re-try if appropriate
- if not callback:
+ if not callback and not async:
raise
# TODO: other "expected" error types to catch?
except pywikibot.Error, err:
- pywikibot.log(u"Error saving page %s\n" % link, exc_info=True)
- if not callback:
- raise pywikibot.PageNotSaved(link)
+ pywikibot.log(u"Error saving page %s (%s)\n" % (link, err),
+ exc_info=True)
+ if not callback and not async:
+ raise pywikibot.PageNotSaved("%s: %s" %(link, err))
if callback:
callback(self, err)
_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn