Revision: 6138
Author:   russblau
Date:     2008-12-11 20:50:17 +0000 (Thu, 11 Dec 2008)

Log Message:
-----------
Add touch.py as a demonstration of how to write a bot under the new framework, 
and clean up bugs found during testing.

Modified Paths:
--------------
    branches/rewrite/pywikibot/__init__.py
    branches/rewrite/pywikibot/bot.py
    branches/rewrite/pywikibot/pagegenerators.py
    branches/rewrite/pywikibot/throttle.py

Added Paths:
-----------
    branches/rewrite/pywikibot/scripts/touch.py

Modified: branches/rewrite/pywikibot/__init__.py
===================================================================
--- branches/rewrite/pywikibot/__init__.py      2008-12-10 19:03:37 UTC (rev 
6137)
+++ branches/rewrite/pywikibot/__init__.py      2008-12-11 20:50:17 UTC (rev 
6138)
@@ -159,23 +159,22 @@
 
     """
     global stopped
-    if stopped:
-        return
     logger = logging.getLogger("wiki")
-    
-    logger.debug("stopme() called")
-    count = sum(1 for thd in threadpool if thd.isAlive())
-    if count:
-        logger.info("Waiting for about %(count)s pages to be saved."
-                     % locals())
-        for thd in threadpool:
-            if thd.isAlive():
-                thd.join()
+
+    if not stopped:
+        logger.debug("stopme() called")
+        count = sum(1 for thd in threadpool if thd.isAlive())
+        if count:
+            logger.info("Waiting for about %(count)s pages to be saved."
+                         % locals())
+            for thd in threadpool:
+                if thd.isAlive():
+                    thd.join()
+        stopped = True
     # only need one drop() call because all throttles use the same global pid
     try:
         _sites[_sites.keys()[0]].throttle.drop()
         logger.info("Dropped throttle(s).")
-        stopped = True
     except IndexError:
         pass
 

Modified: branches/rewrite/pywikibot/bot.py
===================================================================
--- branches/rewrite/pywikibot/bot.py   2008-12-10 19:03:37 UTC (rev 6137)
+++ branches/rewrite/pywikibot/bot.py   2008-12-11 20:50:17 UTC (rev 6138)
@@ -14,6 +14,7 @@
 # scripts, instead of writing each one from scratch.
 
 
+import logging
 import os.path
 import sys
 import pywikibot
@@ -108,8 +109,9 @@
     return nonGlobalArgs
 
 
-def showHelp():
-    moduleName = calledModuleName()
+def showHelp(name=""):
+    # argument, if given, is ignored
+    module = calledModuleName()
     globalHelp =u'''\
 Global arguments available for all bots:
 
@@ -144,15 +146,14 @@
 -v                debugging.
 '''
     try:
-        exec('import %s as module' % moduleName)
+        exec('import %s as module' % module)
         helpText = module.__doc__.decode('utf-8')
         if hasattr(module, 'docuReplacements'):
             for key, value in module.docuReplacements.iteritems():
                 helpText = helpText.replace(key, value.strip('\n\r'))
         pywikibot.output(helpText)
     except:
-        pywikibot.output(u'Sorry, no help available for %s' % moduleName)
+        if module:
+            pywikibot.output(u'Sorry, no help available for %s' % module)
         logging.exception('showHelp:')
     pywikibot.output(globalHelp)
-
-

Modified: branches/rewrite/pywikibot/pagegenerators.py
===================================================================
--- branches/rewrite/pywikibot/pagegenerators.py        2008-12-10 19:03:37 UTC 
(rev 6137)
+++ branches/rewrite/pywikibot/pagegenerators.py        2008-12-11 20:50:17 UTC 
(rev 6138)
@@ -654,7 +654,7 @@
             yield page.toggleTalkPage()
 
 
-def PreloadingGenerator(self, generator, pageNumber=60, lookahead=10):
+def PreloadingGenerator(generator, pageNumber=60, lookahead=10):
     """Yield preloaded pages taken from another generator."""
 
     # pages may be on more than one site, for example if an interwiki
@@ -663,8 +663,8 @@
     # build a list of pages for each site found in the iterator
     for page in generator:
         sites.setdefault(page.site(), []).append(page)
-    return itertools.chain(site.preloadpages(sites[site], pageNumber)
-                           for site in sites)
+    return itertools.chain(*(site.preloadpages(sites[site], pageNumber)
+                             for site in sites))
 
 
 #TODO below

Added: branches/rewrite/pywikibot/scripts/touch.py
===================================================================
--- branches/rewrite/pywikibot/scripts/touch.py                         (rev 0)
+++ branches/rewrite/pywikibot/scripts/touch.py 2008-12-11 20:50:17 UTC (rev 
6138)
@@ -0,0 +1,96 @@
+#!/usr/bin/python
+# -*- coding: utf-8  -*-
+
+"""This bot goes over multiple pages of a wiki, and edits them without
+changing. This is for example used to get category links in templates
+working.
+
+This script understands various command-line arguments:
+
+&params;
+
+-redir            specifies that the robot should touch redirect pages;
+                  otherwise, they will be skipped.
+
+All other parameters will be regarded as a page title; in this case, the bot
+will only touch a single page.
+"""
+
+__version__='$Id: touch.py,v 1.13 2006/03/01 14:07:06 russblau Exp $'
+
+import pywikibot
+from pywikibot import pagegenerators, catlib, config
+import sys
+
+docuReplacements = {'&params;': pagegenerators.parameterHelp}
+
+
+class TouchBot:
+    def __init__(self, generator, touch_redirects):
+        self.generator = generator
+        self.touch_redirects = touch_redirects
+
+    def run(self):
+        for page in self.generator:
+            try:
+                # get the page, and save it using the unmodified text.
+                # whether or not getting a redirect throws an exception
+                # depends on the variable self.touch_redirects.
+                text = page.get(get_redirect = self.touch_redirects)
+                page.save("Pywikibot touch script")
+            except pywikibot.NoPage:
+                print "Page %s does not exist?!" % page.aslink()
+            except pywikibot.IsRedirectPage:
+                print "Page %s is a redirect; skipping." % page.aslink()
+            except pywikibot.LockedPage:
+                print "Page %s is locked?!" % page.aslink()
+
+
+def main(*args):
+    global bot
+    # Disable cosmetic changes because we don't want to modify any page
+    # content, so that we don't flood the histories with minor changes.
+    config.cosmetic_changes = False
+    #page generator
+    gen = None
+    genFactory = pagegenerators.GeneratorFactory()
+    redirs = False
+    namespaces = []
+    # If the user chooses to work on a single page, this temporary array is
+    # used to read the words from the page title. The words will later be
+    # joined with spaces to retrieve the full title.
+    pageTitle = []
+    for arg in pywikibot.handleArgs(*args):
+        if arg == '-redir':
+            redirs = True
+        elif arg.startswith('-namespace:'):
+            try:
+                namespaces.append(int(arg[11:]))
+            except ValueError:
+                namespaces.append(arg[11:])
+        else:
+            generator = genFactory.handleArg(arg)
+            if generator:
+                gen = generator
+            else:
+                pageTitle.append(arg)
+
+    if pageTitle:
+        # work on a single page
+        page = pywikibot.Page(pywikibot.Link(' '.join(pageTitle)))
+        gen = iter([page])
+    if not gen:
+        pywikibot.showHelp()
+    else:
+        if namespaces:
+            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
+        preloadingGen = pagegenerators.PreloadingGenerator(gen)
+        bot = TouchBot(preloadingGen, redirs)
+        bot.run()
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    finally:
+        pywikibot.stopme()

Modified: branches/rewrite/pywikibot/throttle.py
===================================================================
--- branches/rewrite/pywikibot/throttle.py      2008-12-10 19:03:37 UTC (rev 
6137)
+++ branches/rewrite/pywikibot/throttle.py      2008-12-11 20:50:17 UTC (rev 
6138)
@@ -17,10 +17,12 @@
 import threading
 import time
 
-logger = logging.getLogger("wiki")
+logger = logging.getLogger("wiki.throttle")
 
-pid = False   # global process identifier
-              # Don't check for other processes unless this is set
+pid = False     # global process identifier
+                # when the first Throttle is instantiated, it will set this
+                # variable to a positive integer, which will apply to all
+                # throttle objects created by this process.
 
 
 class Throttle(object):
@@ -33,23 +35,25 @@
     rate of access.
 
     """
-    def __init__(self, site, mindelay=config.minthrottle,
-                       maxdelay=config.maxthrottle,
-                       writedelay=config.put_throttle,
-                       multiplydelay=True, verbosedelay=False):
+    def __init__(self, site, mindelay=None, maxdelay=None, writedelay=None,
+                 multiplydelay=True, verbosedelay=False):
         self.lock = threading.RLock()
         self.mysite = str(site)
         self.logfn = config.datafilepath('throttle.log')
         self.mindelay = mindelay
+        if self.mindelay is None:
+            self.mindelay = config.minthrottle
         self.maxdelay = maxdelay
+        if self.maxdelay is None:
+            self.maxdelay = config.maxthrottle
         self.writedelay = writedelay
         self.last_read = 0
         self.last_write = 0
         self.next_multiplicity = 1.0
         self.checkdelay = 300  # Check logfile again after this many seconds
-        self.dropdelay = 750   # Ignore processes that have not made
+        self.dropdelay = 600   # Ignore processes that have not made
                                # a check in this many seconds
-        self.releasepid = 1800 # Free the process id after this many seconds
+        self.releasepid = 1200 # Free the process id after this many seconds
         self.lastwait = 0.0
         self.delay = 0
         self.verbosedelay = verbosedelay
@@ -58,13 +62,16 @@
         self.setDelays()
 
     def checkMultiplicity(self):
+        """Count running processes for site and set process_multiplicity."""
         global pid
         self.lock.acquire()
+        mysite = self.mysite
         logger.debug("Checking multiplicity: pid = %(pid)s" % globals())
         try:
             processes = []
-            my_pid = 1
+            my_pid = pid or 1  # start at 1 if global pid not yet set
             count = 1
+            # open throttle.log
             try:
                 f = open(self.logfn, 'r')
             except IOError:
@@ -75,6 +82,7 @@
             else:
                 now = time.time()
                 for line in f.readlines():
+                    # parse line; format is "pid timestamp site"
                     try:
                         line = line.split(' ')
                         this_pid = int(line[0])
@@ -86,7 +94,7 @@
                     if now - ptime > self.releasepid:
                         continue    # process has expired, drop from file
                     if now - ptime <= self.dropdelay \
-                            and this_site == self.mysite \
+                            and this_site == mysite \
                             and this_pid != pid:
                         count += 1
                     if this_site != self.mysite or this_pid != pid:
@@ -94,14 +102,14 @@
                                           'time': ptime,
                                           'site': this_site})
                     if not pid and this_pid >= my_pid:
-                        my_pid = this_pid+1
+                        my_pid = this_pid+1 # next unused process id
 
             if not pid:
                 pid = my_pid
             self.checktime = time.time()
-            processes.append({'pid': my_pid,
+            processes.append({'pid': pid,
                               'time': self.checktime,
-                              'site': self.mysite})
+                              'site': mysite})
             f = open(self.logfn, 'w')
             processes.sort(key=lambda p:(p['pid'], p['site']))
             for p in processes:
@@ -110,7 +118,7 @@
             self.process_multiplicity = count
             if self.verbosedelay:
                 logger.info(
-u"Found %(count)s processes running, including the current process."
+u"Found %(count)s %(mysite)s processes running, including this one."
                     % locals())
         finally:
             self.lock.release()
@@ -119,10 +127,11 @@
         """Set the nominal delays in seconds. Defaults to config values."""
         self.lock.acquire()
         try:
+            maxdelay = self.maxdelay
             if delay is None:
                 delay = self.mindelay
             if writedelay is None:
-                writedelay = self.writedelay
+                writedelay = config.put_throttle
             if absolute:
                 self.maxdelay = delay
                 self.mindelay = delay
@@ -173,7 +182,8 @@
             return 0.0
 
     def drop(self):
-        """Remove me from the list of running bots processes."""
+        """Remove me from the list of running bot processes."""
+        # drop all throttles with this process's pid, regardless of site
         self.checktime = 0
         processes = []
         try:



_______________________________________________
Pywikipedia-l mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-l

Reply via email to