Revision: 6159
Author:   russblau
Date:     2008-12-17 19:57:39 +0000 (Wed, 17 Dec 2008)

Log Message:
-----------
Improve output and logfile formatting.

Modified Paths:
--------------
    branches/rewrite/pywikibot/__init__.py
    branches/rewrite/pywikibot/bot.py
    branches/rewrite/pywikibot/comms/http.py
    branches/rewrite/pywikibot/config2.py
    branches/rewrite/pywikibot/data/api.py
    branches/rewrite/pywikibot/page.py
    branches/rewrite/pywikibot/pagegenerators.py
    branches/rewrite/pywikibot/scripts/touch.py
    branches/rewrite/pywikibot/throttle.py

Modified: branches/rewrite/pywikibot/__init__.py
===================================================================
--- branches/rewrite/pywikibot/__init__.py      2008-12-17 16:11:20 UTC (rev 
6158)
+++ branches/rewrite/pywikibot/__init__.py      2008-12-17 19:57:39 UTC (rev 
6159)
@@ -105,7 +105,13 @@
 # User interface functions (kept extremely simple for debugging)
 
 def output(text, toStdout=False):
-    print text.encode(config.console_encoding, "xmlcharrefreplace")
+    if toStdout:
+        level = STDOUT
+    else:
+        level = logging.INFO
+    logging.getLogger().log(level,
+                            text.encode(config.console_encoding,
+                                        "xmlcharrefreplace"))
 
 def input(prompt, password=False):
     if isinstance(prompt, unicode):
@@ -169,7 +175,7 @@
     # only need one drop() call because all throttles use the same global pid
     try:
         _sites[_sites.keys()[0]].throttle.drop()
-        logger.log("VERBOSE", "Dropped throttle(s).")
+        logger.log(pywikibot.VERBOSE, "Dropped throttle(s).")
     except IndexError:
         pass
 

Modified: branches/rewrite/pywikibot/bot.py
===================================================================
--- branches/rewrite/pywikibot/bot.py   2008-12-17 16:11:20 UTC (rev 6158)
+++ branches/rewrite/pywikibot/bot.py   2008-12-17 19:57:39 UTC (rev 6159)
@@ -21,6 +21,13 @@
 from pywikibot import config2 as config
 
 
+# logging levels
+
+STDOUT = 16
+VERBOSE = 18
+INPUT = 25
+
+
 def calledModuleName():
     """Return the name of the module calling this function.
 
@@ -35,6 +42,18 @@
     return os.path.basename(called)
 
 
+class LevelFilter(logging.Filter):
+    """Filter that only passes records at a specific level."""
+    def __init__(self, level=None):
+        self.level = level
+
+    def filter(self, record):
+        if self.level:
+            return record.levelno == self.level
+        else:
+            return True
+
+
 def _decodeArg(arg):
     if sys.platform=='win32':
         if config.console_encoding == 'cp850':
@@ -77,7 +96,7 @@
     for arg in args:
         arg = _decodeArg(arg)
         if arg == '-help':
-            showHelp(moduleName)
+            showHelp()
             sys.exit(0)
         elif arg.startswith('-family:'):
             config.family = arg[8:]
@@ -100,10 +119,8 @@
         elif arg == "-debug":
             if moduleName not in config.log:
                 config.log.append(moduleName)
-            config.log.debug_log = True
+            config.debug_log = True
         elif arg == '-verbose' or arg == "-v":
-            pywikibot.output(u'Pywikipediabot %s' % (version.getversion()))
-            pywikibot.output(u'Python %s' % (sys.version))
             config.verbose_output += 1
         elif arg == '-daemonize':
             import daemonize
@@ -118,38 +135,60 @@
 
     # initialize logging system for terminal-based bots
 
-    logging.addLevelName(18, "VERBOSE") # for messages to be displayed on 
-                                        # terminal at "verbose" setting
-                                        # use INFO for messages to be displayed
-                                        # even on non-verbose setting
-    logging.addLevelName(24, "STDOUT")  # for messages to be displayed to 
stdout
-    logging.addLevelName(26, "INPUT")   # for prompts requiring user response
+    logging.addLevelName(VERBOSE, "VERBOSE")
+        # for messages to be displayed on terminal at "verbose" setting
+        # use INFO for messages to be displayed even on non-verbose setting
+    logging.addLevelName(STDOUT, "STDOUT")
+        # for messages to be displayed to stdout
+    logging.addLevelName(INPUT, "INPUT")
+        # for prompts requiring user response
 
-    logging.basicConfig()               # initializes root logger
+    logging.basicConfig(format="%(message)s")  # initialize root logger
     root_logger = logging.getLogger()
+    default_handler = root_logger.handlers[0]
+    root_logger.setLevel(logging.DEBUG) # all records go to logger
+        # handlers filter separately by level
     if config.verbose_output:
-        root_logger.setLevel("VERBOSE")
+        default_handler.setLevel(VERBOSE)
     else:
-        root_logger.setLevel(logging.INFO)
-    if moduleName in config.log:
+        default_handler.setLevel(logging.INFO)
+    if moduleName in config.log or '*' in config.log:
         if config.logfilename:
             logfile = config.datafilepath(config.logfilename)
         else:
-            logfile = config.datafilepath("%s.log" % moduleName)
+            logfile = config.datafilepath("%s-bot.log" % moduleName)
         file_handler = logging.handlers.RotatingFileHandler(
                             filename=logfile, maxBytes=2 << 20, backupCount=5)
         if config.debug_log:
             file_handler.setLevel(logging.DEBUG)
         else:
-            file_handler.setLevel("VERBOSE")
-        logging.addHandler(file_handler)
+            file_handler.setLevel(VERBOSE)
+        form = logging.Formatter(
+                   fmt="%(asctime)s %(filename)-18s:%(lineno)-4d "
+                       "%(levelname)-8s %(message)s",
+                   datefmt="%Y-%m-%d %H:%M:%S"
+               )
+        file_handler.setFormatter(form)
+        root_logger.addHandler(file_handler)
 
+    output_handler = logging.StreamHandler(strm=sys.stdout)
+    output_handler.setLevel(STDOUT)
+    output_handler.addFilter(LevelFilter(STDOUT))
+    root_logger.addHandler(output_handler)
+
+    if config.verbose_output:
+        import re
+        ver = pywikibot.__version__ # probably can be improved on
+        m = re.search(r"\$Id: .* (\d+ \d+-\d+-\d+ \d+:\d+:\d+Z) .*\$", ver)
+        pywikibot.output(u'Pywikipediabot r%s' % m.group(1))
+        pywikibot.output(u'Python %s' % sys.version)
+
     return nonGlobalArgs
 
 
 def showHelp(name=""):
     # argument, if given, is ignored
-    module = calledModuleName()
+    modname = calledModuleName()
     globalHelp =u'''\
 Global arguments available for all bots:
 
@@ -170,28 +209,30 @@
 
 -help             Shows this help text.
 
--log              Enable the logfile. Logs will be stored in the logs
-                  subdirectory.
+-log              Enable the logfile, using the default filename
+                  '%s-bot.log'
 
--log:xyz          Enable the logfile, using xyz as the filename.
+-log:xyz          Enable the logfile, using 'xyz' as the filename.
 
 -nolog            Disable the logfile (if it is enabled by default).
 
+-debug            Enable the logfile and include extensive debugging data.
+
 -putthrottle:n    Set the minimum time (in seconds) the bot will wait between
 -pt:n             saving pages.
 
 -verbose          Have the bot provide additional output that may be useful in
 -v                debugging.
-'''
+''' % modname
     try:
-        exec('import %s as module' % module)
+        exec('import %s as module' % modname)
         helpText = module.__doc__.decode('utf-8')
         if hasattr(module, 'docuReplacements'):
             for key, value in module.docuReplacements.iteritems():
                 helpText = helpText.replace(key, value.strip('\n\r'))
         pywikibot.output(helpText)
     except:
-        if module:
-            pywikibot.output(u'Sorry, no help available for %s' % module)
+        if modname:
+            pywikibot.output(u'Sorry, no help available for %s' % modname)
         logging.exception('showHelp:')
     pywikibot.output(globalHelp)

Modified: branches/rewrite/pywikibot/comms/http.py
===================================================================
--- branches/rewrite/pywikibot/comms/http.py    2008-12-17 16:11:20 UTC (rev 
6158)
+++ branches/rewrite/pywikibot/comms/http.py    2008-12-17 19:57:39 UTC (rev 
6159)
@@ -27,7 +27,7 @@
 import logging
 import atexit
 
-from pywikibot import config
+from pywikibot import config, VERBOSE
 import cookielib
 import threadedhttp
 
@@ -57,7 +57,7 @@
 
 
 # Build up HttpProcessors
-logger.info('Starting %(numthreads)i threads...' % locals())
+logger.log(VERBOSE, 'Starting %(numthreads)i threads...', locals())
 for i in range(numthreads):
     proc = threadedhttp.HttpProcessor(http_queue, cookie_jar, connection_pool)
     proc.setDaemon(True)
@@ -68,7 +68,7 @@
 def _flush():
     for i in threads:
         http_queue.put(None)
-    logger.info('Waiting for threads to finish... ')
+    logger.log(VERBOSE, 'Waiting for threads to finish... ')
     for i in threads:
         i.join()
     logger.debug('All threads finished.')

Modified: branches/rewrite/pywikibot/config2.py
===================================================================
--- branches/rewrite/pywikibot/config2.py       2008-12-17 16:11:20 UTC (rev 
6158)
+++ branches/rewrite/pywikibot/config2.py       2008-12-17 19:57:39 UTC (rev 
6159)
@@ -214,10 +214,13 @@
 #     log = []
 # Per default, logging of interwiki.py is enabled because its logfiles can
 # be used to generate so-called warnfiles.
-# This setting can be overridden by the -log or -nolog command-line arguments.
 log = ['interwiki']
-logfilename = None # defaults to modulename.log
+# filename defaults to modulename-bot.log
+logfilename = None
+# set to 1 (or higher) to generate "informative" messages to terminal
 verbose_output = 0
+# if True, include a lot of debugging info in logfile
+# (overrides log setting above)
 debug_log = False
 
 ############## INTERWIKI SETTINGS ##############

Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py      2008-12-17 16:11:20 UTC (rev 
6158)
+++ branches/rewrite/pywikibot/data/api.py      2008-12-17 19:57:39 UTC (rev 
6159)
@@ -195,7 +195,7 @@
                     rawdata = http.request(self.site, uri)
             except Exception, e: #TODO: what exceptions can occur here?
                 logger.warning(traceback.format_exc())
-                print uri, params
+                logger.warning("%s, %s", uri, params)
                 self.wait()
                 continue
             if not isinstance(rawdata, unicode):
@@ -621,7 +621,7 @@
     from pywikibot import Site
     logger.setLevel(pywikibot.logging.DEBUG)
     mysite = Site("en", "wikipedia")
-    print "starting test...."
+    pywikibot.output("starting test....")
     def _test():
         import doctest
         doctest.testmod()

Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py  2008-12-17 16:11:20 UTC (rev 6158)
+++ branches/rewrite/pywikibot/page.py  2008-12-17 19:57:39 UTC (rev 6159)
@@ -37,8 +37,8 @@
 
     """
 
-    @deprecate_arg("insite", None)
-    @deprecate_arg("defaultNamespace", None)
+#    @deprecate_arg("insite", None)
+#    @deprecate_arg("defaultNamespace", None)
     def __init__(self, source, title=u"", ns=0):
         """Instantiate a Page object.
 

Modified: branches/rewrite/pywikibot/pagegenerators.py
===================================================================
--- branches/rewrite/pywikibot/pagegenerators.py        2008-12-17 16:11:20 UTC 
(rev 6158)
+++ branches/rewrite/pywikibot/pagegenerators.py        2008-12-17 19:57:39 UTC 
(rev 6159)
@@ -328,92 +328,6 @@
         return gen
 
 
-class ThreadedGenerator(threading.Thread):
-    """Look-ahead generator class.
-
-    Runs a generator in a separate thread and queues the results; can
-    be called like a regular generator.
-
-    Subclasses should override self.generator, _not_ self.run
-
-    Important: the generator thread will stop itself if the generator's
-    internal queue is exhausted; but, if the calling program does not use
-    all the generated values, it must call the generator's stop() method to
-    stop the background thread.  Example usage:
-
-    >>> gen = ThreadedGenerator(target=foo)
-    >>> try:
-    ...     for data in gen:
-    ...         do_work(data)
-    ... finally:
-    ...     gen.stop()
-
-    """ #NOT CURRENTLY USED: Intended for future development
-
-    def __init__(self, group=None, target=None, name="GeneratorThread",
-                 args=(), kwargs=None, qsize=65536):
-        """Constructor.  Takes same keyword arguments as threading.Thread.
-
-        target must be a generator function (or other callable that returns
-        an iterable object).
-
-        @param qsize: The size of the lookahead queue. The larger the qsize,
-        the more values will be computed in advance of use (which can eat
-        up memory and processor time).
-        @type qsize: int
-
-        """
-        if kwargs is None:
-            kwargs = {}
-        if target:
-            self.generator = target
-        if not hasattr(self, "generator"):
-            raise RuntimeError("No generator for ThreadedGenerator to run.")
-        self.args, self.kwargs = args, kwargs
-        threading.Thread.__init__(self, group=group, name=name)
-        self.queue = Queue.Queue(qsize)
-        self.finished = threading.Event()
-
-    def __iter__(self):
-        """Iterate results from the queue."""
-        if not self.isAlive() and not self.finished.isSet():
-            self.start()
-        # if there is an item in the queue, yield it, otherwise wait
-        while not self.finished.isSet():
-            try:
-                yield self.queue.get(True, 0.25)
-            except Queue.Empty:
-                pass
-            except KeyboardInterrupt:
-                self.stop()
-
-    def stop(self):
-        """Stop the background thread."""
-##        if not self.finished.isSet():
-##            pywikibot.output("DEBUG: signalling %s to stop." % self)
-        self.finished.set()
-
-    def run(self):
-        """Run the generator and store the results on the queue."""
-        self.__gen = self.generator(*self.args, **self.kwargs)
-        for result in self.__gen:
-            while True:
-                if self.finished.isSet():
-##                    pywikibot.output("DEBUG: %s received stop signal." % 
self)
-                    return
-                try:
-                    self.queue.put_nowait(result)
-                except Queue.Full:
-                    time.sleep(0.25)
-                    continue
-                break
-        # wait for queue to be emptied, then kill the thread
-        while not self.finished.isSet() and not self.queue.empty():
-            time.sleep(0.25)
-        self.stop()
-##        pywikibot.output("DEBUG: %s stopped because generator exhausted." % 
self)
-
-
 def AllpagesPageGenerator(start ='!', namespace=None, includeredirects=True,
                           site=None):
     """
@@ -761,6 +675,8 @@
     for page in site.search(query, number=number, namespaces = namespaces):
         yield page[0]
 
+# following classes just ported from version 1 without revision; not tested
+
 class YahooSearchPageGenerator:
     '''
     To use this generator, install pYsearch
@@ -785,7 +701,8 @@
     def __iter__(self):
         # restrict query to local site
         localQuery = '%s site:%s' % (self.query, self.site.hostname())
-        base = 'http://%s%s' % (self.site.hostname(), 
self.site.nice_get_address(''))
+        base = 'http://%s%s' % (self.site.hostname(),
+                                self.site.nice_get_address(''))
         for url in self.queryYahoo(localQuery):
             if url[:len(base)] == base:
                 title = url[len(base):]
@@ -826,7 +743,8 @@
         google.LICENSE_KEY = config.google_key
         offset = 0
         estimatedTotalResultsCount = None
-        while not estimatedTotalResultsCount or offset < 
estimatedTotalResultsCount:
+        while not estimatedTotalResultsCount \
+              or offset < estimatedTotalResultsCount:
             while (True):
                 # Google often yields 502 errors.
                 try:
@@ -887,7 +805,8 @@
     def __iter__(self):
         # restrict query to local site
         localQuery = '%s site:%s' % (self.query, self.site.hostname())
-        base = 'http://%s%s' % (self.site.hostname(), 
self.site.nice_get_address(''))
+        base = 'http://%s%s' % (self.site.hostname(),
+                                self.site.nice_get_address(''))
         for url in self.queryGoogle(localQuery):
             if url[:len(base)] == base:
                 title = url[len(base):]

Modified: branches/rewrite/pywikibot/scripts/touch.py
===================================================================
--- branches/rewrite/pywikibot/scripts/touch.py 2008-12-17 16:11:20 UTC (rev 
6158)
+++ branches/rewrite/pywikibot/scripts/touch.py 2008-12-17 19:57:39 UTC (rev 
6159)
@@ -39,11 +39,12 @@
                 text = page.get(get_redirect = self.touch_redirects)
                 page.save("Pywikibot touch script")
             except pywikibot.NoPage:
-                print "Page %s does not exist?!" % page.aslink()
+                pywikibot.output("Page %s does not exist?!" % page.aslink())
             except pywikibot.IsRedirectPage:
-                print "Page %s is a redirect; skipping." % page.aslink()
+                pywikibot.output("Page %s is a redirect; skipping."
+                                 % page.aslink())
             except pywikibot.LockedPage:
-                print "Page %s is locked?!" % page.aslink()
+                pywikibot.output("Page %s is locked?!" % page.aslink())
 
 
 def main(*args):

Modified: branches/rewrite/pywikibot/throttle.py
===================================================================
--- branches/rewrite/pywikibot/throttle.py      2008-12-17 16:11:20 UTC (rev 
6158)
+++ branches/rewrite/pywikibot/throttle.py      2008-12-17 19:57:39 UTC (rev 
6159)
@@ -39,7 +39,7 @@
                  multiplydelay=True, verbosedelay=False):
         self.lock = threading.RLock()
         self.mysite = str(site)
-        self.logfn = config.datafilepath('throttle.log')
+        self.ctrlfilename = config.datafilepath('throttle.ctrl')
         self.mindelay = mindelay
         if self.mindelay is None:
             self.mindelay = config.minthrottle
@@ -73,7 +73,7 @@
             count = 1
             # open throttle.log
             try:
-                f = open(self.logfn, 'r')
+                f = open(self.ctrlfilename, 'r')
             except IOError:
                 if not pid:
                     pass
@@ -110,7 +110,7 @@
             processes.append({'pid': pid,
                               'time': self.checktime,
                               'site': mysite})
-            f = open(self.logfn, 'w')
+            f = open(self.ctrlfilename, 'w')
             processes.sort(key=lambda p:(p['pid'], p['site']))
             for p in processes:
                 f.write("%(pid)s %(time)s %(site)s\n" % p)
@@ -187,7 +187,7 @@
         self.checktime = 0
         processes = []
         try:
-            f = open(self.logfn, 'r')
+            f = open(self.ctrlfilename, 'r')
         except IOError:
             return
         else:
@@ -206,15 +206,14 @@
                     processes.append({'pid': this_pid,
                                       'time': ptime,
                                       'site': this_site})
-        f = open(self.logfn, 'w')
+        f = open(self.ctrlfilename, 'w')
         processes.sort(key=lambda p:p['pid'])
         for p in processes:
             f.write("%(pid)s %(time)s %(site)s\n" % p)
         f.close()
 
     def __call__(self, requestsize=1, write=False):
-        """
-        Block the calling program if the throttle time has not expired.
+        """Block the calling program if the throttle time has not expired.
 
         Parameter requestsize is the number of Pages to be read/written;
         multiply delay time by an appropriate factor.
@@ -248,8 +247,7 @@
             self.lock.release()
 
     def lag(self, lagtime):
-        """
-        Seize the throttle lock due to server lag.
+        """Seize the throttle lock due to server lag.
 
         This will prevent any thread from accessing this site.
 



_______________________________________________
Pywikipedia-l mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-l

Reply via email to