http://www.mediawiki.org/wiki/Special:Code/MediaWiki/82903

Revision: 82903
Author:   ariel
Date:     2011-02-27 17:53:18 +0000 (Sun, 27 Feb 2011)
Log Message:
-----------
log progress output and other messages from dump run to a per-wiki-and-date 
file if specified on command line

Modified Paths:
--------------
    branches/ariel/xmldumps-backup/WikiDump.py
    branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/WikiDump.py
===================================================================
--- branches/ariel/xmldumps-backup/WikiDump.py  2011-02-27 17:36:01 UTC (rev 
82902)
+++ branches/ariel/xmldumps-backup/WikiDump.py  2011-02-27 17:53:18 UTC (rev 
82903)
@@ -104,6 +104,7 @@
                        "index": "index.html",
                        "templatedir": home,
                        "perdumpindex": "index.html",
+                       "logfile": "dumplog.txt",
                        #"reporting": {
                        "adminmail": "root@localhost",
                        "mailfrom": "root@localhost",
@@ -179,7 +180,8 @@
                self.index = conf.get("output", "index")
                self.templateDir = conf.get("output", "templateDir")
                self.perDumpIndex = conf.get("output", "perdumpindex")
-               
+               self.logFile = conf.get("output", "logfile")
+
                self.adminMail = conf.get("reporting", "adminmail")
                self.mailFrom = conf.get("reporting", "mailfrom")
                self.smtpServer = conf.get("reporting", "smtpserver")

Modified: branches/ariel/xmldumps-backup/worker.py
===================================================================
--- branches/ariel/xmldumps-backup/worker.py    2011-02-27 17:36:01 UTC (rev 
82902)
+++ branches/ariel/xmldumps-backup/worker.py    2011-02-27 17:53:18 UTC (rev 
82903)
@@ -16,6 +16,8 @@
 import glob
 import WikiDump
 import CommandManagement
+import Queue
+import thread
 
 from os.path import dirname, exists, getsize, join, realpath
 from subprocess import Popen, PIPE
@@ -48,6 +50,43 @@
 def xmlEscape(text):
        return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", 
"&gt;")
 
+class Logger(object):
+
+       def __init__(self, logFileName=None):
+               if (logFileName):
+                       self.logFile = open(logFileName, "a")
+               else:
+                       self.logFile = None
+               self.queue = Queue.Queue()
+               self.JobsDone = "JOBSDONE"
+
+       def logWrite(self, line=None):
+               if (self.logFile):
+                       self.logFile.write(line)
+                       self.logFile.flush()
+
+       def logClose(self):
+               if (logfile):
+                       self.logFile.close()
+
+       # return 1 if logging terminated, 0 otherwise
+       def doJobOnLogQueue(self):
+               line = self.queue.get()
+               if (line == self.JobsDone):
+                       self.logClose()
+                       return 1
+               else:
+                       self.logWrite(line)
+                       return 0
+
+       def addToLogQueue(self,line=None):
+               if (line):
+                       self.queue.put_nowait(line)
+
+       # set in order to have logging thread clean up and exit
+       def indicateJobsDone(self):
+               self.queue.put_nowait(self.JobsDone)
+
 # so if the pages/revsPerChunkAbstract/History are just one number it means
 # use that number for all the chunks, figure out yourself how many.
 # otherwise we get passed alist that says "here's now many for each chunk and 
it's this many chunks. 
@@ -554,7 +593,7 @@
                                      
 class Runner(object):
 
-       def __init__(self, wiki, date=None, checkpoint=None, prefetch=True, 
spawn=True, job=None, restart=False):
+       def __init__(self, wiki, date=None, checkpoint=None, prefetch=True, 
spawn=True, job=None, restart=False, loggingEnabled=False):
                self.wiki = wiki
                self.config = wiki.config
                self.dbName = wiki.dbName
@@ -562,6 +601,8 @@
                self.spawn = spawn
                self.chunkInfo = Chunk(wiki, self.dbName)
                self.restart = restart
+               self.loggingEnabled = loggingEnabled
+               self.log = None
 
                if date:
                        # Override, continuing a past dump?
@@ -577,10 +618,32 @@
 
                self.jobRequested = job
                self.dumpDir = DumpDir(self.wiki, self.dbName, self.date)
+
+               # this must come after the dumpdir setup so we know which 
directory we are in 
+               # for the log file.
+               if (loggingEnabled):
+                       self.logFileName = 
self.dumpDir.publicPath(config.logFile)
+                       self.makeDir(join(self.wiki.publicDir(), self.date))
+                       self.log = Logger(self.logFileName)
+                       thread.start_new_thread(self.logQueueReader,(self.log,))
+
                self.checksums = Checksummer(self.wiki, self.dumpDir)
+
                # some or all of these dumpItems will be marked to run
                self.dumpItemList = DumpItemList(self.wiki, self.prefetch, 
self.spawn, self.date, self.chunkInfo);
 
+       def logQueueReader(self,log):
+               if not log:
+                       return
+               done = False
+               while not done:
+                       done = log.doJobOnLogQueue()
+               
+       def logAndPrint(self, message):
+               if (self.log):
+                       self.log.addToLogQueue("%s\n" % message)
+               print message
+
        def passwordOption(self):
                """If you pass '-pfoo' mysql uses the password 'foo',
                but if you pass '-p' it prompts. Sigh."""
@@ -676,6 +739,7 @@
                        errorString = "Error from command(s): "
                        for cmd in problemCommands: 
                                errorString = errorString + "%s " % cmd
+                       self.logAndPrint(errorString)
                        raise BackupError(errorString)
                return 1
 
@@ -702,12 +766,14 @@
                output = proc.fromchild.read()
                retval = proc.wait()
                if retval:
+                       self.logAndPrint("Non-zero return code from '%s'" % 
command)
                        raise BackupError("Non-zero return code from '%s'" % 
command)
                else:
                        return output
 
        def debug(self, stuff):
-               print "%s: %s %s" % (prettyTime(), self.dbName, stuff)
+               self.logAndPrint("%s: %s %s" % (prettyTime(), self.dbName, 
stuff))
+#              print "%s: %s %s" % (prettyTime(), self.dbName, stuff)
 
        def makeDir(self, dir):
                if exists(dir):
@@ -775,9 +841,9 @@
                self.makeDir(join(self.wiki.privateDir(), self.date))
 
                if (self.restart):
-                       print "Preparing for restart from job %s of %s" % 
(self.jobRequested, self.dbName)
+                       self.logAndPrint("Preparing for restart from job %s of 
%s" % (self.jobRequested, self.dbName))
                elif (self.jobRequested):
-                       print "Preparing for job %s of %s" % 
(self.jobRequested, self.dbName)
+                       self.logAndPrint("Preparing for job %s of %s" % 
(self.jobRequested, self.dbName))
                else:
                        self.showRunnerState("Cleaning up old dumps for %s" % 
self.dbName)
                        self.cleanOldDumps()
@@ -901,7 +967,7 @@
                        # Short line for report extraction goes here
                        
self.wiki.writeStatus(self.reportDatabaseStatusSummary(items, done))
                except:
-                       print "Couldn't update status files. Continuing anyways"
+                       self.logAndPrint("Couldn't update status files. 
Continuing anyways")
 
        def updateStatusFiles(self, done=False):
                self.saveStatusSummaryAndDetail(self.dumpItemList.dumpItems, 
done)
@@ -1059,6 +1125,7 @@
                                self.debug("Removing old symlink %s" % link)
                                os.remove(link)
                        else:
+                               self.logAndPrint("What the hell dude, %s is not 
a symlink" % link)
                                raise BackupError("What the hell dude, %s is 
not a symlink" % link)
                relative = relativePath(real, dirname(link))
                if exists(real):
@@ -1148,6 +1215,8 @@
                """Receive a status line from a shellout and update the status 
files."""
                # pass through...
                if (line):
+                       if (runner.log):
+                               runner.log.addToLogQueue(line)
                        sys.stderr.write(line)
                self.progress = line.strip()
                runner.updateStatusFiles()
@@ -2083,7 +2152,7 @@
        if message:
                print message
        print "Usage: python worker.py [options] [wikidbname]"
-       print "Options: --configfile, --date, --checkpoint, --job, --force, 
--noprefetch, --nospawn, --restartfrom"
+       print "Options: --configfile, --date, --checkpoint, --job, --force, 
--noprefetch, --nospawn, --restartfrom, --log"
        print "--configfile:  Specify an alternative configuration file to 
read."
        print "               Default config file name: wikidump.conf"
        print "--date:        Rerun dump of a given date (probably unwise)"
@@ -2100,10 +2169,11 @@
        print "               (helpful if the previous files may have corrupt 
contents)"
        print "--nospawn:     Do not spawn a separate process in order to 
retrieve revision texts"
        print "--restartfrom: Do all jobs after the one specified via --job, 
including that one"
+       print "--log:         Log progress messages and other output to logfile 
in addition to"
+       print "               the usual console output"
 
        sys.exit(1)
 
-
 if __name__ == "__main__":
        try:
                date = None
@@ -2114,10 +2184,12 @@
                spawn = True
                restart = False
                jobRequested = None
+               enableLogging = False
+               log = None
 
                try:
                        (options, remainder) = getopt.gnu_getopt(sys.argv[1:], 
"",
-                                                                ['date=', 
'checkpoint=', 'job=', 'configfile=', 'force', 'noprefetch', 'nospawn', 
'restartfrom'])
+                                                                ['date=', 
'checkpoint=', 'job=', 'configfile=', 'force', 'noprefetch', 'nospawn', 
'restartfrom', 'log'])
                except:
                        usage("Unknown option specified")
 
@@ -2138,6 +2210,8 @@
                                jobRequested = val
                        elif opt == "--restartfrom":
                                restart = True
+                       elif opt == "--log":
+                               enableLogging = True
 
                if jobRequested and (len(remainder) == 0):
                        usage("--job option requires the name of a wikidb to be 
specified")
@@ -2164,7 +2238,7 @@
                        wiki = findAndLockNextWiki(config)
 
                if wiki:
-                       runner = Runner(wiki, date, checkpoint, prefetch, 
spawn, jobRequested, restart)
+                       runner = Runner(wiki, date, checkpoint, prefetch, 
spawn, jobRequested, restart, enableLogging)
                        if (restart):
                                print "Running %s, restarting from job %s..." % 
(wiki.dbName, jobRequested)
                        elif (jobRequested):


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to