http://www.mediawiki.org/wiki/Special:Code/MediaWiki/82903
Revision: 82903
Author: ariel
Date: 2011-02-27 17:53:18 +0000 (Sun, 27 Feb 2011)
Log Message:
-----------
log progress output and other messages from dump run to a per-wiki-and-date
file if specified on command line
Modified Paths:
--------------
branches/ariel/xmldumps-backup/WikiDump.py
branches/ariel/xmldumps-backup/worker.py
Modified: branches/ariel/xmldumps-backup/WikiDump.py
===================================================================
--- branches/ariel/xmldumps-backup/WikiDump.py 2011-02-27 17:36:01 UTC (rev
82902)
+++ branches/ariel/xmldumps-backup/WikiDump.py 2011-02-27 17:53:18 UTC (rev
82903)
@@ -104,6 +104,7 @@
"index": "index.html",
"templatedir": home,
"perdumpindex": "index.html",
+ "logfile": "dumplog.txt",
#"reporting": {
"adminmail": "root@localhost",
"mailfrom": "root@localhost",
@@ -179,7 +180,8 @@
self.index = conf.get("output", "index")
self.templateDir = conf.get("output", "templateDir")
self.perDumpIndex = conf.get("output", "perdumpindex")
-
+ self.logFile = conf.get("output", "logfile")
+
self.adminMail = conf.get("reporting", "adminmail")
self.mailFrom = conf.get("reporting", "mailfrom")
self.smtpServer = conf.get("reporting", "smtpserver")
Modified: branches/ariel/xmldumps-backup/worker.py
===================================================================
--- branches/ariel/xmldumps-backup/worker.py 2011-02-27 17:36:01 UTC (rev
82902)
+++ branches/ariel/xmldumps-backup/worker.py 2011-02-27 17:53:18 UTC (rev
82903)
@@ -16,6 +16,8 @@
import glob
import WikiDump
import CommandManagement
+import Queue
+import thread
from os.path import dirname, exists, getsize, join, realpath
from subprocess import Popen, PIPE
@@ -48,6 +50,43 @@
def xmlEscape(text):
return text.replace("&", "&").replace("<", "<").replace(">",
">")
+class Logger(object):
+
+ def __init__(self, logFileName=None):
+ if (logFileName):
+ self.logFile = open(logFileName, "a")
+ else:
+ self.logFile = None
+ self.queue = Queue.Queue()
+ self.JobsDone = "JOBSDONE"
+
+ def logWrite(self, line=None):
+ if (self.logFile):
+ self.logFile.write(line)
+ self.logFile.flush()
+
+ def logClose(self):
+ if (logfile):
+ self.logFile.close()
+
+ # return 1 if logging terminated, 0 otherwise
+ def doJobOnLogQueue(self):
+ line = self.queue.get()
+ if (line == self.JobsDone):
+ self.logClose()
+ return 1
+ else:
+ self.logWrite(line)
+ return 0
+
+ def addToLogQueue(self,line=None):
+ if (line):
+ self.queue.put_nowait(line)
+
+ # set in order to have logging thread clean up and exit
+ def indicateJobsDone(self):
+ self.queue.put_nowait(self.JobsDone)
+
# so if the pages/revsPerChunkAbstract/History are just one number it means
# use that number for all the chunks, figure out yourself how many.
# otherwise we get passed alist that says "here's now many for each chunk and
it's this many chunks.
@@ -554,7 +593,7 @@
class Runner(object):
- def __init__(self, wiki, date=None, checkpoint=None, prefetch=True,
spawn=True, job=None, restart=False):
+ def __init__(self, wiki, date=None, checkpoint=None, prefetch=True,
spawn=True, job=None, restart=False, loggingEnabled=False):
self.wiki = wiki
self.config = wiki.config
self.dbName = wiki.dbName
@@ -562,6 +601,8 @@
self.spawn = spawn
self.chunkInfo = Chunk(wiki, self.dbName)
self.restart = restart
+ self.loggingEnabled = loggingEnabled
+ self.log = None
if date:
# Override, continuing a past dump?
@@ -577,10 +618,32 @@
self.jobRequested = job
self.dumpDir = DumpDir(self.wiki, self.dbName, self.date)
+
+ # this must come after the dumpdir setup so we know which
directory we are in
+ # for the log file.
+ if (loggingEnabled):
+ self.logFileName =
self.dumpDir.publicPath(config.logFile)
+ self.makeDir(join(self.wiki.publicDir(), self.date))
+ self.log = Logger(self.logFileName)
+ thread.start_new_thread(self.logQueueReader,(self.log,))
+
self.checksums = Checksummer(self.wiki, self.dumpDir)
+
# some or all of these dumpItems will be marked to run
self.dumpItemList = DumpItemList(self.wiki, self.prefetch,
self.spawn, self.date, self.chunkInfo);
+ def logQueueReader(self,log):
+ if not log:
+ return
+ done = False
+ while not done:
+ done = log.doJobOnLogQueue()
+
+ def logAndPrint(self, message):
+ if (self.log):
+ self.log.addToLogQueue("%s\n" % message)
+ print message
+
def passwordOption(self):
"""If you pass '-pfoo' mysql uses the password 'foo',
but if you pass '-p' it prompts. Sigh."""
@@ -676,6 +739,7 @@
errorString = "Error from command(s): "
for cmd in problemCommands:
errorString = errorString + "%s " % cmd
+ self.logAndPrint(errorString)
raise BackupError(errorString)
return 1
@@ -702,12 +766,14 @@
output = proc.fromchild.read()
retval = proc.wait()
if retval:
+ self.logAndPrint("Non-zero return code from '%s'" %
command)
raise BackupError("Non-zero return code from '%s'" %
command)
else:
return output
def debug(self, stuff):
- print "%s: %s %s" % (prettyTime(), self.dbName, stuff)
+ self.logAndPrint("%s: %s %s" % (prettyTime(), self.dbName,
stuff))
+# print "%s: %s %s" % (prettyTime(), self.dbName, stuff)
def makeDir(self, dir):
if exists(dir):
@@ -775,9 +841,9 @@
self.makeDir(join(self.wiki.privateDir(), self.date))
if (self.restart):
- print "Preparing for restart from job %s of %s" %
(self.jobRequested, self.dbName)
+ self.logAndPrint("Preparing for restart from job %s of
%s" % (self.jobRequested, self.dbName))
elif (self.jobRequested):
- print "Preparing for job %s of %s" %
(self.jobRequested, self.dbName)
+ self.logAndPrint("Preparing for job %s of %s" %
(self.jobRequested, self.dbName))
else:
self.showRunnerState("Cleaning up old dumps for %s" %
self.dbName)
self.cleanOldDumps()
@@ -901,7 +967,7 @@
# Short line for report extraction goes here
self.wiki.writeStatus(self.reportDatabaseStatusSummary(items, done))
except:
- print "Couldn't update status files. Continuing anyways"
+ self.logAndPrint("Couldn't update status files.
Continuing anyways")
def updateStatusFiles(self, done=False):
self.saveStatusSummaryAndDetail(self.dumpItemList.dumpItems,
done)
@@ -1059,6 +1125,7 @@
self.debug("Removing old symlink %s" % link)
os.remove(link)
else:
+ self.logAndPrint("What the hell dude, %s is not
a symlink" % link)
raise BackupError("What the hell dude, %s is
not a symlink" % link)
relative = relativePath(real, dirname(link))
if exists(real):
@@ -1148,6 +1215,8 @@
"""Receive a status line from a shellout and update the status
files."""
# pass through...
if (line):
+ if (runner.log):
+ runner.log.addToLogQueue(line)
sys.stderr.write(line)
self.progress = line.strip()
runner.updateStatusFiles()
@@ -2083,7 +2152,7 @@
if message:
print message
print "Usage: python worker.py [options] [wikidbname]"
- print "Options: --configfile, --date, --checkpoint, --job, --force,
--noprefetch, --nospawn, --restartfrom"
+ print "Options: --configfile, --date, --checkpoint, --job, --force,
--noprefetch, --nospawn, --restartfrom, --log"
print "--configfile: Specify an alternative configuration file to
read."
print " Default config file name: wikidump.conf"
print "--date: Rerun dump of a given date (probably unwise)"
@@ -2100,10 +2169,11 @@
print " (helpful if the previous files may have corrupt
contents)"
print "--nospawn: Do not spawn a separate process in order to
retrieve revision texts"
print "--restartfrom: Do all jobs after the one specified via --job,
including that one"
+ print "--log: Log progress messages and other output to logfile
in addition to"
+ print " the usual console output"
sys.exit(1)
-
if __name__ == "__main__":
try:
date = None
@@ -2114,10 +2184,12 @@
spawn = True
restart = False
jobRequested = None
+ enableLogging = False
+ log = None
try:
(options, remainder) = getopt.gnu_getopt(sys.argv[1:],
"",
- ['date=',
'checkpoint=', 'job=', 'configfile=', 'force', 'noprefetch', 'nospawn',
'restartfrom'])
+ ['date=',
'checkpoint=', 'job=', 'configfile=', 'force', 'noprefetch', 'nospawn',
'restartfrom', 'log'])
except:
usage("Unknown option specified")
@@ -2138,6 +2210,8 @@
jobRequested = val
elif opt == "--restartfrom":
restart = True
+ elif opt == "--log":
+ enableLogging = True
if jobRequested and (len(remainder) == 0):
usage("--job option requires the name of a wikidb to be
specified")
@@ -2164,7 +2238,7 @@
wiki = findAndLockNextWiki(config)
if wiki:
- runner = Runner(wiki, date, checkpoint, prefetch,
spawn, jobRequested, restart)
+ runner = Runner(wiki, date, checkpoint, prefetch,
spawn, jobRequested, restart, enableLogging)
if (restart):
print "Running %s, restarting from job %s..." %
(wiki.dbName, jobRequested)
elif (jobRequested):
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs