xmldumps-backup

ariel Mon, 29 Aug 2011 17:20:21 -0700

http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95732


Revision: 95732
Author:   ariel
Date:     2011-08-30 00:20:10 +0000 (Tue, 30 Aug 2011)
Log Message:
-----------
first take at rerunning a checkpoint file

Modified Paths:
--------------
    branches/ariel/xmldumps-backup/WikiDump.py
    branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/WikiDump.py
===================================================================
--- branches/ariel/xmldumps-backup/WikiDump.py  2011-08-30 00:10:32 UTC (rev 
95731)
+++ branches/ariel/xmldumps-backup/WikiDump.py  2011-08-30 00:20:10 UTC (rev 
95732)
@@ -191,6 +191,7 @@
                        "cat": "/bin/cat",
                        "grep": "/bin/grep",
                        "checkforbz2footer": "/usr/local/bin/checkforbz2footer",
+                       "writeuptopageid": "/usr/local/bin/writeuptopageid",
                        #"cleanup": {
                        "keep": "3",
                        #"chunks": {
@@ -271,6 +272,7 @@
                self.cat = self.conf.get("tools", "cat")
                self.grep = self.conf.get("tools", "grep")
                self.checkforbz2footer = 
self.conf.get("tools","checkforbz2footer")
+               self.writeuptopageid = self.conf.get("tools","writeuptopageid")
 
                if not self.conf.has_section('cleanup'):
                        self.conf.add_section('cleanup')

Modified: branches/ariel/xmldumps-backup/worker.py
===================================================================
--- branches/ariel/xmldumps-backup/worker.py    2011-08-30 00:10:32 UTC (rev 
95731)
+++ branches/ariel/xmldumps-backup/worker.py    2011-08-30 00:20:10 UTC (rev 
95732)
@@ -485,12 +485,13 @@
                self._toBeRun = toBeRun
 
 class DumpItemList(object):
-       def __init__(self, wiki, prefetch, spawn, chunkToDo, singleJob, 
chunkInfo, runInfoFile, dumpDir):
+       def __init__(self, wiki, prefetch, spawn, chunkToDo, checkpointFile, 
singleJob, chunkInfo, runInfoFile, dumpDir):
                self.wiki = wiki
                self._hasFlaggedRevs = self.wiki.hasFlaggedRevs()
                self._prefetch = prefetch
                self._spawn = spawn
                self.chunkInfo = chunkInfo
+               self.checkpointFile = checkpointFile
                self._chunkToDo = chunkToDo
                self._singleJob = singleJob
                self._runInfoFile = runInfoFile
@@ -505,9 +506,21 @@
                            self._singleJob[-9:] == 'recombine' or 
                            self._singleJob == 'noop' or 
                            self._singleJob == 'xmlpagelogsdump' or
-                           self._singleJob == 'pagetitlesdump'):
+                           self._singleJob == 'pagetitlesdump' or
+                           self._singleJob.endswith('recombine')):
                                raise BackupError("You cannot specify a chunk 
with the job %s, exiting.\n" % self._singleJob)
 
+               if (self._singleJob and self.checkpointFile):
+                       if (self._singleJob[-5:] == 'table' or 
+                           self._singleJob[-9:] == 'recombine' or 
+                           self._singleJob == 'noop' or 
+                           self._singleJob == 'xmlpagelogsdump' or
+                           self._singleJob == 'pagetitlesdump' or
+                           self._singleJob == 'abstractsdump' or
+                           self._singleJob == 'xmlstubsdump' or
+                           self._singleJob.endswith('recombine')):
+                               raise BackupError("You cannot specify a 
checkpoint file with the job %s, exiting.\n" % self._singleJob)
+
                self.dumpItems = [PrivateTable("user", "usertable", "User 
account data."),
                        PrivateTable("watchlist", "watchlisttable", "Users' 
watchlist settings."),
                        PrivateTable("ipblocks", "ipblockstable", "Data for 
blocks of IP addresses, ranges, and users."),
@@ -555,7 +568,7 @@
                        XmlDump("articles",
                                "articlesdump",
                                "<big><b>Articles, templates, image 
descriptions, and primary meta-pages.</b></big>",
-                               "This contains current versions of article 
content, and is the archive most mirror sites will probably want.", 
self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, 
self._getChunkToDo("articlesdump"), self.chunkInfo.getPagesPerChunkHistory(), 
checkpoints))
+                               "This contains current versions of article 
content, and is the archive most mirror sites will probably want.", 
self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, 
self._getChunkToDo("articlesdump"), self.chunkInfo.getPagesPerChunkHistory(), 
checkpoints, self.checkpointFile))
                if (self.chunkInfo.chunksEnabled()):
                        
self.dumpItems.append(RecombineXmlDump("articlesdumprecombine", 
"<big><b>Recombine articles, templates, image descriptions, and primary 
meta-pages.</b></big>","This contains current versions of article content, and 
is the archive most mirror sites will probably want.",  
self.findItemByName('articlesdump')))
                
@@ -563,7 +576,7 @@
                        XmlDump("meta-current",
                                "metacurrentdump",
                                "All pages, current versions only.",
-                               "Discussion and user pages are included in this 
complete archive. Most mirrors won't want this extra material.", 
self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, 
self._getChunkToDo("metacurrentdump"), 
self.chunkInfo.getPagesPerChunkHistory(), checkpoints))
+                               "Discussion and user pages are included in this 
complete archive. Most mirrors won't want this extra material.", 
self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, 
self._getChunkToDo("metacurrentdump"), 
self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile))
                        
                if (self.chunkInfo.chunksEnabled()):
                        
self.dumpItems.append(RecombineXmlDump("metacurrentdumprecombine", "Recombine 
all pages, current versions only.","Discussion and user pages are included in 
this complete archive. Most mirrors won't want this extra material.", 
self.findItemByName('metacurrentdump')))
@@ -582,7 +595,7 @@
                                   "metahistorybz2dump",
                                   "All pages with complete page edit history 
(.bz2)",
                                   "These dumps can be *very* large, 
uncompressing up to 20 times the archive download size. " +
-                                  "Suitable for archival and statistical use, 
most mirror sites won't want or need this.", 
self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, 
self._getChunkToDo("metahistorybz2dump"), 
self.chunkInfo.getPagesPerChunkHistory(), checkpoints))
+                                  "Suitable for archival and statistical use, 
most mirror sites won't want or need this.", 
self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, 
self._getChunkToDo("metahistorybz2dump"), 
self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile))
                if (self.chunkInfo.chunksEnabled() and 
self.chunkInfo.recombineHistory()):
                        self.dumpItems.append(
                                RecombineXmlDump("metahistorybz2dumprecombine",
@@ -594,7 +607,7 @@
                                          "metahistory7zdump",
                                          "All pages with complete edit history 
(.7z)",
                                          "These dumps can be *very* large, 
uncompressing up to 100 times the archive download size. " +
-                                         "Suitable for archival and 
statistical use, most mirror sites won't want or need this.", 
self.findItemByName('metahistorybz2dump'), self.wiki, 
self._getChunkToDo("metahistory7zdump"), 
self.chunkInfo.getPagesPerChunkHistory()))
+                                         "Suitable for archival and 
statistical use, most mirror sites won't want or need this.", 
self.findItemByName('metahistorybz2dump'), self.wiki, 
self._getChunkToDo("metahistory7zdump"), 
self.chunkInfo.getPagesPerChunkHistory(), self.checkpointFile))
                if (self.chunkInfo.chunksEnabled() and 
self.chunkInfo.recombineHistory()):
                        self.dumpItems.append(
                                
RecombineXmlRecompressDump("metahistory7zdumprecombine",
@@ -1454,7 +1467,7 @@
                return os.path.join(self.wiki.publicDir(), self.wiki.date)
 
 class Runner(object):
-       def __init__(self, wiki, prefetch=True, spawn=True, job=None, 
restart=False, notice="", dryrun = False, loggingEnabled=False, chunkToDo = 
False):
+       def __init__(self, wiki, prefetch=True, spawn=True, job=None, 
restart=False, notice="", dryrun = False, loggingEnabled=False, chunkToDo = 
False, checkpointFile = None):
                self.wiki = wiki
                self.dbName = wiki.dbName
                self.prefetch = prefetch
@@ -1465,7 +1478,18 @@
                self.log = None
                self.dryrun = dryrun
                self._chunkToDo = chunkToDo
+               self.checkpointFile = None
 
+               if (checkpointFile):
+                       f = DumpFilename(self.wiki)
+                       f.newFromFilename(checkpointFile)
+                       # we should get chunk if any
+                       if not self._chunkToDo and f.chunkInt:
+                               self._chunkToDo = f.chunkInt
+                       elif self._chunkToDo and f.chunkInt and self._chunkToDo 
!= f.chunkInt:
+                               raise BackupError("specifed chunk to do does 
not match chunk of checkpoint file %s to redo", self.checkpointFile)
+                       self.checkpointFile = f
+
                self._loggingEnabled = loggingEnabled
                self._statusEnabled = True
                self._checksummerEnabled = True
@@ -1514,7 +1538,7 @@
                self.checksums = Checksummer(self.wiki, self.dumpDir, 
self._checksummerEnabled)
 
                # some or all of these dumpItems will be marked to run
-               self.dumpItemList = DumpItemList(self.wiki, self.prefetch, 
self.spawn, self._chunkToDo, self.jobRequested, self.chunkInfo, 
self.runInfoFile, self.dumpDir)
+               self.dumpItemList = DumpItemList(self.wiki, self.prefetch, 
self.spawn, self._chunkToDo, self.checkpointFile, self.jobRequested, 
self.chunkInfo, self.runInfoFile, self.dumpDir)
                self.status = Status(self.wiki, self.dumpDir, 
self.dumpItemList.dumpItems, self.checksums, self._statusEnabled, 
self.htmlNoticeFile, self.logAndPrint)
 
        def logQueueReader(self,log):
@@ -1662,11 +1686,11 @@
                                        item.start(self)
                                        self.status.updateStatusFiles()
                                        
self.runInfoFile.saveDumpRunInfoFile(self.dumpItemList.reportDumpRunInfo())
-                                       try:
-                                               item.dump(self)
-                                       except Exception, ex:
-                                               self.debug("*** exception! " + 
str(ex))
-                                               item.setStatus("failed")
+#                                      try:
+                                       item.dump(self)
+#                                      except Exception, ex:
+#                                              self.debug("*** exception! " + 
str(ex))
+#                                              item.setStatus("failed")
                                        if item.status() == "failed":
                                                self.runHandleFailure()
                                        else:
@@ -1696,11 +1720,11 @@
                                item.start(self)
                                self.status.updateStatusFiles()
                                
self.runInfoFile.saveDumpRunInfoFile(self.dumpItemList.reportDumpRunInfo())
-                               try:
-                                       item.dump(self)
-                               except Exception, ex:
-                                       self.debug("*** exception! " + str(ex))
-                                       item.setStatus("failed")
+#                              try:
+                               item.dump(self)
+#                              except Exception, ex:
+#                                      self.debug("*** exception! " + str(ex))
+#                                      item.setStatus("failed")
                                if item.status() == "failed":
                                        self.runHandleFailure()
                                else:
@@ -1873,6 +1897,8 @@
                        self._chunksEnabled = False
                if not hasattr(self, '_checkpointsEnabled'):
                        self._checkpointsEnabled = False
+               if not hasattr(self, 'checkpointFile'):
+                       self.checkpointFile = False
 
        def name(self):
                return self.runInfo.name()
@@ -1931,11 +1957,11 @@
                                      
        def dump(self, runner):
                """Attempt to run the operation, updating progress/status 
info."""
-               try:
-                       self.run(runner)
-               except Exception, ex:
-                       self.setStatus("failed")
-                       raise ex
+#              try:
+               self.run(runner)
+#              except Exception, ex:
+#                      self.setStatus("failed")
+#                      raise ex
                self.setStatus("done")
 
        def run(self, runner):
@@ -2026,6 +2052,10 @@
 
        def cleanupOldFiles(self, dumpDir, chunks = False):
                if (runner._cleanupOldFilesEnabled):
+                       if (self.checkpointFile):
+                               # we only rerun this one, so just remove this 
one
+                               if 
exists(dumpDir.filenamePublicPath(self.checkpointFile)):
+                                       
os.remove(dumpDir.filenamePublicPath(self.checkpointFile))
                        files = self.listOutputFilesForCleanup(dumpDir)
                        for f in files:
                                if exists(dumpDir.filenamePublicPath(f)):
@@ -2266,6 +2296,10 @@
                if (dumpNames == None):
                        dumpNames = [ self.dumpName ]
                files = []
+               if (self.checkpointFile):
+                       files.append(self.checkpointFile)
+                       return files
+
                if (self._checkpointsEnabled):
                        # we will pass list of chunks or chunkToDo, or False, 
depending on the job setup.
                        
files.extend(self.listCheckpointFilesPerChunkExisting(dumpDir, 
self.getChunkList(), dumpNames))
@@ -2285,6 +2319,10 @@
                if (dumpNames == None):
                        dumpNames = [ self.dumpName ]
                files = []
+               if (self.checkpointFile):
+                       files.append(self.checkpointFile)
+                       return files
+
                if (self._checkpointsEnabled):
                        # we will pass list of chunks or chunkToDo, or False, 
depending on the job setup.
                        
files.extend(self.listCheckpointFilesPerChunkExisting(dumpDir, 
self.getChunkList(), dumpNames))
@@ -2303,6 +2341,10 @@
                if (dumpNames == None):
                        dumpNames = [ self.dumpName ]
                files = []
+               if (self.checkpointFile):
+                       files.append(self.checkpointFile)
+                       return files
+
                if (self._checkpointsEnabled):
                        # we will pass list of chunks or chunkToDo, or False, 
depending on the job setup.
                        
files.extend(self.listTempFilesPerChunkExisting(dumpDir, self.getChunkList(), 
dumpNames))
@@ -2321,7 +2363,11 @@
                if (dumpNames == None):
                        dumpNames = [ self.dumpName ]
                files = []
-               if (self._checkpointsEnabled):
+               if (self.checkpointFile):
+                       files.append(self.checkpointFile)
+                       return files
+
+               if (self._checkpointsEnabled):
                        # we will pass list of chunks or chunkToDo, or False, 
depending on the job setup.
                        
files.extend(self.listCheckpointFilesPerChunkExisting(dumpDir, 
self.getChunkList(), dumpNames))
                        
files.extend(self.listTempFilesPerChunkExisting(dumpDir, self.getChunkList(), 
dumpNames))
@@ -2618,7 +2664,7 @@
 
 class XmlDump(Dump):
        """Primary XML dumps, one section at a time."""
-       def __init__(self, subset, name, desc, detail, itemForStubs,  prefetch, 
spawn, wiki, chunkToDo, chunks = False, checkpoints = False):
+       def __init__(self, subset, name, desc, detail, itemForStubs,  prefetch, 
spawn, wiki, chunkToDo, chunks = False, checkpoints = False, checkpointFile = 
None):
                self._subset = subset
                self._detail = detail
                self._desc = desc
@@ -2634,6 +2680,11 @@
                self.itemForStubs = itemForStubs
                if checkpoints:
                        self._checkpointsEnabled = True
+               self.checkpointFile = checkpointFile
+               if self.checkpointFile:
+                       # we don't checkpoint the checkpoint file.
+                       self._checkpointsEnabled = False
+
                Dump.__init__(self, name, desc)
 
        def getDumpNameBase(self):
@@ -2670,12 +2721,17 @@
                                        break
                        if len(inputFiles) > 1:
                                raise BackupError("Trouble finding stub files 
for xml dump run")
-               for f in inputFiles:
-                       # we should convert the input file to an output file I 
guess
-                       # we write regular files
-                       outputFile = DumpFilename(runner.wiki, f.date, 
f.dumpName, f.fileType, self.fileExt)
-                       series = self.buildCommand(runner, f)
+
+               if self.checkpointFile:
+                       series = self.buildCommand(runner, self.checkpointFile)
                        commands.append(series)
+               else:
+                       for f in inputFiles:
+                               # we should convert the input file to an output 
file I guess
+                               # we write regular files
+                               outputFile = DumpFilename(self.wiki, f.date, 
f.dumpName, f.fileType, self.fileExt)
+                               series = self.buildCommand(runner, f)
+                               commands.append(series)
 
                error = runner.runCommand(commands, 
callbackStderr=self.progressCallback, callbackStderrArg=runner)
                truncationError = self.checkForTruncatedFiles(runner)
@@ -2690,7 +2746,7 @@
                        else:
                                files = 
self.listRegularFilesPerChunkExisting(runner.dumpDir, self.getChunkList(), [ 
self.dumpName ])
                        for f in files:
-                               f = 
DumpFile(runner.wiki,runner.dumpDir.filenamePublicPath(f))
+                               f = 
DumpFile(self.wiki,runner.dumpDir.filenamePublicPath(f))
                                if (f.checkIfTruncated()):
                                        runner.logAndPrint("file %s is 
truncated, moving out of the way" % f.filename )
                                        f.rename( f.filename + ".truncated" )
@@ -2707,27 +2763,76 @@
                # do we need checkpoints? ummm
                xmlbz2 = runner.dumpDir.filenamePublicPath(f)
 
-               if (not exists( runner.wiki.config.bzip2 ) ):
-                       raise BackupError("bzip2 command %s not found" % 
runner.wiki.config.bzip2)
-               if runner.wiki.config.bzip2[-6:] == "dbzip2":
+               if (not exists( self.wiki.config.bzip2 ) ):
+                       raise BackupError("bzip2 command %s not found" % 
self.wiki.config.bzip2)
+               if self.wiki.config.bzip2[-6:] == "dbzip2":
                        bz2mode = "dbzip2"
                else:
                        bz2mode = "bzip2"
                return "--output=%s:%s" % (bz2mode, xmlbz2)
 
+       def writePartialStub(self, inputFile, outputFile, startPageID, 
endPageID):
+               if (not exists( self.wiki.config.writeuptopageid ) ):
+                       raise BackupError("writeuptopageid command %s not 
found" % self.wiki.config.writeuptopageid)
+               writeuptopageid = self.wiki.config.writeuptopageid
+
+               inputFilePath = runner.dumpDir.filenamePublicPath(inputFile)
+               outputFilePath = 
os.path.join(self.wiki.config.tempDir,outputFile.filename)
+               if inputFile.fileExt == "gz":
+                       command1 =  "%s -dc %s" % (self.wiki.config.gzip, 
inputFilePath )
+                       command2 = "%s > %s" % (self.wiki.config.gzip, 
outputFilePath )
+               elif inputFile.fileExt == '7z':
+                       command1 =  "%s e -si %s" % (self.wiki.config.sevenzip, 
inputFilePath )
+                       command2 =  "%s e -so %s" % (self.wiki.config.sevenzip, 
outputFilePath )
+               elif inputFile.fileExt == 'bz':
+                       command1 =  "%s -dc %s" % (self.wiki.config.bzip2, 
inputFilePath )
+                       command2 =  "%s > %s" % (self.wiki.config.bzip2, 
outputFilePath ) 
+               else:
+                       raise BackupError("unknown stub file extension %s" % 
inputFile.fileExt)
+               command = [ command1 + ( "| %s %s %s |" % 
(self.wiki.config.writeuptopageid, startPageID, endPageID) ) + command2 ]
+
+               pipeline = [ command ]
+               series = [ pipeline ]
+               error = runner.runCommand([ series ], shell = True)
+               if (error):
+                       raise BackupError("failed to write partial stub file 
%s" % outputFile.filename)
+
        def buildCommand(self, runner, f):
                """Build the command line for the dump, minus output and filter 
options"""
 
-               if (self._checkpointsEnabled):
+               if (self.checkpointFile):
+                       outputFile = f
+                       print "outputFile is ", outputFile.filename
+               elif (self._checkpointsEnabled):
                        # we write a temp file, it will be checkpointed every 
so often.
-                       outputFile = DumpFilename(runner.wiki, f.date, 
self.dumpName, f.fileType, self.fileExt, f.chunk, f.checkpoint, temp = True)
+                       outputFile = DumpFilename(self.wiki, f.date, 
self.dumpName, f.fileType, self.fileExt, f.chunk, f.checkpoint, temp = True)
                else:
                        # we write regular files
-                       outputFile = DumpFilename(runner.wiki, f.date, 
self.dumpName, f.fileType, self.fileExt, f.chunk, checkpoint = False, temp = 
False)
+                       outputFile = DumpFilename(self.wiki, f.date, 
self.dumpName, f.fileType, self.fileExt, f.chunk, checkpoint = False, temp = 
False)
 
                # Page and revision data pulled from this skeleton dump...
-               stubOption = "--stub=gzip:%s" % 
runner.dumpDir.filenamePublicPath(f)
+               # FIXME we need the stream wrappers for proper use of 
writeupto. this is a hack.
+               if (self.checkpointFile):
+                       # fixme I now have this code in a couple places, make 
it a function.
+                       if not self.dumpName.startswith(self.getDumpNameBase()):
+                               raise BackupError("dumpName %s of unknown form 
for this job" % self.dumpName)
+                       dumpName = self.dumpName[len(self.getDumpNameBase()):]
+                       stubDumpNames = self.itemForStubs.listDumpNames()
+                       for s in stubDumpNames:
+                               if s.endswith(dumpName):
+                                       stubDumpName = s
 
+                       stubInputFilename = 
self.checkpointFile.newFilename(stubDumpName, self.itemForStubs.getFileType(), 
self.itemForStubs.getFileExt(), self.checkpointFile.date, 
self.checkpointFile.chunk)
+                       stubInputFile = DumpFilename(self.wiki)
+                       stubInputFile.newFromFilename(stubInputFilename)
+                       stubOutputFilename = 
self.checkpointFile.newFilename(stubDumpName, self.itemForStubs.getFileType(), 
self.itemForStubs.getFileExt(), self.checkpointFile.date, 
self.checkpointFile.chunk, self.checkpointFile.checkpoint)
+                       stubOutputFile = DumpFilename(self.wiki)
+                       stubOutputFile.newFromFilename(stubOutputFilename)
+                       self.writePartialStub(stubInputFile, stubOutputFile, 
self.checkpointFile.firstPageID, str(int(self.checkpointFile.lastPageID) + 1))
+                       stubOption = "--stub=gzip:%s" % 
os.path.join(self.wiki.config.tempDir, stubOutputFile.filename)
+               else:
+                       stubOption = "--stub=gzip:%s" % 
runner.dumpDir.filenamePublicPath(f)
+
                # Try to pull text from the previous run; most stuff hasn't 
changed
                #Source=$OutputDir/pages_$section.xml.bz2
                sources = []
@@ -2753,21 +2858,21 @@
                        prefetch = ""
 
                if self._spawn:
-                       spawn = "--spawn=%s" % (runner.wiki.config.php)
+                       spawn = "--spawn=%s" % (self.wiki.config.php)
                else:
                        spawn = ""
 
-               if (not exists( runner.wiki.config.php ) ):
-                       raise BackupError("php command %s not found" % 
runner.wiki.config.php)
+               if (not exists( self.wiki.config.php ) ):
+                       raise BackupError("php command %s not found" % 
self.wiki.config.php)
 
-               if (self.wiki.config.checkpointTime):
-                       checkpointTime = "--maxtime=%s" % 
(runner.wiki.config.checkpointTime)
+               if (self._checkpointsEnabled):
+                       checkpointTime = "--maxtime=%s" % 
(self.wiki.config.checkpointTime)
                        checkpointFile = "--checkpointfile=%s" % 
outputFile.newFilename(outputFile.dumpName, outputFile.fileType, 
outputFile.fileExt, outputFile.date, outputFile.chunk, "p%sp%s", None)
                else:
                        checkpointTime = ""
                        checkpointFile = ""
-               dumpCommand = [ "%s" % runner.wiki.config.php,
-                               "-q", "%s/maintenance/dumpTextPass.php" % 
runner.wiki.config.wikiDir,
+               dumpCommand = [ "%s" % self.wiki.config.php,
+                               "-q", "%s/maintenance/dumpTextPass.php" % 
self.wiki.config.wikiDir,
                                "--wiki=%s" % runner.dbName,
                                "%s" % stubOption,
                                "%s" % prefetch,
@@ -2853,7 +2958,7 @@
                        startPageID = 1
                        endPageID = None
 
-               dumps = runner.wiki.dumpDirs()
+               dumps = self.wiki.dumpDirs()
                dumps.sort()
                dumps.reverse()
                for date in dumps:
@@ -2978,17 +3083,17 @@
        # 200 files, right?
        def buildCommand(self, runner, outputFiles):
                # FIXME need shell escape
-               if (not exists( runner.wiki.config.bzip2 ) ):
-                       raise BackupError("bzip2 command %s not found" % 
runner.wiki.config.bzip2)
-               if (not exists( runner.wiki.config.sevenzip ) ):
-                       raise BackupError("7zip command %s not found" % 
runner.wiki.config.sevenzip)
+               if (not exists( self.wiki.config.bzip2 ) ):
+                       raise BackupError("bzip2 command %s not found" % 
self.wiki.config.bzip2)
+               if (not exists( self.wiki.config.sevenzip ) ):
+                       raise BackupError("7zip command %s not found" % 
self.wiki.config.sevenzip)
 
                commandSeries = []
                for f in outputFiles:
-                       inputFile = DumpFilename(runner.wiki, None, f.dumpName, 
f.fileType, self.itemForRecompression.fileExt, f.chunk, f.checkpoint) 
+                       inputFile = DumpFilename(self.wiki, None, f.dumpName, 
f.fileType, self.itemForRecompression.fileExt, f.chunk, f.checkpoint) 
                        outfile = runner.dumpDir.filenamePublicPath(f)
                        infile = runner.dumpDir.filenamePublicPath(inputFile)
-                       commandPipe = [ [ "%s -dc %s | %s a -si %s"  % 
(runner.wiki.config.bzip2, infile, runner.wiki.config.sevenzip, outfile) ] ]
+                       commandPipe = [ [ "%s -dc %s | %s a -si %s"  % 
(self.wiki.config.bzip2, infile, self.wiki.config.sevenzip, outfile) ] ]
                        commandSeries.append(commandPipe)
                return(commandSeries)
 
@@ -3107,10 +3212,10 @@
                        if not len(inputFiles):
                                self.setStatus("failed")
                                raise BackupError("No input files for %s found" 
% self.name())
-                       if (not exists( runner.wiki.config.sevenzip ) ):
-                               raise BackupError("sevenzip command %s not 
found" % runner.wiki.config.sevenzip)
-                       compressionCommand = "%s a -si" % 
runner.wiki.config.sevenzip
-                       uncompressionCommand = [ "%s" % 
runner.wiki.config.sevenzip, "e", "-so" ] 
+                       if (not exists( self.wiki.config.sevenzip ) ):
+                               raise BackupError("sevenzip command %s not 
found" % self.wiki.config.sevenzip)
+                       compressionCommand = "%s a -si" % 
self.wiki.config.sevenzip
+                       uncompressionCommand = [ "%s" % 
self.wiki.config.sevenzip, "e", "-so" ] 
 
                        recombineCommandString = 
self.buildRecombineCommandString(runner, files, outputFile, compressionCommand, 
uncompressionCommand )
                        recombineCommand = [ recombineCommandString ]
@@ -3361,7 +3466,9 @@
        if message:
                print message
        print "Usage: python worker.py [options] [wikidbname]"
-       print "Options: --chunk, --configfile, --date, --job, --addnotice, 
--delnotice, --force, --noprefetch, --nospawn, --restartfrom, --log"
+       print "Options: --checkpoint, --chunk, --configfile, --date, --job, 
--addnotice, --delnotice, --force, --noprefetch, --nospawn, --restartfrom, 
--log"
+       print "--checkpoint:  Specify the name of the checkpoint file to rerun 
(requires --job,"
+       print "               depending on the file this may imply --chunk)"
        print "--chunk:       Specify the number of the chunk to rerun (use 
with a specific job"
        print "               to rerun, only if parallel jobs (chunks) are 
enabled)."
        print "--configfile:  Specify an alternative configuration file to 
read."
@@ -3407,10 +3514,11 @@
                htmlNotice = ""
                dryrun = False
                chunkToDo = False
+               checkpointFile = None
 
                try:
                        (options, remainder) = getopt.gnu_getopt(sys.argv[1:], 
"",
-                                                                ['date=', 
'job=', 'configfile=', 'addnotice=', 'delnotice', 'force', 'dryrun', 
'noprefetch', 'nospawn', 'restartfrom', 'log', 'chunk=' ])
+                                                                ['date=', 
'job=', 'configfile=', 'addnotice=', 'delnotice', 'force', 'dryrun', 
'noprefetch', 'nospawn', 'restartfrom', 'log', 'chunk=', 'checkpoint=' ])
                except:
                        usage("Unknown option specified")
 
@@ -3419,6 +3527,8 @@
                                date = val
                        elif opt == "--configfile":
                                configFile = val
+                       elif opt == '--checkpoint':
+                               checkpointFile = val
                        elif opt == '--chunk':
                                chunkToDo = int(val)
                        elif opt == "--force":
@@ -3452,6 +3562,10 @@
                        usage("--chunk option requires a specific job for which 
to rerun that chunk")
                if (chunkToDo and restart):
                        usage("--chunk option can be specified only for one 
specific job")
+               if checkpointFile and (len(remainder) == 0):
+                       usage("--checkpoint option requires the name of a 
wikidb to be specified")
+               if checkpointFile and not jobRequested:
+                       usage("--chekcpoint option requires --job and the job 
from which to restart")
 
                # allow alternate config file
                if (configFile):
@@ -3488,7 +3602,7 @@
                                date = TimeUtils.today()
                        wiki.setDate(date)
 
-                       runner = Runner(wiki, prefetch, spawn, jobRequested, 
restart, htmlNotice, dryrun, enableLogging, chunkToDo)
+                       runner = Runner(wiki, prefetch, spawn, jobRequested, 
restart, htmlNotice, dryrun, enableLogging, chunkToDo, checkpointFile)
                        if (restart):
                                print "Running %s, restarting from job %s..." % 
(wiki.dbName, jobRequested)
                        elif (jobRequested):


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

[MediaWiki-CVS] SVN: [95732] branches/ariel/xmldumps-backup

Reply via email to