ArielGlenn has submitted this change and it was merged.
Change subject: adds-changes: generate prev maxrevid if it's missing, small
code cleanups
......................................................................
adds-changes: generate prev maxrevid if it's missing, small code cleanups
* remove unneeded imports
* write out maxrevid for previous run if it's missing and
use that for current run boundary
* log class instead of 'if verbose' everywhere
* turn retrieval of current max revid and prev max revid into
dump steps like everything else
Change-Id: I7fc3a1768b1487e6ee67f9170936a6862033c7bf
---
M xmldumps-backup/incrementals/generateincrementals.py
1 file changed, 130 insertions(+), 108 deletions(-)
Approvals:
ArielGlenn: Looks good to me, approved
jenkins-bot: Verified
diff --git a/xmldumps-backup/incrementals/generateincrementals.py
b/xmldumps-backup/incrementals/generateincrementals.py
index 42761f2..ad2553a 100644
--- a/xmldumps-backup/incrementals/generateincrementals.py
+++ b/xmldumps-backup/incrementals/generateincrementals.py
@@ -3,23 +3,16 @@
# from the previous adds changes dump, dump stubs, dump history file
# based on stubs.
-import ConfigParser
import getopt
import os
-import re
import sys
-import WikiDump
-from WikiDump import FileUtils, TimeUtils, MiscUtils
-import subprocess
-import socket
import time
-import IncrDumpLib
-from IncrDumpLib import Lock, Config, RunSimpleCommand, MultiVersion
+from IncrDumpLib import Config, RunSimpleCommand, MultiVersion
from IncrDumpLib import DBServer, IncrementDir, IncrementDumpsError
-from IncrDumpLib import MaxRevIDFile, StatusFile, IndexFile, IncrDumpLockFile
+from IncrDumpLib import MaxRevIDFile, StatusFile, IndexFile
from IncrDumpLib import StubFile, RevsFile, MD5File, IncDumpDirs
-from IncrDumpLib import IncrDumpLock, MaxRevIDLock, StatusInfo
-from subprocess import Popen, PIPE
+from IncrDumpLib import IncrDumpLock, StatusInfo
+from WikiDump import FileUtils, TimeUtils
from os.path import exists
import hashlib
import traceback
@@ -27,26 +20,26 @@
class MaxRevID(object):
- def __init__(self, config, date, cutoff):
+ def __init__(self, config, date, cutoff, dryrun):
self._config = config
self.date = date
self.cutoff = cutoff
- self.maxID = 0
+ self.dryrun = dryrun
+ self.maxID = None
def getMaxRevID(self, wikiName):
query = ("select rev_id from revision where rev_timestamp < \"%s\" "
"order by rev_timestamp desc limit 1" % self.cutoff)
db = DBServer(self._config, wikiName)
- # get the result
- c = db.buildSqlCommand(query)
self.maxID = RunSimpleCommand.runWithOutput(db.buildSqlCommand(query),
shell=True)
def recordMaxRevID(self, wikiName):
self.getMaxRevID(wikiName)
- fileObj = MaxRevIDFile(self._config, self.date, wikiName)
- FileUtils.writeFileInPlace(fileObj.getPath(), self.maxID,
- self._config.fileperms)
+ if not self.dryrun:
+ fileObj = MaxRevIDFile(self._config, self.date, wikiName)
+ FileUtils.writeFileInPlace(fileObj.getPath(), self.maxID,
+ self._config.fileperms)
def readMaxRevIDFromFile(self, wikiName, date=None):
if date is None:
@@ -84,30 +77,28 @@
for w in self._config.allWikisList:
result = self.doOneWiki(w)
if result:
- if (self.verbose):
- print "result for wiki ", w, "is ", result
+ log(self.verbose, "result for wiki %s is %s"
+ % (w, result))
text = text + "<li>" + result + "</li>\n"
indexText = (self._config.readTemplate("incrs-index.html")
% {"items": text})
FileUtils.writeFileInPlace(self.indexFile.getPath(),
indexText, self._config.fileperms)
- def doOneWiki(self, w):
+ def doOneWiki(self, w, date=None):
if (w not in self._config.privateWikisList and
w not in self._config.closedWikisList):
- self.incrDumpsDirs = IncDumpDirs(self._config, w)
+ incrDumpsDirs = IncDumpDirs(self._config, w)
if not exists(self.incrDir.getIncDirNoDate(w)):
- if (self.verbose):
- print "No dump for wiki ", w
- next
- if date:
+ log(self.verbose, "No dump for wiki %s" % w)
+ return
+ if date is not None:
incrDate = date
else:
- incrDate = self.incrDumpsDirs.getLatestIncrDate(True)
+ incrDate = incrDumpsDirs.getLatestIncrDate(True)
if not incrDate:
- if (self.verbose):
- print "No dump for wiki ", w
- next
+ log(self.verbose, "No dump for wiki %s" % w)
+ return
otherRunsText = "other runs: %s" % Link.makeLink(w, w)
try:
@@ -123,8 +114,8 @@
try:
stub = StubFile(self._config, incrDate, w)
(stubDate, stubSize) = stub.getFileInfo()
- if verbose:
- print "stub for", w, stubDate, stubSize
+ log(self.verbose, "stub for %s %s %s"
+ % (w, safe(stubDate), safe(stubSize)))
if stubDate:
stubText = ("stubs: %s (size %s)"
% (Link.makeLink(
@@ -137,8 +128,8 @@
revs = RevsFile(self._config, incrDate, w)
(revsDate, revsSize) = revs.getFileInfo()
- if verbose:
- print "revs for", w, revsDate, revsSize
+ log(verbose, "revs for %s %s %s"
+ % (w, safe(revsDate), safe(revsSize)))
if revsDate:
revsText = ("revs: %s (size %s)"
% (Link.makeLink(os.path.join(
@@ -149,17 +140,15 @@
stat = StatusFile(self._config, incrDate, w)
statContents = FileUtils.readFile(stat.getPath())
- if verbose:
- print "status for", w, statContents
+ log(self.verbose, "status for %s %s" % (w, safe(statContents)))
if statContents:
statText = "(%s)" % (statContents)
else:
statText = None
except:
- if (self.verbose):
- print ("Error encountered, no information available"
- " for wiki %s" % w)
+ log(self.verbose, "Error encountered, no information available"
+ " for wiki %s" % w)
return ("<strong>%s</strong> Error encountered,"
" no information available | %s" % (w, otherRunsText))
@@ -177,8 +166,8 @@
except:
if (self.verbose):
traceback.print_exc(file=sys.stdout)
- print ("Error encountered formatting information"
- " for wiki %s" % w)
+ log(self.verbose, "Error encountered formatting information"
+ " for wiki %s" % w)
return ("Error encountered formatting information"
" for wiki %s" % w)
@@ -205,7 +194,8 @@
self.doIndexUpdate = doIndexUpdate
self.dryrun = dryrun
self.forcerun = forcerun
- self.maxRevIDObj = MaxRevID(self._config, self.date, cutoff)
+ self.maxRevIDObj = MaxRevID(self._config, self.date, cutoff,
+ self.dryrun)
self.statusInfo = StatusInfo(self._config, self.date, self.wikiName)
self.stubFile = StubFile(self._config, self.date, self.wikiName)
self.revsFile = RevsFile(self._config, self.date, self.wikiName)
@@ -218,80 +208,48 @@
self.wikiName not in self._config.closedWikisList):
if not exists(self.incrDir.getIncDir(self.wikiName)):
os.makedirs(self.incrDir.getIncDir(self.wikiName))
+
status = self.statusInfo.getStatus()
if status == "done" and not forcerun:
- if (self.verbose):
- print ("wiki %s skipped, adds/changes dump already"
- " complete" % self.wikiName)
+ log(self.verbose, "wiki %s skipped, adds/changes dump already"
+ " complete" % self.wikiName)
return retCodes.OK
+
if not dryrun:
lock = IncrDumpLock(self._config, self.date, self.wikiName)
if not lock.getLock():
- if (self.verbose):
- print ("wiki %s skipped, wiki is locked, another"
- " process should be doing the job"
- % self.wikiName)
+ log(self.verbose, "wiki %s skipped, wiki is locked,"
+ " another process should be doing the job"
+ % self.wikiName)
return retCodes.TODO
- if not dryrun:
- self.incrDumpsDirs.cleanupOldIncrDumps(self.date)
- try:
- if not self.maxRevIDObj.exists(self.wikiName):
- if self.verbose:
- print ("Wiki %s retrieving max revid from db."
- % self.wikiName)
- self.maxRevIDObj.recordMaxRevID(self.wikiName)
- except:
- if (self.verbose):
- print ("Wiki %s failed to get max revid."
- % self.wikiName)
- traceback.print_exc(file=sys.stdout)
+
+ self.incrDumpsDirs.cleanupOldIncrDumps(self.date)
+
+ log(self.verbose, "Doing run for wiki: %s" % self.wikiName)
try:
- maxRevID = self.maxRevIDObj.readMaxRevIDFromFile(self.wikiName)
- if (self.verbose):
- print "Doing run for wiki: ", self.wikiName
- if maxRevID:
- print "maxRevID is ", maxRevID
- else:
- print "no maxRevID found"
- # get the previous run with a max rev id file in it
- prevDate = self.incrDumpsDirs.getPrevIncrDate(self.date,
- revidok=True)
- if (self.verbose):
- if prevDate:
- print "prevDate is", prevDate
- else:
- print "no prevDate found"
- prevRevID = None
- if prevDate:
- prevRevID = self.maxRevIDObj.readMaxRevIDFromFile(
- self.wikiName, prevDate)
- if (self.verbose):
- if prevRevID:
- print "prevRevId is ", prevRevID
- else:
- print "no prevRevID found"
+ maxRevID = self.dumpMaxRevID()
+ if not maxRevID:
+ return retCodes.FAILED
+
+ prevRevID = self.getPrevRevID(maxRevID)
if not prevRevID:
- prevRevID = str(int(maxRevID) - 10)
- if int(prevRevID) < 1:
- prevRevID = str(1)
- else:
- # this incr will cover every revision from the last
- # incremental through the maxid we wrote out already.
- prevRevID = str(int(prevRevID) + 1)
+ return retCodes.FAILED
+
if doStubs:
- # end rev id is not included in dump
- maxRevID = str(int(maxRevID) + 1)
if not self.dumpStub(prevRevID, maxRevID):
return retCodes.FAILED
+
if doRevs:
if not self.dumpRevs():
return retCodes.FAILED
+
if not dryrun:
if not self.md5sums():
return retCodes.FAILED
self.statusInfo.setStatus("done")
lock.unlock()
+
if doIndexUpdate:
index = Index(config, date, verbose)
index.doAllWikis()
@@ -301,9 +259,60 @@
if not dryrun:
lock.unlock()
return retCodes.FAILED
- if (self.verbose):
- print "Success! Wiki", self.wikiName, "incremental dump complete."
+ log(self.verbose, "Success! Wiki %s incremental dump complete."
+ % self.wikiName)
return retCodes.OK
+
+ def dumpMaxRevID(self):
+ if not self.maxRevIDObj.exists(self.wikiName):
+ log(self.verbose, "Wiki %s retrieving max revid from db."
+ % self.wikiName)
+ self.maxRevIDObj.recordMaxRevID(self.wikiName)
+ maxRevID = self.maxRevIDObj.maxID
+ else:
+ maxRevID = self.maxRevIDObj.readMaxRevIDFromFile(
+ self.wikiName)
+
+ # end rev id is not included in dump
+ if maxRevID is not None:
+ maxRevID = str(int(maxRevID) + 1)
+
+ log(self.verbose, "maxRevID is %s" % safe(maxRevID))
+ return maxRevID
+
+ def getPrevRevID(self, maxRevID):
+ # get the previous rundate, with or without maxrevid file
+ # we can populate that file if need be
+ prevDate = self.incrDumpsDirs.getPrevIncrDate(self.date)
+ log(self.verbose, "prevDate is %s" % safe(prevDate))
+
+ prevRevID = None
+
+ if prevDate:
+ prevRevID = self.maxRevIDObj.readMaxRevIDFromFile(
+ self.wikiName, prevDate)
+
+ if prevRevID is None:
+ log(self.verbose, "Wiki %s retrieving prevRevId from db."
+ % self.wikiName)
+ prevRevIDObj = MaxRevID(self._config, prevDate,
+ cutoffFromDate(prevDate),
+ self.dryrun)
+ prevRevIDObj.recordMaxRevID(self.wikiName)
+ prevRevID = prevRevIDObj.maxID
+ else:
+ log(self.verbose, "Wiki %s no previous runs, using %s - 10 "
+ % (self.wikiName, maxRevID))
+ prevRevID = str(int(maxRevID) - 10)
+ if int(prevRevID) < 1:
+ prevRevID = str(1)
+
+ # this incr will cover every revision from the last
+ # incremental through the maxid we wrote out already.
+ if prevRevID is not None:
+ prevRevID = str(int(prevRevID) + 1)
+ log(self.verbose, "prevRevID is %s" % safe(prevRevID))
+ return prevRevID
def dumpStub(self, startRevID, endRevID):
scriptCommand = MultiVersion.MWScriptAsArray(self._config,
@@ -320,9 +329,8 @@
else:
error = RunSimpleCommand.runWithNoOutput(command, shell=False)
if (error):
- if (self.verbose):
- print ("error producing stub files for wiki"
- % self.wikiName)
+ log(self.verbose, "error producing stub files for wiki"
+ % self.wikiName)
return False
return True
@@ -341,9 +349,8 @@
else:
error = RunSimpleCommand.runWithNoOutput(command, shell=False)
if (error):
- if (self.verbose):
- print("error producing revision text files for wiki"
- % self.wikiName)
+ log(self.verbose, "error producing revision text files"
+ " for wiki" % self.wikiName)
return False
return True
@@ -362,7 +369,6 @@
try:
md5File = MD5File(self._config, self.date, self.wikiName)
text = ""
- summer = hashlib.md5()
files = []
if self.doStubs:
files.append(self.stubFile.getPath())
@@ -375,6 +381,18 @@
return True
except:
return False
+
+
+def log(verbose, message):
+ if verbose:
+ print message
+
+
+def safe(item):
+ if item is not None:
+ return item
+ else:
+ return "None"
class IncrDumpLoop(object):
@@ -416,6 +434,13 @@
raise IncrementDumpsError("Too many consecutive failures,"
"giving up")
time.sleep(300)
+
+
+def cutoffFromDate(date):
+ return time.strftime("%Y%m%d%H%M%S",
+ time.gmtime(calendar.timegm(time.strptime(
+ date + "235900UTC", "%Y%m%d%H%M%S%Z"))
+ - config.delay))
def usage(message=None):
@@ -495,10 +520,7 @@
cutoff = time.strftime("%Y%m%d%H%M%S",
time.gmtime(time.time() - config.delay))
else:
- cutoff = time.strftime("%Y%m%d%H%M%S",
- time.gmtime(calendar.timegm(time.strptime(
- date + "235900UTC", "%Y%m%d%H%M%S%Z"))
- - config.delay))
+ cutoff = cutoffFromDate(date)
if len(remainder) > 0:
dump = IncrDump(config, date, cutoff, remainder[0], doStubs,
--
To view, visit https://gerrit.wikimedia.org/r/119943
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I7fc3a1768b1487e6ee67f9170936a6862033c7bf
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits