ArielGlenn has submitted this change and it was merged.

Change subject: adds-changes: generate prev maxrevid if it's missing, small 
code cleanups
......................................................................


adds-changes: generate prev maxrevid if it's missing, small code cleanups

* remove unneeded imports
* write out maxrevid for previous run if it's missing and
  use that for current run boundary
* log class instead of 'if verbose' everywhere
* turn retrieval of current max revid and prev max revid into
  dump steps like everything else

Change-Id: I7fc3a1768b1487e6ee67f9170936a6862033c7bf
---
M xmldumps-backup/incrementals/generateincrementals.py
1 file changed, 130 insertions(+), 108 deletions(-)

Approvals:
  ArielGlenn: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/xmldumps-backup/incrementals/generateincrementals.py 
b/xmldumps-backup/incrementals/generateincrementals.py
index 42761f2..ad2553a 100644
--- a/xmldumps-backup/incrementals/generateincrementals.py
+++ b/xmldumps-backup/incrementals/generateincrementals.py
@@ -3,23 +3,16 @@
 # from the previous adds changes dump, dump stubs, dump history file
 # based on stubs.
 
-import ConfigParser
 import getopt
 import os
-import re
 import sys
-import WikiDump
-from WikiDump import FileUtils, TimeUtils, MiscUtils
-import subprocess
-import socket
 import time
-import IncrDumpLib
-from IncrDumpLib import Lock, Config, RunSimpleCommand, MultiVersion
+from IncrDumpLib import Config, RunSimpleCommand, MultiVersion
 from IncrDumpLib import DBServer, IncrementDir, IncrementDumpsError
-from IncrDumpLib import MaxRevIDFile, StatusFile, IndexFile, IncrDumpLockFile
+from IncrDumpLib import MaxRevIDFile, StatusFile, IndexFile
 from IncrDumpLib import StubFile, RevsFile, MD5File, IncDumpDirs
-from IncrDumpLib import IncrDumpLock, MaxRevIDLock, StatusInfo
-from subprocess import Popen, PIPE
+from IncrDumpLib import IncrDumpLock, StatusInfo
+from WikiDump import FileUtils, TimeUtils
 from os.path import exists
 import hashlib
 import traceback
@@ -27,26 +20,26 @@
 
 
 class MaxRevID(object):
-    def __init__(self, config, date, cutoff):
+    def __init__(self, config, date, cutoff, dryrun):
         self._config = config
         self.date = date
         self.cutoff = cutoff
-        self.maxID = 0
+        self.dryrun = dryrun
+        self.maxID = None
 
     def getMaxRevID(self, wikiName):
         query = ("select rev_id from revision where rev_timestamp < \"%s\" "
                  "order by rev_timestamp desc limit 1" % self.cutoff)
         db = DBServer(self._config, wikiName)
-        # get the result
-        c = db.buildSqlCommand(query)
         self.maxID = RunSimpleCommand.runWithOutput(db.buildSqlCommand(query),
                                                     shell=True)
 
     def recordMaxRevID(self, wikiName):
         self.getMaxRevID(wikiName)
-        fileObj = MaxRevIDFile(self._config, self.date, wikiName)
-        FileUtils.writeFileInPlace(fileObj.getPath(), self.maxID,
-                                   self._config.fileperms)
+        if not self.dryrun:
+            fileObj = MaxRevIDFile(self._config, self.date, wikiName)
+            FileUtils.writeFileInPlace(fileObj.getPath(), self.maxID,
+                                       self._config.fileperms)
 
     def readMaxRevIDFromFile(self, wikiName, date=None):
         if date is None:
@@ -84,30 +77,28 @@
         for w in self._config.allWikisList:
             result = self.doOneWiki(w)
             if result:
-                if (self.verbose):
-                    print "result for wiki ", w, "is ", result
+                log(self.verbose, "result for wiki %s is %s"
+                    % (w, result))
                 text = text + "<li>" + result + "</li>\n"
         indexText = (self._config.readTemplate("incrs-index.html")
                      % {"items": text})
         FileUtils.writeFileInPlace(self.indexFile.getPath(),
                                    indexText, self._config.fileperms)
 
-    def doOneWiki(self, w):
+    def doOneWiki(self, w, date=None):
         if (w not in self._config.privateWikisList and
                 w not in self._config.closedWikisList):
-            self.incrDumpsDirs = IncDumpDirs(self._config, w)
+            incrDumpsDirs = IncDumpDirs(self._config, w)
             if not exists(self.incrDir.getIncDirNoDate(w)):
-                if (self.verbose):
-                    print "No dump for wiki ", w
-                    next
-            if date:
+                log(self.verbose, "No dump for wiki %s" % w)
+                return
+            if date is not None:
                 incrDate = date
             else:
-                incrDate = self.incrDumpsDirs.getLatestIncrDate(True)
+                incrDate = incrDumpsDirs.getLatestIncrDate(True)
             if not incrDate:
-                if (self.verbose):
-                    print "No dump for wiki ", w
-                    next
+                log(self.verbose, "No dump for wiki %s" % w)
+                return
 
             otherRunsText = "other runs: %s" % Link.makeLink(w, w)
             try:
@@ -123,8 +114,8 @@
             try:
                 stub = StubFile(self._config, incrDate, w)
                 (stubDate, stubSize) = stub.getFileInfo()
-                if verbose:
-                    print "stub for", w, stubDate, stubSize
+                log(self.verbose, "stub for %s %s %s"
+                    % (w, safe(stubDate), safe(stubSize)))
                 if stubDate:
                     stubText = ("stubs: %s (size %s)"
                                 % (Link.makeLink(
@@ -137,8 +128,8 @@
 
                 revs = RevsFile(self._config, incrDate, w)
                 (revsDate, revsSize) = revs.getFileInfo()
-                if verbose:
-                    print "revs for", w, revsDate, revsSize
+                log(verbose, "revs for %s %s %s"
+                    % (w, safe(revsDate), safe(revsSize)))
                 if revsDate:
                     revsText = ("revs: %s (size %s)"
                                 % (Link.makeLink(os.path.join(
@@ -149,17 +140,15 @@
 
                 stat = StatusFile(self._config, incrDate, w)
                 statContents = FileUtils.readFile(stat.getPath())
-                if verbose:
-                    print "status for", w, statContents
+                log(self.verbose, "status for %s %s" % (w, safe(statContents)))
                 if statContents:
                     statText = "(%s)" % (statContents)
                 else:
                     statText = None
 
             except:
-                if (self.verbose):
-                    print ("Error encountered, no information available"
-                           " for wiki %s" % w)
+                log(self.verbose, "Error encountered, no information available"
+                    " for wiki %s" % w)
                 return ("<strong>%s</strong> Error encountered,"
                         " no information available | %s" % (w, otherRunsText))
 
@@ -177,8 +166,8 @@
             except:
                 if (self.verbose):
                     traceback.print_exc(file=sys.stdout)
-                    print ("Error encountered formatting information"
-                           " for wiki %s" % w)
+                log(self.verbose, "Error encountered formatting information"
+                    " for wiki %s" % w)
                 return ("Error encountered formatting information"
                         " for wiki %s" % w)
 
@@ -205,7 +194,8 @@
         self.doIndexUpdate = doIndexUpdate
         self.dryrun = dryrun
         self.forcerun = forcerun
-        self.maxRevIDObj = MaxRevID(self._config, self.date, cutoff)
+        self.maxRevIDObj = MaxRevID(self._config, self.date, cutoff,
+                                    self.dryrun)
         self.statusInfo = StatusInfo(self._config, self.date, self.wikiName)
         self.stubFile = StubFile(self._config, self.date, self.wikiName)
         self.revsFile = RevsFile(self._config, self.date, self.wikiName)
@@ -218,80 +208,48 @@
                 self.wikiName not in self._config.closedWikisList):
             if not exists(self.incrDir.getIncDir(self.wikiName)):
                 os.makedirs(self.incrDir.getIncDir(self.wikiName))
+
             status = self.statusInfo.getStatus()
             if status == "done" and not forcerun:
-                if (self.verbose):
-                    print ("wiki %s skipped, adds/changes dump already"
-                           " complete" % self.wikiName)
+                log(self.verbose, "wiki %s skipped, adds/changes dump already"
+                    " complete" % self.wikiName)
                 return retCodes.OK
+
             if not dryrun:
                 lock = IncrDumpLock(self._config, self.date, self.wikiName)
                 if not lock.getLock():
-                    if (self.verbose):
-                        print ("wiki %s skipped, wiki is locked, another"
-                               " process should be doing the job"
-                               % self.wikiName)
+                    log(self.verbose, "wiki %s skipped, wiki is locked,"
+                        " another process should be doing the job"
+                        % self.wikiName)
                     return retCodes.TODO
-                if not dryrun:
-                    self.incrDumpsDirs.cleanupOldIncrDumps(self.date)
-                    try:
-                        if not self.maxRevIDObj.exists(self.wikiName):
-                            if self.verbose:
-                                print ("Wiki %s retrieving max revid from db."
-                                       % self.wikiName)
-                            self.maxRevIDObj.recordMaxRevID(self.wikiName)
-                    except:
-                        if (self.verbose):
-                            print ("Wiki %s failed to get max revid."
-                                   % self.wikiName)
-                            traceback.print_exc(file=sys.stdout)
+
+                self.incrDumpsDirs.cleanupOldIncrDumps(self.date)
+
+            log(self.verbose, "Doing run for wiki: %s" % self.wikiName)
 
             try:
-                maxRevID = self.maxRevIDObj.readMaxRevIDFromFile(self.wikiName)
-                if (self.verbose):
-                    print "Doing run for wiki: ", self.wikiName
-                    if maxRevID:
-                        print "maxRevID is ", maxRevID
-                    else:
-                        print "no maxRevID found"
-                # get the previous run with a max rev id file in it
-                prevDate = self.incrDumpsDirs.getPrevIncrDate(self.date,
-                                                              revidok=True)
-                if (self.verbose):
-                    if prevDate:
-                        print "prevDate is", prevDate
-                    else:
-                        print "no prevDate found"
-                prevRevID = None
-                if prevDate:
-                    prevRevID = self.maxRevIDObj.readMaxRevIDFromFile(
-                        self.wikiName, prevDate)
-                    if (self.verbose):
-                        if prevRevID:
-                            print "prevRevId is ", prevRevID
-                        else:
-                            print "no prevRevID found"
+                maxRevID = self.dumpMaxRevID()
+                if not maxRevID:
+                    return retCodes.FAILED
+
+                prevRevID = self.getPrevRevID(maxRevID)
                 if not prevRevID:
-                    prevRevID = str(int(maxRevID) - 10)
-                    if int(prevRevID) < 1:
-                        prevRevID = str(1)
-                else:
-                    # this incr will cover every revision from the last
-                    # incremental through the maxid we wrote out already.
-                    prevRevID = str(int(prevRevID) + 1)
+                    return retCodes.FAILED
+
                 if doStubs:
-                    # end rev id is not included in dump
-                    maxRevID = str(int(maxRevID) + 1)
                     if not self.dumpStub(prevRevID, maxRevID):
                         return retCodes.FAILED
+
                 if doRevs:
                     if not self.dumpRevs():
                         return retCodes.FAILED
+
                 if not dryrun:
                     if not self.md5sums():
                         return retCodes.FAILED
                     self.statusInfo.setStatus("done")
                     lock.unlock()
+
                 if doIndexUpdate:
                     index = Index(config, date, verbose)
                     index.doAllWikis()
@@ -301,9 +259,60 @@
                 if not dryrun:
                     lock.unlock()
                 return retCodes.FAILED
-        if (self.verbose):
-            print "Success!  Wiki", self.wikiName, "incremental dump complete."
+        log(self.verbose, "Success!  Wiki %s incremental dump complete."
+            % self.wikiName)
         return retCodes.OK
+
+    def dumpMaxRevID(self):
+        if not self.maxRevIDObj.exists(self.wikiName):
+            log(self.verbose, "Wiki %s retrieving max revid from db."
+                % self.wikiName)
+            self.maxRevIDObj.recordMaxRevID(self.wikiName)
+            maxRevID = self.maxRevIDObj.maxID
+        else:
+            maxRevID = self.maxRevIDObj.readMaxRevIDFromFile(
+                self.wikiName)
+
+        # end rev id is not included in dump
+        if maxRevID is not None:
+            maxRevID = str(int(maxRevID) + 1)
+
+        log(self.verbose, "maxRevID is %s" % safe(maxRevID))
+        return maxRevID
+
+    def getPrevRevID(self, maxRevID):
+        # get the previous rundate, with or without maxrevid file
+        # we can populate that file if need be
+        prevDate = self.incrDumpsDirs.getPrevIncrDate(self.date)
+        log(self.verbose, "prevDate is %s" % safe(prevDate))
+
+        prevRevID = None
+
+        if prevDate:
+            prevRevID = self.maxRevIDObj.readMaxRevIDFromFile(
+                self.wikiName, prevDate)
+
+            if prevRevID is None:
+                log(self.verbose, "Wiki %s retrieving prevRevId from db."
+                    % self.wikiName)
+                prevRevIDObj = MaxRevID(self._config, prevDate,
+                                        cutoffFromDate(prevDate),
+                                        self.dryrun)
+                prevRevIDObj.recordMaxRevID(self.wikiName)
+                prevRevID = prevRevIDObj.maxID
+        else:
+            log(self.verbose, "Wiki %s no previous runs, using %s - 10 "
+                % (self.wikiName, maxRevID))
+            prevRevID = str(int(maxRevID) - 10)
+            if int(prevRevID) < 1:
+                prevRevID = str(1)
+
+        # this incr will cover every revision from the last
+        # incremental through the maxid we wrote out already.
+        if prevRevID is not None:
+            prevRevID = str(int(prevRevID) + 1)
+        log(self.verbose, "prevRevID is %s" % safe(prevRevID))
+        return prevRevID
 
     def dumpStub(self, startRevID, endRevID):
         scriptCommand = MultiVersion.MWScriptAsArray(self._config,
@@ -320,9 +329,8 @@
         else:
             error = RunSimpleCommand.runWithNoOutput(command, shell=False)
             if (error):
-                if (self.verbose):
-                    print ("error producing stub files for wiki"
-                           % self.wikiName)
+                log(self.verbose, "error producing stub files for wiki"
+                    % self.wikiName)
                 return False
         return True
 
@@ -341,9 +349,8 @@
         else:
             error = RunSimpleCommand.runWithNoOutput(command, shell=False)
             if (error):
-                if (self.verbose):
-                    print("error producing revision text files for wiki"
-                          % self.wikiName)
+                log(self.verbose, "error producing revision text files"
+                    " for wiki" % self.wikiName)
                 return False
         return True
 
@@ -362,7 +369,6 @@
         try:
             md5File = MD5File(self._config, self.date, self.wikiName)
             text = ""
-            summer = hashlib.md5()
             files = []
             if self.doStubs:
                 files.append(self.stubFile.getPath())
@@ -375,6 +381,18 @@
             return True
         except:
             return False
+
+
+def log(verbose, message):
+    if verbose:
+        print message
+
+
+def safe(item):
+    if item is not None:
+        return item
+    else:
+        return "None"
 
 
 class IncrDumpLoop(object):
@@ -416,6 +434,13 @@
                 raise IncrementDumpsError("Too many consecutive failures,"
                                           "giving up")
             time.sleep(300)
+
+
+def cutoffFromDate(date):
+    return time.strftime("%Y%m%d%H%M%S",
+                         time.gmtime(calendar.timegm(time.strptime(
+                             date + "235900UTC", "%Y%m%d%H%M%S%Z"))
+                             - config.delay))
 
 
 def usage(message=None):
@@ -495,10 +520,7 @@
         cutoff = time.strftime("%Y%m%d%H%M%S",
                                time.gmtime(time.time() - config.delay))
     else:
-        cutoff = time.strftime("%Y%m%d%H%M%S",
-                               time.gmtime(calendar.timegm(time.strptime(
-                                   date + "235900UTC", "%Y%m%d%H%M%S%Z"))
-                                   - config.delay))
+        cutoff = cutoffFromDate(date)
 
     if len(remainder) > 0:
         dump = IncrDump(config, date, cutoff, remainder[0], doStubs,

-- 
To view, visit https://gerrit.wikimedia.org/r/119943
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I7fc3a1768b1487e6ee67f9170936a6862033c7bf
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to