ArielGlenn has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/242464

Change subject: utils.py: pylint, fix many camelcase names. worker.py, fix 
indent issue
......................................................................

utils.py: pylint, fix many camelcase names. worker.py, fix indent issue

Change-Id: I9c5104fa802d4bf1b85c4c61bd6f6419fa7e04f1
---
M xmldumps-backup/dumps/jobs.py
M xmldumps-backup/dumps/utils.py
M xmldumps-backup/worker.py
M xmldumps-backup/xmlstreams.py
4 files changed, 220 insertions(+), 220 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps 
refs/changes/64/242464/1

diff --git a/xmldumps-backup/dumps/jobs.py b/xmldumps-backup/dumps/jobs.py
index 48c5e5a..75f599c 100644
--- a/xmldumps-backup/dumps/jobs.py
+++ b/xmldumps-backup/dumps/jobs.py
@@ -59,18 +59,18 @@
     def updated(self):
         return self.runInfo.updated()
 
-    def toBeRun(self):
-        return self.runInfo.toBeRun()
+    def to_run(self):
+        return self.runInfo.to_run()
 
     def setName(self, name):
         self.runInfo.setName(name)
 
-    def setToBeRun(self, toBeRun):
-        self.runInfo.setToBeRun(toBeRun)
+    def set_to_run(self, to_run):
+        self.runInfo.set_to_run(to_run)
 
     def setSkipped(self):
         self.setStatus("skipped")
-        self.setToBeRun(False)
+        self.set_to_run(False)
 
     # sometimes this will be called to fill in data from an old
     # dump run; in those cases we don't want to clobber the timestamp
@@ -189,7 +189,7 @@
             sys.stderr.write(line)
         self.progress = line.strip()
         runner.status.update_status_files()
-        
runner.runInfoFile.saveDumpRunInfoFile(runner.dumpItemList.report_dump_runinfo())
+        
runner.runInfoFile.save_dump_runinfo_file(runner.dumpItemList.report_dump_runinfo())
 
     def timeToWait(self):
         # we use wait this many secs for a command to complete that
@@ -650,7 +650,7 @@
         """Dump a table from the current DB with mysqldump, save to a gzipped 
sql file."""
         if not exists(runner.wiki.config.gzip):
             raise BackupError("gzip command %s not found" % 
runner.wiki.config.gzip)
-        commands = runner.dbServerInfo.buildSqlDumpCommand(table, 
runner.wiki.config.gzip)
+        commands = runner.dbServerInfo.build_sqldump_command(table, 
runner.wiki.config.gzip)
         return runner.save_command(commands, outfile)
 
 class PrivateTable(PublicTable):
@@ -1228,7 +1228,7 @@
                 continue
 
             # see if this job from that date was successful
-            if not runner.runInfoFile.statusOfOldDumpIsDone(runner, date, 
self.name(), self._desc):
+            if not runner.runInfoFile.status_of_old_dump_is_done(runner, date, 
self.name(), self._desc):
                 runner.debug("skipping incomplete or failed dump for prefetch 
date %s" % date)
                 continue
 
diff --git a/xmldumps-backup/dumps/utils.py b/xmldumps-backup/dumps/utils.py
index 02d020e..0bfbc40 100644
--- a/xmldumps-backup/dumps/utils.py
+++ b/xmldumps-backup/dumps/utils.py
@@ -12,21 +12,21 @@
 from dumps.exceptions import BackupError
 
 class MultiVersion(object):
-    def MWScriptAsString(config, maintenanceScript):
-        return " ".join(MultiVersion.MWScriptAsArray(config, 
maintenanceScript))
+    def MWScriptAsString(config, maintenance_script):
+        return " ".join(MultiVersion.MWScriptAsArray(config, 
maintenance_script))
 
-    def MWScriptAsArray(config, maintenanceScript):
-        MWScriptLocation = os.path.join(config.wikiDir, "multiversion", 
"MWScript.php")
-        if exists(MWScriptLocation):
-            return [MWScriptLocation, maintenanceScript]
+    def MWScriptAsArray(config, maintenance_script):
+        mw_script_location = os.path.join(config.wikiDir, "multiversion", 
"MWScript.php")
+        if exists(mw_script_location):
+            return [mw_script_location, maintenance_script]
         else:
-            return ["%s/maintenance/%s" % (config.wikiDir, maintenanceScript)]
+            return ["%s/maintenance/%s" % (config.wikiDir, maintenance_script)]
 
-    def MWVersion(config, dbName):
-        getVersionLocation = os.path.join(config.wikiDir, "multiversion", 
"getMWVersion")
-        if exists(getVersionLocation):
+    def MWVersion(config, db_name):
+        get_version_location = os.path.join(config.wikiDir, "multiversion", 
"getMWVersion")
+        if exists(get_version_location):
             # run the command for the wiki and get the version
-            command =  getVersionLocation + " " +  dbName
+            command =  get_version_location + " " +  db_name
             version = RunSimpleCommand.runAndReturn(command)
             if version:
                 version = version.rstrip()
@@ -38,57 +38,57 @@
     MWVersion = staticmethod(MWVersion)
 
 class DbServerInfo(object):
-    def __init__(self, wiki, dbName, errorCallback=None):
+    def __init__(self, wiki, db_name, error_callback=None):
         self.wiki = wiki
-        self.dbName = dbName
-        self.errorCallback = errorCallback
-        self.dBTablePrefix = None
-        self.getDefaultServerAndDBprefix()
+        self.dbName = db_name
+        self.errorCallback = error_callback
+        self.db_table_prefix = None
+        self.get_db_server_and_prefix()
 
-    def getDefaultServerAndDBprefix(self):
+    def get_db_server_and_prefix(self):
         """Get the name of a slave server for our cluster; also get
         the prefix for all tables for the specific wiki ($wgDBprefix)"""
         if not exists(self.wiki.config.php):
             raise BackupError("php command %s not found" % 
self.wiki.config.php)
-        commandList = MultiVersion.MWScriptAsArray(self.wiki.config, 
"getSlaveServer.php")
-        phpCommand = MiscUtils.shellEscape(self.wiki.config.php)
-        dbName = MiscUtils.shellEscape(self.dbName)
-        for i in range(0, len(commandList)):
-            commandList[i] = MiscUtils.shellEscape(commandList[i])
-        command = " ".join(commandList)
-        command = "%s -q %s --wiki=%s --group=dump --globals" % (phpCommand, 
command, dbName)
+        command_list = MultiVersion.MWScriptAsArray(self.wiki.config, 
"getSlaveServer.php")
+        php_command = MiscUtils.shellEscape(self.wiki.config.php)
+        db_name = MiscUtils.shellEscape(self.dbName)
+        for i in range(0, len(command_list)):
+            command_list[i] = MiscUtils.shellEscape(command_list[i])
+        command = " ".join(command_list)
+        command = "%s -q %s --wiki=%s --group=dump --globals" % (php_command, 
command, db_name)
         results = RunSimpleCommand.runAndReturn(command, 
self.errorCallback).strip()
         if not results:
             raise BackupError("Failed to get database connection information 
for %s, bailing." % self.wiki.config.php)
         # first line is the server, the second is an array of the globals, we 
need the db table prefix out of those
         lines = results.splitlines()
         self.dbServer = lines[0]
-        self.dbPort = None
+        self.db_port = None
         if ':' in self.dbServer:
-            self.dbServer, _, self.dbPort = self.dbServer.rpartition(':')
+            self.dbServer, _, self.db_port = self.dbServer.rpartition(':')
 
         #       [wgDBprefix] =>
-        wgdbprefixPattern = 
re.compile("\s+\[wgDBprefix\]\s+=>\s+(?P<prefix>.*)$")
-        for l in lines:
-            match = wgdbprefixPattern.match(l)
+        wgdb_prefix_pattern = 
re.compile("\s+\[wgDBprefix\]\s+=>\s+(?P<prefix>.*)$")
+        for line in lines:
+            match = wgdb_prefix_pattern.match(line)
             if match:
-                self.dBTablePrefix = match.group('prefix').strip()
-        if self.dBTablePrefix == None:
+                self.db_table_prefix = match.group('prefix').strip()
+        if self.db_table_prefix == None:
             # if we didn't see this in the globals list, something is broken.
             raise BackupError("Failed to get database table prefix for %s, 
bailing." % self.wiki.config.php)
 
-    def mysqlStandardParameters(self):
+    def mysql_standard_parameters(self):
         host = self.dbServer
-        if self.dbPort and self.dbServer.strip() == "localhost":
+        if self.db_port and self.dbServer.strip() == "localhost":
             # MySQL tools ignore port settings for host "localhost" and 
instead use IPC sockets,
             # so we rewrite the localhost to it's ip address
             host = socket.gethostbyname(self.dbServer);
 
         params = ["-h", "%s" % host] # Host
-        if self.dbPort:
-            params += ["--port", "%s" % self.dbPort] # Port
+        if self.db_port:
+            params += ["--port", "%s" % self.db_port] # Port
         params += ["-u", "%s" % self.wiki.config.dbUser] # Username
-        params += ["%s" % self.passwordOption()] # Password
+        params += ["%s" % self.password_option()] # Password
         return params
 
     def buildSqlCommand(self, query, pipeto=None):
@@ -96,38 +96,38 @@
         if not exists(self.wiki.config.mysql):
             raise BackupError("mysql command %s not found" % 
self.wiki.config.mysql)
         command = [["/bin/echo", "%s" % query],
-                ["%s" % self.wiki.config.mysql] + 
self.mysqlStandardParameters() + [
-                  "%s" % self.dbName,
-                  "-r"]]
+                   ["%s" % self.wiki.config.mysql] + 
self.mysql_standard_parameters() + [
+                       "%s" % self.dbName,
+                       "-r"]]
         if pipeto:
             command.append([pipeto])
         return command
 
-    def buildSqlDumpCommand(self, table, pipeto=None):
+    def build_sqldump_command(self, table, pipeto=None):
         """Put together a command to dump a table from the current DB with 
mysqldump
         and save to a gzipped sql file."""
         if not exists(self.wiki.config.mysqldump):
             raise BackupError("mysqldump command %s not found" % 
self.wiki.config.mysqldump)
-        command = [["%s" % self.wiki.config.mysqldump] + 
self.mysqlStandardParameters() + [
-                   "--opt", "--quick",
-                   "--skip-add-locks", "--skip-lock-tables",
-                   "%s" % self.dbName,
-                   "%s" % self.dBTablePrefix + table]]
+        command = [["%s" % self.wiki.config.mysqldump] + 
self.mysql_standard_parameters() + [
+            "--opt", "--quick",
+            "--skip-add-locks", "--skip-lock-tables",
+            "%s" % self.dbName,
+            "%s" % self.db_table_prefix + table]]
         if pipeto:
             command.append([pipeto])
         return command
 
-    def runSqlAndGetOutput(self, query):
+    def run_sql_and_get_output(self, query):
         command = self.buildSqlCommand(query)
-        p = CommandPipeline(command, quiet=True)
-        p.runPipelineAndGetOutput()
+        proc = CommandPipeline(command, quiet=True)
+        proc.runPipelineAndGetOutput()
         # fixme best to put the return code someplace along with any errors....
-        if p.exitedSuccessfully() and (p.output()):
-            return p.output()
+        if proc.exitedSuccessfully() and (proc.output()):
+            return proc.output()
         else:
             return None
 
-    def passwordOption(self):
+    def password_option(self):
         """If you pass '-pfoo' mysql uses the password 'foo',
         but if you pass '-p' it prompts. Sigh."""
         if self.wiki.config.dbPassword == "":
@@ -136,7 +136,7 @@
             return "-p" + self.wiki.config.dbPassword
 
 class RunSimpleCommand(object):
-    def runAndReturn(command, logCallback=None):
+    def runAndReturn(command, log_callback=None):
         """Run a command and return the output as a string.
         Raises BackupError on non-zero return code."""
         retval = 1
@@ -146,16 +146,16 @@
         output, error = proc.communicate()
         retval = proc.returncode
         while retval and retries < maxretries:
-            if logCallback:
-                logCallback("Non-zero return code from '%s'" % command)
+            if log_callback:
+                log_callback("Non-zero return code from '%s'" % command)
             time.sleep(5)
             proc = Popen(command, bufsize=64, shell=True, stdout=PIPE, 
stderr=PIPE)
             output, error = proc.communicate()
             retval = proc.returncode
             retries = retries + 1
         if retval:
-            if logCallback:
-                logCallback("Non-zero return code from '%s'" % command)
+            if log_callback:
+                log_callback("Non-zero return code from '%s'" % command)
             raise BackupError("Non-zero return code from '%s'" % command)
         else:
             return output
@@ -163,53 +163,53 @@
     runAndReturn = staticmethod(runAndReturn)
 
 class PageAndEditStats(object):
-    def __init__(self, wiki, dbName, errorCallback=None):
-        self.totalPages = None
-        self.totalEdits = None
+    def __init__(self, wiki, db_name, error_callback=None):
+        self.total_pages = None
+        self.total_edits = None
         self.wiki = wiki
-        self.dbName = dbName
-        self.dbServerInfo = DbServerInfo(wiki, dbName, errorCallback)
-        self.getStatistics(self.wiki.config, dbName)
+        self.dbName = db_name
+        self.dbServerInfo = DbServerInfo(wiki, db_name, error_callback)
+        self.get_statistics(self.wiki.config, db_name)
 
-    def getStatistics(self, dbName, ignore):
+    def get_statistics(self, db_name, ignore):
         """Get statistics for the wiki"""
 
-        query = "select MAX(page_id) from %spage;" % 
self.dbServerInfo.dBTablePrefix
+        query = "select MAX(page_id) from %spage;" % 
self.dbServerInfo.db_table_prefix
         results = None
         retries = 0
         maxretries = 5
-        results = self.dbServerInfo.runSqlAndGetOutput(query)
+        results = self.dbServerInfo.run_sql_and_get_output(query)
         while results == None and retries < maxretries:
             retries = retries + 1
             time.sleep(5)
-            results = self.dbServerInfo.runSqlAndGetOutput(query)
+            results = self.dbServerInfo.run_sql_and_get_output(query)
         if not results:
             return 1
 
         lines = results.splitlines()
         if lines and lines[1]:
-            self.totalPages = int(lines[1])
-        query = "select MAX(rev_id) from %srevision;" % 
self.dbServerInfo.dBTablePrefix
+            self.total_pages = int(lines[1])
+        query = "select MAX(rev_id) from %srevision;" % 
self.dbServerInfo.db_table_prefix
         retries = 0
         results = None
-        results = self.dbServerInfo.runSqlAndGetOutput(query)
+        results = self.dbServerInfo.run_sql_and_get_output(query)
         while results == None and retries < maxretries:
             retries = retries + 1
             time.sleep(5)
-            results = self.dbServerInfo.runSqlAndGetOutput(query)
+            results = self.dbServerInfo.run_sql_and_get_output(query)
         if not results:
             return 1
 
         lines = results.splitlines()
         if lines and lines[1]:
-            self.totalEdits = int(lines[1])
+            self.total_edits = int(lines[1])
         return 0
 
-    def getTotalPages(self):
-        return self.totalPages
+    def get_total_pages(self):
+        return self.total_pages
 
-    def getTotalEdits(self):
-        return self.totalEdits
+    def get_total_edits(self):
+        return self.total_edits
 
 
 class RunInfoFile(object):
@@ -218,20 +218,20 @@
         self._enabled = enabled
         self.verbose = verbose
 
-    def saveDumpRunInfoFile(self, text):
+    def save_dump_runinfo_file(self, text):
         """Write out a simple text file with the status for this wiki's 
dump."""
         if self._enabled:
             try:
-                self._writeDumpRunInfoFile(text)
+                self._write_dump_runinfo_file(text)
             except:
                 if self.verbose:
                     exc_type, exc_value, exc_traceback = sys.exc_info()
                     sys.stderr.write(repr(traceback.format_exception(exc_type, 
exc_value, exc_traceback)))
                 sys.stderr.write("Couldn't save dump run info file. Continuing 
anyways\n")
 
-    def statusOfOldDumpIsDone(self, runner, date, jobName, jobDesc):
-        oldDumpRunInfoFilename=self._getDumpRunInfoFileName(date)
-        status = self._getStatusForJobFromRunInfoFile(oldDumpRunInfoFilename, 
jobName)
+    def status_of_old_dump_is_done(self, runner, date, job_name, job_desc):
+        old_dump_runinfo_filename=self._get_dump_runinfo_filename(date)
+        status = self._get_job_status_from_runinfo(old_dump_runinfo_filename, 
job_name)
         if status == "done":
             return 1
         elif not status == None:
@@ -239,26 +239,26 @@
             return 0
 
         # ok, there was no info there to be had, try the index file. yuck.
-        indexFilename = os.path.join(runner.wiki.publicDir(), date, 
runner.wiki.config.perDumpIndex)
-        status = self._getStatusForJobFromIndexFile(indexFilename, jobDesc)
+        index_filename = os.path.join(runner.wiki.publicDir(), date, 
runner.wiki.config.perDumpIndex)
+        status = self._get_job_status_from_html(index_filename, job_desc)
         if status == "done":
             return 1
         else:
             return 0
 
-    def getOldRunInfoFromFile(self):
+    def get_old_runinfo_from_file(self):
         # read the dump run info file in, if there is one, and get info about 
which dumps
         # have already been run and whether they were successful
-        dumpRunInfoFileName = self._getDumpRunInfoFileName()
+        dump_runinfo_filename = self._get_dump_runinfo_filename()
         results = []
 
-        if not os.path.exists(dumpRunInfoFileName):
+        if not os.path.exists(dump_runinfo_filename):
             return False
 
         try:
-            infile = open(dumpRunInfoFileName, "r")
+            infile = open(dump_runinfo_filename, "r")
             for line in infile:
-                results.append(self._getOldRunInfoFromLine(line))
+                results.append(self._get_old_runinfo_from_line(line))
             infile.close
             return results
         except:
@@ -270,7 +270,7 @@
     #
     # functions internal to the class
     #
-    def _getDumpRunInfoFileName(self, date=None):
+    def _get_dump_runinfo_filename(self, date=None):
         # sometimes need to get this info for an older run to check status of 
a file for
         # possible prefetch
         if date:
@@ -278,51 +278,51 @@
         else:
             return os.path.join(self.wiki.publicDir(), self.wiki.date, 
"dumpruninfo.txt")
 
-    def _getDumpRunInfoDirName(self, date=None):
+    def _get_dump_runinfo_dirname(self, date=None):
         if date:
             return os.path.join(self.wiki.publicDir(), date)
         else:
             return os.path.join(self.wiki.publicDir(), self.wiki.date)
 
     # format: name:%; updated:%; status:%
-    def _getOldRunInfoFromLine(self, line):
+    def _get_old_runinfo_from_line(self, line):
         # get rid of leading/trailing/blanks
         line = line.strip(" ")
         line = line.replace("\n", "")
         fields = line.split(';', 2)
-        dumpRunInfo = RunInfo()
+        dump_runinfo = RunInfo()
         for field in fields:
             field = field.strip(" ")
-            (fieldName, separator, fieldValue)  = field.partition(':')
-            if fieldName == "name":
-                dumpRunInfo.setName(fieldValue)
-            elif fieldName == "status":
-                dumpRunInfo.setStatus(fieldValue, False)
-            elif fieldName == "updated":
-                dumpRunInfo.setUpdated(fieldValue)
-        return dumpRunInfo
+            (fieldname, separator, field_value)  = field.partition(':')
+            if fieldname == "name":
+                dump_runinfo.setName(field_value)
+            elif fieldname == "status":
+                dump_runinfo.setStatus(field_value, False)
+            elif fieldname == "updated":
+                dump_runinfo.setUpdated(field_value)
+        return dump_runinfo
 
-    def _writeDumpRunInfoFile(self, text):
-        directory = self._getDumpRunInfoDirName()
-        dumpRunInfoFilename = self._getDumpRunInfoFileName()
+    def _write_dump_runinfo_file(self, text):
+        directory = self._get_dump_runinfo_dirname()
+        dump_runinfo_filename = self._get_dump_runinfo_filename()
 #        FileUtils.writeFile(directory, dumpRunInfoFilename, text, 
self.wiki.config.fileperms)
-        FileUtils.writeFileInPlace(dumpRunInfoFilename, text, 
self.wiki.config.fileperms)
+        FileUtils.writeFileInPlace(dump_runinfo_filename, text, 
self.wiki.config.fileperms)
 
     # format: name:%; updated:%; status:%
-    def _getStatusForJobFromRunInfoFileLine(self, line, jobName):
+    def _get_job_status_from_runinfo_line(self, line, job_name):
         # get rid of leading/trailing/embedded blanks
         line = line.replace(" ", "")
         line = line.replace("\n", "")
         fields = line.split(';', 2)
         for field in fields:
-            (fieldName, separator, fieldValue)  = field.partition(':')
-            if fieldName == "name":
-                if not fieldValue == jobName:
+            (fieldname, separator, field_value)  = field.partition(':')
+            if fieldname == "name":
+                if not field_value == job_name:
                     return None
-            elif fieldName == "status":
-                return fieldValue
+            elif fieldname == "status":
+                return field_value
 
-    def _getStatusForJobFromRunInfoFile(self, filename, jobName=""):
+    def _get_job_status_from_runinfo(self, filename, job_name=""):
         # read the dump run info file in, if there is one, and find out whether
         # a particular job (one step only, not a multiple piece job) has been
         # already run and whether it was successful (use to examine status
@@ -330,7 +330,7 @@
         try:
             infile = open(filename, "r")
             for line in infile:
-                result = self._getStatusForJobFromRunInfoFileLine(line, 
jobName)
+                result = self._get_job_status_from_runinfo_line(line, job_name)
                 if not result == None:
                     return result
             infile.close
@@ -342,7 +342,7 @@
             return None
 
     # find desc in there, look for "class='done'"
-    def _getStatusForJobFromIndexFileLine(self, line, desc):
+    def _get_job_status_from_html_line(self, line, desc):
         if not ">"+desc+"<" in line:
             return None
         if "<li class='done'>" in line:
@@ -350,7 +350,7 @@
         else:
             return "other"
 
-    def _getStatusForJobFromIndexFile(self, filename, desc):
+    def _get_job_status_from_html(self, filename, desc):
         # read the index file in, if there is one, and find out whether
         # a particular job (one step only, not a multiple piece job) has been
         # already run and whether it was successful (use to examine status
@@ -358,7 +358,7 @@
         try:
             infile = open(filename, "r")
             for line in infile:
-                result = self._getStatusForJobFromIndexFileLine(line, desc)
+                result = self._get_job_status_from_html_line(line, desc)
                 if not result == None:
                     return result
             infile.close
@@ -371,11 +371,11 @@
 
 
 class RunInfo(object):
-    def __init__(self, name="", status="", updated="", toBeRun=False):
+    def __init__(self, name="", status="", updated="", to_run=False):
         self._name = name
         self._status = status
         self._updated = updated
-        self._toBeRun = toBeRun
+        self._to_run = to_run
 
     def name(self):
         return self._name
@@ -386,20 +386,20 @@
     def updated(self):
         return self._updated
 
-    def toBeRun(self):
-        return self._toBeRun
+    def to_run(self):
+        return self._to_run
 
     def setName(self, name):
         self._name = name
 
-    def setStatus(self, status, setUpdated=True):
+    def setStatus(self, status, set_updated=True):
         self._status = status
 
     def setUpdated(self, updated):
         self._updated = updated
 
-    def setToBeRun(self, toBeRun):
-        self._toBeRun = toBeRun
+    def set_to_run(self, to_run):
+        self._to_run = to_run
 
 
 # so if the pages/revsPerChunkAbstract/History are just one number it means
@@ -407,59 +407,59 @@
 # otherwise we get passed alist that says "here's now many for each chunk and 
it's this many chunks.
 # extra pages/revs go in the last chunk, stuck on the end. too bad. :-P
 class Chunk(object,):
-    def __init__(self, wiki, dbName, errorCallback=None):
+    def __init__(self, wiki, db_name, error_callback=None):
 
-        self._dbName = dbName
+        self._dbName = db_name
         self.wiki = wiki
         self._chunks_enabled = self.wiki.config.chunksEnabled
         if self._chunks_enabled:
-            self.Stats = PageAndEditStats(self.wiki, dbName, errorCallback)
-            if not self.Stats.totalEdits or not self.Stats.totalPages:
+            self.Stats = PageAndEditStats(self.wiki, self._dbName, 
error_callback)
+            if not self.Stats.total_edits or not self.Stats.total_pages:
                 raise BackupError("Failed to get DB stats, exiting")
             if self.wiki.config.chunksForAbstract:
                 # we add 200 padding to cover new pages that may be added
-                pagesPerChunk = 
self.Stats.totalPages/int(self.wiki.config.chunksForAbstract) + 200
-                self._pagesPerChunkAbstract = [pagesPerChunk for i in range(0, 
int(self.wiki.config.chunksForAbstract))]
+                pagesPerChunk = 
self.Stats.total_pages/int(self.wiki.config.chunksForAbstract) + 200
+                self._pages_per_chunk_abstract = [pagesPerChunk for i in 
range(0, int(self.wiki.config.chunksForAbstract))]
             else:
-                self._pagesPerChunkAbstract = 
self.convertCommaSepLineToNumbers(self.wiki.config.pagesPerChunkAbstract)
+                self._pages_per_chunk_abstract = 
self.convert_comma_sep(self.wiki.config.pagesPerChunkAbstract)
 
-            self._pagesPerChunkHistory = 
self.convertCommaSepLineToNumbers(self.wiki.config.pagesPerChunkHistory)
-            self._revsPerChunkHistory = 
self.convertCommaSepLineToNumbers(self.wiki.config.revsPerChunkHistory)
-            self._recombineHistory = self.wiki.config.recombineHistory
+            self._pages_per_chunk_history = 
self.convert_comma_sep(self.wiki.config.pagesPerChunkHistory)
+            self._revs_per_chunk_history = 
self.convert_comma_sep(self.wiki.config.revsPerChunkHistory)
+            self._recombine_history = self.wiki.config.recombineHistory
         else:
-            self._pagesPerChunkHistory = False
-            self._revsPerChunkHistory = False
-            self._pagesPerChunkAbstract = False
-            self._recombineHistory = False
+            self._pages_per_chunk_history = False
+            self._revs_per_chunk_history = False
+            self._pages_per_chunk_abstract = False
+            self._recombine_history = False
         if self._chunks_enabled:
-            if self._revsPerChunkHistory:
-                if len(self._revsPerChunkHistory) == 1:
-                    self._numChunksHistory = 
self.getNumberOfChunksForXMLDumps(self.Stats.totalEdits, 
self._pagesPerChunkHistory[0])
-                    self._revsPerChunkHistory = [self._revsPerChunkHistory[0] 
for i in range(self._numChunksHistory)]
+            if self._revs_per_chunk_history:
+                if len(self._revs_per_chunk_history) == 1:
+                    self._num_chunks_history = 
self.getNumberOfChunksForXMLDumps(self.Stats.total_edits, 
self._pages_per_chunk_history[0])
+                    self._revs_per_chunk_history = 
[self._revs_per_chunk_history[0] for i in range(self._num_chunks_history)]
                 else:
-                    self._numChunksHistory = len(self._revsPerChunkHistory)
+                    self._num_chunks_history = 
len(self._revs_per_chunk_history)
                 # here we should generate the number of pages per chunk based 
on number of revs.
                 # ...next code update! FIXME
-                # self._pagesPerChunkHistory = ....
-            elif self._pagesPerChunkHistory:
-                if len(self._pagesPerChunkHistory) == 1:
-                    self._numChunksHistory = 
self.getNumberOfChunksForXMLDumps(self.Stats.totalPages, 
self._pagesPerChunkHistory[0])
-                    self._pagesPerChunkHistory = 
[self._pagesPerChunkHistory[0] for i in range(self._numChunksHistory)]
+                # self._pages_per_chunk_history = ....
+            elif self._pages_per_chunk_history:
+                if len(self._pages_per_chunk_history) == 1:
+                    self._num_chunks_history = 
self.getNumberOfChunksForXMLDumps(self.Stats.total_pages, 
self._pages_per_chunk_history[0])
+                    self._pages_per_chunk_history = 
[self._pages_per_chunk_history[0] for i in range(self._num_chunks_history)]
                 else:
-                    self._numChunksHistory = len(self._pagesPerChunkHistory)
+                    self._num_chunks_history = 
len(self._pages_per_chunk_history)
             else:
-                self._numChunksHistory = 0
+                self._num_chunks_history = 0
 
-            if self._pagesPerChunkAbstract:
-                if len(self._pagesPerChunkAbstract) == 1:
-                    self._numChunksAbstract = 
self.getNumberOfChunksForXMLDumps(self.Stats.totalPages, 
self._pagesPerChunkAbstract[0])
-                    self._pagesPerChunkAbstract = 
[self._pagesPerChunkAbstract[0] for i in range(self._numChunksAbstract)]
+            if self._pages_per_chunk_abstract:
+                if len(self._pages_per_chunk_abstract) == 1:
+                    self._num_chunks_abstract = 
self.getNumberOfChunksForXMLDumps(self.Stats.total_pages, 
self._pages_per_chunk_abstract[0])
+                    self._pages_per_chunk_abstract = 
[self._pages_per_chunk_abstract[0] for i in range(self._num_chunks_abstract)]
                 else:
-                    self._numChunksAbstract = len(self._pagesPerChunkAbstract)
+                    self._num_chunks_abstract = 
len(self._pages_per_chunk_abstract)
             else:
-                self._numChunksAbstract = 0
+                self._num_chunks_abstract = 0
 
-    def convertCommaSepLineToNumbers(self, line):
+    def convert_comma_sep(self, line):
         if line == "":
             return False
         result = line.split(',')
@@ -469,33 +469,33 @@
             numbers.append(int(field))
         return numbers
 
-    def getPagesPerChunkAbstract(self):
-        return self._pagesPerChunkAbstract
+    def get_pages_per_chunk_abstract(self):
+        return self._pages_per_chunk_abstract
 
-    def getNumChunksAbstract(self):
-        return self._numChunksAbstract
+    def get_num_chunks_abstract(self):
+        return self._num_chunks_abstract
 
     def getPagesPerChunkHistory(self):
-        return self._pagesPerChunkHistory
+        return self._pages_per_chunk_history
 
-    def getNumChunksHistory(self):
-        return self._numChunksHistory
+    def get_num_chunks_history(self):
+        return self._num_chunks_history
 
     def chunksEnabled(self):
         return self._chunks_enabled
 
     def recombineHistory(self):
-        return self._recombineHistory
+        return self._recombine_history
 
     # args: total (pages or revs), and the number of (pages or revs) per chunk.
-    def getNumberOfChunksForXMLDumps(self, total, perChunk):
+    def getNumberOfChunksForXMLDumps(self, total, per_chunk):
         if not total:
             # default: no chunking.
             return 0
         else:
-            chunks = int(total/perChunk)
+            chunks = int(total/per_chunk)
             # more smaller chunks are better, we want speed
-            if (total - (chunks * perChunk)) > 0:
+            if (total - (chunks * per_chunk)) > 0:
                 chunks = chunks + 1
             if chunks == 1:
                 return 0
diff --git a/xmldumps-backup/worker.py b/xmldumps-backup/worker.py
index de1f5ef..6a52fb6 100644
--- a/xmldumps-backup/worker.py
+++ b/xmldumps-backup/worker.py
@@ -97,41 +97,41 @@
                 raise BackupError("You cannot specify a checkpoint file with 
the job %s, exiting.\n" % self._single_job)
 
         self.dumpItems = [PrivateTable("user", "usertable", "User account 
data."),
-            PrivateTable("watchlist", "watchlisttable", "Users' watchlist 
settings."),
-            PrivateTable("ipblocks", "ipblockstable", "Data for blocks of IP 
addresses, ranges, and users."),
-            PrivateTable("archive", "archivetable", "Deleted page and revision 
data."),
-            #PrivateTable("updates", "updatestable", "Update dataset for OAI 
updater system."),
-            PrivateTable("logging", "loggingtable", "Data for various events 
(deletions, uploads, etc)."),
-            PrivateTable("oldimage", "oldimagetable", "Metadata on prior 
versions of uploaded images."),
-            #PrivateTable("filearchive", "filearchivetable", "Deleted image 
data"),
+                          PrivateTable("watchlist", "watchlisttable", "Users' 
watchlist settings."),
+                          PrivateTable("ipblocks", "ipblockstable", "Data for 
blocks of IP addresses, ranges, and users."),
+                          PrivateTable("archive", "archivetable", "Deleted 
page and revision data."),
+                          #PrivateTable("updates", "updatestable", "Update 
dataset for OAI updater system."),
+                          PrivateTable("logging", "loggingtable", "Data for 
various events (deletions, uploads, etc)."),
+                          PrivateTable("oldimage", "oldimagetable", "Metadata 
on prior versions of uploaded images."),
+                          #PrivateTable("filearchive", "filearchivetable", 
"Deleted image data"),
 
-            PublicTable("site_stats", "sitestatstable", "A few statistics such 
as the page count."),
-            PublicTable("image", "imagetable", "Metadata on current versions 
of uploaded media/files."),
-            #PublicTable("oldimage", "oldimagetable", "Metadata on prior 
versions of uploaded media/files."),
-            PublicTable("pagelinks", "pagelinkstable", "Wiki page-to-page link 
records."),
-            PublicTable("categorylinks", "categorylinkstable", "Wiki category 
membership link records."),
-            PublicTable("imagelinks", "imagelinkstable", "Wiki media/files 
usage records."),
-            PublicTable("templatelinks", "templatelinkstable", "Wiki template 
inclusion link records."),
-            PublicTable("externallinks", "externallinkstable", "Wiki external 
URL link records."),
-            PublicTable("langlinks", "langlinkstable", "Wiki interlanguage 
link records."),
-            #PublicTable("interwiki", "interwikitable", "Set of defined 
interwiki prefixes and links for this wiki."),
-            PublicTable("user_groups", "usergroupstable", "User group 
assignments."),
-            PublicTable("category", "categorytable", "Category information."),
+                          PublicTable("site_stats", "sitestatstable", "A few 
statistics such as the page count."),
+                          PublicTable("image", "imagetable", "Metadata on 
current versions of uploaded media/files."),
+                          #PublicTable("oldimage", "oldimagetable", "Metadata 
on prior versions of uploaded media/files."),
+                          PublicTable("pagelinks", "pagelinkstable", "Wiki 
page-to-page link records."),
+                          PublicTable("categorylinks", "categorylinkstable", 
"Wiki category membership link records."),
+                          PublicTable("imagelinks", "imagelinkstable", "Wiki 
media/files usage records."),
+                          PublicTable("templatelinks", "templatelinkstable", 
"Wiki template inclusion link records."),
+                          PublicTable("externallinks", "externallinkstable", 
"Wiki external URL link records."),
+                          PublicTable("langlinks", "langlinkstable", "Wiki 
interlanguage link records."),
+                          #PublicTable("interwiki", "interwikitable", "Set of 
defined interwiki prefixes and links for this wiki."),
+                          PublicTable("user_groups", "usergroupstable", "User 
group assignments."),
+                          PublicTable("category", "categorytable", "Category 
information."),
 
-            PublicTable("page", "pagetable", "Base per-page data (id, title, 
old restrictions, etc)."),
-            PublicTable("page_restrictions", "pagerestrictionstable", "Newer 
per-page restrictions table."),
-            PublicTable("page_props", "pagepropstable", "Name/value pairs for 
pages."),
-            PublicTable("protected_titles", "protectedtitlestable", 
"Nonexistent pages that have been protected."),
-            #PublicTable("revision", #revisiontable", "Base per-revision data 
(does not include text)."), // safe?
-            #PrivateTable("text", "texttable", "Text blob storage. May be 
compressed, etc."), // ?
-            PublicTable("redirect", "redirecttable", "Redirect list"),
-            PublicTable("iwlinks", "iwlinkstable", "Interwiki link tracking 
records"),
-            PublicTable("geo_tags", "geotagstable", "List of pages' 
geographical coordinates"),
+                          PublicTable("page", "pagetable", "Base per-page data 
(id, title, old restrictions, etc)."),
+                          PublicTable("page_restrictions", 
"pagerestrictionstable", "Newer per-page restrictions table."),
+                          PublicTable("page_props", "pagepropstable", 
"Name/value pairs for pages."),
+                          PublicTable("protected_titles", 
"protectedtitlestable", "Nonexistent pages that have been protected."),
+                          #PublicTable("revision", #revisiontable", "Base 
per-revision data (does not include text)."), // safe?
+                          #PrivateTable("text", "texttable", "Text blob 
storage. May be compressed, etc."), // ?
+                          PublicTable("redirect", "redirecttable", "Redirect 
list"),
+                          PublicTable("iwlinks", "iwlinkstable", "Interwiki 
link tracking records"),
+                          PublicTable("geo_tags", "geotagstable", "List of 
pages' geographical coordinates"),
 
-            TitleDump("pagetitlesdump", "List of page titles in main 
namespace"),
-            AllTitleDump("allpagetitlesdump", "List of all page titles"),
+                          TitleDump("pagetitlesdump", "List of page titles in 
main namespace"),
+                          AllTitleDump("allpagetitlesdump", "List of all page 
titles"),
 
-            AbstractDump("abstractsdump", "Extracted page abstracts for 
Yahoo", self._getChunkToDo("abstractsdump"), self.wiki.dbName, 
self.chunkInfo.getPagesPerChunkAbstract())]
+                          AbstractDump("abstractsdump", "Extracted page 
abstracts for Yahoo", self._get_chunk_to_do("abstractsdump"), self.wiki.dbName, 
self.chunkInfo.get_pages_per_chunk_abstract())]
 
         if self.chunkInfo.chunksEnabled():
             
self.dumpItems.append(RecombineAbstractDump("abstractsdumprecombine", 
"Recombine extracted page abstracts for Yahoo", 
self.find_item_by_name('abstractsdump')))
@@ -223,7 +223,7 @@
                                    "index of page titles/ids and offsets into 
the file.  Useful for offline readers, or for parallel processing of pages.",
                                    
self.find_item_by_name(input_for_multistream), self.wiki, None))
 
-        results = self._runinfo_file.getOldRunInfoFromFile()
+        results = self._runinfo_file.get_old_runinfo_from_file()
         if results:
             for runinfo_obj in results:
                 self._set_dump_item_runinfo(runinfo_obj)
@@ -261,7 +261,7 @@
                     if item.name in self.skip_jobs:
                         item.setSkipped()
                     elif not skipgood or item.status() != "done":
-                        item.setToBeRun(True)
+                        item.set_to_run(True)
             return True
         else:
             for item in self.dumpItems:
@@ -269,7 +269,7 @@
                     if item.name in self.skip_jobs:
                         item.setSkipped()
                     elif not skipgood or item.status() != "done":
-                        item.setToBeRun(True)
+                        item.set_to_run(True)
                     return True
         if job == "noop" or job == "latestlinks" or job == "createdirs":
             return True
@@ -287,12 +287,12 @@
         i = 0;
         for item in self.dumpItems:
             i = i + 1;
-            if item.toBeRun():
+            if item.to_run():
                 for j in range(i, len(self.dumpItems)):
                     if item.name in self.skip_jobs:
                         item.setSkipped()
                     elif not skipgood or item.status() != "done":
-                        self.dumpItems[j].setToBeRun(True)
+                        self.dumpItems[j].set_to_run(True)
                 break
 
     def mark_all_jobs_to_run(self, skipgood=False):
@@ -301,7 +301,7 @@
             if item.name() in self.skip_jobs:
                 item.setSkipped()
             elif not skipgood or item.status() != "done":
-                item.setToBeRun(True)
+                item.set_to_run(True)
 
     def find_item_by_name(self, name):
         for item in self.dumpItems:
@@ -323,7 +323,7 @@
             if item.name() == runInfo.name():
                 item.setStatus(runInfo.status(), False)
                 item.setUpdated(runInfo.updated())
-                item.setToBeRun(runInfo.toBeRun())
+                item.set_to_run(runInfo.to_run())
                 return True
         return False
 
@@ -596,10 +596,10 @@
 
         for item in self.dumpItemList.dumpItems:
             Maintenance.exit_if_in_maintenance_mode("In maintenance mode, 
exiting dump of %s at step %s" % (self.dbName, item.name()))
-            if item.toBeRun():
+            if item.to_run():
                 item.start(self)
                 self.status.update_status_files()
-                
self.runInfoFile.saveDumpRunInfoFile(self.dumpItemList.report_dump_runinfo())
+                
self.runInfoFile.save_dump_runinfo_file(self.dumpItemList.report_dump_runinfo())
                 try:
                     item.dump(self)
                 except Exception, ex:
@@ -643,7 +643,7 @@
             # previously in "waiting" are still in status "waiting"
             self.status.update_status_files("partialdone")
 
-        
self.runInfoFile.saveDumpRunInfoFile(self.dumpItemList.report_dump_runinfo())
+        
self.runInfoFile.save_dump_runinfo_file(self.dumpItemList.report_dump_runinfo())
 
         # if any job succeeds we might as well make the sym link
         if self.status.fail_count < 1:
@@ -718,7 +718,7 @@
         self.sym_links.cleanup_symlinks()
 
         for item in self.dumpItemList.dumpItems:
-            if item.toBeRun():
+            if item.to_run():
                 dump_names = item.listDumpNames()
                 if type(dump_names).__name__!='list':
                     dump_names = [dump_names]
@@ -789,7 +789,7 @@
         # failed to get the run's info so let's call it 'didn't run'
         return False
 
-    results = dumpItemList._runinfo_file.getOldRunInfoFromFile()
+    results = dumpItemList._runinfo_file.get_old_runinfo_from_file()
     if results:
         for runinfo_obj in results:
             dumpItemList._set_dump_item_runinfo(runinfo_obj)
@@ -806,7 +806,7 @@
         # see if there are any to run. no? then return True (all job(s) done)
         # otherwise return False (still some to do)
         for item in dumpItemList.dumpItems:
-            if item.toBeRun():
+            if item.to_run():
                 return False
         return True
     else:
diff --git a/xmldumps-backup/xmlstreams.py b/xmldumps-backup/xmlstreams.py
index 72adbbd..54b1caf 100644
--- a/xmldumps-backup/xmlstreams.py
+++ b/xmldumps-backup/xmlstreams.py
@@ -150,12 +150,12 @@
     wiki = WikiDump.Wiki(wikiconf, wikidb)
 
     db_info = worker.DbServerInfo(wiki, wikidb)
-    query = "select MAX(%s) from %s%s;" % (id_field, db_info.dBTablePrefix, 
table)
+    query = "select MAX(%s) from %s%s;" % (id_field, db_info.db_table_prefix, 
table)
     results = None
     retries = 0
     maxretries = 5
     end = 0
-    results = db_info.runSqlAndGetOutput(query)
+    results = db_info.run_sql_and_get_output(query)
     if results:
         lines = results.splitlines()
         if lines and lines[1]:
@@ -167,7 +167,7 @@
     while results is None and retries < maxretries:
         retries = retries + 1
         time.sleep(5)
-        results = db_info.runSqlAndGetOutput(query)
+        results = db_info.run_sql_and_get_output(query)
         if not results:
             continue
         lines = results.splitlines()

-- 
To view, visit https://gerrit.wikimedia.org/r/242464
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I9c5104fa802d4bf1b85c4c61bd6f6419fa7e04f1
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to