ArielGlenn has uploaded a new change for review. https://gerrit.wikimedia.org/r/249074
Change subject: dumps: fix up incrementals scripts to use changed WikiDump names ...................................................................... dumps: fix up incrementals scripts to use changed WikiDump names since the WikiDump names are no longer camelcase, fix those references in the incrememntals scripts also general pep8 and pylint to clean them up some Change-Id: If463318a055962097b2634f9dd77c90501105aea --- M xmldumps-backup/incrementals/IncrDumpLib.py M xmldumps-backup/incrementals/generateincrementals.py 2 files changed, 453 insertions(+), 433 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/dumps refs/changes/74/249074/1 diff --git a/xmldumps-backup/incrementals/IncrDumpLib.py b/xmldumps-backup/incrementals/IncrDumpLib.py index 76bc1b3..3f31128 100644 --- a/xmldumps-backup/incrementals/IncrDumpLib.py +++ b/xmldumps-backup/incrementals/IncrDumpLib.py @@ -4,182 +4,197 @@ import re import ConfigParser import dumps.WikiDump -from dumps.WikiDump import FileUtils, TimeUtils, MiscUtils +from dumps.WikiDump import FileUtils, MiscUtils +from dumps.exceptions import BackupError from os.path import exists import socket -import subprocess from subprocess import Popen, PIPE import shutil import time + class ContentFile(object): - def __init__(self, config, date, wikiName): + def __init__(self, config, date, wikiname): self._config = config self.date = date - self.incrDir = IncrementDir(self._config, date) - self.wikiName = wikiName + self.incr_dir = IncrementDir(self._config, date) + self.wikiname = wikiname # override this. - def getFileName(self): + def get_filename(self): return "content.txt" - def getPath(self): - return os.path.join(self.incrDir.getIncDir(self.wikiName),self.getFileName()) + def get_path(self): + return os.path.join(self.incr_dir.get_incdir(self.wikiname), self.get_filename()) - def getFileInfo(self): - return FileUtils.file_info(self.getPath()) - + def get_fileinfo(self): + return FileUtils.file_info(self.get_path()) + + class MaxRevIDFile(ContentFile): - def getFileName(self): + def get_filename(self): return "maxrevid.txt" + class StubFile(ContentFile): - def getFileName(self): - return "%s-%s-stubs-meta-hist-incr.xml.gz" % ( self.wikiName, self.date ) + def get_filename(self): + return "%s-%s-stubs-meta-hist-incr.xml.gz" % (self.wikiname, self.date) + class RevsFile(ContentFile): - def getFileName(self): - return "%s-%s-pages-meta-hist-incr.xml.bz2" % ( self.wikiName, self.date ) + def get_filename(self): + return "%s-%s-pages-meta-hist-incr.xml.bz2" % (self.wikiname, self.date) + class StatusFile(ContentFile): - def getFileName(self): + def get_filename(self): return "status.txt" - def getPath(self, date = None): - return os.path.join(self.incrDir.getIncDir(self.wikiName, date),self.getFileName()) + def get_path(self, date=None): + return os.path.join(self.incr_dir.get_incdir(self.wikiname, date), self.get_filename()) + class LockFile(ContentFile): - def getFileName(self): - return "%s-%s.lock" % ( self.wikiName, self.date ) + def get_filename(self): + return "%s-%s.lock" % (self.wikiname, self.date) - def getPath(self): - return os.path.join(self.incrDir.getIncDirNoDate(self.wikiName),self.getFileName()) + def get_path(self): + return os.path.join(self.incr_dir.get_incdir_no_date(self.wikiname), self.get_filename()) -class MaxRevIDLockFile(LockFile): - def getFileName(self): - return "%s-%s-maxrevid.lock" % ( self.wikiName, self.date ) - -class IncrDumpLockFile(LockFile): - def getFileName(self): - return "%s-%s-incrdump.lock" % ( self.wikiName, self.date ) + +class MaxRevIDLockFile(LockFile): + def get_filename(self): + return "%s-%s-maxrevid.lock" % (self.wikiname, self.date) + + +class IncrDumpLockFile(LockFile): + def get_filename(self): + return "%s-%s-incrdump.lock" % (self.wikiname, self.date) + class MD5File(ContentFile): - def getFileName(self): - return "%s-%s-md5sums.txt" % ( self.wikiName, self.date ) + def get_filename(self): + return "%s-%s-md5sums.txt" % (self.wikiname, self.date) + class IndexFile(ContentFile): def __init__(self, config): self._config = config - self.incrDir = IncrementDir(self._config) + self.incr_dir = IncrementDir(self._config) - def getFileName(self): + def get_filename(self): return "index.html" - def getPath(self): - return os.path.join(self.incrDir.getIncDirBase(),self.getFileName()) + def get_path(self): + return os.path.join(self.incr_dir.get_incdir_base(), self.get_filename()) + class StatusInfo(object): - def __init__(self, config, date, wikiName): + def __init__(self, config, date, wikiname): self._config = config self.date = date - self.wikiName = wikiName - self.statusFile = StatusFile(self._config, self.date, self.wikiName) + self.wikiname = wikiname + self.status_file = StatusFile(self._config, self.date, self.wikiname) - def getStatus(self, date = None): + def get_status(self, date=None): status = "" - if exists(self.statusFile.getPath(date)): - status = FileUtils.read_file(self.statusFile.getPath(date)).rstrip() - return(status) + if exists(self.status_file.get_path(date)): + status = FileUtils.read_file(self.status_file.get_path(date)).rstrip() + return status - def setStatus(self, status): - FileUtils.write_file_in_place(self.statusFile.getPath(),status, self._config.fileperms) + def set_status(self, status): + FileUtils.write_file_in_place(self.status_file.get_path(), status, self._config.fileperms) + class Lock(object): - def __init__(self, config, date, wikiName): + def __init__(self, config, date, wikiname): self._config = config self.date = date - self.wikiName = wikiName - self.lockFile = LockFile(self._config, self.date, self.wikiName) + self.wikiname = wikiname + self.lockfile = LockFile(self._config, self.date, self.wikiname) - def isLocked(self): - return exists(self.lockFile.getPath()) + def is_locked(self): + return exists(self.lockfile.get_path()) - def getLock(self): + def get_lock(self): try: - if not exists(self._config.incrementalsDir): - os.makedirs(self._config.incrementalsDir) - f = FileUtils.atomic_create(self.lockFile.getPath(), "w") - f.write("%s %d" % (socket.getfqdn(), os.getpid())) - f.close() + if not exists(self._config.incrementals_dir): + os.makedirs(self._config.incrementals_dir) + fhandle = FileUtils.atomic_create(self.lockfile.get_path(), "w") + fhandle.write("%s %d" % (socket.getfqdn(), os.getpid())) + fhandle.close() return True except: return False - def isStaleLock(self): - if not self.isLocked(): + def is_stale_lock(self): + if not self.is_locked(): return False try: - timestamp = os.stat(self.lockFile.getPath()).st_mtime + timestamp = os.stat(self.lockfile.get_path()).st_mtime except: return False - if (time.time() - timestamp) > self._config.staleInterval: + if (time.time() - timestamp) > self._config.stale_interval: return True else: return False - - def unlock(self): - os.remove(self.lockFile.getPath()) - def getLockInfo(self): + def unlock(self): + os.remove(self.lockfile.get_path()) + + def get_lockinfo(self): try: - timestamp = os.stat(self.lockFile.getPath()).st_mtime - return time.strftime("%Y-%m-%d %H:%M:%S",timestamp) + timestamp = os.stat(self.lockfile.get_path()).st_mtime + return time.strftime("%Y-%m-%d %H:%M:%S", timestamp) except: return None + class IncrDumpLock(Lock): - def __init__(self, config, date, wikiName): + def __init__(self, config, date, wikiname): self._config = config self.date = date - self.wikiName = wikiName - self.lockFile = IncrDumpLockFile(self._config, self.date, self.wikiName) + self.wikiname = wikiname + self.lockfile = IncrDumpLockFile(self._config, self.date, self.wikiname) + class MaxRevIDLock(Lock): - def __init__(self,config, date, wikiName): + def __init__(self, config, date, wikiname): self._config = config self.date = date - self.wikiName = wikiName - self.lockFile = MaxRevIDLockFile(self._config, self.date, self.wikiName) + self.wikiname = wikiname + self.lockfile = MaxRevIDLockFile(self._config, self.date, self.wikiname) + class Config(dumps.WikiDump.Config): - def __init__(self, configFile=False): - self.projectName = False + def __init__(self, config_file=None): + self.project_name = False home = os.path.dirname(sys.argv[0]) - if (not configFile): - configFile = "dumpincr.conf" + if config_file is None: + config_file = "dumpincr.conf" self.files = [ - os.path.join(home,configFile), + os.path.join(home, config_file), "/etc/dumpincrementals.conf", os.path.join(os.getenv("HOME"), ".dumpincr.conf")] defaults = { - #"wiki": { + # "wiki": { "allwikislist": "", "privatewikislist": "", "closedwikislist": "", "skipwikislist": "", - #"output": { + # "output": { "incrementalsdir": "/dumps/public/incr", "templatedir": home, - "temp":"/dumps/temp", + "temp": "/dumps/temp", "webroot": "http://localhost/dumps/incr", "fileperms": "0640", "delay": "43200", "maxrevidstaleinterval": "3600", - #"database": { + # "database": { # moved defaults to get_db_user_and_password - #"tools": { - "mediawiki" : "", + # "tools": { + "mediawiki": "", "php": "/bin/php", "gzip": "/usr/bin/gzip", "bzip2": "/usr/bin/bzip2", @@ -187,7 +202,7 @@ "checkforbz2footer": "/usr/local/bin/checkforbz2footer", "writeuptopageid": "/usr/local/bin/writeuptopageid", "multiversion": "", - #"cleanup": { + # "cleanup": { "keep": "3", } @@ -198,35 +213,35 @@ print "The mandatory configuration section 'wiki' was not defined." raise ConfigParser.NoSectionError('wiki') - if not self.conf.has_option("wiki","mediawiki"): + if not self.conf.has_option("wiki", "mediawiki"): print "The mandatory setting 'mediawiki' in the section 'wiki' was not defined." - raise ConfigParser.NoOptionError('wiki','mediawiki') + raise ConfigParser.NoOptionError('wiki', 'mediawiki') self.db_user = None self.db_password = None - self.parseConfFile() + self.parse_conffile() - def parseConfFile(self): + def parse_conffile(self): self.mediawiki = self.conf.get("wiki", "mediawiki") self.wiki_dir = self.mediawiki - self.allWikisList = MiscUtils.db_list(self.conf.get("wiki", "allwikislist")) - self.privateWikisList = MiscUtils.db_list(self.conf.get("wiki", "privatewikislist")) - self.closedWikisList = MiscUtils.db_list(self.conf.get("wiki", "closedwikislist")) - self.skipWikisList = MiscUtils.db_list(self.conf.get("wiki", "skipwikislist")) + self.all_wikis_list = MiscUtils.db_list(self.conf.get("wiki", "allwikislist")) + self.private_wikis_list = MiscUtils.db_list(self.conf.get("wiki", "privatewikislist")) + self.closed_wikis_list = MiscUtils.db_list(self.conf.get("wiki", "closedwikislist")) + self.skip_wikis_list = MiscUtils.db_list(self.conf.get("wiki", "skipwikislist")) if not self.conf.has_section('output'): self.conf.add_section('output') - self.incrementalsDir = self.conf.get("output", "incrementalsdir") - self.tempDir = self.conf.get("output", "temp") - self.templateDir = self.conf.get("output", "templatedir") - self.webRoot = self.conf.get("output", "webroot") + self.incrementals_dir = self.conf.get("output", "incrementalsdir") + self.temp_dir = self.conf.get("output", "temp") + self.template_dir = self.conf.get("output", "templatedir") + self.webroot = self.conf.get("output", "webroot") self.fileperms = self.conf.get("output", "fileperms") - self.fileperms = int(self.fileperms,0) + self.fileperms = int(self.fileperms, 0) self.delay = self.conf.get("output", "delay") - self.delay = int(self.delay,0) - self.staleInterval = self.conf.get("output", "maxrevidstaleinterval") - self.staleInterval = int(self.staleInterval,0) + self.delay = int(self.delay, 0) + self.stale_interval = self.conf.get("output", "maxrevidstaleinterval") + self.stale_interval = int(self.stale_interval, 0) if not self.conf.has_section('tools'): self.conf.add_section('tools') @@ -234,15 +249,15 @@ self.gzip = self.conf.get("tools", "gzip") self.bzip2 = self.conf.get("tools", "bzip2") self.mysql = self.conf.get("tools", "mysql") - self.checkforbz2footer = self.conf.get("tools","checkforbz2footer") - self.writeuptopageid = self.conf.get("tools","writeuptopageid") - self.multiversion = self.conf.get("tools","multiversion") + self.checkforbz2footer = self.conf.get("tools", "checkforbz2footer") + self.writeuptopageid = self.conf.get("tools", "writeuptopageid") + self.multiversion = self.conf.get("tools", "multiversion") if not self.conf.has_section('cleanup'): self.conf.add_section('cleanup') self.keep = self.conf.getint("cleanup", "keep") - self.wikiDir = self.mediawiki # the parent class methods want this + self.wiki_dir = self.mediawiki # the parent class methods want this self.db_user = None self.db_password = None if not self.conf.has_section('database'): @@ -251,184 +266,193 @@ self.db_user = self.conf.get("database", "user") if self.conf.has_option('database', 'password'): self.db_password = self.conf.get("database", "password") - self.get_db_user_and_password() # get from MW adminsettings file if not set in conf file + self.get_db_user_and_password() # get from MW adminsettings file if not set in conf file - def readTemplate(self, name): - template = os.path.join(self.templateDir, name) + def read_template(self, name): + template = os.path.join(self.template_dir, name) return FileUtils.read_file(template) + class RunSimpleCommand(object): - def runWithOutput(command, maxtries = 3, shell=False): + def run_with_output(command, maxtries=3, shell=False): """Run a command and return the output as a string. Raises IncrementDumpsError on non-zero return code.""" success = False tries = 0 - while (not success and tries < maxtries): - proc = Popen(command, shell = shell, stdout = PIPE, stderr = PIPE) + while not success and tries < maxtries: + proc = Popen(command, shell=shell, stdout=PIPE, stderr=PIPE) output, error = proc.communicate() if not proc.returncode: success = True tries = tries + 1 if not success: - if type(command).__name__=='list': - commandString = " ".join(command) + if type(command).__name__ == 'list': + command_string = " ".join(command) else: - commandString = command + command_string = command if proc: - raise IncrementDumpsError("command '" + commandString + ( "' failed with return code %s " % proc.returncode ) + " and error '" + error + "'") + raise IncrementDumpsError("command '" + command_string + + ("' failed with return code %s " % proc.returncode) + + " and error '" + error + "'") else: - raise IncrementDumpsError("command '" + commandString + ( "' failed" ) + " and error '" + error + "'") + raise IncrementDumpsError("command '" + command_string + + ("' failed") + " and error '" + error + "'") return output - def runWithNoOutput(command, maxtries = 3, shell=False): + def run_with_no_output(command, maxtries=3, shell=False): """Run a command, expecting no output. Raises IncrementDumpsError on non-zero return code.""" success = False tries = 0 - while ((not success) and tries < maxtries): - proc = Popen(command, shell = shell, stderr = PIPE) + while (not success) and tries < maxtries: + proc = Popen(command, shell=shell, stderr=PIPE) # output will be None, we can ignore it - output, error = proc.communicate() + output_unused, error = proc.communicate() if not proc.returncode: success = True tries = tries + 1 if not success: - if type(command).__name__=='list': - commandString = " ".join(command) + if type(command).__name__ == 'list': + command_string = " ".join(command) else: - commandString = command - raise IncrementDumpsError("command '" + commandString + ( "' failed with return code %s " % proc.returncode ) + " and error '" + error + "'") - - runWithOutput = staticmethod(runWithOutput) - runWithNoOutput = staticmethod(runWithNoOutput) + command_string = command + raise IncrementDumpsError("command '" + command_string + + ("' failed with return code %s " % + proc.returncode) + " and error '" + error + "'") + + run_with_output = staticmethod(run_with_output) + run_with_no_output = staticmethod(run_with_no_output) + class MultiVersion(object): - def MWScriptAsString(config, maintenanceScript): - return(" ".join(MultiVersion.MWScriptAsArray(config, maintenanceScript))) + def mwscript_as_string(config, maintenance_script): + return " ".join(MultiVersion.mwscript_as_array(config, maintenance_script)) - def MWScriptAsArray(config, maintenanceScript): + def mwscript_as_array(config, maintenance_script): if config.multiversion != "": if exists(config.multiversion): - return [ config.multiversion, maintenanceScript ] - return [ "%s/maintenance/%s" % (config.mediawiki, maintenanceScript) ] + return [config.multiversion, maintenance_script] + return ["%s/maintenance/%s" % (config.mediawiki, maintenance_script)] - MWScriptAsString = staticmethod(MWScriptAsString) - MWScriptAsArray = staticmethod(MWScriptAsArray) + mwscript_as_string = staticmethod(mwscript_as_string) + mwscript_as_array = staticmethod(mwscript_as_array) + class DBServer(object): - def __init__(self, config, wikiName): + def __init__(self, config, wikiname): self.config = config - self.wikiName = wikiName - self.dbServer = self.defaultServer() + self.wikiname = wikiname + self.db_server = self.default_server() - def defaultServer(self): - if (not exists( self.config.php ) ): + def default_server(self): + if not exists(self.config.php): raise BackupError("php command %s not found" % self.config.php) - commandList = MultiVersion.MWScriptAsArray(self.config, "getSlaveServer.php") - command = [ self.config.php, "-q" ] - command.extend(commandList) - command.extend( [ "--wiki=%s" % self.wikiName, "--group=dump" ]) - return RunSimpleCommand.runWithOutput(command, shell=False).rstrip() + command_list = MultiVersion.mwscript_as_array(self.config, "getSlaveServer.php") + command = [self.config.php, "-q"] + command.extend(command_list) + command.extend(["--wiki=%s" % self.wikiname, "--group=dump"]) + return RunSimpleCommand.run_with_output(command, shell=False).rstrip() - def buildSqlCommand(self, query): + def build_sql_command(self, query): """Put together a command to execute an sql query to the server for this DB.""" - if (not exists( self.config.mysql ) ): + if not exists(self.config.mysql): raise BackupError("mysql command %s not found" % self.config.mysql) - command = "/bin/echo '%s' | %s -h %s -u %s " % ( query, self.config.mysql, self.dbServer, self.config.db_user ) + command = ("/bin/echo '%s' | %s -h %s -u %s " % + (query, self.config.mysql, self.db_server, self.config.db_user)) if self.config.db_password != "": command = command + "-p" + self.config.db_password - command = command + " -r --silent " + self.wikiName + command = command + " -r --silent " + self.wikiname return command + class IncrementDumpsError(Exception): pass + class IncrementDir(object): - def __init__(self, config, date = None): + def __init__(self, config, date=None): self._config = config self.date = date - def getIncDirBase(self): - return self._config.incrementalsDir + def get_incdir_base(self): + return self._config.incrementals_dir - def getIncDirNoDate(self, wikiName): - return os.path.join(self.getIncDirBase(), wikiName) + def get_incdir_no_date(self, wikiname): + return os.path.join(self.get_incdir_base(), wikiname) - def getIncDir(self, wikiName, date = None): - if (date == None): - return os.path.join(self.getIncDirBase(), wikiName, self.date) + def get_incdir(self, wikiname, date=None): + if date is None: + return os.path.join(self.get_incdir_base(), wikiname, self.date) else: - return os.path.join(self.getIncDirBase(), wikiName, date) + return os.path.join(self.get_incdir_base(), wikiname, date) -class IncrementDumpsError(Exception): - pass class IncDumpDirs(object): - def __init__(self, config, wikiName): + def __init__(self, config, wikiname): self._config = config - self.wikiName = wikiName - self.incrDir = IncrementDir(self._config) + self.wikiname = wikiname + self.incr_dir = IncrementDir(self._config) - def getIncDumpDirs(self): - base = self.incrDir.getIncDirNoDate(self.wikiName) + def get_inc_dumpdirs(self): + base = self.incr_dir.get_incdir_no_date(self.wikiname) digits = re.compile(r"^\d{4}\d{2}\d{2}$") dates = [] try: - for dir in os.listdir(base): - if digits.match(dir): - dates.append(dir) + for dirname in os.listdir(base): + if digits.match(dirname): + dates.append(dirname) except OSError: return [] dates.sort() return dates - def cleanupOldIncrDumps(self, date): - old = self.getIncDumpDirs() + def cleanup_old_incrdumps(self, date): + old = self.get_inc_dumpdirs() if old: if old[-1] == date: old = old[:-1] if self._config.keep > 0: old = old[:-(self._config.keep)] for dump in old: - toRemove = os.path.join(self.incrDir.getIncDirNoDate(self.wikiName), dump) - shutil.rmtree("%s" % toRemove) + to_remove = os.path.join(self.incr_dir.get_incdir_no_date(self.wikiname), dump) + shutil.rmtree("%s" % to_remove) - def getPrevIncrDate(self, date, ok = False, revidok = False): + def get_prev_incrdate(self, date, dumpok=False, revidok=False): # find the most recent incr dump before the # specified date - # if "ok" is True, find most recent dump that completed successfully + # if "dumpok" is True, find most recent dump that completed successfully # if "revidok" is True, find most recent dump that has a populated maxrevid.txt file previous = None - old = self.getIncDumpDirs() + old = self.get_inc_dumpdirs() if old: for dump in old: if dump == date: return previous else: - if ok: - statusInfo = StatusInfo(self._config, dump, self.wikiName) - if statusInfo.getStatus(dump) == "done": + if dumpok: + status_info = StatusInfo(self._config, dump, self.wikiname) + if status_info.get_status(dump) == "done": previous = dump elif revidok: - maxRevIDFile = MaxRevIDFile(self._config, dump, self.wikiName) - if exists(maxRevIDFile.getPath()): - revid = FileUtils.read_file(maxRevIDFile.getPath().rstrip()) + max_revid_file = MaxRevIDFile(self._config, dump, self.wikiname) + if exists(max_revid_file.get_path()): + revid = FileUtils.read_file(max_revid_file.get_path().rstrip()) if int(revid) > 0: previous = dump else: previous = dump return previous - def getLatestIncrDate(self, ok = False): - # find the most recent incr dump - dirs = self.getIncDumpDirs() + def get_latest_incr_date(self, dumpok=False): + # find the most recent incr dump + dirs = self.get_inc_dumpdirs() if dirs: - if ok: + if dumpok: for dump in reversed(dirs): - statusInfo = StatusInfo(self._config, dump, self.wikiName) - if statusInfo.getStatus(dump) == "done": + status_info = StatusInfo(self._config, dump, self.wikiname) + if status_info.get_status(dump) == "done": return dump else: - return(dirs[-1]) + return dirs[-1] else: - return(None) + return None diff --git a/xmldumps-backup/incrementals/generateincrementals.py b/xmldumps-backup/incrementals/generateincrementals.py index bc683aa..41d2422 100644 --- a/xmldumps-backup/incrementals/generateincrementals.py +++ b/xmldumps-backup/incrementals/generateincrementals.py @@ -25,358 +25,356 @@ self.date = date self.cutoff = cutoff self.dryrun = dryrun - self.maxID = None + self.max_id = None - def getMaxRevID(self, wikiName): + def get_max_revid(self, wikiname): query = ("select rev_id from revision where rev_timestamp < \"%s\" " "order by rev_timestamp desc limit 1" % self.cutoff) - db = DBServer(self._config, wikiName) - self.maxID = RunSimpleCommand.runWithOutput(db.buildSqlCommand(query), - shell=True) + db_info = DBServer(self._config, wikiname) + self.max_id = RunSimpleCommand.run_with_output(db_info.build_sql_command(query), + shell=True) - def recordMaxRevID(self, wikiName): - self.getMaxRevID(wikiName) + def record_max_revid(self, wikiname): + self.get_max_revid(wikiname) if not self.dryrun: - fileObj = MaxRevIDFile(self._config, self.date, wikiName) - FileUtils.write_file_in_place(fileObj.getPath(), self.maxID, + file_obj = MaxRevIDFile(self._config, self.date, wikiname) + FileUtils.write_file_in_place(file_obj.get_path(), self.max_id, self._config.fileperms) - def readMaxRevIDFromFile(self, wikiName, date=None): + def read_max_revid_from_file(self, wikiname, date=None): if date is None: date = self.date try: - fileObj = MaxRevIDFile(self._config, date, wikiName) - return FileUtils.read_file(fileObj.getPath().rstrip()) + file_obj = MaxRevIDFile(self._config, date, wikiname) + return FileUtils.read_file(file_obj.get_path().rstrip()) except: return None - def exists(self, wikiName, date=None): + def exists(self, wikiname, date=None): if date is None: date = self.date - return exists(MaxRevIDFile(self._config, date, wikiName).getPath()) + return exists(MaxRevIDFile(self._config, date, wikiname).get_path()) class Link(object): - def makeLink(path, linkText): - return('<a href = "' + path + '">' + linkText + "</a>") + def make_link(path, link_text): + return '<a href = "' + path + '">' + link_text + "</a>" - makeLink = staticmethod(makeLink) + make_link = staticmethod(make_link) class Index(object): def __init__(self, config, date, verbose): self._config = config self.date = date - self.indexFile = IndexFile(self._config) - self.incrDir = IncrementDir(self._config) + self.indexfile = IndexFile(self._config) + self.incrdir = IncrementDir(self._config) self.verbose = verbose - def doAllWikis(self): + def do_all_wikis(self): text = "" - for w in self._config.allWikisList: - result = self.doOneWiki(w) + for wiki in self._config.all_wikis_list: + result = self.do_one_wiki(wiki) if result: log(self.verbose, "result for wiki %s is %s" - % (w, result)) + % (wiki, result)) text = text + "<li>" + result + "</li>\n" - indexText = (self._config.readTemplate("incrs-index.html") - % {"items": text}) - FileUtils.write_file_in_place(self.indexFile.getPath(), - indexText, self._config.fileperms) + index_text = (self._config.readTemplate("incrs-index.html") + % {"items": text}) + FileUtils.write_file_in_place(self.indexfile.get_path(), + index_text, self._config.fileperms) - def doOneWiki(self, w, date=None): - if (w not in self._config.privateWikisList and - w not in self._config.closedWikisList and - w not in self._config.skipWikisList): - incrDumpsDirs = IncDumpDirs(self._config, w) - if not exists(self.incrDir.getIncDirNoDate(w)): - log(self.verbose, "No dump for wiki %s" % w) + def do_one_wiki(self, wiki, date=None): + if (wiki not in self._config.private_wikis_list and + wiki not in self._config.closed_wikis_list and + wiki not in self._config.skip_wikis_list): + incr_dumps_dirs = IncDumpDirs(self._config, wiki) + if not exists(self.incrdir.get_incdir_no_date(wiki)): + log(self.verbose, "No dump for wiki %s" % wiki) return if date is not None: - incrDate = date + incr_date = date else: - incrDate = incrDumpsDirs.getLatestIncrDate(True) - if not incrDate: - log(self.verbose, "No dump for wiki %s" % w) + incr_date = incr_dumps_dirs.get_latest_incr_date(True) + if not incr_date: + log(self.verbose, "No dump for wiki %s" % wiki) return - otherRunsText = "other runs: %s" % Link.makeLink(w, w) + other_runs_text = "other runs: %s" % Link.make_link(wiki, wiki) try: - lock = IncrDumpLock(self._config, incrDate, w) - lockDate = lock.getLockInfo() + lock = IncrDumpLock(self._config, incr_date, wiki) + lock_date = lock.get_lockinfo() except: - lockDate = None - if lockDate is not None: - lockText = "run started on %s." % lockDate + lock_date = None + if lock_date is not None: + lock_text = "run started on %s." % lock_date else: - lockText = None + lock_text = None try: - stub = StubFile(self._config, incrDate, w) - (stubDate, stubSize) = stub.getFileInfo() + stub = StubFile(self._config, incr_date, wiki) + (stub_date, stub_size) = stub.get_fileinfo() log(self.verbose, "stub for %s %s %s" - % (w, safe(stubDate), safe(stubSize))) - if stubDate: - stubText = ("stubs: %s (size %s)" - % (Link.makeLink( - os.path.join( - w, incrDate, - stub.getFileName()), - stubDate), stubSize)) + % (wiki, safe(stub_date), safe(stub_size))) + if stub_date: + stub_text = ("stubs: %s (size %s)" + % (Link.make_link( + os.path.join( + wiki, incr_date, + stub.get_filename()), + stub_date), stub_size)) else: - stubText = None + stub_text = None - revs = RevsFile(self._config, incrDate, w) - (revsDate, revsSize) = revs.getFileInfo() + revs = RevsFile(self._config, incr_date, wiki) + (revs_date, revs_size) = revs.get_fileinfo() log(self.verbose, "revs for %s %s %s" - % (w, safe(revsDate), safe(revsSize))) - if revsDate: - revsText = ("revs: %s (size %s)" - % (Link.makeLink(os.path.join( - w, incrDate, revs.getFileName()), - revsDate), revsSize)) + % (wiki, safe(revs_date), safe(revs_size))) + if revs_date: + revs_text = ("revs: %s (size %s)" + % (Link.make_link(os.path.join( + wiki, incr_date, revs.get_filename()), + revs_date), revs_size)) else: - revsText = None + revs_text = None - stat = StatusFile(self._config, incrDate, w) - statContents = FileUtils.read_file(stat.getPath()) - log(self.verbose, "status for %s %s" % (w, safe(statContents))) - if statContents: - statText = "(%s)" % (statContents) + stat = StatusFile(self._config, incr_date, wiki) + stat_contents = FileUtils.read_file(stat.get_path()) + log(self.verbose, "status for %s %s" % (wiki, safe(stat_contents))) + if stat_contents: + stat_text = "(%s)" % (stat_contents) else: - statText = None + stat_text = None except: log(self.verbose, "Error encountered, no information available" - " for wiki %s" % w) + " for wiki %s" % wiki) return ("<strong>%s</strong> Error encountered," - " no information available | %s" % (w, otherRunsText)) + " no information available | %s" % (wiki, other_runs_text)) try: - wikinameText = "<strong>%s</strong>" % w + wikiname_text = "<strong>%s</strong>" % wiki - wikiInfo = (" ".join(filter(None, - [wikinameText, - lockText, statText])) - + "<br />") - wikiInfo = (wikiInfo + " " + - " | ".join(filter(None, - [stubText, revsText, - otherRunsText]))) + wiki_info = (" ".join(filter(None, + [wikiname_text, + lock_text, stat_text])) + + "<br />") + wiki_info = (wiki_info + " " + + " | ".join(filter(None, + [stub_text, revs_text, + other_runs_text]))) except: - if (self.verbose): + if self.verbose: traceback.print_exc(file=sys.stdout) log(self.verbose, "Error encountered formatting information" - " for wiki %s" % w) + " for wiki %s" % wiki) return ("Error encountered formatting information" - " for wiki %s" % w) + " for wiki %s" % wiki) - return wikiInfo + return wiki_info class DumpResults(object): - def __init__(self): - self.TODO = 1 - self.FAILED = -1 - self.OK = 0 + TODO = 1 + FAILED = -1 + GOOD = 0 class IncrDump(object): - def __init__(self, config, date, cutoff, wikiName, doStubs, - doRevs, doIndexUpdate, dryrun, verbose, forcerun): + def __init__(self, config, date, cutoff, wikiname, do_stubs, + do_revs, do_index_update, dryrun, verbose, forcerun): self._config = config self.date = date self.cutoff = cutoff - self.wikiName = wikiName - self.incrDir = IncrementDir(self._config, self.date) - self.doStubs = doStubs - self.doRevs = doRevs - self.doIndexUpdate = doIndexUpdate + self.wikiname = wikiname + self.incrdir = IncrementDir(self._config, self.date) + self.do_stubs = do_stubs + self.do_revs = do_revs + self.do_index_update = do_index_update self.dryrun = dryrun self.forcerun = forcerun - self.maxRevIDObj = MaxRevID(self._config, self.date, cutoff, - self.dryrun) - self.statusInfo = StatusInfo(self._config, self.date, self.wikiName) - self.stubFile = StubFile(self._config, self.date, self.wikiName) - self.revsFile = RevsFile(self._config, self.date, self.wikiName) - self.incrDumpsDirs = IncDumpDirs(self._config, self.wikiName) + self.max_revid_obj = MaxRevID(self._config, self.date, cutoff, + self.dryrun) + self.status_info = StatusInfo(self._config, self.date, self.wikiname) + self.stubfile = StubFile(self._config, self.date, self.wikiname) + self.revsfile = RevsFile(self._config, self.date, self.wikiname) + self.incr_dumps_dirs = IncDumpDirs(self._config, self.wikiname) self.verbose = verbose - def doOneWiki(self): - retCodes = DumpResults() - if (self.wikiName not in self._config.privateWikisList and - self.wikiName not in self._config.closedWikisList): - if not exists(self.incrDir.getIncDir(self.wikiName)): - os.makedirs(self.incrDir.getIncDir(self.wikiName)) + def do_one_wiki(self): + if (self.wikiname not in self._config.private_wikis_list and + self.wikiname not in self._config.closed_wikis_list): + if not exists(self.incrdir.get_incdir(self.wikiname)): + os.makedirs(self.incrdir.get_incdir(self.wikiname)) - status = self.statusInfo.getStatus() + status = self.status_info.get_status() if status == "done" and not self.forcerun: log(self.verbose, "wiki %s skipped, adds/changes dump already" - " complete" % self.wikiName) - return retCodes.OK + " complete" % self.wikiname) + return DumpResults.GOOD if not self.dryrun: - lock = IncrDumpLock(self._config, self.date, self.wikiName) - if not lock.getLock(): + lock = IncrDumpLock(self._config, self.date, self.wikiname) + if not lock.get_lock(): log(self.verbose, "wiki %s skipped, wiki is locked," " another process should be doing the job" - % self.wikiName) - return retCodes.TODO + % self.wikiname) + return DumpResults.TODO - self.incrDumpsDirs.cleanupOldIncrDumps(self.date) + self.incr_dumps_dirs.cleanup_old_incrdumps(self.date) - log(self.verbose, "Doing run for wiki: %s" % self.wikiName) + log(self.verbose, "Doing run for wiki: %s" % self.wikiname) try: - maxRevID = self.dumpMaxRevID() - if not maxRevID: - return retCodes.FAILED + max_revid = self.dump_max_revid() + if not max_revid: + return DumpResults.FAILED - prevRevID = self.getPrevRevID(maxRevID) - if not prevRevID: - return retCodes.FAILED + prev_revid = self.get_prev_revid(max_revid) + if not prev_revid: + return DumpResults.FAILED - if self.doStubs: - if not self.dumpStub(prevRevID, maxRevID): - return retCodes.FAILED + if self.do_stubs: + if not self.dump_stub(prev_revid, max_revid): + return DumpResults.FAILED - if self.doRevs: - if not self.dumpRevs(): - return retCodes.FAILED + if self.do_revs: + if not self.dump_revs(): + return DumpResults.FAILED if not self.dryrun: if not self.md5sums(): - return retCodes.FAILED - self.statusInfo.setStatus("done") + return DumpResults.FAILED + self.status_info.set_status("done") lock.unlock() - if self.doIndexUpdate: + if self.do_index_update: index = Index(self._config, self.date, self.verbose) - index.doAllWikis() + index.do_all_wikis() except: - if (self.verbose): + if self.verbose: traceback.print_exc(file=sys.stdout) if not self.dryrun: lock.unlock() - return retCodes.FAILED + return DumpResults.FAILED log(self.verbose, "Success! Wiki %s incremental dump complete." - % self.wikiName) - return retCodes.OK + % self.wikiname) + return DumpResults.GOOD - def dumpMaxRevID(self): - if not self.maxRevIDObj.exists(self.wikiName): + def dump_max_revid(self): + if not self.max_revid_obj.exists(self.wikiname): log(self.verbose, "Wiki %s retrieving max revid from db." - % self.wikiName) - self.maxRevIDObj.recordMaxRevID(self.wikiName) - maxRevID = self.maxRevIDObj.maxID + % self.wikiname) + self.max_revid_obj.record_max_revid(self.wikiname) + max_revid = self.max_revid_obj.max_id else: - maxRevID = self.maxRevIDObj.readMaxRevIDFromFile( - self.wikiName) + max_revid = self.max_revid_obj.read_max_revid_from_file( + self.wikiname) # end rev id is not included in dump - if maxRevID is not None: - maxRevID = str(int(maxRevID) + 1) + if max_revid is not None: + max_revid = str(int(max_revid) + 1) - log(self.verbose, "maxRevID is %s" % safe(maxRevID)) - return maxRevID + log(self.verbose, "max_revid is %s" % safe(max_revid)) + return max_revid - def getPrevRevID(self, maxRevID): + def get_prev_revid(self, max_revid): # get the previous rundate, with or without maxrevid file # we can populate that file if need be - prevDate = self.incrDumpsDirs.getPrevIncrDate(self.date) - log(self.verbose, "prevDate is %s" % safe(prevDate)) + prev_date = self.incr_dumps_dirs.get_prev_incrdate(self.date) + log(self.verbose, "prev_date is %s" % safe(prev_date)) - prevRevID = None + prev_revid = None - if prevDate: - prevRevID = self.maxRevIDObj.readMaxRevIDFromFile( - self.wikiName, prevDate) + if prev_date: + prev_revid = self.max_revid_obj.read_max_revid_from_file( + self.wikiname, prev_date) - if prevRevID is None: + if prev_revid is None: log(self.verbose, "Wiki %s retrieving prevRevId from db." - % self.wikiName) - prevRevIDObj = MaxRevID(self._config, prevDate, - cutoffFromDate(prevDate, self._config), - self.dryrun) - prevRevIDObj.recordMaxRevID(self.wikiName) - prevRevID = prevRevIDObj.maxID + % self.wikiname) + prev_revid_obj = MaxRevID(self._config, prev_date, + cutoff_from_date(prev_date, self._config), + self.dryrun) + prev_revid_obj.record_max_revid(self.wikiname) + prev_revid = prev_revid_obj.max_id else: log(self.verbose, "Wiki %s no previous runs, using %s - 10 " - % (self.wikiName, maxRevID)) - prevRevID = str(int(maxRevID) - 10) - if int(prevRevID) < 1: - prevRevID = str(1) + % (self.wikiname, max_revid)) + prev_revid = str(int(max_revid) - 10) + if int(prev_revid) < 1: + prev_revid = str(1) # this incr will cover every revision from the last # incremental through the maxid we wrote out already. - if prevRevID is not None: - prevRevID = str(int(prevRevID) + 1) - log(self.verbose, "prevRevID is %s" % safe(prevRevID)) - return prevRevID + if prev_revid is not None: + prev_revid = str(int(prev_revid) + 1) + log(self.verbose, "prev_revid is %s" % safe(prev_revid)) + return prev_revid - def dumpStub(self, startRevID, endRevID): - scriptCommand = MultiVersion.MWScriptAsArray(self._config, - "dumpBackup.php") + def dump_stub(self, start_revid, end_revid): + script_command = MultiVersion.mwscript_as_array(self._config, + "dumpBackup.php") command = ["%s" % self._config.php, "-q"] - command.extend(scriptCommand) - command.extend(["--wiki=%s" % self.wikiName, "--stub", "--quiet", - "--output=gzip:%s" % self.stubFile.getPath(), - "--revrange", "--revstart=%s" % startRevID, - "--revend=%s" % endRevID]) + command.extend(script_command) + command.extend(["--wiki=%s" % self.wikiname, "--stub", "--quiet", + "--output=gzip:%s" % self.stubfile.get_path(), + "--revrange", "--revstart=%s" % start_revid, + "--revend=%s" % end_revid]) if self.dryrun: print "would run command for stubs dump:", command else: - error = RunSimpleCommand.runWithNoOutput(command, shell=False) - if (error): + error = RunSimpleCommand.run_with_no_output(command, shell=False) + if error: log(self.verbose, "error producing stub files for wiki" - % self.wikiName) + % self.wikiname) return False return True - def dumpRevs(self): - scriptCommand = MultiVersion.MWScriptAsArray(self._config, - "dumpTextPass.php") + def dump_revs(self): + script_command = MultiVersion.mwscript_as_array(self._config, + "dumpTextPass.php") command = ["%s" % self._config.php, "-q"] - command.extend(scriptCommand) - command.extend(["--wiki=%s" % self.wikiName, - "--stub=gzip:%s" % self.stubFile.getPath(), + command.extend(script_command) + command.extend(["--wiki=%s" % self.wikiname, + "--stub=gzip:%s" % self.stubfile.get_path(), "--quiet", "--spawn=%s" % self._config.php, - "--output=bzip2:%s" % self.revsFile.getPath()]) + "--output=bzip2:%s" % self.revsfile.get_path()]) if self.dryrun: print "would run command for revs dump:", command else: - error = RunSimpleCommand.runWithNoOutput(command, shell=False) - if (error): + error = RunSimpleCommand.run_with_no_output(command, shell=False) + if error: log(self.verbose, "error producing revision text files" - " for wiki" % self.wikiName) + " for wiki" % self.wikiname) return False return True - def md5sumOneFile(self, filename): + def md5sum_one_file(self, filename): summer = hashlib.md5() infile = file(filename, "rb") bufsize = 4192 * 32 - buffer = infile.read(bufsize) - while buffer: - summer.update(buffer) - buffer = infile.read(bufsize) + buff = infile.read(bufsize) + while buff: + summer.update(buff) + buff = infile.read(bufsize) infile.close() return summer.hexdigest() def md5sums(self): try: - md5File = MD5File(self._config, self.date, self.wikiName) + md5file = MD5File(self._config, self.date, self.wikiname) text = "" files = [] - if self.doStubs: - files.append(self.stubFile.getPath()) - if self.doRevs: - files.append(self.revsFile.getPath()) - for f in files: - text = text + "%s\n" % self.md5sumOneFile(f) - FileUtils.write_file_in_place(md5File.getPath(), + if self.do_stubs: + files.append(self.stubfile.get_path()) + if self.do_revs: + files.append(self.revsfile.get_path()) + for fname in files: + text = text + "%s\n" % self.md5sum_one_file(fname) + FileUtils.write_file_in_place(md5file.get_path(), text, self._config.fileperms) return True except: @@ -396,51 +394,50 @@ class IncrDumpLoop(object): - def __init__(self, config, date, cutoff, doStubs, doRevs, - doIndexUpdate, dryrun, verbose, forcerun): + def __init__(self, config, date, cutoff, do_stubs, do_revs, + do_index_update, dryrun, verbose, forcerun): self._config = config self.date = date self.cutoff = cutoff - self.doStubs = doStubs - self.doRevs = doRevs - self.doIndexUpdate = doIndexUpdate + self.do_stubs = do_stubs + self.do_revs = do_revs + self.do_index_update = do_index_update self.dryrun = dryrun self.verbose = verbose self.forcerun = forcerun - def doRunOnAllWikis(self): - retCodes = DumpResults() + def do_run_on_all_wikis(self): failures = 0 todos = 0 - for w in self._config.allWikisList: - dump = IncrDump(self._config, self.date, self.cutoff, w, - self.doStubs, self.doRevs, self.doIndexUpdate, + for wiki in self._config.all_wikis_list: + dump = IncrDump(self._config, self.date, self.cutoff, wiki, + self.do_stubs, self.do_revs, self.do_index_update, self.dryrun, self.verbose, self.forcerun) - result = dump.doOneWiki() - if result == retCodes.FAILED: + result = dump.do_one_wiki() + if result == DumpResults.FAILED: failures = failures + 1 - elif result == retCodes.TODO: + elif result == DumpResults.TODO: todos = todos + 1 return (failures, todos) - def doAllWikisTilDone(self, numFails): + def do_all_wikis_til_done(self, num_fails): fails = 0 while 1: - (failures, todos) = self.doRunOnAllWikis() + (failures, todos) = self.do_run_on_all_wikis() if not failures and not todos: break fails = fails + 1 - if fails > numFails: + if fails > num_fails: raise IncrementDumpsError("Too many consecutive failures," "giving up") time.sleep(300) -def cutoffFromDate(date, config): +def cutoff_from_date(date, config): return time.strftime("%Y%m%d%H%M%S", time.gmtime(calendar.timegm(time.strptime( date + "235900UTC", "%Y%m%d%H%M%S%Z")) - - config.delay)) + - config.delay)) def usage(message=None): @@ -466,13 +463,13 @@ sys.stderr.write(usage_message) sys.exit(1) + def main(): - configFile = False - result = False + config_file = False date = None - doStubs = True - doRevs = True - doIndexUpdate = True + do_stubs = True + do_revs = True + do_index_update = True dryrun = False verbose = False forcerun = False @@ -489,16 +486,16 @@ if opt == "--date": date = val elif opt == "--configfile": - configFile = val + config_file = val elif opt == "--stubsonly": - doRevs = False - doIndexUpdate = False + do_revs = False + do_index_update = False elif opt == "--revsonly": - doStubs = False - doIndexUpdate = False + do_stubs = False + do_index_update = False elif opt == "--indexonly": - doStubs = False - doRevs = False + do_stubs = False + do_revs = False elif opt == "--dryrun": dryrun = True elif opt == "--verbose": @@ -506,12 +503,12 @@ elif opt == "--forcerun": forcerun = True - if not doRevs and not doStubs and not doIndexUpdate: + if not do_revs and not do_stubs and not do_index_update: usage("You may not specify more than one of stubsonly," "revsonly and indexonly together.") - if (configFile): - config = Config(configFile) + if config_file: + config = Config(config_file) else: config = Config() @@ -520,18 +517,17 @@ cutoff = time.strftime("%Y%m%d%H%M%S", time.gmtime(time.time() - config.delay)) else: - cutoff = cutoffFromDate(date, config) + cutoff = cutoff_from_date(date, config) if len(remainder) > 0: - dump = IncrDump(config, date, cutoff, remainder[0], doStubs, - doRevs, doIndexUpdate, dryrun, verbose, forcerun) - dump.doOneWiki() + dump = IncrDump(config, date, cutoff, remainder[0], do_stubs, + do_revs, do_index_update, dryrun, verbose, forcerun) + dump.do_one_wiki() else: - dump = IncrDumpLoop(config, date, cutoff, doStubs, doRevs, - doIndexUpdate, dryrun, verbose, forcerun) - dump.doAllWikisTilDone(3) + dump = IncrDumpLoop(config, date, cutoff, do_stubs, do_revs, + do_index_update, dryrun, verbose, forcerun) + dump.do_all_wikis_til_done(3) if __name__ == "__main__": main() - -- To view, visit https://gerrit.wikimedia.org/r/249074 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: If463318a055962097b2634f9dd77c90501105aea Gerrit-PatchSet: 1 Gerrit-Project: operations/dumps Gerrit-Branch: ariel Gerrit-Owner: ArielGlenn <ar...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits