ArielGlenn has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/401051 )

Change subject: pep8 for rsyncmedia script
......................................................................

pep8 for rsyncmedia script

Change-Id: I85ca822d84ef2466218d6ba830e01178922ad931
---
M xmldumps-backup/unused/rsyncmedia.py
1 file changed, 174 insertions(+), 153 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps 
refs/changes/51/401051/1

diff --git a/xmldumps-backup/unused/rsyncmedia.py 
b/xmldumps-backup/unused/rsyncmedia.py
index 36c608a..b95fe9d 100644
--- a/xmldumps-backup/unused/rsyncmedia.py
+++ b/xmldumps-backup/unused/rsyncmedia.py
@@ -1,50 +1,56 @@
-import os, sys, getopt, subprocess
+import os
+import sys
+import getopt
 from subprocess import Popen, PIPE
 
+
+def make_path(dir_list):
+    dirs = filter(None, dir_list)
+    if len(dirs) == 0:
+        return None
+    elif len(dirs) == 1:
+        # this is ok even with 'None'
+        return dirs[0]
+    else:
+        return os.path.join(*dirs)
+
+
 class Rsyncer(object):
-    def __init__(self, rsyncHost, remoteBaseDir, outputDir, verbose, dryrun):
-        self.rsyncHost = rsyncHost
-        self.remoteBaseDir = remoteBaseDir
-        self.outputDir = outputDir
+    def __init__(self, rsync_host, remote_base_dir, output_dir, verbose, 
dryrun):
+        self.rsync_host = rsync_host
+        self.remote_base_dir = remote_base_dir
+        self.output_dir = output_dir
         self.verbose = verbose
         self.dryrun = dryrun
-        self.dirList = []
+        self.dir_list = []
 
-    def makePath(self, dirList):
-        dirs = filter(None, dirList)
-        if len(dirs) == 0:
-            return None
-        elif len(dirs) == 1:
-            # this is ok even with 'None'
-            return dirs[0]
-        else:
-            return os.path.join(*dirs)
+    def do_rsync(self, files_to_do, get_dir_list=False):
 
-    def doRsync(self, filesToDo, localPath, getDirList = False):
-        localdir = self.makePath([ self.outputDir, localPath ])
-
-        command = [ "rsync", "-rltDp" ]
-        if getDirList:
-            if filesToDo:
-                filesToDoList = filesToDo.split('\n')
-                if len(filesToDoList) > 1:
-                    sys.stderr.write("refusing to generate wanted dir list for 
multiple toplevel dirs %s\n" % filesToDo)
+        command = ["rsync", "-rltDp"]
+        if get_dir_list:
+            if files_to_do:
+                files_to_do_list = files_to_do.split('\n')
+                if len(files_to_do_list) > 1:
+                    sys.stderr.write("refusing to generate wanted "
+                                     "dir list for multiple toplevel dirs %s\n"
+                                     % files_to_do)
                     return
-                # we want the first level of hash dirs (to see what exists, so 
we can request only those)
+                # we want the first level of hash dirs (to see what
+                # exists, so we can request only those)
                 # but we don't want anything below that.
-                excludeLevels = 3 + filesToDoList[0].count('/')
-                excludeString = "/*" * excludeLevels
-                command.extend([ "-f", "- "+ excludeString ])
-            command.extend([ "--list-only" ])
-            dryrunSaved = self.dryrun
+                exclude_levels = 3 + files_to_do_list[0].count('/')
+                exclude_string = "/*" * exclude_levels
+                command.extend(["-f", "- " + exclude_string])
+            command.extend(["--list-only"])
+            dryrun_saved = self.dryrun
             self.dryrun = False  # we don't actually change anything with 
--list-only so run it
-        if filesToDo:
-            command.extend([ "--files-from", "-" ])
-        if rsyncHost:
-            command.extend([self.rsyncHost + "::" + self.remoteBaseDir, 
self.outputDir ])
+        if files_to_do:
+            command.extend(["--files-from", "-"])
+        if self.rsync_host:
+            command.extend([self.rsync_host + "::" + self.remote_base_dir, 
self.output_dir])
         else:
             # "remote" dir is accessible as a local filesystem
-            command.extend([ self.remoteBaseDir, self.outputDir ])
+            command.extend([self.remote_base_dir, self.output_dir])
 
         # 23 = Partial transfer due to error
         # 24 = Partial transfer due to vanished source files
@@ -54,35 +60,37 @@
         # some of each type of error on every single run, log things
         # but don't bail
 
-        if (getDirList):
-            result, output = self.dirList = self.doCommand(command, filesToDo, 
[23, 24], displayOutput = False)
+        if get_dir_list:
+            result_unused, output = self.dir_list = self.do_command(
+                command, files_to_do, [23, 24], display_output=False)
         else:
-            result, output = self.doCommand(command, filesToDo, [23, 24])
-        if getDirList:
-            self.dryrun = dryrunSaved
+            result_unused, output = self.do_command(command, files_to_do, [23, 
24])
+        if get_dir_list:
+            self.dryrun = dryrun_saved
         return output
 
-    def doCommand(self, command, inputToCommand, returnCodesAllowed, 
displayOutput = True):
+    def do_command(self, command, input_to_command, return_codes_allowed, 
display_output=True):
         output = None
-        commandString = " ".join(command)
+        command_string = " ".join(command)
         if self.dryrun:
             sys.stderr.write("would run commmand: ")
         elif self.verbose:
             sys.stderr.write("about to run command: ")
         if self.dryrun or self.verbose:
-            sys.stderr.write(commandString)
-            if inputToCommand:
-                sys.stderr.write("\nwith input: %s" % inputToCommand)
+            sys.stderr.write(command_string)
+            if input_to_command:
+                sys.stderr.write("\nwith input: %s" % input_to_command)
             sys.stderr.write("\n")
         if self.dryrun:
             return 0, output
 
         try:
             error = None
-            proc = Popen(command, stdin = PIPE, stdout = PIPE, stderr = PIPE)
-            output, error = proc.communicate(inputToCommand)
-            if proc.returncode and proc.returncode not in returnCodesAllowed:
-                sys.stderr.write("command '%s failed with return code %s and 
error %s\n" % ( command, proc.returncode,  error ))
+            proc = Popen(command, stdin=PIPE, stdout=PIPE, stderr=PIPE)
+            output, error = proc.communicate(input_to_command)
+            if proc.returncode and proc.returncode not in return_codes_allowed:
+                sys.stderr.write("command '%s failed with return code %s "
+                                 "and error %s\n" % (command, proc.returncode, 
error))
                 # we don't bail here, let the caller decide what to do about 
it"
         except:
             sys.stderr.write("command %s failed\n" % command)
@@ -90,7 +98,7 @@
                 sys.stderr.write("%s\n" % error)
             # the problem is probably serious enough that we should refuse to 
do further processing
             raise
-        if output and displayOutput:
+        if output and display_output:
             print output
         if error:
             if error:
@@ -105,154 +113,163 @@
         self.wtype = wtype
         self.wikidir = wikidir
 
-    def doRsync(self):
+    def do_rsync(self):
 
         if self.wtype == "huge":
             # do all 256 shards separately
-            self.doHugeRsync()
+            self.do_huge_rsync()
 
         elif self.wtype == "big":
             # do the top 16 shards separately
-            self.doBigRsync()
+            self.do_big_rsync()
         else:
             # do the whole thing at once
-            self.doNormalRsync()
+            self.do_normal_rsync()
 
-    def getFilesFrom(self, hashdir = None, subdir = None):
+    def get_files_from(self, hashdir=None, subdir=None):
         """get list of directories for rsync that will
         be fed to the "--files-from -" option"""
-        return self.rsyncer.makePath([ self.wikidir, hashdir, subdir ])
+        return make_path([self.wikidir, hashdir, subdir])
 
-    def getLocalPath(self, hashdir = None, subdir = None):
-        """get the local output path for.."""
-        return self.rsyncer.makePath([ self.wikidir, hashdir, subdir ])
-
-    def doHugeRsync(self):
+    def do_huge_rsync(self):
         if self.rsyncer.verbose or self.rsyncer.dryrun:
             sys.stderr.write("doing 256 separate shards for wiki %s\n" % 
self.wiki)
 
-        dirs = 
["0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f"]
-        subdirs = 
["0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f"]
-        for d in dirs:
-            for s in subdirs:
-                filesFrom = self.getFilesFrom(d, d+s)
-                localPath = self.getLocalPath(d, d+s)
-                self.rsyncer.doRsync(filesFrom, localPath)
+        dirs = ["0", "1", "2", "3", "4", "5", "6", "7", "8",
+                "9", "a", "b", "c", "d", "e", "f"]
+        subdirs = ["0", "1", "2", "3", "4", "5", "6", "7", "8",
+                   "9", "a", "b", "c", "d", "e", "f"]
+        for dname in dirs:
+            for subdname in subdirs:
+                files_from = self.get_files_from(dname, dname+subdname)
+                self.rsyncer.do_rsync(files_from)
         # now get the archive dir
-        for d in dirs:
-            filesFrom = self.getFilesFrom("archive", d)
-            localPath = self.getLocalPath("archive", d)
-            self.rsyncer.doRsync(filesFrom, localPath)
+        for dname in dirs:
+            files_from = self.get_files_from("archive", dname)
+            self.rsyncer.do_rsync(files_from)
 
-    def doBigRsync(self):
+    def do_big_rsync(self):
         if self.rsyncer.verbose or self.rsyncer.dryrun:
             sys.stderr.write("doing 16 separate shards for wiki %s\n" % 
self.wiki)
 
-        dirs = [ 
"0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f","archive"]
-        for d in dirs:
-            filesFrom = self.getFilesFrom(d)
-            localPath = self.getLocalPath(d)
-            self.rsyncer.doRsync(filesFrom, localPath)
+        dirs = ["0", "1", "2", "3", "4", "5", "6", "7", "8",
+                "9", "a", "b", "c", "d", "e", "f", "archive"]
+        for dname in dirs:
+            files_from = self.get_files_from(dname)
+            self.rsyncer.do_rsync(files_from)
 
-    def doNormalRsync(self):
-        # for anything not big or huge, get list of media dirs that the wiki 
has, this will be the list of dirs we want
+    def do_normal_rsync(self):
+        # for anything not big or huge, get list of media dirs that
+        # the wiki has, this will be the list of dirs we want
         if self.rsyncer.verbose or self.rsyncer.dryrun:
             sys.stderr.write("retrieving dir list for wiki %s\n" % self.wiki)
-        localPath = self.getLocalPath()
-        dirsFound = self.rsyncer.doRsync(self.wikidir, localPath, getDirList = 
True)
+        dirs_found = self.rsyncer.do_rsync(
+            self.wikidir, get_dir_list=True)
 
         # explicitly list the 17 dirs we want
-        dirsWanted = [ self.rsyncer.makePath([self.wikidir, d]) for d in 
["0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f","archive"] ]
-        # filter out the ones not in dirList, keeps rsync from whining about 
nonexistent dirs
+        dirs_wanted = [make_path([self.wikidir, d])
+                       for d in ["0", "1", "2", "3", "4", "5", "6", "7", "8",
+                                 "9", "a", "b", "c", "d", "e", "f", "archive"]]
+        # filter out the ones not in dirList, keeps rsync from
+        # whining about nonexistent dirs
 
         # format of the returned lines is
         # drwxrwxr-x        4096 2012/04/06 10:45:34 blahblah/8
-        filesFrom = [ f.rsplit(None,1)[1] for f in dirsFound.split('\n') if 
'/' in f ]
-        filesFrom = "\n".join([ f for f in filesFrom if f in dirsWanted ])
-        if filesFrom:
+        files_from = [f.rsplit(None, 1)[1] for f in dirs_found.split('\n') if 
'/' in f]
+        files_from = "\n".join([f for f in files_from if f in dirs_wanted])
+        if files_from:
             if self.rsyncer.verbose or self.rsyncer.dryrun:
                 sys.stderr.write("doing 1 shard for wiki %s\n" % self.wiki)
-            self.rsyncer.doRsync(filesFrom, localPath)
+            self.rsyncer.do_rsync(files_from)
         else:
             if self.rsyncer.verbose or self.rsyncer.dryrun:
                 sys.stderr.write("skipping wiki %s, no dirs to sync\n" % 
self.wiki)
 
 
-def usage(message = None):
+def usage(message=None):
     if message:
         sys.stderr.write("%s\n" % message)
-        sys.stderr.write("Usage: python rsyncmedia.py [--remotehost hostname] 
--remotedir dirname\n")
-        sys.stderr.write("                      --localdir dirname --wikilist 
filename\n")
-        sys.stderr.write("                      [--big wiki1,wiki2,...] 
[--huge wiki3,wiki4,...]\n")
-        sys.stderr.write("                      [--verbose] [--dryrun]\n")
-        sys.stderr.write("\n")
-        sys.stderr.write("This script rsyncs media from a primary media host. 
getting only media\n")
-        sys.stderr.write("publically available (no deleted images, no data 
from private wikis)\n")
-        sys.stderr.write("and skipping thumbs, math, timeline, temp, old and 
misc other directories\n")
-        sys.stderr.write("that may have been created over time.\n")
-        sys.stderr.write("\n")
-        sys.stderr.write("--remotehost:    hostname of the remote host form 
which we are rsyncing.\n")
-        sys.stderr.write("                 if this option is ommited, the 
remotedir option is assumed\n")
-        sys.stderr.write("                 to refer to a local filesystem (for 
example nfs-mounted)\n")
-        sys.stderr.write("--remotedir:     path to point in remote directory 
in which media for the\n")
-        sys.stderr.write("                 wiki(s) are stored; this path is 
relative to the rsync root.\n")
-        sys.stderr.write("--localdir:      path to root of local directory 
tree in which media for\n")
-        sys.stderr.write("                 the wiki(s) will be copied.\n")
-        sys.stderr.write("--wikilist       filename which contains names of 
the wiki databases and their\n")
-        sys.stderr.write("                 corresponding media upload 
directories,  one wiki per line,\n")
-        sys.stderr.write("                 line, to be rsynced. The wikiname 
and the directory should be\n")
-        sys.stderr.write("                 separated by a tab character.  If 
'-' is given as the name\n")
-        sys.stderr.write("                 wiki db names and directories will 
be read from stdin.\n")
-        sys.stderr.write("--big            comma-separated list of wiki db 
names which have enough media\n")
-        sys.stderr.write("                 that we should rsync them in 16 
batches, one per subdir\n")
-        sys.stderr.write("                 instead of all at once.\n")
-        sys.stderr.write("--huge           comma-separated list of wiki db 
names which have enough media\n")
-        sys.stderr.write("                 that we should rsync them in 256 
batches, one per 2nd level\n")
-        sys.stderr.write("                 subdir instead of all at once.\n")
-        sys.stderr.write("--verbose:       print lots of status messages.\n")
-        sys.stderr.write("--dryrun:        don't do the rsync, print what 
would be done.\n")
-        sys.stderr.write("wiki             name of wikidb for rsync; if 
specified, this will override\n")
-        sys.stderr.write("                 any file given for 'wikilist'.\n")
-        sys.exit(1)
+    usage_message = """
+Usage: python rsyncmedia.py [--remotehost hostname] --remotedir dirname
+                      --localdir dirname --wikilist filename
+                      [--big wiki1,wiki2,...] [--huge wiki3,wiki4,...]
+                      [--verbose] [--dryrun]
 
-def getCommaSepList(text):
+This script rsyncs media from a primary media host. getting only media
+publically available (no deleted images, no data from private wikis)
+and skipping thumbs, math, timeline, temp, old and misc other directories
+that may have been created over time.
+
+--remotehost:    hostname of the remote host form which we are rsyncing.
+                 if this option is ommited, the remotedir option is assumed
+                 to refer to a local filesystem (for example nfs-mounted)
+--remotedir:     path to point in remote directory in which media for the
+                 wiki(s) are stored; this path is relative to the rsync root.
+--localdir:      path to root of local directory tree in which media for
+                 the wiki(s) will be copied.
+--wikilist       filename which contains names of the wiki databases and their
+                 corresponding media upload directories,  one wiki per line,
+                 line, to be rsynced. The wikiname and the directory should be
+                 separated by a tab character.  If '-' is given as the name
+                 wiki db names and directories will be read from stdin.
+--big            comma-separated list of wiki db names which have enough media
+                 that we should rsync them in 16 batches, one per subdir
+                 instead of all at once.
+--huge           comma-separated list of wiki db names which have enough media
+                 that we should rsync them in 256 batches, one per 2nd level
+                 subdir instead of all at once.
+--verbose:       print lots of status messages.
+--dryrun:        don't do the rsync, print what would be done.
+
+wiki             name of wikidb for rsync; if specified, this will override
+                 any file given for 'wikilist'.
+"""
+    sys.stderr.write(usage_message)
+    sys.exit(1)
+
+
+def get_comma_sep_list(text):
     if text:
         if ',' in text:
             result = text.split(',')
         else:
-            result = [ text ]
+            result = [text]
     else:
         result = []
     return result
 
-if __name__ == "__main__":
-    remoteDir = None
-    rsyncHost = None
-    localDir = None
+
+def do_main():
+    remote_dir = None
+    rsync_host = None
+    local_dir = None
     big = None
     huge = None
-    wikiListFile = None
+    wiki_list_file = None
     verbose = False
     dryrun = False
 
     try:
-        (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "", [ "big=", 
"huge=", "localdir=", "remotedir=", "remotehost=", "localdir=", "wikilist=", 
"verbose", "dryrun" ])
-    except:
+        (options, remainder) = getopt.gnu_getopt(
+            sys.argv[1:], "", ["big=", "huge=", "localdir=", "remotedir=",
+                               "remotehost=", "localdir=", "wikilist=",
+                               "verbose", "dryrun"])
+    except Exception:
         usage("Unknown option specified")
 
     for (opt, val) in options:
         if opt == "--remotedir":
-            remoteDir = val
+            remote_dir = val
         elif opt == "--remotehost":
-            rsyncHost = val
+            rsync_host = val
         elif opt == "--localdir":
-            localDir = val
+            local_dir = val
         elif opt == "--big":
             big = val
         elif opt == "--huge":
             huge = val
         elif opt == "--wikilist":
-            wikiListFile = val
+            wiki_list_file = val
         elif opt == "--verbose":
             verbose = True
         elif opt == "--dryrun":
@@ -261,42 +278,46 @@
     if len(remainder) > 0:
         usage("Unknown option specified")
 
-    if not remoteDir or not localDir or not wikiListFile:
+    if not remote_dir or not local_dir or not wiki_list_file:
         usage("One or more mandatory options missing")
 
-    if wikiListFile == "-":
-        fd = sys.stdin
+    if wiki_list_file == "-":
+        fdesc = sys.stdin
     else:
-        fd = open(wikiListFile ,"r")
-    wikiList = [ line.strip() for line in fd ]
+        fdesc = open(wiki_list_file, "r")
+    wiki_list = [line.strip() for line in fdesc]
 
-    if fd != sys.stdin:
-        fd.close()
+    if fdesc != sys.stdin:
+        fdesc.close()
 
     # eg enwiki
-    bigWikis = getCommaSepList(big)
+    big_wikis = get_comma_sep_list(big)
     # eg commonswiki
-    hugeWikis = getCommaSepList(huge)
+    huge_wikis = get_comma_sep_list(huge)
 
-    rsyncer = Rsyncer(rsyncHost, remoteDir, localDir, verbose, dryrun)
+    rsyncer = Rsyncer(rsync_host, remote_dir, local_dir, verbose, dryrun)
 
-    for winfo in wikiList:
+    for winfo in wiki_list:
         # first skip blank lines and comments
         if not winfo or winfo[0] == '#':
             continue
-        if not '\t' in winfo:
-            sys.stderr.write("unexpected line with no tab in wikilist: %s\n") 
% winfo
+        if '\t' not in winfo:
+            sys.stderr.write("unexpected line with no tab in wikilist: %s\n" % 
winfo)
             continue
 
         # expect <wikiname>\t<directory>
-        w, wikidir = winfo.split('\t', 1)
+        wikiname, wikidir = winfo.split('\t', 1)
 
-        if w in hugeWikis:
+        if wikiname in huge_wikis:
             wtype = "huge"
-        elif w in bigWikis:
+        elif wikiname in big_wikis:
             wtype = "big"
         else:
             wtype = "normal"
 
-        rp = RsyncProject(rsyncer, w, wtype, wikidir)
-        rp.doRsync()
+        rsync_proj = RsyncProject(rsyncer, wikiname, wtype, wikidir)
+        rsync_proj.do_rsync()
+
+
+if __name__ == "__main__":
+    do_main()

-- 
To view, visit https://gerrit.wikimedia.org/r/401051
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I85ca822d84ef2466218d6ba830e01178922ad931
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to