ArielGlenn has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/387022 )

Change subject: Permit overrides section in dump config files and more per proj 
settings
......................................................................


Permit overrides section in dump config files and more per proj settings

This will permit us to use one large config file with project-specific
settings for all wikis and override sections for groups of wikis,
something we want for an upcoming move of xml/sql dumps to a different
server.

Bug: T178893
Change-Id: I5665d9fe2b47e697f9d5294e6f81fec6518ff009
---
M xmldumps-backup/dumpadmin.py
M xmldumps-backup/dumps/WikiDump.py
M xmldumps-backup/dumps/xmljobs.py
M xmldumps-backup/getconfigvals.py
M xmldumps-backup/xmlabstracts.py
M xmldumps-backup/xmllogs.py
M xmldumps-backup/xmlstubs.py
7 files changed, 221 insertions(+), 100 deletions(-)

Approvals:
  ArielGlenn: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/xmldumps-backup/dumpadmin.py b/xmldumps-backup/dumpadmin.py
index 3876a96..21a7773 100644
--- a/xmldumps-backup/dumpadmin.py
+++ b/xmldumps-backup/dumpadmin.py
@@ -167,6 +167,8 @@
         '''
         do all actions specified at instantiation time
         '''
+        # check if these are actually needed now
+        self.conf.parse_conffile_overrideables()
         self.conf.parse_conffile_globally()
         self.do_global_actions()
         self.undo_global_actions()
diff --git a/xmldumps-backup/dumps/WikiDump.py 
b/xmldumps-backup/dumps/WikiDump.py
index 09d3b6a..4b0edc2 100644
--- a/xmldumps-backup/dumps/WikiDump.py
+++ b/xmldumps-backup/dumps/WikiDump.py
@@ -14,12 +14,16 @@
 
 
 class Config(object):
-    def __init__(self, config_file=False):
-        self.project_name = False
+    def __init__(self, config_file=None):
+        self.project_name = None
         self.db_user = None
         self.db_password = None
+        self.override_section = None
 
         home = os.path.dirname(sys.argv[0])
+        if config_file and ':' in config_file:
+            config_file, self.override_section = config_file.split(':')
+
         if not config_file:
             config_file = "wikidump.conf"
         self.files = [
@@ -43,6 +47,7 @@
             print "The mandatory setting 'dir' in the section 'wiki' was not 
defined."
             raise ConfigParser.NoOptionError('wiki', 'dir')
 
+        self.parse_conffile_overrideables()
         self.parse_conffile_globally()
         self.parse_conffile_per_project()
         # get from MW adminsettings file if not set in conf file
@@ -100,46 +105,113 @@
             db_password = default_dbpassword
         return db_user, db_password
 
-    def parse_conffile_globally(self):
-        self.db_list = MiscUtils.db_list(self.conf.get("wiki", "dblist"))
+    def get_opt_from_sections(self, sections_to_check, item_name, is_int):
+        """
+        for each section name in sections_to_check:
+            if the section isn't None and it exists in the config file,
+            and the config setting is in that section, return the value
+            otherwise move on to the next section in list
+        returns int value if is_int is false, string otherwise, or
+        None if the setting can't be found at all, not even a default
+        """
+        for section in sections_to_check:
+            if section is None or not section:
+                continue
+            if not self.conf.has_section(section):
+                continue
+            if not self.conf.has_option(section, item_name):
+                continue
+            if is_int:
+                return self.conf.getint(section, item_name)
+            else:
+                return self.conf.get(section, item_name)
+        return None
+
+    def get_opt_in_overrides_or_default(self, section_name, item_name, is_int):
+        """
+        look for option in the override section, if one was
+        provided. if not provided or not found, look for it
+        in the global (usual) section.
+        """
+        return self.get_opt_from_sections(
+            [self.override_section, section_name],
+            item_name, is_int)
+
+    def get_opt_for_proj_or_default(self, section_name, item_name, is_int):
+        """
+        look for option in the project name section, if one was
+        provided. if not provided or not found, look for it
+        in the overrides section, if there is one. if there
+        was no overrides section provided, or there is no
+        such section in the config file, or the setting isn't
+        in that section either, look for it in the global (usual)
+        section.
+        """
+        return self.get_opt_from_sections(
+            [self.project_name, self.override_section, section_name],
+            item_name, is_int)
+
+    def get_skipdbs(self, filenames):
+        """
+        permit comma-separated list of files so that eg some script
+        can skip all private and/or closed wikis in addition to some
+        other exclusion list
+        """
+        if ',' in filenames:
+            skipfiles = filenames.split(',')
+        else:
+            skipfiles = [filenames]
+        skip_db_list = []
+        for skipfile in skipfiles:
+            skip_db_list.extend(MiscUtils.db_list(skipfile))
+        return list(set(skip_db_list))
+
+    def parse_conffile_overrideables(self):
+        """
+        globals like entries in 'wiki' or 'output' that can
+        be overriden by a specific named section
+        """
+        self.db_list = MiscUtils.db_list(self.get_opt_in_overrides_or_default(
+            "wiki", "dblist", 0))
 
         # permit comma-separated list of files so that eg some script
         # can skip all private and/or closed wikis in addition to some
         # other exclusion list
-        to_skip = self.conf.get("wiki", "skipdblist")
-        if ',' in to_skip:
-            skipfiles = to_skip.split(',')
-        else:
-            skipfiles = [to_skip]
-        self.skip_db_list = []
-        for skipfile in skipfiles:
-            self.skip_db_list.extend(MiscUtils.db_list(skipfile))
-        self.skip_db_list = list(set(self.skip_db_list))
+        to_skip = self.get_opt_in_overrides_or_default("wiki", "skipdblist", 0)
+        self.skip_db_list = self.get_skipdbs(to_skip)
 
-        self.private_list = MiscUtils.db_list(self.conf.get("wiki", 
"privatelist"))
-        self.closed_list = MiscUtils.db_list(self.conf.get("wiki", 
"closedlist"))
-        self.flow_list = MiscUtils.db_list(self.conf.get("wiki", "flowlist"))
-        self.tablejobs = self.conf.get("wiki", "tablejobs")
-        self.apijobs = self.conf.get("wiki", "apijobs")
+        self.private_list = 
MiscUtils.db_list(self.get_opt_in_overrides_or_default(
+            "wiki", "privatelist", 0))
+        self.closed_list = 
MiscUtils.db_list(self.get_opt_in_overrides_or_default(
+            "wiki", "closedlist", 0))
+        self.flow_list = 
MiscUtils.db_list(self.get_opt_in_overrides_or_default(
+            "wiki", "flowlist", 0))
+        self.tablejobs = self.get_opt_in_overrides_or_default(
+            "wiki", "tablejobs", 0)
+        self.apijobs = self.get_opt_in_overrides_or_default(
+            "wiki", "apijobs", 0)
 
         self.db_list = list(set(self.db_list) - set(self.skip_db_list))
+
+        if not self.conf.has_section('output'):
+            self.conf.add_section('output')
+        self.public_dir = self.get_opt_in_overrides_or_default("output", 
"public", 0)
+        self.private_dir = self.get_opt_in_overrides_or_default("output", 
"private", 0)
+        self.temp_dir = self.get_opt_in_overrides_or_default("output", "temp", 
0)
+        self.web_root = self.get_opt_in_overrides_or_default("output", 
"webroot", 0)
+        self.index = self.get_opt_in_overrides_or_default("output", "index", 0)
+        self.template_dir = self.get_opt_in_overrides_or_default("output", 
"templatedir", 0)
+        self.perdump_index = self.get_opt_in_overrides_or_default("output", 
"perdumpindex", 0)
+        self.log_file = self.get_opt_in_overrides_or_default("output", 
"logfile", 0)
+        self.fileperms = self.get_opt_in_overrides_or_default("output", 
"fileperms", 0)
+        self.fileperms = int(self.fileperms, 0)
+
+    def parse_conffile_globally(self):
 
         if not self.conf.has_section('database'):
             self.conf.add_section('database')
         self.max_allowed_packet = self.conf.get("database", 
"max_allowed_packet")
 
-        if not self.conf.has_section('output'):
-            self.conf.add_section('output')
-        self.public_dir = self.conf.get("output", "public")
-        self.private_dir = self.conf.get("output", "private")
-        self.temp_dir = self.conf.get("output", "temp")
-        self.web_root = self.conf.get("output", "webroot")
-        self.index = self.conf.get("output", "index")
-        self.template_dir = self.conf.get("output", "templatedir")
-        self.perdump_index = self.conf.get("output", "perdumpindex")
-        self.log_file = self.conf.get("output", "logfile")
-        self.fileperms = self.conf.get("output", "fileperms")
-        self.fileperms = int(self.fileperms, 0)
         if not self.conf.has_section('reporting'):
             self.conf.add_section('reporting')
         self.admin_mail = self.conf.get("reporting", "adminmail")
@@ -164,15 +236,11 @@
         self.writeuptopageid = self.conf.get("tools", "writeuptopageid")
         self.recompressxml = self.conf.get("tools", "recompressxml")
 
-        if not self.conf.has_section('cleanup'):
-            self.conf.add_section('cleanup')
-        self.keep = self.conf.getint("cleanup", "keep")
-
         if not self.conf.has_section('query'):
             self.conf.add_section('query')
         self.queryfile = self.conf.get("query", "queryfile")
 
-    def parse_conffile_per_project(self, project_name=False):
+    def parse_conffile_per_project(self, project_name=None):
         if project_name:
             self.project_name = project_name
 
@@ -189,6 +257,10 @@
             "database", "max_allowed_packet", 0)
         if max_allowed_packet:
             self.max_allowed_packet = max_allowed_packet
+
+        if not self.conf.has_section('cleanup'):
+            self.conf.add_section('cleanup')
+        self.keep = self.conf.getint("cleanup", "keep")
 
         if not self.conf.has_section('chunks'):
             self.conf.add_section('chunks')
@@ -221,7 +293,6 @@
             self.conf.add_section('otherformats')
         self.multistream_enabled = self.get_opt_for_proj_or_default(
             'otherformats', 'multistream', 1)
-
         if not self.conf.has_section('stubs'):
             self.conf.add_section('stubs')
         self.stubs_orderrevs = self.get_opt_for_proj_or_default(
@@ -234,23 +305,6 @@
         if not self.conf.has_section('wiki'):
             self.conf.add_section('wiki')
         self.wiki_dir = self.get_opt_for_proj_or_default("wiki", "dir", 0)
-
-    def get_opt_for_proj_or_default(self, section_name, item_name, is_int):
-        # look for option in per project sections
-        if self.conf.has_section(self.project_name):
-            if self.conf.has_option(self.project_name, item_name):
-                if is_int:
-                    return self.conf.getint(self.project_name, item_name)
-                else:
-                    return self.conf.get(self.project_name, item_name)
-
-        # look for option in global sections
-        if self.conf.has_section(section_name):
-            if self.conf.has_option(section_name, item_name):
-                if is_int:
-                    return self.conf.getint(section_name, item_name)
-                else:
-                    return self.conf.get(section_name, item_name)
 
     def db_latest_status(self):
         '''
diff --git a/xmldumps-backup/dumps/xmljobs.py b/xmldumps-backup/dumps/xmljobs.py
index b1de960..8a999ec 100644
--- a/xmldumps-backup/dumps/xmljobs.py
+++ b/xmldumps-backup/dumps/xmljobs.py
@@ -129,7 +129,10 @@
             current_filepath = 
runner.dump_dir.filename_public_path(current_dfname)
 #        script_command = MultiVersion.mw_script_as_array(runner.wiki.config, 
"dumpBackup.php")
 
-        command = ["/usr/bin/python", "xmlstubs.py", "--config", 
runner.wiki.config.files[0],
+        config_file_arg = runner.wiki.config.files[0]
+        if runner.wiki.config.override_section:
+            config_file_arg = config_file_arg + ":" + 
runner.wiki.config.override_section
+        command = ["/usr/bin/python", "xmlstubs.py", "--config", 
config_file_arg,
                    "--wiki", runner.db_name,
                    "--articles", self.get_inprogress_name(articles_filepath),
                    "--history", self.get_inprogress_name(history_filepath),
@@ -219,8 +222,11 @@
         else:
             logging_path = runner.dump_dir.filename_public_path(output_dfname)
 
+        config_file_arg = runner.wiki.config.files[0]
+        if runner.wiki.config.override_section:
+            config_file_arg = config_file_arg + ":" + 
runner.wiki.config.override_section
         command = ["/usr/bin/python", "xmllogs.py", "--config",
-                   runner.wiki.config.files[0], "--wiki", runner.db_name,
+                   config_file_arg, "--wiki", runner.db_name,
                    "--outfile", self.get_inprogress_name(logging_path)]
 
         pipeline = [command]
@@ -279,8 +285,11 @@
         args:
             Runner, DumpFilename for output without any language variant
         """
+        config_file_arg = runner.wiki.config.files[0]
+        if runner.wiki.config.override_section:
+            config_file_arg = config_file_arg + ":" + 
runner.wiki.config.override_section
         command = ["/usr/bin/python", "xmlabstracts.py", "--config",
-                   runner.wiki.config.files[0], "--wiki", self.db_name]
+                   config_file_arg, "--wiki", self.db_name]
 
         output_paths = []
         variants = []
diff --git a/xmldumps-backup/getconfigvals.py b/xmldumps-backup/getconfigvals.py
index 56ae325..3e222f6 100644
--- a/xmldumps-backup/getconfigvals.py
+++ b/xmldumps-backup/getconfigvals.py
@@ -18,22 +18,30 @@
     return sections
 
 
-def getconfs(configfile, args, outformat):
+def get_setting_from_overrides(conf, overrides, setting):
     '''
-    given a configfile path and a string
-    section1:name1,name2...;section2:name1,name2...
-    print a json representation of a dict with
-    the setting names and values per section
+    look for a setting in the overrides section if
+    the section and the setting in that section exist;
+    return it if so, return None otherwise
     '''
-    conf = ConfigParser.SafeConfigParser()
-    conf.read(configfile)
-    confs = {}
-    sections = get_sections_settingnames(args)
-    for section in sections:
-        confs[section] = {}
-        for setting in sections[section]:
-            if conf.has_option(section, setting):
-                confs[section][setting] = conf.get(section, setting)
+    if not overrides:
+        return None
+    for secname in overrides:
+        if not secname:
+            continue
+        if not conf.has_section(overrides):
+            continue
+        if not conf.has_option(overrides, setting):
+            continue
+        return conf.get(overrides, setting)
+    return None
+
+
+def display(confs, outformat):
+    '''
+    given a dict of conf settings and values,
+    display them in the requested format
+    '''
     if outformat == "json":
         print json.dumps(confs)
     elif outformat == "txt":
@@ -51,6 +59,31 @@
                 print "%s %s" % (item, confs[section][item])
 
 
+def getconfs(configfile, overrides, args, outformat):
+    '''
+    given a configfile path and a string
+    section1:name1,name2...;section2:name1,name2...
+    print a json representation of a dict with
+    the setting names and values per section
+    if the overrides argument is supplied, arguments
+    in this list of sections will override the values
+    in the specific section requested.
+    '''
+    conf = ConfigParser.SafeConfigParser()
+    conf.read(configfile)
+    confs = {}
+    sections = get_sections_settingnames(args)
+    for section in sections:
+        confs[section] = {}
+        for setting in sections[section]:
+            result = get_setting_from_overrides(conf, overrides, setting)
+            if result:
+                confs[section][setting] = result
+            elif conf.has_option(section, setting):
+                confs[section][setting] = conf.get(section, setting)
+    display(confs, outformat)
+
+
 def usage(message=None):
     '''
     display a helpful usage message with
@@ -61,7 +94,7 @@
         sys.stderr.write(message)
         sys.stderr.write("\n")
     usage_message = """
-Usage: getconfigvals.py --configfile path
+Usage: getconfigvals.py --configfile path[:override_sec1[,override_sec2...]]
            --args section:name[,name...][;section:name[,name...]]
            [--help]
 
@@ -69,10 +102,17 @@
 in ConfigParser format
 
 Note that this script does not load any defaults for config values.
+It also cannot deal with per-wiki config value settings, unless you
+explicitly set up the config file with a section for the wiki and
+pass that in as an override section (see --configfile below).
 
 Options:
 
-  --configfile (-c):  path to config fiel
+  --configfile (-c):  path to config file
+                      you may tack on colon ':' and a comma-separated list of
+                      sections in which to look first for values, for example
+                      the wiki project name, or a section 'bigwikis' that might
+                      have values that override the regular ones.
   --args       (-a):  names of args for which to check the config file;
                       config file section names must be specified
                       along with the arg names
@@ -86,26 +126,19 @@
                       If an item is missing it is silently ignored.
   --help       (-h):  display this usage message
 
-Example:  getconfig.py --configfile confs/wikidump.conf --args 
'tools:php,mysqldump,gzip'
+Examples: getconfig.py --configfile confs/wikidump.conf \
+                 --args 'tools:php,mysqldump,gzip'
+          getconfig.py --configfile confs/wikidump.conf:enwiki,hugewikis \
+                 --args 'tools:php,mysqldump,gzip'
 """
     sys.stderr.write(usage_message)
     sys.exit(1)
 
 
-def main():
-    'main entry point, does all the work'
-
-    configfile = None
-    args = None
-    outformat = "json"
-
-    try:
-        (options, remainder) = getopt.gnu_getopt(
-            sys.argv[1:], "c:a:f:h", ["configfile=", "args=", "format=", 
"help"])
-
-    except getopt.GetoptError as err:
-        usage("Unknown option specified: " + str(err))
-
+def get_args(options):
+    '''
+    get and return the args passed on command line
+    '''
     for (opt, val) in options:
         if opt in ["-c", "--configfile"]:
             configfile = val
@@ -117,6 +150,25 @@
             usage('Help for this script\n')
         else:
             usage("Unknown option specified: <%s>" % opt)
+    return (configfile, args, outformat)
+
+
+def main():
+    'main entry point, does all the work'
+
+    configfile = None
+    args = None
+    outformat = "json"
+    overrides = None
+
+    try:
+        (options, remainder) = getopt.gnu_getopt(
+            sys.argv[1:], "c:a:f:h", ["configfile=", "args=", "format=", 
"help"])
+
+    except getopt.GetoptError as err:
+        usage("Unknown option specified: " + str(err))
+
+    (configfile, args, outformat) = get_args(options)
 
     if len(remainder) > 0:
         usage("Unknown option(s) specified: <%s>" % remainder[0])
@@ -128,10 +180,17 @@
     if outformat not in ["txt", "json", "pairs", "values"]:
         usage("Unknown format type %s" % outformat)
 
+    if ':' in configfile:
+        configfile, overrides = configfile.split(':', 1)
+    if not overrides:
+        overrides = []
+    elif ',' in overrides:
+        overrides = overrides.split(',')
+
     if not os.path.exists(configfile):
         usage("no such file found: " + configfile)
 
-    getconfs(configfile, args, outformat)
+    getconfs(configfile, overrides, args, outformat)
 
 
 if __name__ == '__main__':
diff --git a/xmldumps-backup/xmlabstracts.py b/xmldumps-backup/xmlabstracts.py
index 7f378d7..28abd31 100644
--- a/xmldumps-backup/xmlabstracts.py
+++ b/xmldumps-backup/xmlabstracts.py
@@ -76,7 +76,7 @@
     usage_message = """
 Usage: xmlabstracts.py --wiki wikidbname --outfile path
     [--start number] [--end number]
-    [--config path]
+    [--config path[:overrides_section]]
 
 Options:
 
@@ -91,6 +91,8 @@
   --end (-e):          ending page id to dump, exclusive of this page 
(default: dump all)
 
   --config (-C):       path to wikidump configfile (default: "wikidump.conf" 
in current dir)
+                       if followed by : and a name, this section name in the 
config file
+                       will be used to override config settings in default 
sections
   --dryrun (-d):       display the commands that would be run to produce the 
output but
                        don't actually run them
 """
@@ -158,9 +160,6 @@
             usage("value for --end must be a number")
         else:
             end = int(end) - 1
-
-    if not os.path.exists(configfile):
-        usage("no such file found: " + configfile)
 
     output_files = output_files.split(",")
     variants = variants.split(",")
diff --git a/xmldumps-backup/xmllogs.py b/xmldumps-backup/xmllogs.py
index 9c3fde6..5655e8a 100644
--- a/xmldumps-backup/xmllogs.py
+++ b/xmldumps-backup/xmllogs.py
@@ -55,7 +55,7 @@
     usage_message = """
 Usage: xmllogs.py --wiki wikidbname --outfile path
     [--start number] [--end number]
-    [--config path]
+    [--config path[:overrides]]
 
 Options:
 
@@ -66,6 +66,8 @@
   --end (-e):          ending log id to dump, exclusive of this entry 
(default: dump all)
 
   --config (-C):       path to wikidump configfile (default: "wikidump.conf" 
in current dir)
+                       if followed by : and a name, this section name in the 
config file
+                       will be used to override config settings in default 
sections
   --dryrun (-d):       display the commands that would be run to produce the 
output but
                        don't actually run them
 """
@@ -128,9 +130,6 @@
             usage("value for --end must be a number")
         else:
             end = int(end) - 1
-
-    if not os.path.exists(configfile):
-        usage("no such file found: " + configfile)
 
     wikiconf = Config(configfile)
     wikiconf.parse_conffile_per_project(wiki)
diff --git a/xmldumps-backup/xmlstubs.py b/xmldumps-backup/xmlstubs.py
index 47be3a7..3585203 100644
--- a/xmldumps-backup/xmlstubs.py
+++ b/xmldumps-backup/xmlstubs.py
@@ -149,7 +149,7 @@
     usage_message = """
 Usage: xmlstubs.py --wiki wikidbname --articles path --current path
     --history path [--start number] [--end number]
-    [--config path]
+    [--config path[:overrides_section]]
 
 Options:
 
@@ -162,6 +162,8 @@
   --end (-e):          ending page to dump, exclusive of this page (default: 
dump all)
 
   --config (-C):       path to wikidump configfile (default: "wikidump.conf" 
in current dir)
+                       if followed by : and a name, this section name in the 
config file
+                       will be used to override config settings in default 
sections
   --dryrun (-d):       display the commands that would be run to produce the 
output but
                        don't actually run them
 """
@@ -234,9 +236,6 @@
             usage("value for --end must be a number")
         else:
             end = int(end) - 1
-
-    if not os.path.exists(configfile):
-        usage("no such file found: " + configfile)
 
     wikiconf = Config(configfile)
     wikiconf.parse_conffile_per_project(wiki)

-- 
To view, visit https://gerrit.wikimedia.org/r/387022
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I5665d9fe2b47e697f9d5294e6f81fec6518ff009
Gerrit-PatchSet: 2
Gerrit-Project: operations/dumps
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>
Gerrit-Reviewer: ArielGlenn <ar...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to