ArielGlenn has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/387022 )
Change subject: Permit overrides section in dump config files and more per proj settings ...................................................................... Permit overrides section in dump config files and more per proj settings This will permit us to use one large config file with project-specific settings for all wikis and override sections for groups of wikis, something we want for an upcoming move of xml/sql dumps to a different server. Bug: T178893 Change-Id: I5665d9fe2b47e697f9d5294e6f81fec6518ff009 --- M xmldumps-backup/dumpadmin.py M xmldumps-backup/dumps/WikiDump.py M xmldumps-backup/dumps/xmljobs.py M xmldumps-backup/getconfigvals.py M xmldumps-backup/xmlabstracts.py M xmldumps-backup/xmllogs.py M xmldumps-backup/xmlstubs.py 7 files changed, 221 insertions(+), 100 deletions(-) Approvals: ArielGlenn: Looks good to me, approved jenkins-bot: Verified diff --git a/xmldumps-backup/dumpadmin.py b/xmldumps-backup/dumpadmin.py index 3876a96..21a7773 100644 --- a/xmldumps-backup/dumpadmin.py +++ b/xmldumps-backup/dumpadmin.py @@ -167,6 +167,8 @@ ''' do all actions specified at instantiation time ''' + # check if these are actually needed now + self.conf.parse_conffile_overrideables() self.conf.parse_conffile_globally() self.do_global_actions() self.undo_global_actions() diff --git a/xmldumps-backup/dumps/WikiDump.py b/xmldumps-backup/dumps/WikiDump.py index 09d3b6a..4b0edc2 100644 --- a/xmldumps-backup/dumps/WikiDump.py +++ b/xmldumps-backup/dumps/WikiDump.py @@ -14,12 +14,16 @@ class Config(object): - def __init__(self, config_file=False): - self.project_name = False + def __init__(self, config_file=None): + self.project_name = None self.db_user = None self.db_password = None + self.override_section = None home = os.path.dirname(sys.argv[0]) + if config_file and ':' in config_file: + config_file, self.override_section = config_file.split(':') + if not config_file: config_file = "wikidump.conf" self.files = [ @@ -43,6 +47,7 @@ print "The mandatory setting 'dir' in the section 'wiki' was not defined." raise ConfigParser.NoOptionError('wiki', 'dir') + self.parse_conffile_overrideables() self.parse_conffile_globally() self.parse_conffile_per_project() # get from MW adminsettings file if not set in conf file @@ -100,46 +105,113 @@ db_password = default_dbpassword return db_user, db_password - def parse_conffile_globally(self): - self.db_list = MiscUtils.db_list(self.conf.get("wiki", "dblist")) + def get_opt_from_sections(self, sections_to_check, item_name, is_int): + """ + for each section name in sections_to_check: + if the section isn't None and it exists in the config file, + and the config setting is in that section, return the value + otherwise move on to the next section in list + returns int value if is_int is false, string otherwise, or + None if the setting can't be found at all, not even a default + """ + for section in sections_to_check: + if section is None or not section: + continue + if not self.conf.has_section(section): + continue + if not self.conf.has_option(section, item_name): + continue + if is_int: + return self.conf.getint(section, item_name) + else: + return self.conf.get(section, item_name) + return None + + def get_opt_in_overrides_or_default(self, section_name, item_name, is_int): + """ + look for option in the override section, if one was + provided. if not provided or not found, look for it + in the global (usual) section. + """ + return self.get_opt_from_sections( + [self.override_section, section_name], + item_name, is_int) + + def get_opt_for_proj_or_default(self, section_name, item_name, is_int): + """ + look for option in the project name section, if one was + provided. if not provided or not found, look for it + in the overrides section, if there is one. if there + was no overrides section provided, or there is no + such section in the config file, or the setting isn't + in that section either, look for it in the global (usual) + section. + """ + return self.get_opt_from_sections( + [self.project_name, self.override_section, section_name], + item_name, is_int) + + def get_skipdbs(self, filenames): + """ + permit comma-separated list of files so that eg some script + can skip all private and/or closed wikis in addition to some + other exclusion list + """ + if ',' in filenames: + skipfiles = filenames.split(',') + else: + skipfiles = [filenames] + skip_db_list = [] + for skipfile in skipfiles: + skip_db_list.extend(MiscUtils.db_list(skipfile)) + return list(set(skip_db_list)) + + def parse_conffile_overrideables(self): + """ + globals like entries in 'wiki' or 'output' that can + be overriden by a specific named section + """ + self.db_list = MiscUtils.db_list(self.get_opt_in_overrides_or_default( + "wiki", "dblist", 0)) # permit comma-separated list of files so that eg some script # can skip all private and/or closed wikis in addition to some # other exclusion list - to_skip = self.conf.get("wiki", "skipdblist") - if ',' in to_skip: - skipfiles = to_skip.split(',') - else: - skipfiles = [to_skip] - self.skip_db_list = [] - for skipfile in skipfiles: - self.skip_db_list.extend(MiscUtils.db_list(skipfile)) - self.skip_db_list = list(set(self.skip_db_list)) + to_skip = self.get_opt_in_overrides_or_default("wiki", "skipdblist", 0) + self.skip_db_list = self.get_skipdbs(to_skip) - self.private_list = MiscUtils.db_list(self.conf.get("wiki", "privatelist")) - self.closed_list = MiscUtils.db_list(self.conf.get("wiki", "closedlist")) - self.flow_list = MiscUtils.db_list(self.conf.get("wiki", "flowlist")) - self.tablejobs = self.conf.get("wiki", "tablejobs") - self.apijobs = self.conf.get("wiki", "apijobs") + self.private_list = MiscUtils.db_list(self.get_opt_in_overrides_or_default( + "wiki", "privatelist", 0)) + self.closed_list = MiscUtils.db_list(self.get_opt_in_overrides_or_default( + "wiki", "closedlist", 0)) + self.flow_list = MiscUtils.db_list(self.get_opt_in_overrides_or_default( + "wiki", "flowlist", 0)) + self.tablejobs = self.get_opt_in_overrides_or_default( + "wiki", "tablejobs", 0) + self.apijobs = self.get_opt_in_overrides_or_default( + "wiki", "apijobs", 0) self.db_list = list(set(self.db_list) - set(self.skip_db_list)) + + if not self.conf.has_section('output'): + self.conf.add_section('output') + self.public_dir = self.get_opt_in_overrides_or_default("output", "public", 0) + self.private_dir = self.get_opt_in_overrides_or_default("output", "private", 0) + self.temp_dir = self.get_opt_in_overrides_or_default("output", "temp", 0) + self.web_root = self.get_opt_in_overrides_or_default("output", "webroot", 0) + self.index = self.get_opt_in_overrides_or_default("output", "index", 0) + self.template_dir = self.get_opt_in_overrides_or_default("output", "templatedir", 0) + self.perdump_index = self.get_opt_in_overrides_or_default("output", "perdumpindex", 0) + self.log_file = self.get_opt_in_overrides_or_default("output", "logfile", 0) + self.fileperms = self.get_opt_in_overrides_or_default("output", "fileperms", 0) + self.fileperms = int(self.fileperms, 0) + + def parse_conffile_globally(self): if not self.conf.has_section('database'): self.conf.add_section('database') self.max_allowed_packet = self.conf.get("database", "max_allowed_packet") - if not self.conf.has_section('output'): - self.conf.add_section('output') - self.public_dir = self.conf.get("output", "public") - self.private_dir = self.conf.get("output", "private") - self.temp_dir = self.conf.get("output", "temp") - self.web_root = self.conf.get("output", "webroot") - self.index = self.conf.get("output", "index") - self.template_dir = self.conf.get("output", "templatedir") - self.perdump_index = self.conf.get("output", "perdumpindex") - self.log_file = self.conf.get("output", "logfile") - self.fileperms = self.conf.get("output", "fileperms") - self.fileperms = int(self.fileperms, 0) if not self.conf.has_section('reporting'): self.conf.add_section('reporting') self.admin_mail = self.conf.get("reporting", "adminmail") @@ -164,15 +236,11 @@ self.writeuptopageid = self.conf.get("tools", "writeuptopageid") self.recompressxml = self.conf.get("tools", "recompressxml") - if not self.conf.has_section('cleanup'): - self.conf.add_section('cleanup') - self.keep = self.conf.getint("cleanup", "keep") - if not self.conf.has_section('query'): self.conf.add_section('query') self.queryfile = self.conf.get("query", "queryfile") - def parse_conffile_per_project(self, project_name=False): + def parse_conffile_per_project(self, project_name=None): if project_name: self.project_name = project_name @@ -189,6 +257,10 @@ "database", "max_allowed_packet", 0) if max_allowed_packet: self.max_allowed_packet = max_allowed_packet + + if not self.conf.has_section('cleanup'): + self.conf.add_section('cleanup') + self.keep = self.conf.getint("cleanup", "keep") if not self.conf.has_section('chunks'): self.conf.add_section('chunks') @@ -221,7 +293,6 @@ self.conf.add_section('otherformats') self.multistream_enabled = self.get_opt_for_proj_or_default( 'otherformats', 'multistream', 1) - if not self.conf.has_section('stubs'): self.conf.add_section('stubs') self.stubs_orderrevs = self.get_opt_for_proj_or_default( @@ -234,23 +305,6 @@ if not self.conf.has_section('wiki'): self.conf.add_section('wiki') self.wiki_dir = self.get_opt_for_proj_or_default("wiki", "dir", 0) - - def get_opt_for_proj_or_default(self, section_name, item_name, is_int): - # look for option in per project sections - if self.conf.has_section(self.project_name): - if self.conf.has_option(self.project_name, item_name): - if is_int: - return self.conf.getint(self.project_name, item_name) - else: - return self.conf.get(self.project_name, item_name) - - # look for option in global sections - if self.conf.has_section(section_name): - if self.conf.has_option(section_name, item_name): - if is_int: - return self.conf.getint(section_name, item_name) - else: - return self.conf.get(section_name, item_name) def db_latest_status(self): ''' diff --git a/xmldumps-backup/dumps/xmljobs.py b/xmldumps-backup/dumps/xmljobs.py index b1de960..8a999ec 100644 --- a/xmldumps-backup/dumps/xmljobs.py +++ b/xmldumps-backup/dumps/xmljobs.py @@ -129,7 +129,10 @@ current_filepath = runner.dump_dir.filename_public_path(current_dfname) # script_command = MultiVersion.mw_script_as_array(runner.wiki.config, "dumpBackup.php") - command = ["/usr/bin/python", "xmlstubs.py", "--config", runner.wiki.config.files[0], + config_file_arg = runner.wiki.config.files[0] + if runner.wiki.config.override_section: + config_file_arg = config_file_arg + ":" + runner.wiki.config.override_section + command = ["/usr/bin/python", "xmlstubs.py", "--config", config_file_arg, "--wiki", runner.db_name, "--articles", self.get_inprogress_name(articles_filepath), "--history", self.get_inprogress_name(history_filepath), @@ -219,8 +222,11 @@ else: logging_path = runner.dump_dir.filename_public_path(output_dfname) + config_file_arg = runner.wiki.config.files[0] + if runner.wiki.config.override_section: + config_file_arg = config_file_arg + ":" + runner.wiki.config.override_section command = ["/usr/bin/python", "xmllogs.py", "--config", - runner.wiki.config.files[0], "--wiki", runner.db_name, + config_file_arg, "--wiki", runner.db_name, "--outfile", self.get_inprogress_name(logging_path)] pipeline = [command] @@ -279,8 +285,11 @@ args: Runner, DumpFilename for output without any language variant """ + config_file_arg = runner.wiki.config.files[0] + if runner.wiki.config.override_section: + config_file_arg = config_file_arg + ":" + runner.wiki.config.override_section command = ["/usr/bin/python", "xmlabstracts.py", "--config", - runner.wiki.config.files[0], "--wiki", self.db_name] + config_file_arg, "--wiki", self.db_name] output_paths = [] variants = [] diff --git a/xmldumps-backup/getconfigvals.py b/xmldumps-backup/getconfigvals.py index 56ae325..3e222f6 100644 --- a/xmldumps-backup/getconfigvals.py +++ b/xmldumps-backup/getconfigvals.py @@ -18,22 +18,30 @@ return sections -def getconfs(configfile, args, outformat): +def get_setting_from_overrides(conf, overrides, setting): ''' - given a configfile path and a string - section1:name1,name2...;section2:name1,name2... - print a json representation of a dict with - the setting names and values per section + look for a setting in the overrides section if + the section and the setting in that section exist; + return it if so, return None otherwise ''' - conf = ConfigParser.SafeConfigParser() - conf.read(configfile) - confs = {} - sections = get_sections_settingnames(args) - for section in sections: - confs[section] = {} - for setting in sections[section]: - if conf.has_option(section, setting): - confs[section][setting] = conf.get(section, setting) + if not overrides: + return None + for secname in overrides: + if not secname: + continue + if not conf.has_section(overrides): + continue + if not conf.has_option(overrides, setting): + continue + return conf.get(overrides, setting) + return None + + +def display(confs, outformat): + ''' + given a dict of conf settings and values, + display them in the requested format + ''' if outformat == "json": print json.dumps(confs) elif outformat == "txt": @@ -51,6 +59,31 @@ print "%s %s" % (item, confs[section][item]) +def getconfs(configfile, overrides, args, outformat): + ''' + given a configfile path and a string + section1:name1,name2...;section2:name1,name2... + print a json representation of a dict with + the setting names and values per section + if the overrides argument is supplied, arguments + in this list of sections will override the values + in the specific section requested. + ''' + conf = ConfigParser.SafeConfigParser() + conf.read(configfile) + confs = {} + sections = get_sections_settingnames(args) + for section in sections: + confs[section] = {} + for setting in sections[section]: + result = get_setting_from_overrides(conf, overrides, setting) + if result: + confs[section][setting] = result + elif conf.has_option(section, setting): + confs[section][setting] = conf.get(section, setting) + display(confs, outformat) + + def usage(message=None): ''' display a helpful usage message with @@ -61,7 +94,7 @@ sys.stderr.write(message) sys.stderr.write("\n") usage_message = """ -Usage: getconfigvals.py --configfile path +Usage: getconfigvals.py --configfile path[:override_sec1[,override_sec2...]] --args section:name[,name...][;section:name[,name...]] [--help] @@ -69,10 +102,17 @@ in ConfigParser format Note that this script does not load any defaults for config values. +It also cannot deal with per-wiki config value settings, unless you +explicitly set up the config file with a section for the wiki and +pass that in as an override section (see --configfile below). Options: - --configfile (-c): path to config fiel + --configfile (-c): path to config file + you may tack on colon ':' and a comma-separated list of + sections in which to look first for values, for example + the wiki project name, or a section 'bigwikis' that might + have values that override the regular ones. --args (-a): names of args for which to check the config file; config file section names must be specified along with the arg names @@ -86,26 +126,19 @@ If an item is missing it is silently ignored. --help (-h): display this usage message -Example: getconfig.py --configfile confs/wikidump.conf --args 'tools:php,mysqldump,gzip' +Examples: getconfig.py --configfile confs/wikidump.conf \ + --args 'tools:php,mysqldump,gzip' + getconfig.py --configfile confs/wikidump.conf:enwiki,hugewikis \ + --args 'tools:php,mysqldump,gzip' """ sys.stderr.write(usage_message) sys.exit(1) -def main(): - 'main entry point, does all the work' - - configfile = None - args = None - outformat = "json" - - try: - (options, remainder) = getopt.gnu_getopt( - sys.argv[1:], "c:a:f:h", ["configfile=", "args=", "format=", "help"]) - - except getopt.GetoptError as err: - usage("Unknown option specified: " + str(err)) - +def get_args(options): + ''' + get and return the args passed on command line + ''' for (opt, val) in options: if opt in ["-c", "--configfile"]: configfile = val @@ -117,6 +150,25 @@ usage('Help for this script\n') else: usage("Unknown option specified: <%s>" % opt) + return (configfile, args, outformat) + + +def main(): + 'main entry point, does all the work' + + configfile = None + args = None + outformat = "json" + overrides = None + + try: + (options, remainder) = getopt.gnu_getopt( + sys.argv[1:], "c:a:f:h", ["configfile=", "args=", "format=", "help"]) + + except getopt.GetoptError as err: + usage("Unknown option specified: " + str(err)) + + (configfile, args, outformat) = get_args(options) if len(remainder) > 0: usage("Unknown option(s) specified: <%s>" % remainder[0]) @@ -128,10 +180,17 @@ if outformat not in ["txt", "json", "pairs", "values"]: usage("Unknown format type %s" % outformat) + if ':' in configfile: + configfile, overrides = configfile.split(':', 1) + if not overrides: + overrides = [] + elif ',' in overrides: + overrides = overrides.split(',') + if not os.path.exists(configfile): usage("no such file found: " + configfile) - getconfs(configfile, args, outformat) + getconfs(configfile, overrides, args, outformat) if __name__ == '__main__': diff --git a/xmldumps-backup/xmlabstracts.py b/xmldumps-backup/xmlabstracts.py index 7f378d7..28abd31 100644 --- a/xmldumps-backup/xmlabstracts.py +++ b/xmldumps-backup/xmlabstracts.py @@ -76,7 +76,7 @@ usage_message = """ Usage: xmlabstracts.py --wiki wikidbname --outfile path [--start number] [--end number] - [--config path] + [--config path[:overrides_section]] Options: @@ -91,6 +91,8 @@ --end (-e): ending page id to dump, exclusive of this page (default: dump all) --config (-C): path to wikidump configfile (default: "wikidump.conf" in current dir) + if followed by : and a name, this section name in the config file + will be used to override config settings in default sections --dryrun (-d): display the commands that would be run to produce the output but don't actually run them """ @@ -158,9 +160,6 @@ usage("value for --end must be a number") else: end = int(end) - 1 - - if not os.path.exists(configfile): - usage("no such file found: " + configfile) output_files = output_files.split(",") variants = variants.split(",") diff --git a/xmldumps-backup/xmllogs.py b/xmldumps-backup/xmllogs.py index 9c3fde6..5655e8a 100644 --- a/xmldumps-backup/xmllogs.py +++ b/xmldumps-backup/xmllogs.py @@ -55,7 +55,7 @@ usage_message = """ Usage: xmllogs.py --wiki wikidbname --outfile path [--start number] [--end number] - [--config path] + [--config path[:overrides]] Options: @@ -66,6 +66,8 @@ --end (-e): ending log id to dump, exclusive of this entry (default: dump all) --config (-C): path to wikidump configfile (default: "wikidump.conf" in current dir) + if followed by : and a name, this section name in the config file + will be used to override config settings in default sections --dryrun (-d): display the commands that would be run to produce the output but don't actually run them """ @@ -128,9 +130,6 @@ usage("value for --end must be a number") else: end = int(end) - 1 - - if not os.path.exists(configfile): - usage("no such file found: " + configfile) wikiconf = Config(configfile) wikiconf.parse_conffile_per_project(wiki) diff --git a/xmldumps-backup/xmlstubs.py b/xmldumps-backup/xmlstubs.py index 47be3a7..3585203 100644 --- a/xmldumps-backup/xmlstubs.py +++ b/xmldumps-backup/xmlstubs.py @@ -149,7 +149,7 @@ usage_message = """ Usage: xmlstubs.py --wiki wikidbname --articles path --current path --history path [--start number] [--end number] - [--config path] + [--config path[:overrides_section]] Options: @@ -162,6 +162,8 @@ --end (-e): ending page to dump, exclusive of this page (default: dump all) --config (-C): path to wikidump configfile (default: "wikidump.conf" in current dir) + if followed by : and a name, this section name in the config file + will be used to override config settings in default sections --dryrun (-d): display the commands that would be run to produce the output but don't actually run them """ @@ -234,9 +236,6 @@ usage("value for --end must be a number") else: end = int(end) - 1 - - if not os.path.exists(configfile): - usage("no such file found: " + configfile) wikiconf = Config(configfile) wikiconf.parse_conffile_per_project(wiki) -- To view, visit https://gerrit.wikimedia.org/r/387022 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I5665d9fe2b47e697f9d5294e6f81fec6518ff009 Gerrit-PatchSet: 2 Gerrit-Project: operations/dumps Gerrit-Branch: master Gerrit-Owner: ArielGlenn <ar...@wikimedia.org> Gerrit-Reviewer: ArielGlenn <ar...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits