Rush has submitted this change and it was merged. Change subject: labsdb: maintain-views laundry list of changes ......................................................................
labsdb: maintain-views laundry list of changes * Convert config file handling to yaml * Add packages needed for maintain-views to puppet * Change behavior from default to modifying all dbs to default to requiring an explicit flag * move view definer to attribute * separate dry-run and debug options * add user_exists method to verify definer * add doc strings to methods for class SchemaOperations * check_customview_source method return a consistent type * verify source database exists before creating _p variant * make natively single tenant rather than expecting slices * move do_dbhost function logic into main (as we are expecting single tenant) * separate metadata assignment and sensitive DB exclusion * qualify password module lookup for toplevel Change-Id: I95275471471ce2ba8b63ed0b309a126c5a8ab051 --- M modules/role/files/labsdb/maintain-views.py M modules/role/manifests/labsdb/views.pp D modules/role/templates/labsdb/maintain-views.json A modules/role/templates/labsdb/maintain-views.yaml 4 files changed, 526 insertions(+), 292 deletions(-) Approvals: Rush: Looks good to me, approved Madhuvishy: Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/modules/role/files/labsdb/maintain-views.py b/modules/role/files/labsdb/maintain-views.py index eefaa6f..b1cb9d0 100644 --- a/modules/role/files/labsdb/maintain-views.py +++ b/modules/role/files/labsdb/maintain-views.py @@ -18,21 +18,18 @@ # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. # -# # This script maintains the databases containing sanitized views to # the replicated databases (in the form <db>_p for every <db>) -# -# By default, it processes every database but it accepts a list of -# databases to process # # Information on available and operational databases is sourced from # a checkout of mediawiki-config. # import argparse -import json import logging import re +import sys +import yaml import pymysql @@ -45,17 +42,33 @@ self.db_p = db + '_p' self.db_size = db_size self.cursor = cursor + self.definer = 'viewmaster' def write_execute(self, query): - if self.dry_run: - logging.info("DRY RUN: Would execute: {}".format(query)) - else: + """ Do operation or simulate + :param query: str + """ + logging.debug("SQL: {}".format(query)) + if not self.dry_run: self.cursor.execute(query) - def table_exists(self, table, database): + def user_exists(self, name): + """ Check if a user exists + :param name: str """ - Determine whether a table of the given name exists in the database of + self.cursor.execute(""" + SELECT 1 + FROM `mysql`.`user` + WHERE `user`=%s; + """, args=(name)) + return bool(self.cursor.rowcount) + + def table_exists(self, table, database): + """ Determine whether a table of the given name exists in the database of the given name. + :param table: str + :param database: str + :returns: bool """ self.cursor.execute(""" SELECT `table_name` @@ -64,33 +77,21 @@ """, args=(table, database)) return bool(self.cursor.rowcount) - def execute(self, fullviews, customviews): + def database_exists(self, database): + """ Verify if a DB exists + :param database: str + :return: bool """ - Begin checking/creating views for this schema. - """ - self.cursor.execute(""" SELECT `schema_name` FROM `information_schema`.`schemata` WHERE `schema_name`=%s - """, args=(self.db_p,)) - if not self.cursor.rowcount: - # Can't use pymysql to build this - self.write_execute( - "CREATE DATABASE `{}`;".format(self.db_p) - ) - - logging.info("Full views for {}:".format(self.db)) - for view in fullviews: - self.do_fullview(view) - - logging.info("Custom views for {}:".format(self.db)) - for view_name, view_details in customviews.items(): - self.do_customview(view_name, view_details) + """, args=(database,)) + return bool(self.cursor.rowcount) def do_fullview(self, view): - """ - Check whether the source table exists, and if so, create the view. + """ Check whether the source table exists, and if so, create the view. + :param view: str """ if self.table_exists(view, self.db): # If it does, create or replace the view for it. @@ -101,10 +102,10 @@ # Can't use pymysql to build this self.write_execute(""" CREATE OR REPLACE - DEFINER=viewmaster - VIEW `{0}`.`{1}` - AS SELECT * FROM `{2}`.`{1}`; - """.format(self.db_p, view, self.db)) + DEFINER={0} + VIEW `{1}`.`{2}` + AS SELECT * FROM `{3}`.`{2}`; + """.format(self.definer, self.db_p, view, self.db)) else: # Some views only exist in CentralAuth, some only in MediaWiki, # etc. @@ -115,9 +116,10 @@ ) def check_customview_source(self, view_name, source): - """ - Check whether a custom view's particular source exists. If it does, - return the source database and table names. If not, return False. + """ Check whether a custom view's particular source exists + :param view_name: str + :param source: str + :return: tuple """ match = re.match(r"^(?:(.*)\.)?([^.]+)$", source) if not match: @@ -142,20 +144,20 @@ " view {}") .format(source_table, source_db, view_name) ) - return False + return () def do_customview(self, view_name, view_details): - """ - Process a custom view's sources, and if they're all present, and the - view's limit is not bigger than the database size, call - create_customview. + """ Process a custom view's sources, and if they're all present, and the + view's limit is not bigger than the database size, call create_customview. + :param view_name: str + :param view_details: str """ if ("limit" in view_details and self.db_size is not None and view_details["limit"] > self.db_size): # Ignore custom views which have a limit number greater # than the size ID set by the read_list calls for # size.dblist above. - logging.debug("Too big for this database") + logging.warning("Too big for this database") return sources = view_details["source"] @@ -166,13 +168,12 @@ for source in sources: result = self.check_customview_source(view_name, source) - if result: - source_db, source_table = result - sources_checked.append( - "`{}`.`{}`".format(source_db, source_table) - ) - else: + if not result: break + source_db, source_table = result + sources_checked.append( + "`{}`.`{}`".format(source_db, source_table) + ) if len(sources) == len(sources_checked): @@ -202,16 +203,20 @@ ) def create_customview(self, view_name, view_details, sources): + """ Creates or replaces a custom view from its sources. + :param view_name: str + :param view_details: dict + :param sources: list """ - Creates or replaces a custom view from it's sources. - """ + query = """ CREATE OR REPLACE - DEFINER=viewmaster + DEFINER={} VIEW `{}`.`{}` AS {} FROM {} """.format( + self.definer, self.db_p, view_name, view_details["view"], @@ -229,53 +234,54 @@ # Can't use pymysql to build this self.write_execute(query) + def execute(self, fullviews, customviews): + """ Begin checking/creating views for this schema. + :param fullviews: list + :param customviews: dict + """ -def do_dbhost(dry_run, replace_all, dbhost, dbport, config, dbs, customviews): - """ - Handle setting up a connection to a dbhost, then go through every database - to start the process of creating views there. - """ - dbh = pymysql.connect( - host=dbhost, - port=dbport, - user=config["mysql_user"], - passwd=config["mysql_password"], - charset="utf8" - ) - with dbh.cursor() as cursor: - logging.info("Connected to {}:{}...".format(dbhost, dbport)) - cursor.execute("SET NAMES 'utf8';") - for db, db_info in dbs.items(): - skip = False - for bad_flag in ["deleted", "private"]: - if db_info.get(bad_flag, False): - logging.debug( - "Skipping database {} because it's marked as {}." - .format(db, bad_flag) - ) - skip = True + if not self.database_exists(self.db): + logging.warning('DB {} does not exist to create views'.format(self.db)) + return - if skip: - continue + if not self.database_exists(self.db_p): + # Can't use pymysql to build this + self.write_execute( + "CREATE DATABASE `{}`;".format(self.db_p) + ) - db_size = None - if "size" in db_info: - db_size = db_info["size"] + logging.info("Full views for {}:".format(self.db)) + for view in fullviews: + self.do_fullview(view) - ops = SchemaOperations(dry_run, replace_all, db, db_size, cursor) - ops.execute(config["fullviews"], customviews) + logging.info("Custom views for {}:".format(self.db)) + for view_name, view_details in customviews.items(): + self.do_customview(view_name, view_details) + if __name__ == "__main__": - logging.getLogger().setLevel(logging.INFO) argparser = argparse.ArgumentParser( "maintain-views", - description="Maintain labs sanitized views of replica databases" + description="Maintain labsdb sanitized views of replica databases" + ) + + group = argparser.add_mutually_exclusive_group(required=True) + group.add_argument( + '--databases', + help=("Specify database(s) to work on, instead of all. Multiple" + " values can be given space-separated."), + nargs="+" + ) + group.add_argument( + '--all-databases', + help='Flag to run through all possible databases', + action='store_true', ) argparser.add_argument( "--config-location", help="Path to find the configuration file", - default="/etc/maintain-views.json" + default="/etc/maintain-views.yaml" ) argparser.add_argument( "--dry-run", @@ -290,20 +296,41 @@ action="store_true" ) argparser.add_argument( - "--databases", - help=("Specify database(s) to work on, instead of all. Multiple" - " values can be given space-separated."), - nargs="+" - ) - argparser.add_argument( "--mediawiki-config", help=("Specify path to mediawiki-config checkout" " values can be given space-separated."), default="/usr/local/lib/mediawiki-config" ) + argparser.add_argument( + '--debug', + help='Turn on debug logging', + action='store_true' + ) + args = argparser.parse_args() - with open(args.config_location) as f: - config = json.load(f) + + with open(args.config_location, 'r') as stream: + try: + config = yaml.load(stream) + except yaml.YAMLError as exc: + logging.critical(exc) + sys.exit(1) + + dbs_metadata = config['metadata'] + sensitive_db_lists = config['sensitive_db_lists'] + + logging.basicConfig( + format='%(asctime)s %(levelname)s %(message)s', + level=logging.DEBUG if args.debug else logging.INFO, + ) + + dbh = pymysql.connect( + host=config['host'], + port=config['port'], + user=config["mysql_user"], + passwd=config["mysql_password"], + charset="utf8" + ) # Hacks safelog = ("log_type IN ('" + @@ -316,51 +343,50 @@ customviews["logging_userindex"]["where"] = ("(log_deleted&4)=0 and " + safelog) - with open("{}/dblists/all.dblist".format(args.mediawiki_config)) as f: + # This will include private and deleted dbs at this stage + all_dbs_file = "{}/dblists/all.dblist".format(args.mediawiki_config) + with open(all_dbs_file) as f: all_available_dbs = f.read().splitlines() + all_available_dbs.extend(config['add_to_all_dbs']) - all_available_dbs.append("centralauth") + # argparse will ensure we are declaring explicitly + dbs = all_available_dbs if args.databases: - dbs = {} - for db in args.databases: - if db in all_available_dbs: - dbs[db] = {} - else: - logging.info( - "Ignoring database {} which doesn't appear to exist." - .format(db) - ) + dbs = [db for db in args.databases if db in all_available_dbs] - if not len(dbs.keys()): - logging.critical("No databases specified exist.") - else: - dbs = {db: {} for db in all_available_dbs} + # purge all sensitive dbs so they are never attempted + allowed_dbs = dbs + for list in sensitive_db_lists: + path = "{}/dblists/{}.dblist".format(args.mediawiki_config, list) + with open(path) as file: + pdbs = [db for db in file.read().splitlines()] + allowed_dbs = [x for x in allowed_dbs if x not in pdbs] - def read_list(fname, prop, val): - with open("{}/dblists/{}.dblist".format(args.mediawiki_config, fname)) as f: - for db in f.read().splitlines(): - if db in dbs: - dbs[db][prop] = val + logging.debug("Removing {} dbs as sensitive".format(len(dbs) - len(allowed_dbs))) + if not allowed_dbs: + logging.error('None of the specified dbs are allowed') + sys.exit(1) - # Reads various .dblist files to store information about specific - # databases. The first line, for example, reads deleted.dblist, and for - # each listed database name sets dbs[db_name]["deleted"] = True - read_list("deleted", "deleted", True) - read_list("private", "private", True) - read_list("small", "size", 1) - read_list("medium", "size", 2) - read_list("large", "size", 3) + # assign all metadata from lists + dbs_with_metadata = {x: {} for x in allowed_dbs} + for list, meta in dbs_metadata.items(): + path = "{}/dblists/{}.dblist".format(args.mediawiki_config, list) + with open(path) as file: + mdbs = [db for db in file.read().splitlines()] + for db in mdbs: + if db in dbs_with_metadata: + dbs_with_metadata[db].update(meta) - logging.info("Got all necessary info, starting to connect to slices") - for dbhost, dbport in config["slices"]: - do_dbhost( - args.dry_run, - args.replace_all, - dbhost, - dbport, - config, - dbs, - customviews - ) + with dbh.cursor() as cursor: + cursor.execute("SET NAMES 'utf8';") + for db, db_info in dbs_with_metadata.items(): + ops = SchemaOperations(args.dry_run, + args.replace_all, + db, + db_info.get('size', None), + cursor) + if not ops.user_exists(ops.definer): + logging.critical("Definer has not been created") + sys.exit(1) - logging.info("All done.") + ops.execute(config["fullviews"], customviews) diff --git a/modules/role/manifests/labsdb/views.pp b/modules/role/manifests/labsdb/views.pp index c05646c..64eb17d 100644 --- a/modules/role/manifests/labsdb/views.pp +++ b/modules/role/manifests/labsdb/views.pp @@ -1,17 +1,23 @@ class role::labsdb::views { + package { + ['python3-yaml', 'python3-pymysql']: + ensure => present, + before => File['/usr/local/sbin/maintain-views'], + } + git::clone { 'operations/mediawiki-config': ensure => 'latest', directory => '/usr/local/lib/mediawiki-config', - before => File['/usr/local/sbin/maintain-views'], recurse_submodules => true, + before => File['/usr/local/sbin/maintain-views'], } - $view_user = $passwords::mysql::maintain_views::user - $view_pass = $passwords::mysql::maintain_views::password - file { '/etc/maintain-views.json': + $view_user = $::passwords::mysql::maintain_views::user + $view_pass = $::passwords::mysql::maintain_views::password + file { '/etc/maintain-views.yaml': ensure => file, - content => template('role/labsdb/maintain-views.json'), + content => template('role/labsdb/maintain-views.yaml'), owner => 'root', group => 'root', mode => '0444', diff --git a/modules/role/templates/labsdb/maintain-views.json b/modules/role/templates/labsdb/maintain-views.json deleted file mode 100644 index a4881c9..0000000 --- a/modules/role/templates/labsdb/maintain-views.json +++ /dev/null @@ -1,153 +0,0 @@ -{ - "slices": [ - ["<%= @fqdn %>", 3306] - ], - "mysql_user": "<%= @view_user %>", - "mysql_password": "<%= @view_pass %>", - "fullviews": [ - "abuse_filter_action", "aft_article_answer", "aft_article_answer_text", - "aft_article_feedback_properties", "aft_article_feedback_ratings_rollup", - "aft_article_feedback_select_rollup", "aft_article_field", "aft_article_field_group", - "aft_article_field_option", "aft_article_filter_count", "aft_article_revision_feedback_ratings_rollup", - "aft_article_revision_feedback_select_rollup", "article_assessment", "article_assessment_pages", - "article_assessment_ratings", "article_feedback", "article_feedback_pages", - "article_feedback_properties", "article_feedback_ratings", "article_feedback_revisions", - "article_feedback_stats", "article_feedback_stats_types", "category", "categorylinks", "change_tag", - "ep_articles", "ep_cas", "ep_courses", "ep_events", "ep_instructors", "ep_oas", "ep_orgs", - "ep_revisions", "ep_students", "ep_users_per_course", "externallinks", "flaggedimages", - "flaggedpage_config", "flaggedpage_pending", "flaggedpages", "flaggedrevs", "flaggedrevs_promote", - "flaggedrevs_statistics", "flaggedrevs_stats", "flaggedrevs_stats2", "flaggedrevs_tracking", - "flaggedtemplates", "geo_killlist", "geo_tags", "geo_updates", "globalimagelinks", "global_block_whitelist", - "hashs", "hitcounter", "image", "imagelinks", "imagelinks_old", "interwiki", "iwlinks", - "l10n_cache", "langlinks", "links", "localisation", "localisation_file_hash", - "math", "module_deps", "msg_resource_links", "namespaces", - "page", "page_broken", "pagelinks", "page_props", "page_restrictions", "pagetriage_log", - "pagetriage_page", "pagetriage_page_tags", "pagetriage_tags", "pif_edits", "povwatch_log", - "povwatch_subscribers", "protected_titles", "pr_index", "redirect", "renameuser_status", "site_identifiers", - "sites", "site_stats", "tag_summary", "templatelinks", "transcode", "updatelog", "updates", - "user_former_groups", "user_groups", "valid_tag", "wikilove_image_log", - "wikilove_log", "global_group_permissions", "global_group_restrictions", "global_user_groups", - "globalblocks", "localuser", "wikiset", "wb_changes", "wb_changes_dispatch", "wb_changes_subscription", - "wb_entity_per_page", "wb_id_counters", "wb_items_per_site", "wb_property_info", "wb_terms", - "wbc_entity_usage", "wbs_propertypairs", "watchlist_count" - ], - "logging_whitelist": [ - "gblrights", "globalauth", "close", "lock", "gblblock", "managetags", "gather", "campus", - "institution", "instructor", "online", "merge", "massmessage", "course", "stable", "import", - "abusefilter", "student", "eparticle", "rights", "pagetriage-deletion", "protect", - "pagetriage-curation", "thanks", "renameuser", "review", "block", "upload", "move", - "delete", "patrol", "newusers" - ], - "customviews": { - "abuse_filter": { - "source": "abuse_filter", - "view": "select af_id, if(af_hidden,null,af_pattern) as af_pattern, af_user, af_user_text, af_timestamp, af_enabled, if(af_hidden,null,af_comments) as af_comments, af_public_comments, af_hidden, af_hit_count, af_throttled, af_deleted, af_actions, af_global, af_group" - }, - "abuse_filter_log": { - "source": "abuse_filter_log", - "view": "select afl_id, afl_filter, afl_user, afl_user_text, NULL as afl_ip, afl_action, afl_actions, afl_var_dump, afl_timestamp, afl_namespace, afl_title, afl_wiki, afl_deleted, afl_patrolled_by, afl_rev_id, afl_log_id" - }, - "aft_article_feedback": { - "source": "aft_article_feedback", - "view": "select af_id, af_page_id, af_user_id, NULL as af_user_ip, af_user_anon_token, af_revision_id, af_cta_id, af_link_id, af_created, af_abuse_count, af_helpful_count, af_unhelpful_count, af_oversight_count, af_is_deleted, af_is_hidden, af_net_helpfulness, af_has_comment, af_is_unhidden, af_is_undeleted, af_is_declined, af_activity_count, af_form_id, af_experiment, af_suppress_count, af_last_status, af_last_status_user_id, af_last_status_timestamp, af_is_autohide, af_is_unrequested, af_is_featured, af_is_unfeatured, af_is_resolved, af_is_unresolved, af_relevance_score, af_relevance_sort, af_last_status_notes" - }, - "archive": { - "source": "archive", - "view": "select ar_id, ar_namespace, ar_title, NULL as ar_text, NULL as ar_comment, if(ar_deleted&4,null,ar_user) as ar_user, if(ar_deleted&4,null,ar_user_text) as ar_user_text, ar_timestamp, ar_minor_edit, ar_flags, ar_rev_id, if(ar_deleted&1,null,ar_text_id) as ar_text_id, ar_deleted, if(ar_deleted&1,null,ar_len) as ar_len, ar_page_id, ar_parent_id, if(ar_deleted&1,null,ar_sha1) as ar_sha1" - }, - "archive_userindex": { - "source": "archive", - "view": "select ar_id, ar_namespace, ar_title, NULL as ar_text, NULL as ar_comment, ar_user, ar_user_text, ar_timestamp, ar_minor_edit, ar_flags, ar_rev_id, if(ar_deleted&1,null,ar_text_id) as ar_text_id, ar_deleted, if(ar_deleted&1,null,ar_len) as ar_len, ar_page_id, ar_parent_id, if(ar_deleted&1,null,ar_sha1) as ar_sha1", - "where": "(ar_deleted&4)=0" - }, - "filearchive": { - "source": "filearchive", - "view": "select fa_id, fa_name, fa_archive_name, fa_storage_group, NULL as fa_storage_key, fa_deleted_user, fa_deleted_timestamp, fa_deleted_reason, if(fa_deleted&1,null,fa_size) as fa_size, if(fa_deleted&1,null,fa_width) as fa_width, if(fa_deleted&1,null,fa_height) as fa_height, if(fa_deleted&1,null,fa_metadata) as fa_metadata, if(fa_deleted&1,null,fa_bits) as fa_bits, if(fa_deleted&1,null,fa_media_type) as fa_media_type, if(fa_deleted&1,null,fa_major_mime) as fa_major_mime, if(fa_deleted&1,null,fa_minor_mime) as fa_minor_mime, if(fa_deleted&2,null,fa_description) as fa_description, if(fa_deleted&4,null,fa_user)as fa_user, if(fa_deleted&4,null,fa_user_text) as fa_user_text, fa_timestamp, fa_deleted, if(fa_deleted&1,null,fa_sha1) as fa_sha1" - }, - "filearchive_userindex": { - "source": "filearchive", - "view": "select fa_id, fa_name, fa_archive_name, fa_storage_group, NULL as fa_storage_key, fa_deleted_user, fa_deleted_timestamp, fa_deleted_reason, if(fa_deleted&1,null,fa_size) as fa_size, if(fa_deleted&1,null,fa_width) as fa_width, if(fa_deleted&1,null,fa_height) as fa_height, if(fa_deleted&1,null,fa_metadata) as fa_metadata, if(fa_deleted&1,null,fa_bits) as fa_bits, if(fa_deleted&1,null,fa_media_type) as fa_media_type, if(fa_deleted&1,null,fa_major_mime) as fa_major_mime, if(fa_deleted&1,null,fa_minor_mime) as fa_minor_mime, if(fa_deleted&2,null,fa_description) as fa_description, fa_user, fa_user_text, fa_timestamp, fa_deleted, if(fa_deleted&1,null,fa_sha1) as fa_sha1", - "where": "(fa_deleted&4)=0" - }, - "globaluser": { - "source": "globaluser", - "view": "select gu_id, gu_name, gu_enabled, gu_enabled_method, gu_home_db, NULL as gu_email, NULL as gu_email_authenticated, NULL as gu_salt, NULL as gu_password, gu_locked, gu_hidden, gu_registration, NULL as gu_password_reset_key, NULL as gu_password_reset_expiration, NULL as gu_auth_token", - "where": "gu_hidden=''" - }, - "ipblocks": { - "source": "ipblocks", - "view": "select ipb_id, if(ipb_auto<>0,null,ipb_address) as ipb_address, ipb_user, ipb_by, ipb_reason, ipb_timestamp, ipb_auto, ipb_anon_only, ipb_create_account, ipb_expiry, if(ipb_auto<>0,null,ipb_range_start) as ipb_range_start, if(ipb_auto<>0,null,ipb_range_end) as ipb_range_end, ipb_enable_autoblock, ipb_deleted, ipb_block_email, ipb_by_text, ipb_allow_usertalk, ipb_parent_block_id", - "where": "ipb_deleted=0" - }, - "ipblocks_ipindex": { - "source": "ipblocks", - "view": "select ipb_id, ipb_address, ipb_user, ipb_by, ipb_reason, ipb_timestamp, ipb_auto, ipb_anon_only, ipb_create_account, ipb_expiry, ipb_range_start, ipb_range_end, ipb_enable_autoblock, ipb_deleted, ipb_block_email, ipb_by_text, ipb_allow_usertalk, ipb_parent_block_id", - "where": "ipb_deleted=0 and ipb_auto=0" - }, - "logging": { - "source": "logging", - "view": "select log_id, log_type, if(log_deleted&1,null,log_action) as log_action, log_timestamp, if(log_deleted&4,null,log_user) as log_user, if(log_deleted&1,null,log_namespace) as log_namespace, if(log_deleted&1,null,log_title) as log_title, if(log_deleted&2,null,log_comment) as log_comment, if(log_deleted,null,log_params) as log_params, log_deleted, if(log_deleted&4,null,log_user_text) as log_user_text, if(log_deleted&1,null,log_page) as log_page", - "where": "ANYTHING YOU WRITE HERE WILL BE OVERRIDDEN BY THE SCRIPT TO PULL FROM THE `logging_whitelist` CONFIG" - }, - "logging_logindex": { - "source": "logging", - "view": "select log_id, log_type, log_action, log_timestamp, if(log_deleted&4,null,log_user) as log_user, log_namespace, log_title, if(log_deleted&2,null,log_comment) as log_comment, if(log_deleted,null,log_params) as log_params, log_deleted, if(log_deleted&4,null,log_user_text) as log_user_text, log_page", - "where": "ANYTHING YOU WRITE HERE WILL BE OVERRIDDEN BY THE SCRIPT TO PULL FROM THE `logging_whitelist` CONFIG" - }, - "logging_userindex": { - "source": "logging", - "view": "select log_id, log_type, if(log_deleted&1,null,log_action) as log_action, log_timestamp, log_user, if(log_deleted&1,null,log_namespace) as log_namespace, if(log_deleted&1,null,log_title) as log_title, if(log_deleted&2,null,log_comment) as log_comment, if(log_deleted,null,log_params) as log_params, log_deleted, log_user_text as log_user_text, if(log_deleted&1,null,log_page) as log_page", - "where": "ANYTHING YOU WRITE HERE WILL BE OVERRIDDEN BY THE SCRIPT TO PULL FROM THE `logging_whitelist` CONFIG" - }, - "mark_as_helpful": { - "source": "mark_as_helpful", - "view": "select mah_id, mah_type, mah_item, mah_user_id, mah_user_editcount, mah_namespace, mah_title, mah_timestamp, NULL as mah_system_type, NULL as mah_user_agent, NULL as mah_locale" - }, - "oldimage": { - "source": "oldimage", - "view": "select oi_name, oi_archive_name, oi_size, oi_width, oi_height, oi_bits, if(oi_deleted&2,null,oi_description) as oi_description, if(oi_deleted&4,null,oi_user) as oi_user, if(oi_deleted&4,null,oi_user_text) as oi_user_text, oi_timestamp, oi_metadata, oi_media_type, oi_major_mime, oi_minor_mime, oi_deleted, oi_sha1" - }, - "oldimage_userindex": { - "source": "oldimage", - "view": "select oi_name, oi_archive_name, oi_size, oi_width, oi_height, oi_bits, if(oi_deleted&2,null,oi_description) as oi_description, oi_user, oi_user_text, oi_timestamp, oi_metadata, oi_media_type, oi_major_mime, oi_minor_mime, oi_deleted, oi_sha1", - "where": "(oi_deleted&4)=0" - }, - "recentchanges": { - "source": "recentchanges", - "view": "select rc_id, rc_timestamp, if(rc_deleted&4,null,rc_user) as rc_user, if(rc_deleted&4,null,rc_user_text) as rc_user_text, rc_namespace, rc_title, if(rc_deleted&2,null,rc_comment) as rc_comment, rc_minor, rc_bot, rc_new, rc_cur_id, rc_this_oldid, rc_last_oldid, rc_type, rc_source, rc_patrolled, null as rc_ip, rc_old_len, rc_new_len, rc_deleted, rc_logid, rc_log_type, rc_log_action, rc_params" - }, - "recentchanges_userindex": { - "source": "recentchanges", - "view": "select rc_id, rc_timestamp, rc_user, rc_user_text, rc_namespace, rc_title, if(rc_deleted&2,null,rc_comment) as rc_comment, rc_minor, rc_bot, rc_new, rc_cur_id, rc_this_oldid, rc_last_oldid, rc_type, rc_source, rc_patrolled, null as rc_ip, rc_old_len, rc_new_len, rc_deleted, rc_logid, rc_log_type, rc_log_action, rc_params", - "where": "(rc_deleted&4)=0" - }, - "revision": { - "source": "revision", - "view": "select rev_id, rev_page, if(rev_deleted&1,null,rev_text_id) as rev_text_id, if(rev_deleted&2,null,rev_comment) as rev_comment, if(rev_deleted&4,null,rev_user) as rev_user, if(rev_deleted&4,null,rev_user_text) as rev_user_text, rev_timestamp, rev_minor_edit, rev_deleted, if(rev_deleted&1,null,rev_len) as rev_len, rev_parent_id, if(rev_deleted&1,null,rev_sha1) as rev_sha1, rev_content_model, rev_content_format" - }, - "revision_userindex": { - "source": "revision", - "view": "select rev_id, rev_page, if(rev_deleted&1,null,rev_text_id) as rev_text_id, if(rev_deleted&2,null,rev_comment) as rev_comment, rev_user, rev_user_text, rev_timestamp, rev_minor_edit, rev_deleted, if(rev_deleted&1,null,rev_len) as rev_len, rev_parent_id, if(rev_deleted&1,null,rev_sha1) as rev_sha1, rev_content_model, rev_content_format", - "where": "(rev_deleted&4)=0" - }, - "user": { - "source": "user", - "view": "select user_id, user_name, user_real_name, NULL as user_password, NULL as user_newpassword, NULL as user_email, NULL as user_options, NULL as user_touched, NULL as user_token, NULL as user_email_authenticated, NULL as user_email_token, NULL as user_email_token_expires, user_registration, NULL as user_newpass_time, user_editcount, NULL as user_password_expires" - }, - "user_old": { - "source": "user_old", - "view": "select user_id, user_name, NULL as user_password, NULL as user_newpassword, NULL as user_email, NULL as user_options, NULL as user_newtalk, NULL as user_touched, user_real_name, NULL as user_token" - }, - "user_properties": { - "source": "user_properties", - "view": "select up_user, up_property, up_value", - "where": "up_property in ( 'disablemail', 'fancysig', 'gender', 'language', 'nickname', 'skin', 'timecorrection', 'variant' )" - }, - "user_properties_anon": { - "limit": 2, - "source": [ "user_properties", "user", "meta_p.properties_anon_whitelist" ], - "view": "select cast(extract(year_month from user_touched)*100+1 as date) upa_touched, up_property, up_value", - "where": "user_id=up_user and up_property like pw_property" - } - } -} diff --git a/modules/role/templates/labsdb/maintain-views.yaml b/modules/role/templates/labsdb/maintain-views.yaml new file mode 100644 index 0000000..6106ee0 --- /dev/null +++ b/modules/role/templates/labsdb/maintain-views.yaml @@ -0,0 +1,355 @@ +host: "<%= @fqdn %>" +port: 3306 +mysql_user: "<%= @view_user %>" +mysql_password: "<%= @view_pass %>" + +add_to_all_dbs: + - centralauth + +# These lists will be excluded entirely +sensitive_db_lists: + - private + - deleted + +# by list name for internal processing +metadata: + small: + size: 1 + medium: + size: 2 + large: + size: 3 + +# Tables we 1:1 to a view +fullviews: + - abuse_filter_action + - aft_article_answer + - aft_article_answer_text + - aft_article_feedback_properties + - aft_article_feedback_ratings_rollup + - aft_article_feedback_select_rollup + - aft_article_field + - aft_article_field_group + - aft_article_field_option + - aft_article_filter_count + - aft_article_revision_feedback_ratings_rollup + - aft_article_revision_feedback_select_rollup + - article_assessment + - article_assessment_pages + - article_assessment_ratings + - article_feedback + - article_feedback_pages + - article_feedback_properties + - article_feedback_ratings + - article_feedback_revisions + - article_feedback_stats + - article_feedback_stats_types + - category + - categorylinks + - change_tag + - ep_articles + - ep_cas + - ep_courses + - ep_events + - ep_instructors + - ep_oas + - ep_orgs + - ep_revisions + - ep_students + - ep_users_per_course + - externallinks + - flaggedimages + - flaggedpage_config + - flaggedpage_pending + - flaggedpages + - flaggedrevs + - flaggedrevs_promote + - flaggedrevs_statistics + - flaggedrevs_stats + - flaggedrevs_stats2 + - flaggedrevs_tracking + - flaggedtemplates + - geo_killlist + - geo_tags + - geo_updates + - globalimagelinks + - global_block_whitelist + - hashs + - hitcounter + - image + - imagelinks + - imagelinks_old + - interwiki + - iwlinks + - l10n_cache + - langlinks + - links + - localisation + - localisation_file_hash + - math + - module_deps + - msg_resource_links + - namespaces + - page + - page_broken + - pagelinks + - page_props + - page_restrictions + - pagetriage_log + - pagetriage_page + - pagetriage_page_tags + - pagetriage_tags + - pif_edits + - povwatch_log + - povwatch_subscribers + - protected_titles + - pr_index + - redirect + - renameuser_status + - site_identifiers + - sites + - site_stats + - tag_summary + - templatelinks + - transcode + - updatelog + - updates + - user_former_groups + - user_groups + - valid_tag + - wikilove_image_log + - wikilove_log + - global_group_permissions + - global_group_restrictions + - global_user_groups + - globalblocks + - localuser + - wikiset + - wb_changes + - wb_changes_dispatch + - wb_changes_subscription + - wb_entity_per_page + - wb_id_counters + - wb_items_per_site + - wb_property_info + - wb_terms + - wbc_entity_usage + - wbs_propertypairs + - watchlist_count + +logging_whitelist: + - gblrights + - globalauth + - close + - lock + - gblblock + - managetags + - gather + - campus + - institution + - instructor + - online + - merge + - massmessage + - course + - stable + - import + - abusefilter + - student + - eparticle + - rights + - pagetriage-deletion + - protect + - pagetriage-curation + - thanks + - renameuser + - review + - block + - upload + - move + - delete + - patrol + - newusers + +customviews: + abuse_filter: + source: abuse_filter + view: > + select af_id, if(af_hidden,null,af_pattern) as af_pattern, + af_user, af_user_text, af_timestamp, af_enabled, + if(af_hidden,null,af_comments) as af_comments, af_public_comments, + af_hidden, af_hit_count, af_throttled, af_deleted, af_actions, af_global, af_group + abuse_filter_log: + source: abuse_filter_log + view: > + select afl_id, afl_filter, afl_user, afl_user_text, NULL as + afl_ip, afl_action, afl_actions, afl_var_dump, afl_timestamp, afl_namespace, + afl_title, afl_wiki, afl_deleted, afl_patrolled_by, afl_rev_id, afl_log_id + aft_article_feedback: + source: aft_article_feedback + view: > + select af_id, af_page_id, af_user_id, NULL as af_user_ip, af_user_anon_token, + af_revision_id, af_cta_id, af_link_id, af_created, af_abuse_count, af_helpful_count, + af_unhelpful_count, af_oversight_count, af_is_deleted, af_is_hidden, + af_net_helpfulness, af_has_comment, af_is_unhidden, af_is_undeleted, af_is_declined, + af_activity_count, af_form_id, af_experiment, af_suppress_count, af_last_status, + af_last_status_user_id, af_last_status_timestamp, af_is_autohide, af_is_unrequested, + af_is_featured, af_is_unfeatured, af_is_resolved, af_is_unresolved, af_relevance_score, + af_relevance_sort, af_last_status_notes + archive: + source: archive + view: > + select ar_id, ar_namespace, ar_title, NULL as ar_text, NULL as ar_comment, + if(ar_deleted&4,null,ar_user) as ar_user, if(ar_deleted&4,null,ar_user_text) as + ar_user_text, ar_timestamp, ar_minor_edit, ar_flags, ar_rev_id, if(ar_deleted&1,null,ar_text_id) + as ar_text_id, ar_deleted, if(ar_deleted&1,null,ar_len) as ar_len, ar_page_id, ar_parent_id, + if(ar_deleted&1,null,ar_sha1) as ar_sha1 + archive_userindex: + source: archive + view: > + select ar_id, ar_namespace, ar_title, NULL as ar_text, NULL as ar_comment, ar_user, + ar_user_text, ar_timestamp, ar_minor_edit, ar_flags, ar_rev_id, if(ar_deleted&1,null,ar_text_id) + as ar_text_id, ar_deleted, if(ar_deleted&1,null,ar_len) as ar_len, ar_page_id, ar_parent_id, + if(ar_deleted&1,null,ar_sha1) as ar_sha1 + where: (ar_deleted&4)=0 + filearchive: + source: filearchive + view: > + select fa_id, fa_name, fa_archive_name, fa_storage_group, NULL as fa_storage_key, + fa_deleted_user, fa_deleted_timestamp, fa_deleted_reason, if(fa_deleted&1,null,fa_size) + as fa_size, if(fa_deleted&1,null,fa_width) as fa_width, if(fa_deleted&1,null,fa_height) + as fa_height, if(fa_deleted&1,null,fa_metadata) as fa_metadata, if(fa_deleted&1,null,fa_bits) + as fa_bits, if(fa_deleted&1,null,fa_media_type) as fa_media_type, if(fa_deleted&1,null,fa_major_mime) + as fa_major_mime, if(fa_deleted&1,null,fa_minor_mime) as fa_minor_mime, + if(fa_deleted&2,null,fa_description) as fa_description, if(fa_deleted&4,null,fa_user)as fa_user, + if(fa_deleted&4,null,fa_user_text) as fa_user_text, fa_timestamp, fa_deleted, + if(fa_deleted&1,null,fa_sha1) as fa_sha1 + filearchive_userindex: + source: filearchive + view: > + select fa_id, fa_name, fa_archive_name, fa_storage_group, NULL as fa_storage_key, + fa_deleted_user, fa_deleted_timestamp, fa_deleted_reason, if(fa_deleted&1,null,fa_size) + as fa_size, if(fa_deleted&1,null,fa_width) as fa_width, if(fa_deleted&1,null,fa_height) + as fa_height, if(fa_deleted&1,null,fa_metadata) as fa_metadata, if(fa_deleted&1,null,fa_bits) + as fa_bits, if(fa_deleted&1,null,fa_media_type) as fa_media_type, if(fa_deleted&1,null,fa_major_mime) + as fa_major_mime, if(fa_deleted&1,null,fa_minor_mime) as fa_minor_mime, if(fa_deleted&2,null,fa_description) as + fa_description, fa_user, fa_user_text, fa_timestamp, fa_deleted, if(fa_deleted&1,null,fa_sha1) as fa_sha1 + where: (fa_deleted&4)=0 + globaluser: + source: globaluser + view: > + select gu_id, gu_name, gu_enabled, gu_enabled_method, gu_home_db, NULL as gu_email, + NULL as gu_email_authenticated, NULL as gu_salt, NULL as gu_password, gu_locked, + gu_hidden, gu_registration, NULL as gu_password_reset_key, NULL as gu_password_reset_expiration, + NULL as gu_auth_token + where: gu_hidden='' + ipblocks: + source: ipblocks + view: > + select ipb_id, if(ipb_auto<>0,null,ipb_address) as ipb_address, ipb_user, ipb_by, ipb_reason, + ipb_timestamp, ipb_auto, ipb_anon_only, ipb_create_account, ipb_expiry, if(ipb_auto<>0,null,ipb_range_start) + as ipb_range_start, if(ipb_auto<>0,null,ipb_range_end) as ipb_range_end, ipb_enable_autoblock, + ipb_deleted, ipb_block_email, ipb_by_text, ipb_allow_usertalk, ipb_parent_block_id + where: ipb_deleted=0 + ipblocks_ipindex: + source: ipblocks + view: > + select ipb_id, ipb_address, ipb_user, ipb_by, ipb_reason, ipb_timestamp, ipb_auto, ipb_anon_only, + ipb_create_account, ipb_expiry, ipb_range_start, ipb_range_end, ipb_enable_autoblock, ipb_deleted, + ipb_block_email, ipb_by_text, ipb_allow_usertalk, ipb_parent_block_id + where: ipb_deleted=0 and ipb_auto=0 + logging: + source: logging + view: > + select log_id, log_type, if(log_deleted&1,null,log_action) as log_action, log_timestamp, + if(log_deleted&4,null,log_user) as log_user, if(log_deleted&1,null,log_namespace) as log_namespace, + if(log_deleted&1,null,log_title) as log_title, if(log_deleted&2,null,log_comment) as log_comment, + if(log_deleted,null,log_params) as log_params, log_deleted, if(log_deleted&4,null,log_user_text) + as log_user_text, if(log_deleted&1,null,log_page) as log_page + where: ANYTHING YOU WRITE HERE WILL BE OVERRIDDEN BY THE SCRIPT TO PULL FROM THE `logging_whitelist` CONFIG + logging_logindex: + source: logging + view: > + select log_id, log_type, log_action, log_timestamp, if(log_deleted&4,null,log_user) as log_user, + log_namespace, log_title, if(log_deleted&2,null,log_comment) as log_comment, if(log_deleted,null,log_params) + as log_params, log_deleted, if(log_deleted&4,null,log_user_text) as log_user_text, log_page + where: ANYTHING YOU WRITE HERE WILL BE OVERRIDDEN BY THE SCRIPT TO PULL FROM THE `logging_whitelist` CONFIG + logging_userindex: + source: logging + view: > + select log_id, log_type, if(log_deleted&1,null,log_action) as log_action, log_timestamp, log_user, + if(log_deleted&1,null,log_namespace) as log_namespace, if(log_deleted&1,null,log_title) as log_title, + if(log_deleted&2,null,log_comment) as log_comment, if(log_deleted,null,log_params) as log_params, + log_deleted, log_user_text as log_user_text, if(log_deleted&1,null,log_page) as log_page + where: ANYTHING YOU WRITE HERE WILL BE OVERRIDDEN BY THE SCRIPT TO PULL FROM THE `logging_whitelist` CONFIG + mark_as_helpful: + source: mark_as_helpful + view: > + select mah_id, mah_type, mah_item, mah_user_id, mah_user_editcount, mah_namespace, mah_title, + mah_timestamp, NULL as mah_system_type, NULL as mah_user_agent, NULL as mah_locale + oldimage: + source: oldimage + view: > + select oi_name, oi_archive_name, oi_size, oi_width, oi_height, oi_bits, + if(oi_deleted&2,null,oi_description) as oi_description, if(oi_deleted&4,null,oi_user) as oi_user, + if(oi_deleted&4,null,oi_user_text) as oi_user_text, oi_timestamp, oi_metadata, oi_media_type, + oi_major_mime, oi_minor_mime, oi_deleted, oi_sha1 + oldimage_userindex: + source: oldimage + view: > + select oi_name, oi_archive_name, oi_size, oi_width, oi_height, oi_bits, if(oi_deleted&2,null,oi_description) + as oi_description, oi_user, oi_user_text, oi_timestamp, oi_metadata, oi_media_type, oi_major_mime, + oi_minor_mime, oi_deleted, oi_sha1 + where: (oi_deleted&4)=0 + recentchanges: + source: recentchanges + view: > + select rc_id, rc_timestamp, if(rc_deleted&4,null,rc_user) as rc_user, if(rc_deleted&4,null,rc_user_text) + as rc_user_text, rc_namespace, rc_title, if(rc_deleted&2,null,rc_comment) as rc_comment, rc_minor, + rc_bot, rc_new, rc_cur_id, rc_this_oldid, rc_last_oldid, rc_type, rc_source, rc_patrolled, + null as rc_ip, rc_old_len, rc_new_len, rc_deleted, rc_logid, rc_log_type, rc_log_action, rc_params + recentchanges_userindex: + source: recentchanges + view: > + select rc_id, rc_timestamp, rc_user, rc_user_text, rc_namespace, rc_title, if(rc_deleted&2,null,rc_comment) + as rc_comment, rc_minor, rc_bot, rc_new, rc_cur_id, rc_this_oldid, rc_last_oldid, rc_type, rc_source, + rc_patrolled, null as rc_ip, rc_old_len, rc_new_len, rc_deleted, rc_logid, rc_log_type, rc_log_action, rc_params + where: (rc_deleted&4)=0 + revision: + source: revision + view: > + select rev_id, rev_page, if(rev_deleted&1,null,rev_text_id) as rev_text_id, if(rev_deleted&2,null,rev_comment) + as rev_comment, if(rev_deleted&4,null,rev_user) as rev_user, if(rev_deleted&4,null,rev_user_text) as rev_user_text, + rev_timestamp, rev_minor_edit, rev_deleted, if(rev_deleted&1,null,rev_len) as rev_len, rev_parent_id, + if(rev_deleted&1,null,rev_sha1) as rev_sha1, rev_content_model, rev_content_format + revision_userindex: + source: revision + view: > + select rev_id, rev_page, if(rev_deleted&1,null,rev_text_id) as rev_text_id, if(rev_deleted&2,null,rev_comment) + as rev_comment, rev_user, rev_user_text, rev_timestamp, rev_minor_edit, rev_deleted, + if(rev_deleted&1,null,rev_len) as rev_len, rev_parent_id, if(rev_deleted&1,null,rev_sha1) as rev_sha1, + rev_content_model, rev_content_format + where: (rev_deleted&4)=0 + user: + source: user + view: > + select user_id, user_name, user_real_name, NULL as user_password, NULL as user_newpassword, + NULL as user_email, NULL as user_options, NULL as user_touched, NULL as user_token, + NULL as user_email_authenticated, NULL as user_email_token, NULL as user_email_token_expires, + user_registration, NULL as user_newpass_time, user_editcount, NULL as user_password_expires + user_old: + source: user_old + view: > + select user_id, user_name, NULL as user_password, NULL as user_newpassword, NULL as user_email, + NULL as user_options, NULL as user_newtalk, NULL as user_touched, user_real_name, NULL as user_token + user_properties: + source: user_properties + view: select up_user, up_property, up_value + where: > + up_property in ( 'disablemail', 'fancysig', 'gender', 'language', + 'nickname', 'skin', 'timecorrection', 'variant' ) + user_properties_anon: + limit: 2 + source: ["user_properties", "user", "meta_p.properties_anon_whitelist" ] + view: select cast(extract(year_month from user_touched)*100+1 as date) upa_touched, up_property, up_value + where: user_id=up_user and up_property like pw_property -- To view, visit https://gerrit.wikimedia.org/r/315534 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I95275471471ce2ba8b63ed0b309a126c5a8ab051 Gerrit-PatchSet: 4 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Rush <r...@wikimedia.org> Gerrit-Reviewer: Madhuvishy <mviswanat...@wikimedia.org> Gerrit-Reviewer: Rush <r...@wikimedia.org> Gerrit-Reviewer: Volans <rcocci...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits