Rush has submitted this change and it was merged.

Change subject: Add python version of maintain-replicas script
......................................................................


Add python version of maintain-replicas script

This does not include functionality that was split into maintain-meta_p.py last
year.

This (so far) deals with the following issues:
* It was written in Perl.
* Configuration was part of the script.
* It tried to parse ~/.my.cnf instead of getting that data from it's own
  configuration.
* Configuration file couldn't be specified on the CLI
* It is now commented more.
* It allows specifying one or multiple databases to work on, instead of just
  running across all.dblist.
* It does much less work done in file scope.
* It has a "dry run" mode.
* It is more verbose about what it's not doing.
* It waits for confirmation before replacing existing views, by default.
* It clones mediawiki-config for you if it doesn't already exist.

Bug: T138450
Change-Id: Icc51fc9a23f2fcf4c19c7adc092662d7122471e1
---
A maintain-replicas/config.json
A maintain-replicas/maintain-replicas.py
M tox.ini
3 files changed, 530 insertions(+), 1 deletion(-)

Approvals:
  Rush: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/maintain-replicas/config.json b/maintain-replicas/config.json
new file mode 100644
index 0000000..6d0ec78
--- /dev/null
+++ b/maintain-replicas/config.json
@@ -0,0 +1,156 @@
+{
+    "slices": [
+        ["labsdb1001.eqiad.wmnet", 3306],
+        ["labsdb1002.eqiad.wmnet", 3306],
+        ["labsdb1003.eqiad.wmnet", 3306]
+    ],
+    "mysql_user": "root",
+    "mysql_password": "password",
+    "fullviews": [
+        "abuse_filter_action", "abuse_filter_history",
+        "aft_article_answer", "aft_article_answer_text",
+        "aft_article_feedback_properties", 
"aft_article_feedback_ratings_rollup",
+        "aft_article_feedback_select_rollup", "aft_article_field", 
"aft_article_field_group",
+        "aft_article_field_option", "aft_article_filter_count", 
"aft_article_revision_feedback_ratings_rollup",
+        "aft_article_revision_feedback_select_rollup", "article_assessment", 
"article_assessment_pages",
+        "article_assessment_ratings", "article_feedback", 
"article_feedback_pages",
+        "article_feedback_properties", "article_feedback_ratings", 
"article_feedback_revisions",
+        "article_feedback_stats", "article_feedback_stats_types", "category", 
"categorylinks", "change_tag",
+        "ep_articles", "ep_cas", "ep_courses", "ep_events", "ep_instructors", 
"ep_oas", "ep_orgs",
+        "ep_revisions", "ep_students", "ep_users_per_course", "externallinks", 
"flaggedimages",
+        "flaggedpage_config", "flaggedpage_pending", "flaggedpages", 
"flaggedrevs", "flaggedrevs_promote",
+        "flaggedrevs_statistics", "flaggedrevs_stats", "flaggedrevs_stats2", 
"flaggedrevs_tracking",
+        "flaggedtemplates", "geo_killlist", "geo_tags", "geo_updates", 
"globalimagelinks", "global_block_whitelist",
+        "hashs", "hitcounter", "image", "imagelinks", "imagelinks_old", 
"interwiki", "iwlinks",
+        "l10n_cache", "langlinks", "links", "localisation", 
"localisation_file_hash",
+        "math", "module_deps", "msg_resource_links", "namespaces",
+        "page", "page_broken", "pagelinks", "page_props", "page_restrictions", 
"pagetriage_log",
+        "pagetriage_page", "pagetriage_page_tags", "pagetriage_tags", 
"pif_edits", "povwatch_log",
+        "povwatch_subscribers", "protected_titles", "pr_index", "redirect", 
"renameuser_status", "site_identifiers",
+        "sites", "site_stats", "tag_summary", "templatelinks", "transcode", 
"updatelog", "updates",
+        "user_former_groups", "user_groups", "valid_tag", "wikilove_image_log",
+        "wikilove_log", "global_group_permissions", 
"global_group_restrictions", "global_user_groups",
+        "globalblocks", "localuser", "wikiset", "wb_changes", 
"wb_changes_dispatch", "wb_changes_subscription",
+        "wb_entity_per_page", "wb_id_counters", "wb_items_per_site", 
"wb_property_info", "wb_terms",
+        "wbc_entity_usage", "wbs_propertypairs", "watchlist_count"
+    ],
+    "logging_whitelist": [
+        "gblrights", "globalauth", "close", "lock", "gblblock", "managetags", 
"gather", "campus",
+        "institution", "instructor", "online", "merge", "massmessage", 
"course", "stable", "import",
+        "abusefilter", "student", "eparticle", "rights", 
"pagetriage-deletion", "protect",
+        "pagetriage-curation", "thanks", "renameuser", "review", "block", 
"upload", "move",
+        "delete", "patrol", "newusers"
+    ],
+    "customviews": {
+        "abuse_filter": {
+            "source": "abuse_filter",
+            "view": "select af_id, if(af_hidden,null,af_pattern) as 
af_pattern, af_user, af_user_text, af_timestamp, af_enabled, 
if(af_hidden,null,af_comments) as af_comments, af_public_comments, af_hidden, 
af_hit_count, af_throttled, af_deleted, af_actions, af_global, af_group"
+        },
+        "abuse_filter_log": {
+            "source": "abuse_filter_log",
+            "view": "select afl_id, afl_filter, afl_user, afl_user_text, NULL 
as afl_ip, afl_action, afl_actions, afl_var_dump, afl_timestamp, afl_namespace, 
afl_title, afl_wiki, afl_deleted, afl_patrolled_by, afl_rev_id, afl_log_id"
+        },
+        "aft_article_feedback": {
+            "source": "aft_article_feedback",
+            "view": "select af_id, af_page_id, af_user_id, NULL as af_user_ip, 
af_user_anon_token, af_revision_id, af_cta_id, af_link_id, af_created, 
af_abuse_count, af_helpful_count, af_unhelpful_count, af_oversight_count, 
af_is_deleted, af_is_hidden, af_net_helpfulness, af_has_comment, 
af_is_unhidden, af_is_undeleted, af_is_declined, af_activity_count, af_form_id, 
af_experiment, af_suppress_count, af_last_status, af_last_status_user_id, 
af_last_status_timestamp, af_is_autohide, af_is_unrequested, af_is_featured, 
af_is_unfeatured, af_is_resolved, af_is_unresolved, af_relevance_score, 
af_relevance_sort, af_last_status_notes"
+        },
+        "archive": {
+            "source": "archive",
+            "view": "select ar_id, ar_namespace, ar_title, NULL as ar_text, 
NULL as ar_comment, if(ar_deleted&4,null,ar_user) as ar_user, 
if(ar_deleted&4,null,ar_user_text) as ar_user_text, ar_timestamp, 
ar_minor_edit, ar_flags, ar_rev_id, if(ar_deleted&1,null,ar_text_id) as 
ar_text_id, ar_deleted, if(ar_deleted&1,null,ar_len) as ar_len, ar_page_id, 
ar_parent_id, if(ar_deleted&1,null,ar_sha1) as ar_sha1"
+        },
+        "archive_userindex": {
+            "source": "archive",
+            "view": "select ar_id, ar_namespace, ar_title, NULL as ar_text, 
NULL as ar_comment, ar_user, ar_user_text, ar_timestamp, ar_minor_edit, 
ar_flags, ar_rev_id, if(ar_deleted&1,null,ar_text_id) as ar_text_id, 
ar_deleted, if(ar_deleted&1,null,ar_len) as ar_len, ar_page_id, ar_parent_id, 
if(ar_deleted&1,null,ar_sha1) as ar_sha1",
+            "where": "(ar_deleted&4)=0"
+        },
+        "filearchive": {
+            "source": "filearchive",
+            "view": "select fa_id, fa_name, fa_archive_name, fa_storage_group, 
NULL as fa_storage_key, fa_deleted_user, fa_deleted_timestamp, 
fa_deleted_reason, if(fa_deleted&1,null,fa_size) as fa_size, 
if(fa_deleted&1,null,fa_width) as fa_width, if(fa_deleted&1,null,fa_height) as 
fa_height, if(fa_deleted&1,null,fa_metadata) as fa_metadata, 
if(fa_deleted&1,null,fa_bits) as fa_bits, if(fa_deleted&1,null,fa_media_type) 
as fa_media_type, if(fa_deleted&1,null,fa_major_mime) as fa_major_mime, 
if(fa_deleted&1,null,fa_minor_mime) as fa_minor_mime, 
if(fa_deleted&2,null,fa_description) as fa_description, 
if(fa_deleted&4,null,fa_user)as fa_user, if(fa_deleted&4,null,fa_user_text) as 
fa_user_text, fa_timestamp, fa_deleted, if(fa_deleted&1,null,fa_sha1) as 
fa_sha1"
+        },
+        "filearchive_userindex": {
+            "source": "filearchive",
+            "view": "select fa_id, fa_name, fa_archive_name, fa_storage_group, 
NULL as fa_storage_key, fa_deleted_user, fa_deleted_timestamp, 
fa_deleted_reason, if(fa_deleted&1,null,fa_size) as fa_size, 
if(fa_deleted&1,null,fa_width) as fa_width, if(fa_deleted&1,null,fa_height) as 
fa_height, if(fa_deleted&1,null,fa_metadata) as fa_metadata, 
if(fa_deleted&1,null,fa_bits) as fa_bits, if(fa_deleted&1,null,fa_media_type) 
as fa_media_type, if(fa_deleted&1,null,fa_major_mime) as fa_major_mime, 
if(fa_deleted&1,null,fa_minor_mime) as fa_minor_mime, 
if(fa_deleted&2,null,fa_description) as fa_description, fa_user, fa_user_text, 
fa_timestamp, fa_deleted, if(fa_deleted&1,null,fa_sha1) as fa_sha1",
+            "where": "(fa_deleted&4)=0"
+        },
+        "globaluser": {
+            "source": "globaluser",
+            "view": "select gu_id, gu_name, gu_enabled, gu_enabled_method, 
gu_home_db, NULL as gu_email, NULL as gu_email_authenticated, NULL as gu_salt, 
NULL as gu_password, gu_locked, gu_hidden, gu_registration, NULL as 
gu_password_reset_key, NULL as gu_password_reset_expiration, NULL as 
gu_auth_token",
+            "where": "gu_hidden=''"
+        },
+        "ipblocks": {
+            "source": "ipblocks",
+            "view": "select ipb_id, if(ipb_auto<>0,null,ipb_address) as 
ipb_address, ipb_user, ipb_by, ipb_reason, ipb_timestamp, ipb_auto, 
ipb_anon_only, ipb_create_account, ipb_expiry, 
if(ipb_auto<>0,null,ipb_range_start) as ipb_range_start, 
if(ipb_auto<>0,null,ipb_range_end) as ipb_range_end, ipb_enable_autoblock, 
ipb_deleted, ipb_block_email, ipb_by_text, ipb_allow_usertalk, 
ipb_parent_block_id",
+            "where": "ipb_deleted=0"
+        },
+        "ipblocks_ipindex": {
+            "source": "ipblocks",
+            "view": "select ipb_id, ipb_address, ipb_user, ipb_by, ipb_reason, 
ipb_timestamp, ipb_auto, ipb_anon_only, ipb_create_account, ipb_expiry, 
ipb_range_start, ipb_range_end, ipb_enable_autoblock, ipb_deleted, 
ipb_block_email, ipb_by_text, ipb_allow_usertalk, ipb_parent_block_id",
+            "where": "ipb_deleted=0 and ipb_auto=0"
+        },
+        "logging": {
+            "source": "logging",
+            "view": "select log_id, log_type, 
if(log_deleted&1,null,log_action) as log_action, log_timestamp, 
if(log_deleted&4,null,log_user) as log_user, 
if(log_deleted&1,null,log_namespace) as log_namespace, 
if(log_deleted&1,null,log_title) as log_title, 
if(log_deleted&2,null,log_comment) as log_comment, 
if(log_deleted,null,log_params) as log_params, log_deleted, 
if(log_deleted&4,null,log_user_text) as log_user_text, 
if(log_deleted&1,null,log_page) as log_page",
+            "where": "ANYTHING YOU WRITE HERE WILL BE OVERRIDDEN BY THE SCRIPT 
TO PULL FROM THE `logging_whitelist` CONFIG"
+        },
+        "logging_logindex": {
+            "source": "logging",
+            "view": "select log_id, log_type, log_action, log_timestamp, 
if(log_deleted&4,null,log_user) as log_user, log_namespace, log_title, 
if(log_deleted&2,null,log_comment) as log_comment, 
if(log_deleted,null,log_params) as log_params, log_deleted, 
if(log_deleted&4,null,log_user_text) as log_user_text, log_page",
+            "where": "ANYTHING YOU WRITE HERE WILL BE OVERRIDDEN BY THE SCRIPT 
TO PULL FROM THE `logging_whitelist` CONFIG"
+        },
+        "logging_userindex": {
+            "source": "logging",
+            "view": "select log_id, log_type, 
if(log_deleted&1,null,log_action) as log_action, log_timestamp, log_user, 
if(log_deleted&1,null,log_namespace) as log_namespace, 
if(log_deleted&1,null,log_title) as log_title, 
if(log_deleted&2,null,log_comment) as log_comment, 
if(log_deleted,null,log_params) as log_params, log_deleted, log_user_text as 
log_user_text, if(log_deleted&1,null,log_page) as log_page",
+            "where": "ANYTHING YOU WRITE HERE WILL BE OVERRIDDEN BY THE SCRIPT 
TO PULL FROM THE `logging_whitelist` CONFIG"
+        },
+        "mark_as_helpful": {
+            "source": "mark_as_helpful",
+            "view": "select mah_id, mah_type, mah_item, mah_user_id, 
mah_user_editcount, mah_namespace, mah_title, mah_timestamp, NULL as 
mah_system_type, NULL as mah_user_agent, NULL as mah_locale"
+        },
+        "oldimage": {
+            "source": "oldimage",
+            "view": "select oi_name, oi_archive_name, oi_size, oi_width, 
oi_height, oi_bits, if(oi_deleted&2,null,oi_description) as oi_description, 
if(oi_deleted&4,null,oi_user) as oi_user, if(oi_deleted&4,null,oi_user_text) as 
oi_user_text, oi_timestamp, oi_metadata, oi_media_type, oi_major_mime, 
oi_minor_mime, oi_deleted, oi_sha1"
+        },
+        "oldimage_userindex": {
+            "source": "oldimage",
+            "view": "select oi_name, oi_archive_name, oi_size, oi_width, 
oi_height, oi_bits, if(oi_deleted&2,null,oi_description) as oi_description, 
oi_user, oi_user_text, oi_timestamp, oi_metadata, oi_media_type, oi_major_mime, 
oi_minor_mime, oi_deleted, oi_sha1",
+            "where": "(oi_deleted&4)=0"
+        },
+        "recentchanges": {
+            "source": "recentchanges",
+            "view": "select rc_id, rc_timestamp, if(rc_deleted&4,null,rc_user) 
as rc_user, if(rc_deleted&4,null,rc_user_text) as rc_user_text, rc_namespace, 
rc_title, if(rc_deleted&2,null,rc_comment) as rc_comment, rc_minor, rc_bot, 
rc_new, rc_cur_id, rc_this_oldid, rc_last_oldid, rc_type, rc_source, 
rc_patrolled, null as rc_ip, rc_old_len, rc_new_len, rc_deleted, rc_logid, 
rc_log_type, rc_log_action, rc_params"
+        },
+        "recentchanges_userindex": {
+            "source": "recentchanges",
+            "view": "select rc_id, rc_timestamp, rc_user, rc_user_text, 
rc_namespace, rc_title, if(rc_deleted&2,null,rc_comment) as rc_comment, 
rc_minor, rc_bot, rc_new, rc_cur_id, rc_this_oldid, rc_last_oldid, rc_type, 
rc_source, rc_patrolled, null as rc_ip, rc_old_len, rc_new_len, rc_deleted, 
rc_logid, rc_log_type, rc_log_action, rc_params",
+            "where": "(rc_deleted&4)=0"
+        },
+        "revision": {
+            "source": "revision",
+            "view": "select rev_id, rev_page, 
if(rev_deleted&1,null,rev_text_id) as rev_text_id, 
if(rev_deleted&2,null,rev_comment) as rev_comment, 
if(rev_deleted&4,null,rev_user) as rev_user, 
if(rev_deleted&4,null,rev_user_text) as rev_user_text, rev_timestamp, 
rev_minor_edit, rev_deleted, if(rev_deleted&1,null,rev_len) as rev_len, 
rev_parent_id, if(rev_deleted&1,null,rev_sha1) as rev_sha1, rev_content_model, 
rev_content_format"
+        },
+        "revision_userindex": {
+            "source": "revision",
+            "view": "select rev_id, rev_page, 
if(rev_deleted&1,null,rev_text_id) as rev_text_id, 
if(rev_deleted&2,null,rev_comment) as rev_comment, rev_user, rev_user_text, 
rev_timestamp, rev_minor_edit, rev_deleted, if(rev_deleted&1,null,rev_len) as 
rev_len, rev_parent_id, if(rev_deleted&1,null,rev_sha1) as rev_sha1, 
rev_content_model, rev_content_format",
+            "where": "(rev_deleted&4)=0"
+        },
+        "user": {
+            "source": "user",
+            "view": "select user_id, user_name, user_real_name, NULL as 
user_password, NULL as user_newpassword, NULL as user_email, NULL as 
user_options, NULL as user_touched, NULL as user_token, NULL as 
user_email_authenticated, NULL as user_email_token, NULL as 
user_email_token_expires, user_registration, NULL as user_newpass_time, 
user_editcount, NULL as user_password_expires"
+        },
+        "user_old": {
+            "source": "user_old",
+            "view": "select user_id, user_name, NULL as user_password, NULL as 
user_newpassword, NULL as user_email, NULL as user_options, NULL as 
user_newtalk, NULL as user_touched, user_real_name, NULL as user_token"
+        },
+        "user_properties": {
+            "source": "user_properties",
+            "view": "select up_user, up_property, up_value",
+            "where": "up_property in ( 'disablemail', 'fancysig', 'gender', 
'language', 'nickname', 'skin', 'timecorrection', 'variant' )"
+        },
+        "user_properties_anon": {
+            "limit": 2,
+            "source": [ "user_properties", "user", 
"meta_p.properties_anon_whitelist" ],
+            "view": "select cast(extract(year_month from user_touched)*100+1 
as date) upa_touched, up_property, up_value",
+            "where": "user_id=up_user and up_property like pw_property"
+        }
+    }
+}
diff --git a/maintain-replicas/maintain-replicas.py 
b/maintain-replicas/maintain-replicas.py
new file mode 100644
index 0000000..3774d32
--- /dev/null
+++ b/maintain-replicas/maintain-replicas.py
@@ -0,0 +1,373 @@
+#! /usr/bin/python3
+# -*- coding: utf-8 -*-
+
+#  Based on work by Marc-André Pelletier, ported to Python by Alex Monk
+#  Copyright © 2016 Alex Monk <[email protected]>
+#  Copyright © 2015 Alex Monk <[email protected]>
+#  Copyright © 2013 Marc-André Pelletier <[email protected]>
+#
+#  Permission to use, copy, modify, and/or distribute this software for any
+#  purpose with or without fee is hereby granted, provided that the above
+#  copyright notice and this permission notice appear in all copies.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+#  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+#  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+#  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+#  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+#  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+#  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+#
+#
+#  This script maintains the databases containing sanitized views to
+#  the replicated databases (in the form <db>_p for every <db>)
+#
+#  By default, it processes every database but it accepts a list of
+#  databases to process
+#
+#  The script uses a clone of operations/mediawiki-config in a directory called
+#  mediawiki-config, from which it will get it's database list information. It
+#  will clone the repository from https://gerrit.wikimedia.org if necessary.
+#
+
+import argparse
+import json
+import logging
+import os
+import re
+import subprocess
+
+import pymysql
+
+
+class SchemaOperations():
+    def __init__(self, dry_run, replace_all, db, db_size, cursor):
+        self.dry_run = dry_run
+        self.replace_all = replace_all
+        self.db = db
+        self.db_p = db + '_p'
+        self.db_size = db_size
+        self.cursor = cursor
+
+    def write_execute(self, query):
+        if self.dry_run:
+            logging.info("DRY RUN: Would execute: {}".format(query))
+        else:
+            self.cursor.execute(query)
+
+    def table_exists(self, table, database):
+        """
+        Determine whether a table of the given name exists in the database of
+        the given name.
+        """
+        self.cursor.execute("""
+            SELECT `table_name`
+            FROM `information_schema`.`tables`
+            WHERE `table_name`=%s AND `table_schema`=%s;
+        """, args=(table, database))
+        return bool(self.cursor.rowcount)
+
+    def execute(self, fullviews, customviews):
+        """
+        Begin checking/creating views for this schema.
+        """
+
+        self.cursor.execute("""
+            SELECT `schema_name`
+            FROM `information_schema`.`schemata`
+            WHERE `schema_name`=%s
+        """, args=(self.db_p,))
+        if not self.cursor.rowcount:
+            # Can't use pymysql to build this
+            self.write_execute(
+                "CREATE DATABASE `{}`;".format(self.db_p)
+            )
+
+        logging.info("Full views for {}:".format(self.db))
+        for view in fullviews:
+            self.do_fullview(view)
+
+        logging.info("Custom views for {}:".format(self.db))
+        for view_name, view_details in customviews.items():
+            self.do_customview(view_name, view_details)
+
+    def do_fullview(self, view):
+        """
+        Check whether the source table exists, and if so, create the view.
+        """
+        if self.table_exists(view, self.db):
+            # If it does, create or replace the view for it.
+            logging.info("[{}] ".format(view))
+            if (self.replace_all or not self.table_exists(view, self.db_p) or
+                    input('View already exists. Replace? [y/N] ').lower()
+                    in ['y', 'yes']):
+                # Can't use pymysql to build this
+                self.write_execute("""
+                    CREATE OR REPLACE
+                    DEFINER=viewmaster
+                    VIEW `{0}`.`{1}`
+                    AS SELECT * FROM `{2}`.`{1}`;
+                """.format(self.db_p, view, self.db))
+        else:
+            # Some views only exist in CentralAuth, some only in MediaWiki,
+            # etc.
+            logging.debug(
+                ("Skipping full view {} on database {} as the table does not"
+                    " seem to exist.")
+                .format(view, self.db)
+            )
+
+    def check_customview_source(self, view_name, source):
+        """
+        Check whether a custom view's particular source exists. If it does,
+        return the source database and table names. If not, return False.
+        """
+        match = re.match(r"^(?:(.*)\.)?([^.]+)$", source)
+        if not match:
+            raise Exception(
+                "Custom view source does not look valid! Source: {}, view: {}"
+                .format(source, view_name)
+            )
+
+        # Effectively separate a db.table source into it's parts
+        source_db, source_table = match.groups()
+
+        # If there was no DB part, assume the current DB name
+        if source_db is None:
+            source_db = self.db
+
+        if self.table_exists(source_table, source_db):
+            # If it does, take this source into account.
+            return source_db, source_table
+        else:
+            logging.debug(
+                ("Failed to find table {} in database {} as a source for"
+                    " view {}")
+                .format(source_table, source_db, view_name)
+            )
+            return False
+
+    def do_customview(self, view_name, view_details):
+        """
+        Process a custom view's sources, and if they're all present, and the
+        view's limit is not bigger than the database size, call
+        create_customview.
+        """
+        if ("limit" in view_details and self.db_size is not None and
+                view_details["limit"] > self.db_size):
+            # Ignore custom views which have a limit number greater
+            # than the size ID set by the read_list calls for
+            # size.dblist above.
+            logging.debug("Too big for this database")
+            return
+
+        sources = view_details["source"]
+        if sources.__class__ is str:
+            sources = [sources]
+
+        sources_checked = []
+
+        for source in sources:
+            result = self.check_customview_source(view_name, source)
+            if result:
+                source_db, source_table = result
+                sources_checked.append(
+                    "`{}`.`{}`".format(source_db, source_table)
+                )
+            else:
+                break
+
+        if len(sources) == len(sources_checked):
+
+            if (self.replace_all or
+                    not self.table_exists(view_name, self.db_p) or
+                    input('View already exists. Replace? [y/N] ').lower()
+                    in ['y', 'yes']):
+                logging.info("[{}] ".format(view_name))
+                self.create_customview(
+                    view_name,
+                    view_details,
+                    sources_checked
+                )
+        else:
+            # If any source was not found, ignore this view.
+            # Some views only exist in CentralAuth, some only in MediaWiki,
+            # etc.
+            logging.debug(
+                ("Skipping custom view {} on database {} as not all sources"
+                    " were verified.\nSources: {}\nVerified sources: {}")
+                .format(
+                    view_name,
+                    self.db,
+                    str(view_details["source"]),
+                    str(sources_checked)
+                )
+            )
+
+    def create_customview(self, view_name, view_details, sources):
+        """
+        Creates or replaces a custom view from it's sources.
+        """
+        query = """
+            CREATE OR REPLACE
+            DEFINER=viewmaster
+            VIEW `{}`.`{}`
+            AS {}
+            FROM {}
+        """.format(
+            self.db_p,
+            view_name,
+            view_details["view"],
+            ",".join(sources)
+        )
+
+        if "where" in view_details:
+            query += " WHERE {}\n".format(view_details["where"])
+        if "group" in view_details:
+            # Technically nothing is using this at the moment...
+            query += " GROUP BY {}\n".format(view_details["group"])
+
+        query += ";"
+
+        # Can't use pymysql to build this
+        self.write_execute(query)
+
+
+def do_dbhost(dry_run, replace_all, dbhost, dbport, config, dbs, customviews):
+    """
+    Handle setting up a connection to a dbhost, then go through every database
+    to start the process of creating views there.
+    """
+    dbh = pymysql.connect(
+        host=dbhost,
+        port=dbport,
+        user=config["mysql_user"],
+        passwd=config["mysql_password"],
+        charset="utf8"
+    )
+    with dbh.cursor() as cursor:
+        logging.info("Connected to {}:{}...".format(dbhost, dbport))
+        cursor.execute("SET NAMES 'utf8';")
+        for db, db_info in dbs.items():
+            skip = False
+            for bad_flag in ["deleted", "private"]:
+                if db_info.get(bad_flag, False):
+                    logging.debug(
+                        "Skipping database {} because it's marked as {}."
+                        .format(db, bad_flag)
+                    )
+                    skip = True
+
+            if skip:
+                continue
+
+            db_size = None
+            if "size" in db_info:
+                db_size = db_info["size"]
+
+            ops = SchemaOperations(dry_run, replace_all, db, db_size, cursor)
+            ops.execute(config["fullviews"], customviews)
+
+if __name__ == "__main__":
+    logging.getLogger().setLevel(logging.INFO)
+
+    argparser = argparse.ArgumentParser(
+        "maintain-replicas",
+        description="Maintain labs sanitized views of replica databases"
+    )
+    argparser.add_argument(
+        "--config-location",
+        help="Path to find the configuration file",
+        default="config.json"
+    )
+    argparser.add_argument(
+        "--dry-run",
+        help=("Give this parameter if you don't want the script to actually"
+              " make changes."),
+        action="store_true"
+    )
+    argparser.add_argument(
+        "--replace-all",
+        help=("Give this parameter if you don't want the script to prompt"
+              " before replacing views."),
+        action="store_true"
+    )
+    argparser.add_argument(
+        "--databases",
+        help=("Specify database(s) to work on, instead of all. Multiple"
+              " values can be given space-separated."),
+        nargs="+"
+    )
+    args = argparser.parse_args()
+    with open(args.config_location) as f:
+        config = json.load(f)
+
+    # Hacks
+    safelog = ("log_type IN ('" +
+               "', '".join(config["logging_whitelist"]) +
+               "')")
+    customviews = config["customviews"]
+    customviews["logging"]["where"] = safelog
+    customviews["logging_logindex"]["where"] = ("(log_deleted&1)=0 and " +
+                                                safelog)
+    customviews["logging_userindex"]["where"] = ("(log_deleted&4)=0 and " +
+                                                 safelog)
+
+    if os.path.isdir('mediawiki-config'):
+        logging.info("Running git pull in mediawiki-config")
+        subprocess.call(["git", "pull"], cwd="mediawiki-config")
+    else:
+        logging.info("Running git clone for mediawiki-config")
+        subprocess.call([
+            "git",
+            "clone",
+            "https://gerrit.wikimedia.org/r/p/operations/mediawiki-config.git";
+        ])
+
+    with open("mediawiki-config/dblists/all.dblist") as f:
+        all_available_dbs = f.read().splitlines()
+    all_available_dbs.append("centralauth")
+    if args.databases:
+        dbs = {}
+        for db in args.databases:
+            if db in all_available_dbs:
+                dbs[db] = {}
+            else:
+                logging.info(
+                    "Ignoring database {} which doesn't appear to exist."
+                    .format(db)
+                )
+
+        if not len(dbs.keys()):
+            logging.critical("No databases specified exist.")
+    else:
+        dbs = {db: {} for db in all_available_dbs}
+
+    def read_list(fname, prop, val):
+        with open("mediawiki-config/dblists/{}.dblist".format(fname)) as f:
+            for db in f.read().splitlines():
+                if db in dbs:
+                    dbs[db][prop] = val
+
+    # Reads various .dblist files to store information about specific
+    # databases. The first line, for example, reads deleted.dblist, and for
+    # each listed database name sets dbs[db_name]["deleted"] = True
+    read_list("deleted", "deleted", True)
+    read_list("private", "private", True)
+    read_list("small", "size", 1)
+    read_list("medium", "size", 2)
+    read_list("large", "size", 3)
+
+    logging.info("Got all necessary info, starting to connect to slices")
+    for dbhost, dbport in config["slices"]:
+        do_dbhost(
+            args.dry_run,
+            args.replace_all,
+            dbhost,
+            dbport,
+            config,
+            dbs,
+            customviews
+        )
+
+    logging.info("All done.")
diff --git a/tox.ini b/tox.ini
index 3299fea..02bfcd5 100644
--- a/tox.ini
+++ b/tox.ini
@@ -42,7 +42,7 @@
     compare-puppet-catalogs/*,
     fwconfigtool/*,
     geturls/*,
-    maintain-replicas/*,
+    maintain-replicas/maintain-meta_p.py,
     swift-synctool/*,
     swiftcleaner/*,
     swiftrepl/*,

-- 
To view, visit https://gerrit.wikimedia.org/r/295607
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Icc51fc9a23f2fcf4c19c7adc092662d7122471e1
Gerrit-PatchSet: 31
Gerrit-Project: operations/software
Gerrit-Branch: master
Gerrit-Owner: Alex Monk <[email protected]>
Gerrit-Reviewer: Alex Monk <[email protected]>
Gerrit-Reviewer: Andrew Bogott <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>
Gerrit-Reviewer: Chasemp <[email protected]>
Gerrit-Reviewer: Coren <[email protected]>
Gerrit-Reviewer: Halfak <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Ori.livneh <[email protected]>
Gerrit-Reviewer: Rush <[email protected]>
Gerrit-Reviewer: Volans <[email protected]>
Gerrit-Reviewer: Yuvipanda <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to